wordcloud
This commit is contained in:
90
plots.py
90
plots.py
@@ -943,3 +943,93 @@ class JPMCPlotsMixin:
|
||||
|
||||
chart = self._save_plot(chart, title)
|
||||
return chart
|
||||
|
||||
def plot_traits_wordcloud(
|
||||
self,
|
||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||
column: str = 'Top_3_Traits',
|
||||
title: str = "Most Prominent Personality Traits",
|
||||
width: int = 1600,
|
||||
height: int = 800,
|
||||
background_color: str = 'white',
|
||||
):
|
||||
"""Create a word cloud visualization of personality traits from survey data.
|
||||
|
||||
Args:
|
||||
data: Polars DataFrame or LazyFrame containing trait data
|
||||
column: Name of column containing comma-separated traits
|
||||
title: Title for the word cloud
|
||||
width: Width of the word cloud image in pixels
|
||||
height: Height of the word cloud image in pixels
|
||||
background_color: Background color for the word cloud
|
||||
|
||||
Returns:
|
||||
matplotlib.figure.Figure: The word cloud figure for display in notebooks
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
from wordcloud import WordCloud
|
||||
from collections import Counter
|
||||
import random
|
||||
|
||||
df = self._ensure_dataframe(data)
|
||||
|
||||
# Extract and split traits
|
||||
traits_list = []
|
||||
for row in df[column].drop_nulls():
|
||||
# Split by comma and clean whitespace
|
||||
traits = [trait.strip() for trait in row.split(',')]
|
||||
traits_list.extend(traits)
|
||||
|
||||
# Create frequency dictionary
|
||||
trait_freq = Counter(traits_list)
|
||||
|
||||
# Color function using JPMC colors
|
||||
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
||||
colors = [
|
||||
ColorPalette.PRIMARY,
|
||||
ColorPalette.RANK_1,
|
||||
ColorPalette.RANK_2,
|
||||
ColorPalette.RANK_3,
|
||||
]
|
||||
return random.choice(colors)
|
||||
|
||||
# Generate word cloud
|
||||
wordcloud = WordCloud(
|
||||
width=width,
|
||||
height=height,
|
||||
background_color=background_color,
|
||||
color_func=color_func,
|
||||
relative_scaling=0.5,
|
||||
min_font_size=10,
|
||||
prefer_horizontal=0.7,
|
||||
collocations=False # Treat each word independently
|
||||
).generate_from_frequencies(trait_freq)
|
||||
|
||||
# Create matplotlib figure
|
||||
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
|
||||
ax.imshow(wordcloud, interpolation='bilinear')
|
||||
ax.axis('off')
|
||||
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
|
||||
|
||||
plt.tight_layout(pad=0)
|
||||
|
||||
# Save figure if directory specified (using same pattern as other plots)
|
||||
if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
|
||||
save_path = Path(self.fig_save_dir)
|
||||
|
||||
# Add filter slug subfolder
|
||||
filter_slug = self._get_filter_slug()
|
||||
save_path = save_path / filter_slug
|
||||
|
||||
if not save_path.exists():
|
||||
save_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Use _sanitize_filename for consistency
|
||||
filename = f"{self._sanitize_filename(title)}.png"
|
||||
filepath = save_path / filename
|
||||
|
||||
# Save as PNG at high resolution
|
||||
fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
|
||||
print(f"Word cloud saved to: {filepath}")
|
||||
|
||||
return fig
|
||||
|
||||
Reference in New Issue
Block a user