wordcloud

This commit is contained in:
2026-02-02 11:12:53 +01:00
parent d770645d8e
commit 45dd121d90
6 changed files with 314 additions and 8 deletions

View File

@@ -943,3 +943,93 @@ class JPMCPlotsMixin:
chart = self._save_plot(chart, title)
return chart
def plot_traits_wordcloud(
self,
data: pl.LazyFrame | pl.DataFrame | None = None,
column: str = 'Top_3_Traits',
title: str = "Most Prominent Personality Traits",
width: int = 1600,
height: int = 800,
background_color: str = 'white',
):
"""Create a word cloud visualization of personality traits from survey data.
Args:
data: Polars DataFrame or LazyFrame containing trait data
column: Name of column containing comma-separated traits
title: Title for the word cloud
width: Width of the word cloud image in pixels
height: Height of the word cloud image in pixels
background_color: Background color for the word cloud
Returns:
matplotlib.figure.Figure: The word cloud figure for display in notebooks
"""
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
import random
df = self._ensure_dataframe(data)
# Extract and split traits
traits_list = []
for row in df[column].drop_nulls():
# Split by comma and clean whitespace
traits = [trait.strip() for trait in row.split(',')]
traits_list.extend(traits)
# Create frequency dictionary
trait_freq = Counter(traits_list)
# Color function using JPMC colors
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
colors = [
ColorPalette.PRIMARY,
ColorPalette.RANK_1,
ColorPalette.RANK_2,
ColorPalette.RANK_3,
]
return random.choice(colors)
# Generate word cloud
wordcloud = WordCloud(
width=width,
height=height,
background_color=background_color,
color_func=color_func,
relative_scaling=0.5,
min_font_size=10,
prefer_horizontal=0.7,
collocations=False # Treat each word independently
).generate_from_frequencies(trait_freq)
# Create matplotlib figure
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
plt.tight_layout(pad=0)
# Save figure if directory specified (using same pattern as other plots)
if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
save_path = Path(self.fig_save_dir)
# Add filter slug subfolder
filter_slug = self._get_filter_slug()
save_path = save_path / filter_slug
if not save_path.exists():
save_path.mkdir(parents=True, exist_ok=True)
# Use _sanitize_filename for consistency
filename = f"{self._sanitize_filename(title)}.png"
filepath = save_path / filename
# Save as PNG at high resolution
fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
print(f"Word cloud saved to: {filepath}")
return fig