demographics section done
This commit is contained in:
98
plots.py
98
plots.py
@@ -1,6 +1,7 @@
|
||||
"""Plotting functions for Voice Branding analysis using Altair."""
|
||||
|
||||
import re
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import altair as alt
|
||||
@@ -728,8 +729,6 @@ class JPMCPlotsMixin:
|
||||
},
|
||||
width=width or 800,
|
||||
height=height or getattr(self, 'plot_height', 400)
|
||||
).configure_view(
|
||||
strokeWidth=0 # Remove frame which might obscure labels
|
||||
)
|
||||
|
||||
chart = self._save_plot(chart, title)
|
||||
@@ -794,6 +793,101 @@ class JPMCPlotsMixin:
|
||||
chart = self._save_plot(chart, title)
|
||||
return chart
|
||||
|
||||
def plot_demographic_distribution(
|
||||
self,
|
||||
column: str,
|
||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||
title: str | None = None,
|
||||
height: int | None = None,
|
||||
width: int | str | None = None,
|
||||
show_counts: bool = True,
|
||||
) -> alt.Chart:
|
||||
"""Create a horizontal bar chart showing the distribution of respondents by a demographic column.
|
||||
|
||||
Designed to be compact so multiple charts (approx. 6) can fit on one slide.
|
||||
Uses horizontal bars for better readability with many categories.
|
||||
|
||||
Parameters:
|
||||
column: The column name to analyze (e.g., 'Age', 'Gender', 'Race/Ethnicity').
|
||||
data: Optional DataFrame. If None, uses self.data_filtered.
|
||||
title: Chart title. If None, auto-generates based on column name.
|
||||
height: Chart height in pixels (default: auto-sized based on categories).
|
||||
width: Chart width in pixels (default: 280 for compact layout).
|
||||
show_counts: If True, display count labels on the bars.
|
||||
|
||||
Returns:
|
||||
alt.Chart: An Altair horizontal bar chart showing the distribution.
|
||||
"""
|
||||
df = self._ensure_dataframe(data)
|
||||
|
||||
if column not in df.columns:
|
||||
return alt.Chart(pd.DataFrame({'text': [f"Column '{column}' not found"]})).mark_text().encode(text='text:N')
|
||||
|
||||
# Count values in the column, including nulls
|
||||
stats_df = (
|
||||
df.select(pl.col(column))
|
||||
.with_columns(pl.col(column).fill_null("(No Response)"))
|
||||
.group_by(column)
|
||||
.agg(pl.len().alias("count"))
|
||||
.sort("count", descending=True)
|
||||
.to_pandas()
|
||||
)
|
||||
|
||||
if stats_df.empty:
|
||||
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
|
||||
|
||||
# Calculate percentages
|
||||
total = stats_df['count'].sum()
|
||||
stats_df['percentage'] = (stats_df['count'] / total * 100).round(1)
|
||||
|
||||
# Generate title if not provided
|
||||
if title is None:
|
||||
clean_col = column.replace('_', ' ').replace('/', ' / ')
|
||||
title = f"Distribution: {clean_col}"
|
||||
|
||||
# Calculate appropriate height based on number of categories
|
||||
num_categories = len(stats_df)
|
||||
bar_height = 18 # pixels per bar
|
||||
calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding
|
||||
|
||||
# Horizontal bar chart - categories on Y axis, counts on X axis
|
||||
bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode(
|
||||
x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=False)),
|
||||
y=alt.Y(f'{column}:N', title=None, sort='-x', axis=alt.Axis(labelLimit=150)),
|
||||
tooltip=[
|
||||
alt.Tooltip(f'{column}:N', title=column.replace('_', ' ')),
|
||||
alt.Tooltip('count:Q', title='Count'),
|
||||
alt.Tooltip('percentage:Q', title='Percentage', format='.1f')
|
||||
]
|
||||
)
|
||||
|
||||
# Add count labels at end of bars
|
||||
if show_counts:
|
||||
text = alt.Chart(stats_df).mark_text(
|
||||
align='left',
|
||||
baseline='middle',
|
||||
dx=3, # Offset from bar end
|
||||
fontSize=9,
|
||||
color=ColorPalette.TEXT
|
||||
).encode(
|
||||
x='count:Q',
|
||||
y=alt.Y(f'{column}:N', sort='-x'),
|
||||
text='count:Q'
|
||||
)
|
||||
chart = (bars + text)
|
||||
else:
|
||||
chart = bars
|
||||
|
||||
# Compact dimensions for 6-per-slide layout
|
||||
chart = chart.properties(
|
||||
title=self._process_title(title),
|
||||
width=width or 200,
|
||||
height=height or calculated_height
|
||||
)
|
||||
|
||||
chart = self._save_plot(chart, title)
|
||||
return chart
|
||||
|
||||
def plot_speaking_style_ranking_correlation(
|
||||
self,
|
||||
style_color: str,
|
||||
|
||||
Reference in New Issue
Block a user