demographics section done
This commit is contained in:
@@ -22,7 +22,6 @@ def _():
|
|||||||
initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
|
initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
|
||||||
)
|
)
|
||||||
file_browser
|
file_browser
|
||||||
|
|
||||||
return (file_browser,)
|
return (file_browser,)
|
||||||
|
|
||||||
|
|
||||||
@@ -117,7 +116,7 @@ def _(data_validated):
|
|||||||
data = data_validated
|
data = data_validated
|
||||||
|
|
||||||
data.collect()
|
data.collect()
|
||||||
return
|
return (data,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
@@ -130,6 +129,81 @@ def _():
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, data):
|
||||||
|
demographics = S.get_demographics(data)[0].collect()
|
||||||
|
demographics
|
||||||
|
return (demographics,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell(hide_code=True)
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
## Lucia confirmation missing 'Consumer' data
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(demographics):
|
||||||
|
# Demographics where 'Consumer' is null
|
||||||
|
demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list()
|
||||||
|
# demographics_no_consumer
|
||||||
|
return (demographics_no_consumer,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(data_all, demographics_no_consumer):
|
||||||
|
# check if the responses with missing 'Consumer type' in demographics are all business owners as Lucia mentioned
|
||||||
|
assert all(data_all.filter(pl.col('_recordId').is_in(demographics_no_consumer)).collect()['QID4'] == 'Yes'), "Not all respondents with missing 'Consumer' are business owners."
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(data_all):
|
||||||
|
# Check if all business owners are missing a 'Consumer type' in demographics
|
||||||
|
assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell(hide_code=True)
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
## Demographic Distributions
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
demo_plot_cols = [
|
||||||
|
'Age',
|
||||||
|
'Gender',
|
||||||
|
# 'Race/Ethnicity',
|
||||||
|
'Bussiness_Owner',
|
||||||
|
'Consumer'
|
||||||
|
]
|
||||||
|
return (demo_plot_cols,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, demo_plot_cols, demographics):
|
||||||
|
_content = """
|
||||||
|
## Demographic Distributions
|
||||||
|
|
||||||
|
"""
|
||||||
|
for c in demo_plot_cols:
|
||||||
|
_fig = S.plot_demographic_distribution(
|
||||||
|
data=demographics,
|
||||||
|
column=c,
|
||||||
|
title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
|
||||||
|
)
|
||||||
|
_content += f"""{mo.ui.altair_chart(_fig)}\n\n"""
|
||||||
|
|
||||||
|
mo.md(_content)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _():
|
def _():
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
|
|||||||
70
04_PPTX_Update_Images.py
Normal file
70
04_PPTX_Update_Images.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import marimo
|
||||||
|
|
||||||
|
__generated_with = "0.19.2"
|
||||||
|
app = marimo.App(width="medium")
|
||||||
|
|
||||||
|
with app.setup:
|
||||||
|
import marimo as mo
|
||||||
|
from pathlib import Path
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
# Tag existing images with Alt-Text
|
||||||
|
|
||||||
|
Based on image content
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
TAG_SOURCE = Path('data/reports/Perception-Research-Report.pptx')
|
||||||
|
TAG_TARGET = Path('data/reports/Perception-Research-Report_tagged.pptx')
|
||||||
|
TAG_IMAGE_DIR = Path('figures/OneDrive_2026-01-28/')
|
||||||
|
return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET):
|
||||||
|
utils.update_ppt_alt_text(ppt_path=TAG_SOURCE, image_source_dir=TAG_IMAGE_DIR, output_path=TAG_TARGET)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell(hide_code=True)
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
# Replace Images using Alt-Text
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
REPLACE_SOURCE = Path('data/test_replace_source.pptx')
|
||||||
|
REPLACE_TARGET = Path('data/test_replace_target.pptx')
|
||||||
|
return REPLACE_SOURCE, REPLACE_TARGET
|
||||||
|
|
||||||
|
|
||||||
|
app._unparsable_cell(
|
||||||
|
r"""
|
||||||
|
IMAGE_FILE = Path('figures/OneDrive_2026-01-28/Cons-Early_Professional/cold_distant_approachable_familiar_warm.png'
|
||||||
|
""",
|
||||||
|
name="_"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(IMAGE_FILE, REPLACE_SOURCE, REPLACE_TARGET):
|
||||||
|
utils.pptx_replace_named_image(
|
||||||
|
presentation_path=REPLACE_SOURCE,
|
||||||
|
target_tag=utils.image_alt_text_generator(IMAGE_FILE),
|
||||||
|
new_image_path=IMAGE_FILE,
|
||||||
|
save_path=REPLACE_TARGET)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run()
|
||||||
@@ -42,14 +42,6 @@ def _(survey):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
app._unparsable_cell(
|
|
||||||
r"""
|
|
||||||
data.
|
|
||||||
""",
|
|
||||||
name="_"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
|
|||||||
98
plots.py
98
plots.py
@@ -1,6 +1,7 @@
|
|||||||
"""Plotting functions for Voice Branding analysis using Altair."""
|
"""Plotting functions for Voice Branding analysis using Altair."""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import math
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import altair as alt
|
import altair as alt
|
||||||
@@ -728,8 +729,6 @@ class JPMCPlotsMixin:
|
|||||||
},
|
},
|
||||||
width=width or 800,
|
width=width or 800,
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
).configure_view(
|
|
||||||
strokeWidth=0 # Remove frame which might obscure labels
|
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
@@ -794,6 +793,101 @@ class JPMCPlotsMixin:
|
|||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
|
def plot_demographic_distribution(
|
||||||
|
self,
|
||||||
|
column: str,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
|
title: str | None = None,
|
||||||
|
height: int | None = None,
|
||||||
|
width: int | str | None = None,
|
||||||
|
show_counts: bool = True,
|
||||||
|
) -> alt.Chart:
|
||||||
|
"""Create a horizontal bar chart showing the distribution of respondents by a demographic column.
|
||||||
|
|
||||||
|
Designed to be compact so multiple charts (approx. 6) can fit on one slide.
|
||||||
|
Uses horizontal bars for better readability with many categories.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
column: The column name to analyze (e.g., 'Age', 'Gender', 'Race/Ethnicity').
|
||||||
|
data: Optional DataFrame. If None, uses self.data_filtered.
|
||||||
|
title: Chart title. If None, auto-generates based on column name.
|
||||||
|
height: Chart height in pixels (default: auto-sized based on categories).
|
||||||
|
width: Chart width in pixels (default: 280 for compact layout).
|
||||||
|
show_counts: If True, display count labels on the bars.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
alt.Chart: An Altair horizontal bar chart showing the distribution.
|
||||||
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
|
if column not in df.columns:
|
||||||
|
return alt.Chart(pd.DataFrame({'text': [f"Column '{column}' not found"]})).mark_text().encode(text='text:N')
|
||||||
|
|
||||||
|
# Count values in the column, including nulls
|
||||||
|
stats_df = (
|
||||||
|
df.select(pl.col(column))
|
||||||
|
.with_columns(pl.col(column).fill_null("(No Response)"))
|
||||||
|
.group_by(column)
|
||||||
|
.agg(pl.len().alias("count"))
|
||||||
|
.sort("count", descending=True)
|
||||||
|
.to_pandas()
|
||||||
|
)
|
||||||
|
|
||||||
|
if stats_df.empty:
|
||||||
|
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
|
||||||
|
|
||||||
|
# Calculate percentages
|
||||||
|
total = stats_df['count'].sum()
|
||||||
|
stats_df['percentage'] = (stats_df['count'] / total * 100).round(1)
|
||||||
|
|
||||||
|
# Generate title if not provided
|
||||||
|
if title is None:
|
||||||
|
clean_col = column.replace('_', ' ').replace('/', ' / ')
|
||||||
|
title = f"Distribution: {clean_col}"
|
||||||
|
|
||||||
|
# Calculate appropriate height based on number of categories
|
||||||
|
num_categories = len(stats_df)
|
||||||
|
bar_height = 18 # pixels per bar
|
||||||
|
calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding
|
||||||
|
|
||||||
|
# Horizontal bar chart - categories on Y axis, counts on X axis
|
||||||
|
bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode(
|
||||||
|
x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=False)),
|
||||||
|
y=alt.Y(f'{column}:N', title=None, sort='-x', axis=alt.Axis(labelLimit=150)),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip(f'{column}:N', title=column.replace('_', ' ')),
|
||||||
|
alt.Tooltip('count:Q', title='Count'),
|
||||||
|
alt.Tooltip('percentage:Q', title='Percentage', format='.1f')
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add count labels at end of bars
|
||||||
|
if show_counts:
|
||||||
|
text = alt.Chart(stats_df).mark_text(
|
||||||
|
align='left',
|
||||||
|
baseline='middle',
|
||||||
|
dx=3, # Offset from bar end
|
||||||
|
fontSize=9,
|
||||||
|
color=ColorPalette.TEXT
|
||||||
|
).encode(
|
||||||
|
x='count:Q',
|
||||||
|
y=alt.Y(f'{column}:N', sort='-x'),
|
||||||
|
text='count:Q'
|
||||||
|
)
|
||||||
|
chart = (bars + text)
|
||||||
|
else:
|
||||||
|
chart = bars
|
||||||
|
|
||||||
|
# Compact dimensions for 6-per-slide layout
|
||||||
|
chart = chart.properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 200,
|
||||||
|
height=height or calculated_height
|
||||||
|
)
|
||||||
|
|
||||||
|
chart = self._save_plot(chart, title)
|
||||||
|
return chart
|
||||||
|
|
||||||
def plot_speaking_style_ranking_correlation(
|
def plot_speaking_style_ranking_correlation(
|
||||||
self,
|
self,
|
||||||
style_color: str,
|
style_color: str,
|
||||||
|
|||||||
14
theme.py
14
theme.py
@@ -24,6 +24,20 @@ class ColorPalette:
|
|||||||
GRID = "lightgray"
|
GRID = "lightgray"
|
||||||
BACKGROUND = "white"
|
BACKGROUND = "white"
|
||||||
|
|
||||||
|
# Extended palette for categorical charts (e.g., pie charts with many categories)
|
||||||
|
CATEGORICAL = [
|
||||||
|
"#0077B6", # PRIMARY - Medium Blue
|
||||||
|
"#004C6D", # RANK_1 - Dark Blue
|
||||||
|
"#008493", # RANK_2 - Teal
|
||||||
|
"#5AAE95", # RANK_3 - Sea Green
|
||||||
|
"#9E9E9E", # RANK_4 - Grey
|
||||||
|
"#D3D3D3", # NEUTRAL - Light Grey
|
||||||
|
"#003049", # Dark Navy
|
||||||
|
"#669BBC", # Light Steel Blue
|
||||||
|
"#A8DADC", # Pale Cyan
|
||||||
|
"#457B9D", # Steel Blue
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def jpmc_altair_theme():
|
def jpmc_altair_theme():
|
||||||
"""JPMC brand theme for Altair charts."""
|
"""JPMC brand theme for Altair charts."""
|
||||||
|
|||||||
11
utils.py
11
utils.py
@@ -13,8 +13,12 @@ from pptx import Presentation
|
|||||||
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
||||||
|
|
||||||
|
|
||||||
def image_alt_text_generator(fpath):
|
def image_alt_text_generator(fpath, include_dataset_dirname=False) -> str:
|
||||||
"""convert image file path to alt text
|
"""convert image file path to alt text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fpath (str or Path): path to image file, must start with 'figures/'
|
||||||
|
include_dataset_dirname (bool): whether to include the dataset directory name in the alt text. Recommended to keep False, so that the images do not get tied to a specific dataset export. (Defeats the purpose of assigning alt text to be able to update images when new datasets are exported.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not isinstance(fpath, Path):
|
if not isinstance(fpath, Path):
|
||||||
@@ -23,7 +27,10 @@ def image_alt_text_generator(fpath):
|
|||||||
fparts = fpath.parts
|
fparts = fpath.parts
|
||||||
assert fparts[0] == 'figures', "Image file path must start with 'figures'"
|
assert fparts[0] == 'figures', "Image file path must start with 'figures'"
|
||||||
|
|
||||||
return Path('/'.join(fparts[2:])).as_posix()
|
if include_dataset_dirname:
|
||||||
|
return Path('/'.join(fparts[1:])).as_posix()
|
||||||
|
else:
|
||||||
|
return Path('/'.join(fparts[2:])).as_posix()
|
||||||
|
|
||||||
def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path):
|
def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user