diff --git a/03_quant_report.py b/03_quant_report.py index 42d3368..ae2dd86 100644 --- a/03_quant_report.py +++ b/03_quant_report.py @@ -22,7 +22,6 @@ def _(): initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File" ) file_browser - return (file_browser,) @@ -117,7 +116,7 @@ def _(data_validated): data = data_validated data.collect() - return + return (data,) @app.cell(hide_code=True) @@ -130,6 +129,81 @@ def _(): return +@app.cell +def _(S, data): + demographics = S.get_demographics(data)[0].collect() + demographics + return (demographics,) + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Lucia confirmation missing 'Consumer' data + """) + return + + +@app.cell +def _(demographics): + # Demographics where 'Consumer' is null + demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list() + # demographics_no_consumer + return (demographics_no_consumer,) + + +@app.cell +def _(data_all, demographics_no_consumer): + # check if the responses with missing 'Consumer type' in demographics are all business owners as Lucia mentioned + assert all(data_all.filter(pl.col('_recordId').is_in(demographics_no_consumer)).collect()['QID4'] == 'Yes'), "Not all respondents with missing 'Consumer' are business owners." + return + + +@app.cell +def _(data_all): + # Check if all business owners are missing a 'Consumer type' in demographics + assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics." + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Demographic Distributions + """) + return + + +@app.cell +def _(): + demo_plot_cols = [ + 'Age', + 'Gender', + # 'Race/Ethnicity', + 'Bussiness_Owner', + 'Consumer' + ] + return (demo_plot_cols,) + + +@app.cell +def _(S, demo_plot_cols, demographics): + _content = """ + ## Demographic Distributions + + """ + for c in demo_plot_cols: + _fig = S.plot_demographic_distribution( + data=demographics, + column=c, + title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents" + ) + _content += f"""{mo.ui.altair_chart(_fig)}\n\n""" + + mo.md(_content) + return + + @app.cell(hide_code=True) def _(): mo.md(r""" diff --git a/04_PPTX_Update_Images.py b/04_PPTX_Update_Images.py new file mode 100644 index 0000000..941e92e --- /dev/null +++ b/04_PPTX_Update_Images.py @@ -0,0 +1,70 @@ +import marimo + +__generated_with = "0.19.2" +app = marimo.App(width="medium") + +with app.setup: + import marimo as mo + from pathlib import Path + import utils + + +@app.cell +def _(): + mo.md(r""" + # Tag existing images with Alt-Text + + Based on image content + """) + return + + +@app.cell +def _(): + TAG_SOURCE = Path('data/reports/Perception-Research-Report.pptx') + TAG_TARGET = Path('data/reports/Perception-Research-Report_tagged.pptx') + TAG_IMAGE_DIR = Path('figures/OneDrive_2026-01-28/') + return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET + + +@app.cell +def _(TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET): + utils.update_ppt_alt_text(ppt_path=TAG_SOURCE, image_source_dir=TAG_IMAGE_DIR, output_path=TAG_TARGET) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + # Replace Images using Alt-Text + """) + return + + +@app.cell +def _(): + REPLACE_SOURCE = Path('data/test_replace_source.pptx') + REPLACE_TARGET = Path('data/test_replace_target.pptx') + return REPLACE_SOURCE, REPLACE_TARGET + + +app._unparsable_cell( + r""" + IMAGE_FILE = Path('figures/OneDrive_2026-01-28/Cons-Early_Professional/cold_distant_approachable_familiar_warm.png' + """, + name="_" +) + + +@app.cell +def _(IMAGE_FILE, REPLACE_SOURCE, REPLACE_TARGET): + utils.pptx_replace_named_image( + presentation_path=REPLACE_SOURCE, + target_tag=utils.image_alt_text_generator(IMAGE_FILE), + new_image_path=IMAGE_FILE, + save_path=REPLACE_TARGET) + return + + +if __name__ == "__main__": + app.run() diff --git a/99_example_ingest_qualtrics_export.py b/99_example_ingest_qualtrics_export.py index 094a5a6..54c0a23 100644 --- a/99_example_ingest_qualtrics_export.py +++ b/99_example_ingest_qualtrics_export.py @@ -42,14 +42,6 @@ def _(survey): return -app._unparsable_cell( - r""" - data. - """, - name="_" -) - - @app.cell def _(mo): mo.md(r""" diff --git a/plots.py b/plots.py index 897b07d..8b5ad4a 100644 --- a/plots.py +++ b/plots.py @@ -1,6 +1,7 @@ """Plotting functions for Voice Branding analysis using Altair.""" import re +import math from pathlib import Path import altair as alt @@ -728,8 +729,6 @@ class JPMCPlotsMixin: }, width=width or 800, height=height or getattr(self, 'plot_height', 400) - ).configure_view( - strokeWidth=0 # Remove frame which might obscure labels ) chart = self._save_plot(chart, title) @@ -794,6 +793,101 @@ class JPMCPlotsMixin: chart = self._save_plot(chart, title) return chart + def plot_demographic_distribution( + self, + column: str, + data: pl.LazyFrame | pl.DataFrame | None = None, + title: str | None = None, + height: int | None = None, + width: int | str | None = None, + show_counts: bool = True, + ) -> alt.Chart: + """Create a horizontal bar chart showing the distribution of respondents by a demographic column. + + Designed to be compact so multiple charts (approx. 6) can fit on one slide. + Uses horizontal bars for better readability with many categories. + + Parameters: + column: The column name to analyze (e.g., 'Age', 'Gender', 'Race/Ethnicity'). + data: Optional DataFrame. If None, uses self.data_filtered. + title: Chart title. If None, auto-generates based on column name. + height: Chart height in pixels (default: auto-sized based on categories). + width: Chart width in pixels (default: 280 for compact layout). + show_counts: If True, display count labels on the bars. + + Returns: + alt.Chart: An Altair horizontal bar chart showing the distribution. + """ + df = self._ensure_dataframe(data) + + if column not in df.columns: + return alt.Chart(pd.DataFrame({'text': [f"Column '{column}' not found"]})).mark_text().encode(text='text:N') + + # Count values in the column, including nulls + stats_df = ( + df.select(pl.col(column)) + .with_columns(pl.col(column).fill_null("(No Response)")) + .group_by(column) + .agg(pl.len().alias("count")) + .sort("count", descending=True) + .to_pandas() + ) + + if stats_df.empty: + return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N') + + # Calculate percentages + total = stats_df['count'].sum() + stats_df['percentage'] = (stats_df['count'] / total * 100).round(1) + + # Generate title if not provided + if title is None: + clean_col = column.replace('_', ' ').replace('/', ' / ') + title = f"Distribution: {clean_col}" + + # Calculate appropriate height based on number of categories + num_categories = len(stats_df) + bar_height = 18 # pixels per bar + calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding + + # Horizontal bar chart - categories on Y axis, counts on X axis + bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode( + x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=False)), + y=alt.Y(f'{column}:N', title=None, sort='-x', axis=alt.Axis(labelLimit=150)), + tooltip=[ + alt.Tooltip(f'{column}:N', title=column.replace('_', ' ')), + alt.Tooltip('count:Q', title='Count'), + alt.Tooltip('percentage:Q', title='Percentage', format='.1f') + ] + ) + + # Add count labels at end of bars + if show_counts: + text = alt.Chart(stats_df).mark_text( + align='left', + baseline='middle', + dx=3, # Offset from bar end + fontSize=9, + color=ColorPalette.TEXT + ).encode( + x='count:Q', + y=alt.Y(f'{column}:N', sort='-x'), + text='count:Q' + ) + chart = (bars + text) + else: + chart = bars + + # Compact dimensions for 6-per-slide layout + chart = chart.properties( + title=self._process_title(title), + width=width or 200, + height=height or calculated_height + ) + + chart = self._save_plot(chart, title) + return chart + def plot_speaking_style_ranking_correlation( self, style_color: str, diff --git a/theme.py b/theme.py index aa15845..713c6f0 100644 --- a/theme.py +++ b/theme.py @@ -24,6 +24,20 @@ class ColorPalette: GRID = "lightgray" BACKGROUND = "white" + # Extended palette for categorical charts (e.g., pie charts with many categories) + CATEGORICAL = [ + "#0077B6", # PRIMARY - Medium Blue + "#004C6D", # RANK_1 - Dark Blue + "#008493", # RANK_2 - Teal + "#5AAE95", # RANK_3 - Sea Green + "#9E9E9E", # RANK_4 - Grey + "#D3D3D3", # NEUTRAL - Light Grey + "#003049", # Dark Navy + "#669BBC", # Light Steel Blue + "#A8DADC", # Pale Cyan + "#457B9D", # Steel Blue + ] + def jpmc_altair_theme(): """JPMC brand theme for Altair charts.""" diff --git a/utils.py b/utils.py index ebb309f..bd0081b 100644 --- a/utils.py +++ b/utils.py @@ -13,8 +13,12 @@ from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE -def image_alt_text_generator(fpath): +def image_alt_text_generator(fpath, include_dataset_dirname=False) -> str: """convert image file path to alt text + + Args: + fpath (str or Path): path to image file, must start with 'figures/' + include_dataset_dirname (bool): whether to include the dataset directory name in the alt text. Recommended to keep False, so that the images do not get tied to a specific dataset export. (Defeats the purpose of assigning alt text to be able to update images when new datasets are exported.) """ if not isinstance(fpath, Path): @@ -23,7 +27,10 @@ def image_alt_text_generator(fpath): fparts = fpath.parts assert fparts[0] == 'figures', "Image file path must start with 'figures'" - return Path('/'.join(fparts[2:])).as_posix() + if include_dataset_dirname: + return Path('/'.join(fparts[1:])).as_posix() + else: + return Path('/'.join(fparts[2:])).as_posix() def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path): """