diff --git a/03_quant_report.py b/03_quant_report.py index 17178bb..4172687 100644 --- a/03_quant_report.py +++ b/03_quant_report.py @@ -174,6 +174,11 @@ def _(data_validated): return (data,) +@app.cell +def _(): + return + + @app.cell def _(): # Check if all business owners are missing a 'Consumer type' in demographics @@ -404,6 +409,37 @@ def _(): return +@app.cell +def _(): + mo.md(r""" + ## Character Ranking Points (per customer segment) + """) + return + + +@app.cell +def _(S, data): + _content = "" + for _consumer_group, _consumer_df in utils.split_consumer_groups(data).items(): + + _char_rank = S.get_character_ranking(_consumer_df)[0] + _char_rank_weighted = calculate_weighted_ranking_scores(_char_rank) + + _plot = S.plot_weighted_ranking_score( + _char_rank_weighted, + title=f'Most Popular Character - Weighted Popularity Score - CONSUMER: "{_consumer_group.replace()}"
(1st=3pts, 2nd=2pts, 3rd=1pt)', + x_label='Voice' + ) + + _content += f""" + {mo.ui.altair_chart(_plot)} + + """ + + mo.md(_content) + return + + @app.cell(hide_code=True) def _(): mo.md(r""" @@ -579,7 +615,6 @@ def _(S, voice_1_10): {mo.ui.altair_chart(_summary)} """) - return diff --git a/utils.py b/utils.py index e4966dc..e5468ac 100644 --- a/utils.py +++ b/utils.py @@ -1716,3 +1716,37 @@ def process_voice_ranking_data( ]) return result.collect() + + +def split_consumer_groups(df: Union[pl.LazyFrame, pl.DataFrame]) -> dict[str, pl.DataFrame]: + """ + Split dataframe into groups based on Consumer column, combining A/B subgroups. + + Mappings: + - Mass_A, Mass_B -> Mass + - Lower_Mass_A, Lower_Mass_B -> Lower_Mass + - MassAffluent_A, MassAffluent_B -> MassAffluent + - Mix_of_Affluent..._A, ..._B -> Mix_of_Affluent... + """ + if isinstance(df, pl.LazyFrame): + df = df.collect() + + if "Consumer" not in df.columns: + raise ValueError("Column 'Consumer' not found in DataFrame") + + # Clean Consumer column by removing _A or _B suffix + # Using regex replacement for trailing _A or _B + df_clean = df.with_columns( + pl.col("Consumer") + .str.replace(r"_[AB]$", "") + .alias("Consumer_Group") + ) + + # Split into dict + groups = {} + unique_groups = df_clean["Consumer_Group"].drop_nulls().unique().to_list() + + for group in unique_groups: + groups[group] = df_clean.filter(pl.col("Consumer_Group") == group) + + return groups