From 8fbc11da7a68316f2a4ad3ef19681355ab5e4be4 Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Tue, 16 Dec 2025 23:42:25 -0800 Subject: [PATCH] Inline removal of keywords --- 02-B_Thematic-Processing.py | 55 ++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/02-B_Thematic-Processing.py b/02-B_Thematic-Processing.py index f3411c8..b9ebe04 100644 --- a/02-B_Thematic-Processing.py +++ b/02-B_Thematic-Processing.py @@ -345,25 +345,53 @@ def _(mo, tag_select): return -@app.cell(hide_code=True) +@app.cell def _(frequency_df, min_freq_select, mo): mo.stop('keyword' not in frequency_df.columns, "Waiting for keyword extraction to finish") MIN_FREQ = min_freq_select.value - freq_df_filtered = frequency_df.loc[frequency_df['frequency'] >= MIN_FREQ].copy() + _freq_df_filtered = frequency_df.loc[frequency_df['frequency'] >= MIN_FREQ].copy() - freq_df_filtered.reset_index(drop=True, inplace=True) - - keyword_freq_filtered = freq_df_filtered.set_index('keyword')['frequency'].to_dict() - - table_selection = mo.ui.table(freq_df_filtered, page_size=50) + table_selection = mo.ui.table(_freq_df_filtered, page_size=50) table_selection - return (keyword_freq_filtered,) + return MIN_FREQ, table_selection -@app.cell +@app.cell(hide_code=True) +def _(mo, table_selection): + remove_rows_btn = None + if len(table_selection.value) >0 : + remove_rows_btn = mo.ui.run_button(label="Click to remove selected keywords and update xlsx") + + remove_rows_btn + return (remove_rows_btn,) + + +@app.cell(hide_code=True) +def _(KEYWORD_FREQ_FPATH, frequency_df, remove_rows_btn, table_selection): + if remove_rows_btn is not None and remove_rows_btn.value: + # get selected rows + selected_rows = table_selection.value + if len(selected_rows) >0 : + rows_to_drop = table_selection.value.index.tolist() + + frequency_df.drop(index=rows_to_drop, inplace=True, axis=0) + + # Save updated frequencies back to xlsx + frequency_df.to_excel( + KEYWORD_FREQ_FPATH, + index=False + ) + + print(f"Updated keyword frequencies saved to: `{KEYWORD_FREQ_FPATH}`") + + print("GO TO STEP 4b) and reload data to continue refining the dataset.") + return + + +@app.cell(hide_code=True) def _(): IGNORE_WORDS = { 'chase as a brand': [ @@ -423,11 +451,12 @@ def _( IGNORE_WORDS, Image, ImageDraw, + MIN_FREQ, WordCloud, blue_color_func, buffer, canvas_size, - keyword_freq_filtered, + frequency_df, logo_switch, mo, np, @@ -438,6 +467,12 @@ def _( if run_wordcloud_btn.value: pass + freq_df_filtered = frequency_df.loc[frequency_df['frequency'] >= MIN_FREQ].copy() + + # freq_df_filtered.reset_index(drop=True, inplace=True) + + keyword_freq_filtered = freq_df_filtered.set_index('keyword')['frequency'].to_dict() + # remove specific keywords depending on selected tag if IGNORE_WORDS.get(tag_select.value.lower()): for word in IGNORE_WORDS[tag_select.value.lower()]: