Inline removal of keywords

This commit is contained in:
2025-12-16 23:42:25 -08:00
parent 50f9538dcf
commit 8fbc11da7a

View File

@@ -345,25 +345,53 @@ def _(mo, tag_select):
return
@app.cell(hide_code=True)
@app.cell
def _(frequency_df, min_freq_select, mo):
mo.stop('keyword' not in frequency_df.columns, "Waiting for keyword extraction to finish")
MIN_FREQ = min_freq_select.value
freq_df_filtered = frequency_df.loc[frequency_df['frequency'] >= MIN_FREQ].copy()
_freq_df_filtered = frequency_df.loc[frequency_df['frequency'] >= MIN_FREQ].copy()
freq_df_filtered.reset_index(drop=True, inplace=True)
keyword_freq_filtered = freq_df_filtered.set_index('keyword')['frequency'].to_dict()
table_selection = mo.ui.table(freq_df_filtered, page_size=50)
table_selection = mo.ui.table(_freq_df_filtered, page_size=50)
table_selection
return (keyword_freq_filtered,)
return MIN_FREQ, table_selection
@app.cell
@app.cell(hide_code=True)
def _(mo, table_selection):
remove_rows_btn = None
if len(table_selection.value) >0 :
remove_rows_btn = mo.ui.run_button(label="Click to remove selected keywords and update xlsx")
remove_rows_btn
return (remove_rows_btn,)
@app.cell(hide_code=True)
def _(KEYWORD_FREQ_FPATH, frequency_df, remove_rows_btn, table_selection):
if remove_rows_btn is not None and remove_rows_btn.value:
# get selected rows
selected_rows = table_selection.value
if len(selected_rows) >0 :
rows_to_drop = table_selection.value.index.tolist()
frequency_df.drop(index=rows_to_drop, inplace=True, axis=0)
# Save updated frequencies back to xlsx
frequency_df.to_excel(
KEYWORD_FREQ_FPATH,
index=False
)
print(f"Updated keyword frequencies saved to: `{KEYWORD_FREQ_FPATH}`")
print("GO TO STEP 4b) and reload data to continue refining the dataset.")
return
@app.cell(hide_code=True)
def _():
IGNORE_WORDS = {
'chase as a brand': [
@@ -423,11 +451,12 @@ def _(
IGNORE_WORDS,
Image,
ImageDraw,
MIN_FREQ,
WordCloud,
blue_color_func,
buffer,
canvas_size,
keyword_freq_filtered,
frequency_df,
logo_switch,
mo,
np,
@@ -438,6 +467,12 @@ def _(
if run_wordcloud_btn.value:
pass
freq_df_filtered = frequency_df.loc[frequency_df['frequency'] >= MIN_FREQ].copy()
# freq_df_filtered.reset_index(drop=True, inplace=True)
keyword_freq_filtered = freq_df_filtered.set_index('keyword')['frequency'].to_dict()
# remove specific keywords depending on selected tag
if IGNORE_WORDS.get(tag_select.value.lower()):
for word in IGNORE_WORDS[tag_select.value.lower()]: