This commit is contained in:
2025-12-17 00:25:03 -08:00
parent d6b449e8c6
commit eee6947f01

View File

@@ -138,7 +138,7 @@ def _(WORKING_DIR, all_tags_df, mo, tag_select):
# filter all_tags_df to only the document = file_dropdown.value
tags_df = all_tags_df.loc[all_tags_df['tag'] == tag_select.value].copy()
tags_df
tags_df.head()
return (
KEYWORDS_FPATH,
KEYWORD_FREQ_FPATH,
@@ -150,9 +150,9 @@ def _(WORKING_DIR, all_tags_df, mo, tag_select):
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# 4) Keyword extraction
def _(KEYWORD_FREQ_FPATH, mo):
mo.md(rf"""
# 4) Keyword extraction {'(skippable, see 4b)' if KEYWORD_FREQ_FPATH.exists() else ''}
""")
return
@@ -267,7 +267,7 @@ def _(KEYWORD_FREQ_FPATH, mo, start_processing_btn):
load_existing_btn = None
if KEYWORD_FREQ_FPATH.exists():
load_existing_btn = mo.ui.run_button(label=f"Load keywords from `{KEYWORD_FREQ_FPATH.name}`")
load_existing_btn = mo.ui.run_button(label=f"Load keywords from `{KEYWORD_FREQ_FPATH.name}`", kind='warn')
load_existing_btn
return (load_existing_btn,)
@@ -349,7 +349,7 @@ def _(mo, tag_select):
return
@app.cell
@app.cell(hide_code=True)
def _(frequency_df, min_freq_select, mo):
mo.stop('keyword' not in frequency_df.columns, "Waiting for keyword extraction to finish")
@@ -384,7 +384,7 @@ def _(KEYWORD_FREQ_FPATH, frequency_df, mo, remove_rows_btn, table_selection):
try:
frequency_df.drop(index=rows_to_drop, inplace=True, axis=0)
except KeyError:
_s = mo.callout("GO TO STEP 4b) and reload data to continue refining the dataset.", kind='warn')
_s = mo.callout("GO BACK TO STEP 4b) and reload data to continue refining the dataset.", kind='warn')
else:
# Save updated frequencies back to xlsx
frequency_df.to_excel(
@@ -395,7 +395,7 @@ def _(KEYWORD_FREQ_FPATH, frequency_df, mo, remove_rows_btn, table_selection):
print(f"Updated keyword frequencies saved to: `{KEYWORD_FREQ_FPATH}`")
# mo.callout(f"Updated keyword frequencies saved to: `{KEYWORD_FREQ_FPATH}`", kind="success")
_s = mo.callout("GO TO STEP 4b) and reload data to continue refining the dataset.", kind='warn')
_s = mo.callout("GO BACK TO STEP 4b) and reload data to continue refining the dataset.", kind='warn')
_s
return
@@ -436,11 +436,13 @@ def _(mo):
canvas_size = (1200, 800)
logo_switch = mo.ui.switch(label="Include Chase Logo", value=False)
return buffer, canvas_size, logo_switch
n_words = mo.ui.slider(start=10, stop=200, step=1, value=40, debounce=True, show_value=True, label="Max number of words in WordCloud")
return buffer, canvas_size, logo_switch, n_words
@app.cell(hide_code=True)
def _(logo_switch, mo):
def _(logo_switch, mo, n_words):
run_wordcloud_btn = mo.ui.run_button(label="Generate WordCloud")
mo.vstack([
@@ -451,7 +453,7 @@ def _(logo_switch, mo):
When satisfied with the result, click 'Save WordCloud to File' to save the image."""),
mo.md('---'),
mo.hstack([logo_switch, run_wordcloud_btn], align='center', justify='space-around')]
mo.hstack([logo_switch, n_words, run_wordcloud_btn], align='center', justify='space-around')]
)
return (run_wordcloud_btn,)
@@ -469,6 +471,7 @@ def _(
frequency_df,
logo_switch,
mo,
n_words,
np,
plt,
run_wordcloud_btn,
@@ -532,7 +535,7 @@ def _(
width=canvas_size[0],
height=canvas_size[1],
max_font_size=100, # Increased font size for larger canvas
max_words=40, # Increased word count to fill space
max_words=n_words.value, # Increased word count to fill space
color_func=blue_color_func,
mask=chase_mask, # Apply the circular mask
contour_width=0,
@@ -546,7 +549,7 @@ def _(
width=canvas_size[0],
height=canvas_size[1],
max_font_size=150, # Increased font size for larger canvas
max_words=40, # Increased word count to fill space
max_words=n_words.value, # Increased word count to fill space
color_func=blue_color_func,
# mask=chase_mask, # Apply the circular mask
# contour_width=0,