progress apply

This commit is contained in:
2025-12-16 16:28:07 -08:00
parent 12e14e3c9b
commit 228a6daa59
5 changed files with 212 additions and 45 deletions

View File

@@ -7,8 +7,8 @@ app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
# import pandas as pd
import modin.pandas as pd
import pandas as pd
import modin.pandas as mpd
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
@@ -20,8 +20,7 @@ def _():
# initialize tqdm for pandas
tqdm.pandas()
from modin.config import ProgressBar
ProgressBar.enable()
client, _models = connect_qumo_ollama(OLLAMA_LOCATION, print_models=False)
@@ -134,6 +133,9 @@ def _(mo):
@app.cell
def _(mo, tag_select):
mo.stop(not tag_select.value, mo.md("Select tag to continue"))
# mdf = mpd.from_pandas(df)
start_processing_btn = mo.ui.button(
label="Start Keyword Extraction",
kind="warn",
@@ -144,13 +146,23 @@ def _(mo, tag_select):
@app.cell
def _(client, df, mo, model_select, pd, start_processing_btn):
from utils import ollama_keyword_extraction
def _(
WORKING_DIR,
client,
df,
mo,
model_select,
pd,
start_processing_btn,
tag_select,
):
from utils import ollama_keyword_extraction, worker_extraction
# Wait for start processing button
mo.stop(not start_processing_btn.value, "Click button above to start processing")
# Run keyword extraction
df['keywords'] = df.apply(
df['keywords'] = df.progress_apply(
lambda row: pd.Series(ollama_keyword_extraction(
content=row['content'],
tag=row['tag'],
@@ -159,17 +171,9 @@ def _(client, df, mo, model_select, pd, start_processing_btn):
)),
axis=1
)
return
@app.cell
def _(df):
df['keywords_txt'] = df['keywords'].progress_apply(lambda kws: ', '.join(kws))
return
@app.cell
def _(WORKING_DIR, df, tag_select):
df[['id', 'tag', 'content', 'keywords_txt']].to_csv(
WORKING_DIR / f'keywords_{tag_select.value.replace(" ", "-")}.csv',
index=False
@@ -214,7 +218,7 @@ def _(df):
else:
keyword_freq[kw] = 1
keyword_freq_filtered = {kw: freq for kw, freq in keyword_freq.items() if freq > MIN_FREQ}
keyword_freq_filtered = {kw: freq for kw, freq in keyword_freq.items() if freq >= MIN_FREQ}
# create list of keywords sorted by their frequencies. only store the keyword
sorted_keywords = sorted(keyword_freq_filtered.items(), key=lambda x: x[1], reverse=True)
@@ -231,12 +235,12 @@ def _(plt):
def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
# Use the provided random_state for reproducibility if available, else use random module
r = random_state if random_state else random
# Sample from the darker end of the 'Blues' colormap (e.g., 0.4 to 1.0)
# 0.0 is white/light, 1.0 is dark blue
min_val, max_val = 0.4, 1.0
color_val = r.uniform(min_val, max_val)
# Get color from matplotlib colormap
rgba = plt.cm.Blues(color_val)
return mcolors.to_hex(rgba)