progress apply
This commit is contained in:
@@ -7,8 +7,8 @@ app = marimo.App(width="medium")
|
||||
@app.cell
|
||||
def _():
|
||||
import marimo as mo
|
||||
# import pandas as pd
|
||||
import modin.pandas as pd
|
||||
import pandas as pd
|
||||
import modin.pandas as mpd
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
@@ -20,8 +20,7 @@ def _():
|
||||
|
||||
# initialize tqdm for pandas
|
||||
tqdm.pandas()
|
||||
from modin.config import ProgressBar
|
||||
ProgressBar.enable()
|
||||
|
||||
|
||||
client, _models = connect_qumo_ollama(OLLAMA_LOCATION, print_models=False)
|
||||
|
||||
@@ -134,6 +133,9 @@ def _(mo):
|
||||
@app.cell
|
||||
def _(mo, tag_select):
|
||||
mo.stop(not tag_select.value, mo.md("Select tag to continue"))
|
||||
|
||||
# mdf = mpd.from_pandas(df)
|
||||
|
||||
start_processing_btn = mo.ui.button(
|
||||
label="Start Keyword Extraction",
|
||||
kind="warn",
|
||||
@@ -144,13 +146,23 @@ def _(mo, tag_select):
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(client, df, mo, model_select, pd, start_processing_btn):
|
||||
from utils import ollama_keyword_extraction
|
||||
def _(
|
||||
WORKING_DIR,
|
||||
client,
|
||||
df,
|
||||
mo,
|
||||
model_select,
|
||||
pd,
|
||||
start_processing_btn,
|
||||
tag_select,
|
||||
):
|
||||
from utils import ollama_keyword_extraction, worker_extraction
|
||||
# Wait for start processing button
|
||||
mo.stop(not start_processing_btn.value, "Click button above to start processing")
|
||||
|
||||
|
||||
# Run keyword extraction
|
||||
df['keywords'] = df.apply(
|
||||
df['keywords'] = df.progress_apply(
|
||||
lambda row: pd.Series(ollama_keyword_extraction(
|
||||
content=row['content'],
|
||||
tag=row['tag'],
|
||||
@@ -159,17 +171,9 @@ def _(client, df, mo, model_select, pd, start_processing_btn):
|
||||
)),
|
||||
axis=1
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(df):
|
||||
df['keywords_txt'] = df['keywords'].progress_apply(lambda kws: ', '.join(kws))
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(WORKING_DIR, df, tag_select):
|
||||
df[['id', 'tag', 'content', 'keywords_txt']].to_csv(
|
||||
WORKING_DIR / f'keywords_{tag_select.value.replace(" ", "-")}.csv',
|
||||
index=False
|
||||
@@ -214,7 +218,7 @@ def _(df):
|
||||
else:
|
||||
keyword_freq[kw] = 1
|
||||
|
||||
keyword_freq_filtered = {kw: freq for kw, freq in keyword_freq.items() if freq > MIN_FREQ}
|
||||
keyword_freq_filtered = {kw: freq for kw, freq in keyword_freq.items() if freq >= MIN_FREQ}
|
||||
|
||||
# create list of keywords sorted by their frequencies. only store the keyword
|
||||
sorted_keywords = sorted(keyword_freq_filtered.items(), key=lambda x: x[1], reverse=True)
|
||||
@@ -231,12 +235,12 @@ def _(plt):
|
||||
def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
||||
# Use the provided random_state for reproducibility if available, else use random module
|
||||
r = random_state if random_state else random
|
||||
|
||||
|
||||
# Sample from the darker end of the 'Blues' colormap (e.g., 0.4 to 1.0)
|
||||
# 0.0 is white/light, 1.0 is dark blue
|
||||
min_val, max_val = 0.4, 1.0
|
||||
color_val = r.uniform(min_val, max_val)
|
||||
|
||||
|
||||
# Get color from matplotlib colormap
|
||||
rgba = plt.cm.Blues(color_val)
|
||||
return mcolors.to_hex(rgba)
|
||||
|
||||
Reference in New Issue
Block a user