import marimo

__generated_with = "0.18.0"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    from utils import connect_qumo_ollama

    # VM_NAME = 'hiperf-gpu'
    VM_NAME = 'ollama-lite'

    client = connect_qumo_ollama(VM_NAME)
    return VM_NAME, client, mo


@app.cell(hide_code=True)
def _(VM_NAME, mo):
    mo.md(rf"""
    # Ollama Reference

    ## Ollama Web-UI: http://{VM_NAME}.tail44fa00.ts.net:3000
    Use the UI to modify system prompts, custom models, etc...

    **if the connection fails, make sure Tailscale is up**

    ## Ollama Python
    Docs: https://github.com/ollama/ollama-python

    Use the code below to programmatically interact with the models. E.g: create a small pipeline that loads a transcript and inserts it into the prompt. Helpful if we need to analyze 26 interviews...

    **Important Definitions:**
    - **Generate**: post a single message and get a response.
    - **Chat**: post a single message and the previous chat history, and get a response
    """)
    return


@app.cell
def _(client):
    client.list().models
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    # Sandbox Generate vs. Chat
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Chat
    """)
    return


@app.cell
def _(client):
    response_chat = client.chat(model='deepseek-r1:7b', messages=[
      {
        'role': 'user',
        'content': 'Why is the sky blue?',
      },
    ])
    return (response_chat,)


@app.cell
def _(mo, response_chat):
    mo.md(rf"""
    {response_chat.message.content}
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Generate
    """)
    return


@app.cell
def _(client):
    response_generate = client.generate(model='deepseek-r1:32b', prompt='Why is the sky blue?')
    return (response_generate,)


@app.cell
def _(mo, response_generate):
    mo.md(rf"""
    {response_generate.response}
    """)
    return


if __name__ == "__main__":
    app.run()