JPMC-quant/speech_data_correlation.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7174c11a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars as pl\n",
    "from utils import QualtricsSurvey, calculate_weighted_ranking_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d9d11d52",
   "metadata": {},
   "outputs": [],
   "source": [
    "SPEECH_DATA = 'data/speech_data/JPMC Speech data ab samples (Final Speech Data AB samples).csv'\n",
    "RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'\n",
    "QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "c8f06ff8",
   "metadata": {},
   "outputs": [],
   "source": [
    "speech_df = pl.read_csv(SPEECH_DATA, separator=';')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8da85898",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "e7ccd8ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Convert comma decimal separator to period and cast to float\n",
    "cols_to_convert = [\n",
    "    pl.col('dur (s)').str.replace(',', '.').cast(pl.Float64),\n",
    "    pl.col('phonationtime (s)').str.replace(',', '.').cast(pl.Float64),\n",
    "    pl.col('articulation rate (nsyll / phonationtime)').str.replace(',', '.').cast(pl.Float64),\n",
    "    pl.col('speech rate words per minute').str.replace(',', '.').cast(pl.Float64),\n",
    "]\n",
    "\n",
    "# Convert specified columns to float, handling percent signs, commas and spaces\n",
    "if \"Standard deviation pitch\" in speech_df.columns:\n",
    "    cols_to_convert.append(\n",
    "        pl.col(\"Standard deviation pitch\")\n",
    "        .cast(pl.Utf8)\n",
    "        .str.replace(\",\", \".\")\n",
    "        .str.replace(\" \", \"\")\n",
    "        # strict=False converts unparseable strings (like empty ones) to Null\n",
    "        .cast(pl.Float64, strict=False)\n",
    "    )\n",
    "\n",
    "for col_name in [\"Jitter (local)\", \"Shimmer (local)\"]:\n",
    "    if col_name in speech_df.columns:\n",
    "        cols_to_convert.append(\n",
    "            pl.col(col_name)\n",
    "            .cast(pl.Utf8)\n",
    "            .str.replace(\"%\", \"\")\n",
    "            .str.replace(\" \", \"\")\n",
    "            .str.replace(\",\", \".\")\n",
    "            .cast(pl.Float64, strict=False)\n",
    "        )\n",
    "\n",
    "if cols_to_convert:\n",
    "    speech_df = speech_df.with_columns(cols_to_convert)\n",
    "\n",
    "# speech_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "450d1d29",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape: (18, 3)\n",
      "┌───────┬────────┬──────┐\n",
      "│ Voice ┆ Gender ┆ Age  │\n",
      "│ ---   ┆ ---    ┆ ---  │\n",
      "│ str   ┆ str    ┆ i64  │\n",
      "╞═══════╪════════╪══════╡\n",
      "│ V04   ┆ female ┆ 28   │\n",
      "│ V08   ┆ female ┆ 45   │\n",
      "│ V14   ┆ female ┆ 50   │\n",
      "│ V16   ┆ male   ┆ 40   │\n",
      "│ V34   ┆ male   ┆ 42   │\n",
      "│ …     ┆ …      ┆ …    │\n",
      "│ V82   ┆ female ┆ null │\n",
      "│ V86   ┆ male   ┆ 62   │\n",
      "│ V88   ┆ male   ┆ 42   │\n",
      "│ V89   ┆ female ┆ 32   │\n",
      "│ V91   ┆ female ┆ null │\n",
      "└───────┴────────┴──────┘\n"
     ]
    }
   ],
   "source": [
    "# Convert Voice ints to zero-padded strings like \"V04\", \"V81\"\n",
    "# Survey uses zero-padded IDs (V04, V08) so we must match that format\n",
    "speech_df = speech_df.with_columns(\n",
    "    pl.when(pl.col(\"Voice\").is_not_null())\n",
    "      .then(pl.concat_str([pl.lit(\"V\"), pl.col(\"Voice\").cast(pl.Utf8).str.zfill(2)]))\n",
    "      .otherwise(None)\n",
    "      .alias(\"Voice\")\n",
    ")\n",
    "\n",
    "print(speech_df.select([\"Voice\", \"Gender\", \"Age\"]).sort(\"Voice\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5fb615fe",
   "metadata": {},
   "source": [
    "# Get survey data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "bb4200ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "S = QualtricsSurvey(RESULTS_FILE, QSF_FILE, figures_dir=None)\n",
    "data_all = S.load_data()\n",
    "data = S.filter_data(data_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "57243afd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape: (5, 2)\n",
      "┌───────────┬────────────────┐\n",
      "│ Character ┆ Weighted Score │\n",
      "│ ---       ┆ ---            │\n",
      "│ str       ┆ i64            │\n",
      "╞═══════════╪════════════════╡\n",
      "│ V14       ┆ 209            │\n",
      "│ V04       ┆ 209            │\n",
      "│ V08       ┆ 180            │\n",
      "│ V82       ┆ 172            │\n",
      "│ V77       ┆ 158            │\n",
      "└───────────┴────────────────┘\n"
     ]
    }
   ],
   "source": [
    "top3_voices = S.get_top_3_voices(data)[0]\n",
    "voices_weighted_rank = calculate_weighted_ranking_scores(top3_voices)\n",
    "print(voices_weighted_rank.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b38d21fc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape: (5, 18)\n",
      "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n",
      "│ _recordId ┆ Voice_Sca ┆ Voice_Sca ┆ Voice_Sca ┆ … ┆ Voice_Sca ┆ Voice_Sca ┆ Voice_Sca ┆ Voice_Sc │\n",
      "│ ---       ┆ le_1_10__ ┆ le_1_10__ ┆ le_1_10__ ┆   ┆ le_1_10__ ┆ le_1_10__ ┆ le_1_10__ ┆ ale_1_10 │\n",
      "│ str       ┆ V14       ┆ V04       ┆ V08       ┆   ┆ V74       ┆ V81       ┆ V86       ┆ __V88    │\n",
      "│           ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │\n",
      "│           ┆ f64       ┆ f64       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64      │\n",
      "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n",
      "│ R_59pdrC3 ┆ null      ┆ null      ┆ null      ┆ … ┆ null      ┆ null      ┆ 5.5       ┆ null     │\n",
      "│ urLmZnbP  ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │\n",
      "│ R_3fJSKy5 ┆ 6.0       ┆ 5.0       ┆ null      ┆ … ┆ null      ┆ null      ┆ 6.0       ┆ null     │\n",
      "│ SVxmNdBC  ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │\n",
      "│ R_3g11G0u ┆ 9.5       ┆ null      ┆ 5.0       ┆ … ┆ null      ┆ null      ┆ null      ┆ 9.5      │\n",
      "│ pJ7iGt8Q  ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │\n",
      "│ R_3i3dGL7 ┆ 6.0       ┆ 9.0       ┆ 8.0       ┆ … ┆ null      ┆ 2.0       ┆ 3.0       ┆ null     │\n",
      "│ cfLOTgxb  ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │\n",
      "│ R_3BBF1fR ┆ null      ┆ null      ┆ null      ┆ … ┆ 6.0       ┆ null      ┆ 8.5       ┆ null     │\n",
      "│ WGGeButr  ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │\n",
      "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘\n"
     ]
    }
   ],
   "source": [
    "voice_1_10 = S.get_voice_scale_1_10(data)[0].collect()\n",
    "print(voice_1_10.head())\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5b3e6ad0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape: (5, 2)\n",
      "┌───────┬────────────────────────┐\n",
      "│ Voice ┆ Avg Voice Score (1-10) │\n",
      "│ ---   ┆ ---                    │\n",
      "│ str   ┆ f64                    │\n",
      "╞═══════╪════════════════════════╡\n",
      "│ V08   ┆ 7.38172                │\n",
      "│ V82   ┆ 7.376984               │\n",
      "│ V89   ┆ 7.373206               │\n",
      "│ V86   ┆ 7.264444               │\n",
      "│ V69   ┆ 7.219577               │\n",
      "└───────┴────────────────────────┘\n"
     ]
    }
   ],
   "source": [
    "# --- Compute average voice score (1-10) per voice ---\n",
    "voice_cols = [c for c in voice_1_10.columns if c.startswith(\"Voice_Scale_1_10__\")]\n",
    "avg_scores = []\n",
    "for col in voice_cols:\n",
    "    voice_id = col.replace(\"Voice_Scale_1_10__\", \"\")  # e.g. \"V14\"\n",
    "    mean_val = voice_1_10.select(pl.col(col).mean()).item()\n",
    "    avg_scores.append({\"Voice\": voice_id, \"Avg Voice Score (1-10)\": mean_val})\n",
    "\n",
    "avg_voice_scores = pl.DataFrame(avg_scores)\n",
    "print(avg_voice_scores.sort(\"Avg Voice Score (1-10)\", descending=True).head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "79626ffb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Merged rows: 18  (voices with both speech data and survey data)\n",
      "  → Voices missing Avg Voice Score: ['V46']\n",
      "shape: (5, 4)\n",
      "┌───────┬────────┬────────────────────────┬────────────────┐\n",
      "│ Voice ┆ Gender ┆ Avg Voice Score (1-10) ┆ Weighted Score │\n",
      "│ ---   ┆ ---    ┆ ---                    ┆ ---            │\n",
      "│ str   ┆ str    ┆ f64                    ┆ i64            │\n",
      "╞═══════╪════════╪════════════════════════╪════════════════╡\n",
      "│ V14   ┆ female ┆ 7.216279               ┆ 209            │\n",
      "│ V04   ┆ female ┆ 7.07971                ┆ 209            │\n",
      "│ V08   ┆ female ┆ 7.38172                ┆ 180            │\n",
      "│ V82   ┆ female ┆ 7.376984               ┆ 172            │\n",
      "│ V77   ┆ female ┆ 6.960894               ┆ 158            │\n",
      "└───────┴────────┴────────────────────────┴────────────────┘\n"
     ]
    }
   ],
   "source": [
    "# --- Normalize weighted rank column name and join all data ---\n",
    "weighted_rank = voices_weighted_rank.rename({\"Character\": \"Voice\"})\n",
    "\n",
    "# Join speech attributes with both survey metrics\n",
    "# Left join on avg_voice_scores so V46 (excluded from survey voice scale) is kept\n",
    "# — its Avg Score will be null but Weighted Ranking Score is still valid\n",
    "merged = (\n",
    "    speech_df\n",
    "    .join(avg_voice_scores, on=\"Voice\", how=\"left\")\n",
    "    .join(weighted_rank, on=\"Voice\", how=\"inner\")\n",
    ")\n",
    "\n",
    "print(f\"Merged rows: {merged.height}  (voices with both speech data and survey data)\")\n",
    "print(f\"  → Voices missing Avg Voice Score: {merged.filter(pl.col('Avg Voice Score (1-10)').is_null())['Voice'].to_list()}\")\n",
    "print(merged.select([\"Voice\", \"Gender\", \"Avg Voice Score (1-10)\", \"Weighted Score\"]).head())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "932cfb9e",
   "metadata": {},
   "source": [
    "# Correlation: Speech Attributes vs Survey Metrics\n",
    "\n",
    "We correlate each speech characteristic (pitch, duration, jitter, etc.) against two survey metrics:\n",
    "\n",
    "| Metric | Type | Correlation Method | Why |\n",
    "|---|---|---|---|\n",
    "| **Avg Voice Score (1-10)** | Continuous | **Pearson** | Both variables are continuous and approximately interval-scaled — Pearson captures linear relationships well here. |\n",
    "| **Weighted Ranking Score** | Ordinal / count-based | **Spearman** | The weighted score (1st=3pts, 2nd=2pts, 3rd=1pt) is ordinal in nature with a small number of discrete values. Spearman is rank-based, making no assumptions about linearity or normality — more appropriate for this type of data. |\n",
    "\n",
    "> **Note:** With only ~17 voices, all correlations should be interpreted cautiously. Small samples amplify the influence of individual data points."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "77658327",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape: (39, 3)\n",
      "┌─────────────────────────────────┬─────────────────────────────────┬─────────────┐\n",
      "│ attribute                       ┆ metric                          ┆ correlation │\n",
      "│ ---                             ┆ ---                             ┆ ---         │\n",
      "│ str                             ┆ str                             ┆ f64         │\n",
      "╞═════════════════════════════════╪═════════════════════════════════╪═════════════╡\n",
      "│ Age                             ┆ Avg Voice Score (1-10) [Pearso… ┆ 0.386054    │\n",
      "│ Age                             ┆ Weighted Ranking Score [Spearm… ┆ 0.094086    │\n",
      "│ Mean pitch                      ┆ Avg Voice Score (1-10) [Pearso… ┆ 0.459684    │\n",
      "│ Mean pitch                      ┆ Weighted Ranking Score [Spearm… ┆ 0.63429     │\n",
      "│ Standard deviation pitch        ┆ Avg Voice Score (1-10) [Pearso… ┆ 0.664432    │\n",
      "│ …                               ┆ …                               ┆ …           │\n",
      "│ speechrate (nsyll/dur)          ┆ Weighted Ranking Score [Spearm… ┆ -0.336524   │\n",
      "│ articulation rate (nsyll / pho… ┆ Avg Voice Score (1-10) [Pearso… ┆ -0.456181   │\n",
      "│ articulation rate (nsyll / pho… ┆ Weighted Ranking Score [Spearm… ┆ -0.268239   │\n",
      "│ speech rate words per minute    ┆ Avg Voice Score (1-10) [Pearso… ┆ -0.26437    │\n",
      "│ speech rate words per minute    ┆ Weighted Ranking Score [Spearm… ┆ 0.252577    │\n",
      "└─────────────────────────────────┴─────────────────────────────────┴─────────────┘\n"
     ]
    }
   ],
   "source": [
    "# --- Compute correlations ---\n",
    "# Pearson for continuous Voice Score, Spearman for ordinal Weighted Ranking Score\n",
    "exclude_cols = {\"Voice\", \"Gender\", \"Avg Voice Score (1-10)\", \"Weighted Score\"}\n",
    "speech_attrs = [c for c in merged.columns if c not in exclude_cols]\n",
    "\n",
    "rows = []\n",
    "for attr in speech_attrs:\n",
    "    # Drop nulls for the pair before computing correlation\n",
    "    valid = merged.select([attr, \"Avg Voice Score (1-10)\", \"Weighted Score\"]).drop_nulls()\n",
    "    if valid.height > 2:\n",
    "        # Pearson for continuous 1-10 score\n",
    "        r_score = valid.select(pl.corr(attr, \"Avg Voice Score (1-10)\", method=\"pearson\")).item()\n",
    "        # Spearman for ordinal weighted ranking score\n",
    "        r_rank = valid.select(pl.corr(attr, \"Weighted Score\", method=\"spearman\")).item()\n",
    "    else:\n",
    "        r_score = None\n",
    "        r_rank = None\n",
    "    rows.append({\"attribute\": attr, \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": r_score})\n",
    "    rows.append({\"attribute\": attr, \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": r_rank})\n",
    "\n",
    "corr_long = (\n",
    "    pl.DataFrame(rows)\n",
    "    .drop_nulls()\n",
    "    .filter(pl.col(\"correlation\").is_not_nan())\n",
    ")\n",
    "print(corr_long)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "ef4ceefc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Female Age correlation:\n",
      "shape: (2, 3)\n",
      "┌───────────┬─────────────────────────────────┬─────────────┐\n",
      "│ attribute ┆ metric                          ┆ correlation │\n",
      "│ ---       ┆ ---                             ┆ ---         │\n",
      "│ str       ┆ str                             ┆ f64         │\n",
      "╞═══════════╪═════════════════════════════════╪═════════════╡\n",
      "│ Age       ┆ Avg Voice Score (1-10) [Pearso… ┆ -0.023566   │\n",
      "│ Age       ┆ Weighted Ranking Score [Spearm… ┆ 0.231908    │\n",
      "└───────────┴─────────────────────────────────┴─────────────┘\n",
      "\n",
      "Female data (Age + Weighted Score):\n",
      "shape: (6, 3)\n",
      "┌───────┬─────┬────────────────┐\n",
      "│ Voice ┆ Age ┆ Weighted Score │\n",
      "│ ---   ┆ --- ┆ ---            │\n",
      "│ str   ┆ i64 ┆ i64            │\n",
      "╞═══════╪═════╪════════════════╡\n",
      "│ V04   ┆ 28  ┆ 209            │\n",
      "│ V89   ┆ 32  ┆ 130            │\n",
      "│ V48   ┆ 35  ┆ 144            │\n",
      "│ V08   ┆ 45  ┆ 180            │\n",
      "│ V77   ┆ 48  ┆ 158            │\n",
      "│ V14   ┆ 50  ┆ 209            │\n",
      "└───────┴─────┴────────────────┘\n"
     ]
    }
   ],
   "source": [
    "# Verify: Age correlation in female split should no longer be 1.0\n",
    "print(\"Female Age correlation:\")\n",
    "print(corr_female.filter(pl.col(\"attribute\") == \"Age\"))\n",
    "\n",
    "print(\"\\nFemale data (Age + Weighted Score):\")\n",
    "female_check = merged.filter(pl.col(\"Gender\") == \"female\").select([\"Voice\", \"Age\", \"Weighted Score\"]).drop_nulls().sort(\"Age\")\n",
    "print(female_check)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "0d9567ff",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved plot to figures/2-4-26/All_Respondents/speech_attr_vs_survey_correlation.png\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-edfa78fab61544c2b6298947b6780648.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-edfa78fab61544c2b6298947b6780648.vega-embed details,\n",
       "  #altair-viz-edfa78fab61544c2b6298947b6780648.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-edfa78fab61544c2b6298947b6780648\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-edfa78fab61544c2b6298947b6780648\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-edfa78fab61544c2b6298947b6780648\");\n",
       "    }\n",
       "\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@6?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@6.1.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@7?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      let deps = [\"vega-embed\"];\n",
       "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"6\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"6.1.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"7\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 1000, \"continuousHeight\": 500, \"strokeWidth\": 0}, \"background\": \"white\", \"axis\": {\"grid\": true, \"gridColor\": \"lightgray\", \"labelFontSize\": 11, \"titleFontSize\": 12, \"labelColor\": \"black\", \"titleColor\": \"black\", \"labelLimit\": 200}, \"axisX\": {\"labelAngle\": -45, \"labelLimit\": 200}, \"axisY\": {\"labelAngle\": 0}, \"legend\": {\"orient\": \"top\", \"direction\": \"horizontal\", \"titleFontSize\": 11, \"labelFontSize\": 11}, \"title\": {\"fontSize\": 14, \"color\": \"black\", \"anchor\": \"start\", \"subtitleFontSize\": 10, \"subtitleColor\": \"gray\"}, \"bar\": {\"color\": \"#0077B6\"}}, \"layer\": [{\"data\": {\"name\": \"data-bf2d69abcead9f87074f0225263a6b70\"}, \"mark\": {\"type\": \"rect\", \"stroke\": \"white\", \"strokeWidth\": 1}, \"encoding\": {\"color\": {\"field\": \"correlation\", \"legend\": {\"title\": \"Pearson r\"}, \"scale\": {\"domain\": [-1, 1], \"scheme\": \"redblue\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"metric\", \"title\": \"Metric\", \"type\": \"nominal\"}, {\"field\": \"attribute\", \"title\": \"Attribute\", \"type\": \"nominal\"}, {\"field\": \"correlation\", \"format\": \".3f\", \"title\": \"r\", \"type\": \"quantitative\"}], \"x\": {\"axis\": {\"grid\": false, \"labelAngle\": -45, \"labelLimit\": 180}, \"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"title\": null, \"type\": \"nominal\"}, \"y\": {\"axis\": {\"grid\": false, \"labelLimit\": 200}, \"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"title\": null, \"type\": \"nominal\"}}}, {\"data\": {\"name\": \"data-289f8b6b88c4bd0d6c4cd2d7944b9204\"}, \"mark\": {\"type\": \"text\", \"color\": \"black\", \"fontSize\": 11, \"fontWeight\": \"normal\"}, \"encoding\": {\"text\": {\"field\": \"correlation\", \"format\": \".2f\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"type\": \"nominal\"}}}, {\"data\": {\"name\": \"data-d9febc5cd73fea8c5b239d0e0fcc4730\"}, \"mark\": {\"type\": \"text\", \"color\": \"white\", \"fontSize\": 11, \"fontWeight\": \"normal\"}, \"encoding\": {\"text\": {\"field\": \"correlation\", \"format\": \".2f\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"type\": \"nominal\"}}}], \"height\": 160, \"title\": {\"text\": [\"Speech Characteristics vs Survey Metrics\", \"Correlation per Voice (Pearson / Spearman)\"], \"subtitle\": [\"Sample size: 455\"], \"subtitleColor\": \"gray\", \"subtitleFontSize\": 10, \"anchor\": \"start\"}, \"width\": 1100, \"$schema\": \"https://vega.github.io/schema/vega-lite/v6.1.0.json\", \"datasets\": {\"data-bf2d69abcead9f87074f0225263a6b70\": [{\"attribute\": \"Age\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3860542538211813}, {\"attribute\": \"Age\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.09408617259260567}, {\"attribute\": \"Mean pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4596840564490561}, {\"attribute\": \"Mean pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.6342900907774179}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6644317974322577}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.4621046136986293}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.1759369593953851}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.31052252703952476}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.5017986100730272}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.12950702549515725}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6245665417092515}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.4793226233717229}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.21256639799177363}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.5552282768777614}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.06278597743642035}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.47387797965273454}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.14672115371769612}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.478292991885524}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.2632790009685734}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.5974983221708392}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.13144303593482204}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.026509572901325478}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.08205229565874973}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.20014722121978848}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.2313204819112786}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.3949890534544011}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.287746169841669}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.0}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3573794459254656}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.36203100308873504}, {\"attribute\": \"dur (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.25521688716065966}, {\"attribute\": \"dur (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.25257731958762886}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4556993131527357}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.26823883480858585}, {\"attribute\": \"speechrate (nsyll/dur)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.33652430044182624}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.45618077972667986}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.26823883480858585}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.26437048461934665}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.25257731958762886}], \"data-289f8b6b88c4bd0d6c4cd2d7944b9204\": [{\"attribute\": \"Age\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3860542538211813}, {\"attribute\": \"Age\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.09408617259260567}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.1759369593953851}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.31052252703952476}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.12950702549515725}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.21256639799177363}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.06278597743642035}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.14672115371769612}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.2632790009685734}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.13144303593482204}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.026509572901325478}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.08205229565874973}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.20014722121978848}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.2313204819112786}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.3949890534544011}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.287746169841669}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.0}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3573794459254656}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.36203100308873504}, {\"attribute\": \"dur (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.25521688716065966}, {\"attribute\": \"dur (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.25257731958762886}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.26823883480858585}, {\"attribute\": \"speechrate (nsyll/dur)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.33652430044182624}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.26823883480858585}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.26437048461934665}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.25257731958762886}], \"data-d9febc5cd73fea8c5b239d0e0fcc4730\": [{\"attribute\": \"Mean pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4596840564490561}, {\"attribute\": \"Mean pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.6342900907774179}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6644317974322577}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.4621046136986293}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.5017986100730272}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6245665417092515}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.4793226233717229}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.5552282768777614}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.47387797965273454}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.478292991885524}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.5974983221708392}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4556993131527357}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.45618077972667986}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.LayerChart(...)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# --- Plot correlation heatmap ---\n",
    "S.plot_speech_attribute_correlation(\n",
    "    corr_long,\n",
    "    title=\"Speech Characteristics vs Survey Metrics<br>Correlation per Voice (Pearson / Spearman)\",\n",
    "    filename=\"speech_attr_vs_survey_correlation\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "a173be9a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Male voices: 10\n",
      "Female voices: 8\n"
     ]
    }
   ],
   "source": [
    "# --- Compute correlations by voice gender ---\n",
    "def compute_corr_for_gender(merged_df: pl.DataFrame, gender: str) -> pl.DataFrame:\n",
    "    \"\"\"Compute Pearson (score) + Spearman (ranking) correlations for a gender subset.\"\"\"\n",
    "    subset = merged_df.filter(pl.col(\"Gender\") == gender)\n",
    "    exclude = {\"Voice\", \"Gender\", \"Avg Voice Score (1-10)\", \"Weighted Score\"}\n",
    "    attrs = [c for c in subset.columns if c not in exclude]\n",
    "\n",
    "    rows = []\n",
    "    for attr in attrs:\n",
    "        valid = subset.select([attr, \"Avg Voice Score (1-10)\", \"Weighted Score\"]).drop_nulls()\n",
    "        if valid.height > 2:\n",
    "            r_score = valid.select(pl.corr(attr, \"Avg Voice Score (1-10)\", method=\"pearson\")).item()\n",
    "            r_rank = valid.select(pl.corr(attr, \"Weighted Score\", method=\"spearman\")).item()\n",
    "        else:\n",
    "            r_score = None\n",
    "            r_rank = None\n",
    "        rows.append({\"attribute\": attr, \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": r_score})\n",
    "        rows.append({\"attribute\": attr, \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": r_rank})\n",
    "\n",
    "    return pl.DataFrame(rows).drop_nulls().filter(pl.col(\"correlation\").is_not_nan())\n",
    "\n",
    "corr_male = compute_corr_for_gender(merged, \"male\")\n",
    "corr_female = compute_corr_for_gender(merged, \"female\")\n",
    "\n",
    "print(f\"Male voices: {merged.filter(pl.col('Gender') == 'male').height}\")\n",
    "print(f\"Female voices: {merged.filter(pl.col('Gender') == 'female').height}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "84eaaff6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved plot to figures/2-4-26/All_Respondents/speech_attr_vs_survey_correlation_male.png\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-8bb6e49fc5024b13bd65cd65e7abcc26.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-8bb6e49fc5024b13bd65cd65e7abcc26.vega-embed details,\n",
       "  #altair-viz-8bb6e49fc5024b13bd65cd65e7abcc26.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-8bb6e49fc5024b13bd65cd65e7abcc26\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-8bb6e49fc5024b13bd65cd65e7abcc26\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-8bb6e49fc5024b13bd65cd65e7abcc26\");\n",
       "    }\n",
       "\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@6?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@6.1.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@7?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      let deps = [\"vega-embed\"];\n",
       "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"6\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"6.1.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"7\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 1000, \"continuousHeight\": 500, \"strokeWidth\": 0}, \"background\": \"white\", \"axis\": {\"grid\": true, \"gridColor\": \"lightgray\", \"labelFontSize\": 11, \"titleFontSize\": 12, \"labelColor\": \"black\", \"titleColor\": \"black\", \"labelLimit\": 200}, \"axisX\": {\"labelAngle\": -45, \"labelLimit\": 200}, \"axisY\": {\"labelAngle\": 0}, \"legend\": {\"orient\": \"top\", \"direction\": \"horizontal\", \"titleFontSize\": 11, \"labelFontSize\": 11}, \"title\": {\"fontSize\": 14, \"color\": \"black\", \"anchor\": \"start\", \"subtitleFontSize\": 10, \"subtitleColor\": \"gray\"}, \"bar\": {\"color\": \"#0077B6\"}}, \"layer\": [{\"data\": {\"name\": \"data-294148e4f8046232fd1444ad2a71061b\"}, \"mark\": {\"type\": \"rect\", \"stroke\": \"white\", \"strokeWidth\": 1}, \"encoding\": {\"color\": {\"field\": \"correlation\", \"legend\": {\"title\": \"Pearson r\"}, \"scale\": {\"domain\": [-1, 1], \"scheme\": \"redblue\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"metric\", \"title\": \"Metric\", \"type\": \"nominal\"}, {\"field\": \"attribute\", \"title\": \"Attribute\", \"type\": \"nominal\"}, {\"field\": \"correlation\", \"format\": \".3f\", \"title\": \"r\", \"type\": \"quantitative\"}], \"x\": {\"axis\": {\"grid\": false, \"labelAngle\": -45, \"labelLimit\": 180}, \"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"title\": null, \"type\": \"nominal\"}, \"y\": {\"axis\": {\"grid\": false, \"labelLimit\": 200}, \"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"title\": null, \"type\": \"nominal\"}}}, {\"data\": {\"name\": \"data-831ec53dcaaeecdb39d6b0d9725a3191\"}, \"mark\": {\"type\": \"text\", \"color\": \"black\", \"fontSize\": 11, \"fontWeight\": \"normal\"}, \"encoding\": {\"text\": {\"field\": \"correlation\", \"format\": \".2f\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"type\": \"nominal\"}}}, {\"data\": {\"name\": \"data-52bf66d21049bc948cf856353572dbed\"}, \"mark\": {\"type\": \"text\", \"color\": \"white\", \"fontSize\": 11, \"fontWeight\": \"normal\"}, \"encoding\": {\"text\": {\"field\": \"correlation\", \"format\": \".2f\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"type\": \"nominal\"}}}], \"height\": 160, \"title\": {\"text\": [\"Speech Characteristics vs Survey Metrics\", \"Male Voices Only (Pearson / Spearman)\"], \"subtitle\": [\"Sample size: 455\"], \"subtitleColor\": \"gray\", \"subtitleFontSize\": 10, \"anchor\": \"start\"}, \"width\": 1100, \"$schema\": \"https://vega.github.io/schema/vega-lite/v6.1.0.json\", \"datasets\": {\"data-294148e4f8046232fd1444ad2a71061b\": [{\"attribute\": \"Age\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.7927291691160427}, {\"attribute\": \"Age\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.11765121295424251}, {\"attribute\": \"Mean pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.07387665732270982}, {\"attribute\": \"Mean pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.31666666666666665}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6034368859027126}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.016666666666666666}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.045593534007381364}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.5333333333333333}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6204407338182272}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.21666666666666667}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3800450521306157}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.10084389681792215}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.31567445527294263}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.13333333333333333}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.8125424009949568}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.36666666666666664}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.7383607657633772}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.7666666666666667}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.11917236523792353}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.3}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.3263747881906577}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.2}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.06717033815227702}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.13333333333333333}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.031002439933730502}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.2928895930645031}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.5633028462359824}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.26666666666666666}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4005768082277356}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.23333333333333334}, {\"attribute\": \"dur (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4609220168794499}, {\"attribute\": \"dur (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.5}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4065438796132842}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.26666666666666666}, {\"attribute\": \"speechrate (nsyll/dur)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.4}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.39263591023580885}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.26666666666666666}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.46339784895869474}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.5}], \"data-831ec53dcaaeecdb39d6b0d9725a3191\": [{\"attribute\": \"Age\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.11765121295424251}, {\"attribute\": \"Mean pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.07387665732270982}, {\"attribute\": \"Mean pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.31666666666666665}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.016666666666666666}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.045593534007381364}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.21666666666666667}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3800450521306157}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.10084389681792215}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.31567445527294263}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.13333333333333333}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.36666666666666664}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.11917236523792353}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.3}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.3263747881906577}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.2}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.06717033815227702}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.13333333333333333}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.031002439933730502}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.2928895930645031}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.26666666666666666}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4005768082277356}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.23333333333333334}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4065438796132842}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.26666666666666666}, {\"attribute\": \"speechrate (nsyll/dur)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.4}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.39263591023580885}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.26666666666666666}], \"data-52bf66d21049bc948cf856353572dbed\": [{\"attribute\": \"Age\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.7927291691160427}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6034368859027126}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.5333333333333333}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6204407338182272}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.8125424009949568}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.7383607657633772}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.7666666666666667}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.5633028462359824}, {\"attribute\": \"dur (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.4609220168794499}, {\"attribute\": \"dur (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.5}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.46339784895869474}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.5}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.LayerChart(...)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# --- Plot: Male voices ---\n",
    "S.plot_speech_attribute_correlation(\n",
    "    corr_male,\n",
    "    title=\"Speech Characteristics vs Survey Metrics<br>Male Voices Only (Pearson / Spearman)\",\n",
    "    filename=\"speech_attr_vs_survey_correlation_male\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "d04225e1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved plot to figures/2-4-26/All_Respondents/speech_attr_vs_survey_correlation_female.png\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-1dc66c91a21f4b8d9e49612f81eebd9a.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-1dc66c91a21f4b8d9e49612f81eebd9a.vega-embed details,\n",
       "  #altair-viz-1dc66c91a21f4b8d9e49612f81eebd9a.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-1dc66c91a21f4b8d9e49612f81eebd9a\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-1dc66c91a21f4b8d9e49612f81eebd9a\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-1dc66c91a21f4b8d9e49612f81eebd9a\");\n",
       "    }\n",
       "\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@6?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@6.1.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@7?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      let deps = [\"vega-embed\"];\n",
       "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"6\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"6.1.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"7\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 1000, \"continuousHeight\": 500, \"strokeWidth\": 0}, \"background\": \"white\", \"axis\": {\"grid\": true, \"gridColor\": \"lightgray\", \"labelFontSize\": 11, \"titleFontSize\": 12, \"labelColor\": \"black\", \"titleColor\": \"black\", \"labelLimit\": 200}, \"axisX\": {\"labelAngle\": -45, \"labelLimit\": 200}, \"axisY\": {\"labelAngle\": 0}, \"legend\": {\"orient\": \"top\", \"direction\": \"horizontal\", \"titleFontSize\": 11, \"labelFontSize\": 11}, \"title\": {\"fontSize\": 14, \"color\": \"black\", \"anchor\": \"start\", \"subtitleFontSize\": 10, \"subtitleColor\": \"gray\"}, \"bar\": {\"color\": \"#0077B6\"}}, \"layer\": [{\"data\": {\"name\": \"data-ff3d93a394a88d44a18ee07d1bece27c\"}, \"mark\": {\"type\": \"rect\", \"stroke\": \"white\", \"strokeWidth\": 1}, \"encoding\": {\"color\": {\"field\": \"correlation\", \"legend\": {\"title\": \"Pearson r\"}, \"scale\": {\"domain\": [-1, 1], \"scheme\": \"redblue\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"metric\", \"title\": \"Metric\", \"type\": \"nominal\"}, {\"field\": \"attribute\", \"title\": \"Attribute\", \"type\": \"nominal\"}, {\"field\": \"correlation\", \"format\": \".3f\", \"title\": \"r\", \"type\": \"quantitative\"}], \"x\": {\"axis\": {\"grid\": false, \"labelAngle\": -45, \"labelLimit\": 180}, \"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"title\": null, \"type\": \"nominal\"}, \"y\": {\"axis\": {\"grid\": false, \"labelLimit\": 200}, \"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"title\": null, \"type\": \"nominal\"}}}, {\"data\": {\"name\": \"data-46f3f309a17840278531f68da6ea94e1\"}, \"mark\": {\"type\": \"text\", \"color\": \"black\", \"fontSize\": 11, \"fontWeight\": \"normal\"}, \"encoding\": {\"text\": {\"field\": \"correlation\", \"format\": \".2f\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"type\": \"nominal\"}}}, {\"data\": {\"name\": \"data-59f2b1b765dab085e4ba1e557bab9669\"}, \"mark\": {\"type\": \"text\", \"color\": \"white\", \"fontSize\": 11, \"fontWeight\": \"normal\"}, \"encoding\": {\"text\": {\"field\": \"correlation\", \"format\": \".2f\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"attribute\", \"sort\": [\"Age\", \"Mean pitch\", \"Standard deviation pitch\", \"Minimum pitch\", \"Maximum pitch\", \"Number of voice breaks\", \"Jitter (local)\", \"Shimmer (local)\", \"Mean harmonics-to-noise ratio dB\", \"H1-minus-H2\", \"MinI(dB)\", \"MaxI(dB)\", \"MeanI(dB)\", \"SDI(dB)\", \"MeanAbsSlope(dB/s)\", \"dur (s)\", \"phonationtime (s)\", \"speechrate (nsyll/dur)\", \"articulation rate (nsyll / phonationtime)\", \"speech rate words per minute\"], \"type\": \"nominal\"}, \"y\": {\"field\": \"metric\", \"sort\": [\"Avg Voice Score (1-10) [Pearson]\", \"Weighted Ranking Score [Spearman]\"], \"type\": \"nominal\"}}}], \"height\": 160, \"title\": {\"text\": [\"Speech Characteristics vs Survey Metrics\", \"Female Voices Only (Pearson / Spearman)\"], \"subtitle\": [\"Sample size: 455\"], \"subtitleColor\": \"gray\", \"subtitleFontSize\": 10, \"anchor\": \"start\"}, \"width\": 1100, \"$schema\": \"https://vega.github.io/schema/vega-lite/v6.1.0.json\", \"datasets\": {\"data-ff3d93a394a88d44a18ee07d1bece27c\": [{\"attribute\": \"Age\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.023565559711957996}, {\"attribute\": \"Age\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.23190841426097938}, {\"attribute\": \"Mean pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.19016169196951113}, {\"attribute\": \"Mean pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.19820624179302296}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.33721050044841366}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.12613124477737825}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.6155078897604899}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.21622499104693416}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.17733809220861338}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.6126374746329801}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6316973759292629}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.027524094128159014}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.13206426212662656}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.05405624776173354}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3537250698256}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.21622499104693416}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.2539964392617356}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.018018749253911177}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.5810981615794052}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.3964124835860459}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.19824072563494607}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.41443123283995714}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.11491430922360052}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.5585812268712466}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.6893083031964921}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.6363636363636364}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.6585847686635248}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.12613124477737825}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.4478442793358411}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.3603749850782236}, {\"attribute\": \"dur (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.03526726467129442}, {\"attribute\": \"dur (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.0900937462695559}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.029359105916164314}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.1801874925391118}, {\"attribute\": \"speechrate (nsyll/dur)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.0900937462695559}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.027420506142136723}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.1801874925391118}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.031236055978832018}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.0900937462695559}], \"data-46f3f309a17840278531f68da6ea94e1\": [{\"attribute\": \"Age\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.023565559711957996}, {\"attribute\": \"Age\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.23190841426097938}, {\"attribute\": \"Mean pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.19016169196951113}, {\"attribute\": \"Mean pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.19820624179302296}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.33721050044841366}, {\"attribute\": \"Standard deviation pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.12613124477737825}, {\"attribute\": \"Minimum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.21622499104693416}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.17733809220861338}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.027524094128159014}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.13206426212662656}, {\"attribute\": \"Jitter (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.05405624776173354}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.3537250698256}, {\"attribute\": \"Shimmer (local)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.21622499104693416}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.2539964392617356}, {\"attribute\": \"Mean harmonics-to-noise ratio dB\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.018018749253911177}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.3964124835860459}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.19824072563494607}, {\"attribute\": \"MinI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.41443123283995714}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.11491430922360052}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.12613124477737825}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.4478442793358411}, {\"attribute\": \"MeanAbsSlope(dB/s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.3603749850782236}, {\"attribute\": \"dur (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.03526726467129442}, {\"attribute\": \"dur (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.0900937462695559}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.029359105916164314}, {\"attribute\": \"phonationtime (s)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.1801874925391118}, {\"attribute\": \"speechrate (nsyll/dur)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.0900937462695559}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.027420506142136723}, {\"attribute\": \"articulation rate (nsyll / phonationtime)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.1801874925391118}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.031236055978832018}, {\"attribute\": \"speech rate words per minute\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.0900937462695559}], \"data-59f2b1b765dab085e4ba1e557bab9669\": [{\"attribute\": \"Minimum pitch\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.6155078897604899}, {\"attribute\": \"Maximum pitch\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": 0.6126374746329801}, {\"attribute\": \"Number of voice breaks\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": 0.6316973759292629}, {\"attribute\": \"H1-minus-H2\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.5810981615794052}, {\"attribute\": \"MaxI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.5585812268712466}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.6893083031964921}, {\"attribute\": \"MeanI(dB)\", \"metric\": \"Weighted Ranking Score [Spearman]\", \"correlation\": -0.6363636363636364}, {\"attribute\": \"SDI(dB)\", \"metric\": \"Avg Voice Score (1-10) [Pearson]\", \"correlation\": -0.6585847686635248}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.LayerChart(...)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# --- Plot: Female voices ---\n",
    "S.plot_speech_attribute_correlation(\n",
    "    corr_female,\n",
    "    title=\"Speech Characteristics vs Survey Metrics<br>Female Voices Only (Pearson / Spearman)\",\n",
    "    filename=\"speech_attr_vs_survey_correlation_female\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "8e2fbc25",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== MALE | Age vs Avg Voice Score (1-10) [Pearson] | r = 0.793 ===\n",
      "shape: (9, 3)\n",
      "┌───────┬─────┬────────────────────────┐\n",
      "│ Voice ┆ Age ┆ Avg Voice Score (1-10) │\n",
      "│ ---   ┆ --- ┆ ---                    │\n",
      "│ str   ┆ i64 ┆ f64                    │\n",
      "╞═══════╪═════╪════════════════════════╡\n",
      "│ V81   ┆ 28  ┆ 6.683007               │\n",
      "│ V54   ┆ 36  ┆ 6.67734                │\n",
      "│ V16   ┆ 40  ┆ 6.921053               │\n",
      "│ V74   ┆ 41  ┆ 6.89196                │\n",
      "│ V34   ┆ 42  ┆ 7.153005               │\n",
      "│ V88   ┆ 42  ┆ 6.916667               │\n",
      "│ V69   ┆ 43  ┆ 7.219577               │\n",
      "│ V45   ┆ 43  ┆ 7.062189               │\n",
      "│ V86   ┆ 62  ┆ 7.264444               │\n",
      "└───────┴─────┴────────────────────────┘\n",
      "\n",
      "=== MALE | Shimmer (local) vs Avg Voice Score (1-10) [Pearson] | r = 0.813 ===\n",
      "shape: (9, 3)\n",
      "┌───────┬─────────────────┬────────────────────────┐\n",
      "│ Voice ┆ Shimmer (local) ┆ Avg Voice Score (1-10) │\n",
      "│ ---   ┆ ---             ┆ ---                    │\n",
      "│ str   ┆ f64             ┆ f64                    │\n",
      "╞═══════╪═════════════════╪════════════════════════╡\n",
      "│ V88   ┆ 9.07            ┆ 6.916667               │\n",
      "│ V74   ┆ 9.09            ┆ 6.89196                │\n",
      "│ V54   ┆ 9.11            ┆ 6.67734                │\n",
      "│ V81   ┆ 9.13            ┆ 6.683007               │\n",
      "│ V16   ┆ 9.38            ┆ 6.921053               │\n",
      "│ V34   ┆ 10.07           ┆ 7.153005               │\n",
      "│ V86   ┆ 10.28           ┆ 7.264444               │\n",
      "│ V45   ┆ 10.39           ┆ 7.062189               │\n",
      "│ V69   ┆ 11.52           ┆ 7.219577               │\n",
      "└───────┴─────────────────┴────────────────────────┘\n",
      "\n",
      "=== MALE | Mean harmonics-to-noise ratio dB vs Avg Voice Score (1-10) [Pearson] | r = -0.738 ===\n",
      "shape: (9, 3)\n",
      "┌───────┬─────────────────────────────────┬────────────────────────┐\n",
      "│ Voice ┆ Mean harmonics-to-noise ratio … ┆ Avg Voice Score (1-10) │\n",
      "│ ---   ┆ ---                             ┆ ---                    │\n",
      "│ str   ┆ f64                             ┆ f64                    │\n",
      "╞═══════╪═════════════════════════════════╪════════════════════════╡\n",
      "│ V16   ┆ 7.544                           ┆ 6.921053               │\n",
      "│ V34   ┆ 7.671                           ┆ 7.153005               │\n",
      "│ V69   ┆ 7.763                           ┆ 7.219577               │\n",
      "│ V86   ┆ 8.092                           ┆ 7.264444               │\n",
      "│ V45   ┆ 8.647                           ┆ 7.062189               │\n",
      "│ V74   ┆ 8.732                           ┆ 6.89196                │\n",
      "│ V88   ┆ 9.009                           ┆ 6.916667               │\n",
      "│ V81   ┆ 9.026                           ┆ 6.683007               │\n",
      "│ V54   ┆ 9.598                           ┆ 6.67734                │\n",
      "└───────┴─────────────────────────────────┴────────────────────────┘\n",
      "\n",
      "=== MALE | Mean harmonics-to-noise ratio dB vs Weighted Ranking Score [Spearman] | r = -0.767 ===\n",
      "shape: (10, 3)\n",
      "┌───────┬─────────────────────────────────┬────────────────┐\n",
      "│ Voice ┆ Mean harmonics-to-noise ratio … ┆ Weighted Score │\n",
      "│ ---   ┆ ---                             ┆ ---            │\n",
      "│ str   ┆ f64                             ┆ i64            │\n",
      "╞═══════╪═════════════════════════════════╪════════════════╡\n",
      "│ V16   ┆ 7.544                           ┆ 156            │\n",
      "│ V34   ┆ 7.671                           ┆ 128            │\n",
      "│ V69   ┆ 7.763                           ┆ 121            │\n",
      "│ V86   ┆ 8.092                           ┆ 76             │\n",
      "│ V46   ┆ 8.625                           ┆ 148            │\n",
      "│ V45   ┆ 8.647                           ┆ 118            │\n",
      "│ V74   ┆ 8.732                           ┆ 117            │\n",
      "│ V88   ┆ 9.009                           ┆ 97             │\n",
      "│ V81   ┆ 9.026                           ┆ 69             │\n",
      "│ V54   ┆ 9.598                           ┆ 116            │\n",
      "└───────┴─────────────────────────────────┴────────────────┘\n",
      "\n",
      "=== FEMALE: no correlations with |r| > 0.7 ===\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Inspect underlying data for all |r| > 0.7 correlations in gender subgroups\n",
    "THRESHOLD = 0.7\n",
    "\n",
    "metric_col_map = {\n",
    "    \"Avg Voice Score (1-10) [Pearson]\": \"Avg Voice Score (1-10)\",\n",
    "    \"Weighted Ranking Score [Spearman]\": \"Weighted Score\",\n",
    "}\n",
    "\n",
    "for gender, corr_df in [(\"MALE\", corr_male), (\"FEMALE\", corr_female)]:\n",
    "    strong = corr_df.filter(pl.col(\"correlation\").abs() > THRESHOLD)\n",
    "    if strong.height == 0:\n",
    "        print(f\"=== {gender}: no correlations with |r| > {THRESHOLD} ===\\n\")\n",
    "        continue\n",
    "\n",
    "    subset = merged.filter(pl.col(\"Gender\") == gender.lower())\n",
    "\n",
    "    for row in strong.iter_rows(named=True):\n",
    "        attr = row[\"attribute\"]\n",
    "        metric_label = row[\"metric\"]\n",
    "        r = row[\"correlation\"]\n",
    "        survey_col = metric_col_map[metric_label]\n",
    "\n",
    "        table = (\n",
    "            subset\n",
    "            .select([\"Voice\", attr, survey_col])\n",
    "            .drop_nulls()\n",
    "            .sort(attr)\n",
    "        )\n",
    "        print(f\"=== {gender} | {attr} vs {metric_label} | r = {r:.3f} ===\")\n",
    "        print(table)\n",
    "        print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "959945f2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 4 sheets to figures/2-4-26/strong_correlations_by_gender.xlsx\n"
     ]
    }
   ],
   "source": [
    "import xlsxwriter\n",
    "from pathlib import Path\n",
    "\n",
    "out_path = Path(S.fig_save_dir) / \"strong_correlations_by_gender.xlsx\"\n",
    "\n",
    "with xlsxwriter.Workbook(str(out_path)) as wb:\n",
    "    sheet_count = 0\n",
    "    for gender, corr_df in [(\"Male\", corr_male), (\"Female\", corr_female)]:\n",
    "        strong = corr_df.filter(pl.col(\"correlation\").abs() > THRESHOLD)\n",
    "        if strong.height == 0:\n",
    "            continue\n",
    "\n",
    "        subset = merged.filter(pl.col(\"Gender\") == gender.lower())\n",
    "\n",
    "        for row in strong.iter_rows(named=True):\n",
    "            attr = row[\"attribute\"]\n",
    "            metric_label = row[\"metric\"]\n",
    "            r = row[\"correlation\"]\n",
    "            survey_col = metric_col_map[metric_label]\n",
    "\n",
    "            table = subset.select([\"Voice\", attr, survey_col]).drop_nulls().sort(attr)\n",
    "\n",
    "            # Sheet name: max 31 chars for Excel\n",
    "            short_metric = \"Score\" if \"Pearson\" in metric_label else \"Rank\"\n",
    "            sheet_name = f\"{gender}_{attr[:18]}_{short_metric}\"[:31]\n",
    "\n",
    "            ws = wb.add_worksheet(sheet_name)\n",
    "            # Header row with context\n",
    "            bold = wb.add_format({\"bold\": True})\n",
    "            ws.write(0, 0, f\"{gender} | {attr} vs {metric_label} | r = {r:.3f}\", bold)\n",
    "\n",
    "            # Column headers\n",
    "            for ci, col_name in enumerate(table.columns):\n",
    "                ws.write(2, ci, col_name, bold)\n",
    "\n",
    "            # Data rows\n",
    "            for ri, data_row in enumerate(table.iter_rows()):\n",
    "                for ci, val in enumerate(data_row):\n",
    "                    ws.write(3 + ri, ci, val)\n",
    "\n",
    "            # Auto-fit column widths\n",
    "            for ci, col_name in enumerate(table.columns):\n",
    "                max_len = max(len(str(col_name)), *(len(str(v)) for v in table.get_column(col_name).to_list()))\n",
    "                ws.set_column(ci, ci, max_len + 2)\n",
    "\n",
    "            sheet_count += 1\n",
    "\n",
    "print(f\"Saved {sheet_count} sheets to {out_path}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "phase-3-quant",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}