Character personality ranking complete

2026-01-23 10:50:26 +01:00
parent 5327b50ab0
commit 424355f4a1
5 changed files with 421 additions and 7 deletions
--- a/utils.py
+++ b/utils.py
@@ -55,6 +55,45 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe



+def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
+    """
+    Calculate weighted scores for character rankings.
+    Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
+
+    Parameters
+    ----------
+    df : pl.DataFrame
+        DataFrame containing character ranking columns.
+
+    Returns
+    -------
+    pl.DataFrame
+        DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
+    """
+    scores = []
+    # Identify columns related to Character Ranking
+    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+
+    for col in ranking_cols:
+        # Calculate score:
+        # (Count of Rank 1 * 3) + (Count of Rank 2 * 2) + (Count of Rank 3 * 1)
+        r1_count = df.filter(pl.col(col) == 1).height
+        r2_count = df.filter(pl.col(col) == 2).height
+        r3_count = df.filter(pl.col(col) == 3).height
+        
+        weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
+        
+        # Clean name
+        clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+        
+        scores.append({
+            'Character': clean_name,
+            'Weighted Score': weighted_score
+        })
+
+    return pl.DataFrame(scores).sort('Weighted Score', descending=True)
+
+
 class JPMCSurvey:
    """Class to handle JPMorgan Chase survey data."""
    
@@ -249,9 +288,19 @@ class JPMCSurvey:
        rename_dict = {
            'QID29': '18-8_Set-A',
            'QID101': '18-8_Set-B',
-            'QID36_0_GROUP': '8-3_Ranked'
+            'QID36_0_GROUP': '3_Ranked'
        }
-        return self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict), None
+        
+        subset = self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict)
+        
+        # Combine 18-8 Set A and Set B into single column
+        subset = subset.with_columns(
+            pl.coalesce(['18-8_Set-A', '18-8_Set-B']).alias('8_Combined')
+        )
+        # Change order of columns
+        subset = subset.select(['_recordId', '18-8_Set-A', '18-8_Set-B', '8_Combined', '3_Ranked'])
+        
+        return subset, None
    
    
    def get_voice_scale_1_10(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
@@ -363,4 +412,9 @@ class JPMCSurvey:
        """
        QIDs = ['QID44', 'QID97', 'QID95', 'QID96']
        
-        return self._get_subset(q, QIDs, rename_cols=True), None
+        return self._get_subset(q, QIDs, rename_cols=True), None
+    
+
+
+
+