Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

pantdipendra commited on 18 days ago

Commit

e9e83fc

verified ·

1 Parent(s): c458985

v2_seperate tabs categories in UI

Browse files

Files changed (1) hide show

app.py +236 -308

app.py CHANGED Viewed

@@ -27,53 +27,27 @@ class ModelPredictor:
         self.model_filenames = model_filenames
         self.models = self.load_models()
-        # The map from each label column to the textual meaning for 0 or 1
-        # (Some columns also mention '2' as positive, so adapt as needed).
         self.prediction_map = {
             "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
             "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
             "YOWRHRS":  ["Did not have trouble sleeping", "Had trouble sleeping"],
             "YO_MDEA5": ["Others did not notice restlessness/lethargy", "Others noticed restlessness/lethargy"],
             "YOWRCHR":  ["Did not feel so sad nothing could cheer up", "Felt so sad that nothing could cheer up"],
-            "YOWRLSIN": [
-                "Did not feel bored / lose interest",
-                "Felt bored / lost interest in enjoyable things"
-            ],
             "YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
             "YOWRPROB": ["No 'worst time ever' feeling", "Had 'worst time ever' feeling"],
-            "YODPR2WK": [
-                "No periods with depressed feelings lasting 2+ weeks",
-                "Had depressed feelings for 2+ weeks"
-            ],
             "YOWRDEPR": ["Did not feel sad/depressed mostly everyday", "Felt sad/depressed mostly everyday"],
-            "YODPDISC": [
-                "Overall mood duration was not sad/depressed",
-                "Overall mood duration was sad/depressed"
-            ],
-            "YOLOSEV": [
-                "Did not lose interest in activities",
-                "Lost interest in enjoyable things"
-            ],
             "YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
-            "YODSMMDE": [
-                "Never had 2 weeks of depression symptoms",
-                "Had 2+ weeks of depression symptoms"
-            ],
-            "YO_MDEA3": [
-                "No changes in appetite/weight",
-                "Had changes in appetite or weight"
-            ],
-            "YODPLSIN": ["Never lost interest / felt bored", "Lost interest / felt bored"],
             "YOWRELES": ["Did not eat less than usual", "Ate less than usual"],
             "YODSCEV":  ["Fewer severe depression symptoms", "More severe depression symptoms"],
-            "YOPB2WK": [
-                "No uneasy feelings lasting every day for 2+ weeks",
-                "Uneasy feelings lasting 2+ weeks"
-            ],
-            "YO_MDEA2": [
-                "No physical/mental issues for 2+ weeks",
-                "Had physical/mental issues for 2+ weeks"
-            ]
         }
     def load_models(self):
@@ -85,10 +59,6 @@ class ModelPredictor:
         return loaded
     def make_predictions(self, user_input: pd.DataFrame):
-        """
-        Return a list of np.ndarrays, each of shape (1,) or (n_samples,),
-        one for each model in self.models, in the same order as model_filenames.
-        """
         predictions = []
         for model in self.models:
             out = model.predict(user_input)
@@ -96,19 +66,10 @@ class ModelPredictor:
         return predictions
     def get_majority_vote(self, predictions):
-        """
-        Flatten all predictions from each model into a single array
-        and compute the most common value (mode).
-        """
         combined = np.concatenate(predictions)
         return np.bincount(combined).argmax()
     def evaluate_severity(self, count_ones: int) -> str:
-        """
-        The user wanted a logic: if >=13 => Severe, >=9 => Moderate, >=5 => Low, else Very Low.
-        Here 'count_ones' is how many '1's (or '2's) across all model predictions.
-        Adjust logic if needed.
-        """
         if count_ones >= 13:
             return "Mental Health Severity: Severe"
         elif count_ones >= 9:
@@ -123,174 +84,175 @@ predictor = ModelPredictor(model_path, model_filenames)
 ######################################
-# 3) HELPER: NEAREST NEIGHBORS
 ######################################
-def get_nearest_neighbors_info(user_input_df: pd.DataFrame, k=5):
-    """
-    Given a single-row user_input_df (the 25 numeric features),
-    find the top-k nearest neighbors in df (using those same 25 columns).
-    Then build a textual summary for clinicians.
-    We assume df has the same numeric coding for these 25 features.
-    """
-    # 1) Ensure these columns exist in df
-    user_cols = user_input_df.columns
-    if not all(col in df.columns for col in user_cols):
-        return "Cannot compute nearest neighbors. Some columns not found in df."
-    # 2) We'll do a simple Euclidean distance
-    #    Subset df to these 25 columns
-    sub_df = df[list(user_cols)].copy()
-    # 3) Compute distance to the user input row
-    #    user_input_df has shape (1, 25). We'll broadcast to sub_df's shape
-    #    row by row. For performance, you might prefer scikit's NearestNeighbors,
-    #    but let's do a manual approach for clarity.
-    diffs = sub_df - user_input_df.iloc[0]  # shape (N,25)
-    dists = (diffs**2).sum(axis=1)**0.5     # Euclidean
-    # 4) Sort by distance, pick top k
-    nn_indices = dists.nsmallest(k).index
-    neighbors = df.loc[nn_indices]
-    # 5) Build a textual summary
-    #    We will look at each label in predictor.prediction_map,
-    #    see if it is a column in df. If so, see how many are 1 vs 0 (or 2) among neighbors.
-    #    Then map numeric -> text from prediction_map if possible.
-    summary_lines = [f"**Nearest Neighbors (k={k})**",
-                     f"Distances Range: {dists[nn_indices].min():.2f} to {dists[nn_indices].max():.2f}",
-                     ""]
-    for label_col, label_map in predictor.prediction_map.items():
-        if label_col not in neighbors.columns:
-            continue  # Not present in df
-        # Values among neighbors
-        vals = neighbors[label_col].value_counts().to_dict()
-        # Example: {0: 3, 1: 2}, or {2: 4, 1: 1}, etc.
-        line = f"{label_col} => "
-        parts = []
-        for val, count_ in vals.items():
-            # If we have a mapping, use it
-            if val in range(len(label_map)):
-                meaning = label_map[val]
-                parts.append(f"{count_} had {meaning}")
-            else:
-                parts.append(f"{count_} had numeric={val}")
-        line += "; ".join(parts)
-        summary_lines.append(line)
-    summary_lines.append("")
-    summary_text = "\n".join(summary_lines)
-    return summary_text
-######################################
-# 4) INPUT MAPPING
-######################################
 def validate_inputs(*args):
     for arg in args:
         if not arg:  # empty or None
             return False
     return True
-# Only keep the 25 features requested.
-input_mapping = {
-    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
-    'YMDEHPO':      {"Yes": 1, "No": 0},
-    'YMDETXRX':     {"Yes": 1, "No": 0},
-    'LVLDIFMEM2':   {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
-    'YMSUD5YANY':   {"Yes": 1, "No": 0},
-    'YPSY2MDE':     {"Yes": 1, "No": 0},
-    'YMDELT':       {"Yes": 1, "No": 2},
-    'YDOCMDE':      {"Yes": 1, "No": 0},
-    'YMIMI5YANY':   {"Yes": 1, "No": 0},
-    'YMDEHARX':     {"Yes": 1, "No": 0},
-    'MDEIMPY':      {"Yes": 1, "No": 2},
-    'YRXMDEYR':     {"Yes": 1, "No": 0},
-    'YMDERSUD5ANY': {"Yes": 1, "No": 0},
-    'YMIMS5YANY':   {"Yes": 1, "No": 0},
-    'YMDEYR':       {"Yes": 1, "No": 2},
-    'YHLTMDE':      {"Yes": 1, "No": 0},
-    'YUSUIPLNYR':   {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YMDEHPRX':     {"Yes": 1, "No": 0},
-    'YUSUIPLN':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YPSY1MDE':     {"Yes": 1, "No": 0},
-    'YMIUD5YANY':   {"Yes": 1, "No": 0},
-    'YUSUITHK':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YTXMDEYR':     {"Yes": 1, "No": 0},
-    'YCOUNMDE':     {"Yes": 1, "No": 0},
-    'YUSUITHKYR':   {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4}
-}
 ######################################
-# 5) PREDICT FUNCTION (Prediction Tab)
 ######################################
 def predict(
-    # EXACT 25 features in this order:
-    YMDESUD5ANYO, YMDEHPO, YMDETXRX, LVLDIFMEM2, YMSUD5YANY,
-    YPSY2MDE, YMDELT, YDOCMDE, YMIMI5YANY, YMDEHARX,
-    MDEIMPY, YRXMDEYR, YMDERSUD5ANY, YMIMS5YANY, YMDEYR,
-    YHLTMDE, YUSUIPLNYR, YMDEHPRX, YUSUIPLN, YPSY1MDE,
-    YMIUD5YANY, YUSUITHK, YTXMDEYR, YCOUNMDE, YUSUITHKYR
 ):
-    # 1) Validate
     if not validate_inputs(
-        YMDESUD5ANYO, YMDEHPO, YMDETXRX, LVLDIFMEM2, YMSUD5YANY,
-        YPSY2MDE, YMDELT, YDOCMDE, YMIMI5YANY, YMDEHARX,
-        MDEIMPY, YRXMDEYR, YMDERSUD5ANY, YMIMS5YANY, YMDEYR,
-        YHLTMDE, YUSUIPLNYR, YMDEHPRX, YUSUIPLN, YPSY1MDE,
-        YMIUD5YANY, YUSUITHK, YTXMDEYR, YCOUNMDE, YUSUITHKYR
     ):
         return (
             "Please select all required fields.",  # 1) Prediction Results
-            "Validation Error",                    # 2) Mental Health Severity
-            "No data",                             # 3) Total Patient Count
-            "No nearest neighbors info",           # 4) Nearest Neighbors Summary
-            None,                                  # 5) Bar Chart (Input Feature)
-            None                                   # 6) Bar Chart (Predicted Labels)
         )
-    # 2) Map user-friendly -> numeric
     user_input_dict = {
         'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
-        'YMDEHPO':      input_mapping['YMDEHPO'][YMDEHPO],
-        'YMDETXRX':     input_mapping['YMDETXRX'][YMDETXRX],
-        'LVLDIFMEM2':   input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
-        'YMSUD5YANY':   input_mapping['YMSUD5YANY'][YMSUD5YANY],
-        'YPSY2MDE':     input_mapping['YPSY2MDE'][YPSY2MDE],
         'YMDELT':       input_mapping['YMDELT'][YMDELT],
-        'YDOCMDE':      input_mapping['YDOCMDE'][YDOCMDE],
         'YMIMI5YANY':   input_mapping['YMIMI5YANY'][YMIMI5YANY],
         'YMDEHARX':     input_mapping['YMDEHARX'][YMDEHARX],
-        'MDEIMPY':      input_mapping['MDEIMPY'][MDEIMPY],
         'YRXMDEYR':     input_mapping['YRXMDEYR'][YRXMDEYR],
-        'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
-        'YMIMS5YANY':   input_mapping['YMIMS5YANY'][YMIMS5YANY],
-        'YMDEYR':       input_mapping['YMDEYR'][YMDEYR],
         'YHLTMDE':      input_mapping['YHLTMDE'][YHLTMDE],
-        'YUSUIPLNYR':   input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
-        'YMDEHPRX':     input_mapping['YMDEHPRX'][YMDEHPRX],
-        'YUSUIPLN':     input_mapping['YUSUIPLN'][YUSUIPLN],
-        'YPSY1MDE':     input_mapping['YPSY1MDE'][YPSY1MDE],
-        'YMIUD5YANY':   input_mapping['YMIUD5YANY'][YMIUD5YANY],
-        'YUSUITHK':     input_mapping['YUSUITHK'][YUSUITHK],
         'YTXMDEYR':     input_mapping['YTXMDEYR'][YTXMDEYR],
         'YCOUNMDE':     input_mapping['YCOUNMDE'][YCOUNMDE],
-        'YUSUITHKYR':   input_mapping['YUSUITHKYR'][YUSUITHKYR]
     }
     user_df = pd.DataFrame(user_input_dict, index=[0])
-    # 3) Make predictions
-    predictions = predictor.make_predictions(user_df)  # list of arrays
-    # e.g. predictions[i][0] is the predicted label for model i
-    # Flatten them for counting ones
     all_preds = np.concatenate(predictions)
-    # In your logic, "1" might be a positive class, or "2" might be. Adapt if needed:
-    # For now, let's assume "1" is the relevant "positive" count:
     count_ones = sum(all_preds == 1)
     severity_msg = predictor.evaluate_severity(count_ones)
-    # 4) Format textual results grouped by domain
     groups = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
@@ -302,20 +264,20 @@ def predict(
             "YODPPROB", "YOWRPROB", "YODPR2WK", "YODSMMDE", "YOPB2WK"
         ]
     }
-    # Build text for each label in the order they appear in model_filenames
     group_text = {g: [] for g in groups}
     for i, arr in enumerate(predictions):
         label_col = model_filenames[i].split('.')[0]  # e.g. "YOWRCONC"
         val = arr[0]
-        # Map prediction to text if possible
         if label_col in predictor.prediction_map and val in range(len(predictor.prediction_map[label_col])):
             text_label = predictor.prediction_map[label_col][val]
         else:
             text_label = f"Prediction={val}"
-        # Place it in whichever group
-        for group_name, gcols in groups.items():
-            if label_col in gcols:
                 group_text[group_name].append(f"{label_col} => {text_label}")
                 break
@@ -325,20 +287,20 @@ def predict(
             gtitle = gname.replace("_", " ")
             final_str_parts.append(f"**{gtitle}**")
             final_str_parts.append("\n".join(lines))
-            final_str_parts.append("")  # blank line
     if not final_str_parts:
         final_str = "No predictions made or no matching group columns."
     else:
         final_str = "\n".join(final_str_parts)
-    # 5) Overall patient count
     total_count = len(df)
     total_count_md = f"We have **{total_count}** patients in the dataset."
-    # 6) Nearest Neighbors summary
     nn_md = get_nearest_neighbors_info(user_df, k=5)
-    # 7) Bar chart for input features
     input_counts = {}
     for col, val_ in user_input_dict.items():
         matched = len(df[df[col] == val_])
@@ -351,14 +313,12 @@ def predict(
     )
     fig_in.update_layout(width=1200, height=400)
-    # 8) Bar chart for predicted labels
-    #    For each model’s label_col, see how many in df have the same predicted value
     label_counts = {}
     for i, arr in enumerate(predictions):
         lbl = model_filenames[i].split('.')[0]
         pred_val = arr[0]
         if lbl in df.columns:
-            # How many in df have this same value
             label_counts[lbl] = len(df[df[lbl] == pred_val])
     if label_counts:
         bar_lbl_df = pd.DataFrame({
@@ -379,8 +339,8 @@ def predict(
         severity_msg,      # 2) Mental Health Severity
         total_count_md,    # 3) Total Patient Count
         nn_md,             # 4) Nearest Neighbors Summary
-        fig_in,            # 5) Bar Chart for input features
-        fig_lbl            # 6) Bar Chart for predicted labels
     )
@@ -388,9 +348,6 @@ def predict(
 # 6) EXTRA TABS / FUNCTIONS
 ######################################
 def distribution_plot(feature_col, label_col):
-    """
-    Creates a bar chart grouping by [feature_col, label_col], showing counts.
-    """
     if not feature_col or not label_col:
         return px.bar(title="Please select both Feature and Label.")
     if (feature_col not in df.columns) or (label_col not in df.columns):
@@ -409,9 +366,6 @@ def distribution_plot(feature_col, label_col):
 def co_occurrence_plot(feature1, feature2, label_col):
-    """
-    Similar approach but grouping by [feature1, feature2, label_col].
-    """
     if not feature1 or not feature2 or not label_col:
         return px.bar(title="Please select all three fields.")
     if feature1 not in df.columns or feature2 not in df.columns or label_col not in df.columns:
@@ -437,127 +391,104 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
     # ======== TAB 1: PREDICTION ========
     with gr.Tab("Prediction"):
-        inputs = [
-            gr.Dropdown(
-                list(input_mapping['YMDESUD5ANYO'].keys()),
-                label="YMDESUD5ANYO: ONLY MDE, ONLY SUD, BOTH, OR NEITHER-ANY"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDEHPO'].keys()),
-                label="YMDEHPO: Saw health prof only for MDE in past years?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDETXRX'].keys()),
-                label="YMDETXRX: Received treatment/counseling if saw doc/prof for MDE?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['LVLDIFMEM2'].keys()),
-                label="LVLDIFMEM2: Difficulty remembering / concentrating?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMSUD5YANY'].keys()),
-                label="YMSUD5YANY: Past-year MDE & substance use disorder?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YPSY2MDE'].keys()),
-                label="YPSY2MDE: Saw/talked to psychiatrist about MDE?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDELT'].keys()),
-                label="YMDELT: Had major depressive episode in lifetime?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YDOCMDE'].keys()),
-                label="YDOCMDE: Saw/talked to general practitioner/family MD about MDE?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMIMI5YANY'].keys()),
-                label="YMIMI5YANY: Past-year MDE with severe impairment & illicit drug use?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDEHARX'].keys()),
-                label="YMDEHARX: Saw health professional & received medication for MDE?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['MDEIMPY'].keys()),
-                label="MDEIMPY: MDE with severe role impairment?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YRXMDEYR'].keys()),
-                label="YRXMDEYR: Used received medication for MDE in past years?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDERSUD5ANY'].keys()),
-                label="YMDERSUD5ANY: MDE or substance use disorder - past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMIMS5YANY'].keys()),
-                label="YMIMS5YANY: Past-year MDE + severe impairment + substance use?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDEYR'].keys()),
-                label="YMDEYR: Past-year major depressive episode?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YHLTMDE'].keys()),
-                label="YHLTMDE: Saw/talk to health professional about MDE in past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YUSUIPLNYR'].keys()),
-                label="YUSUIPLNYR: Made plans to kill self in past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMDEHPRX'].keys()),
-                label="YMDEHPRX: Saw health prof or received med for MDE in past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YUSUIPLN'].keys()),
-                label="YUSUIPLN: Make plans to kill yourself in past 12 months?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YPSY1MDE'].keys()),
-                label="YPSY1MDE: Saw/talked to psychologist about MDE in past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YMIUD5YANY'].keys()),
-                label="YMIUD5YANY: Past-year MDE & illicit drug use disorder?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YUSUITHK'].keys()),
-                label="YUSUITHK: Youth seriously think about killing self in past 12 months?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YTXMDEYR'].keys()),
-                label="YTXMDEYR: Saw or talk to doc/health prof for MDE in past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YCOUNMDE'].keys()),
-                label="YCOUNMDE: Saw/talk to counselor about MDE in past year?"
-            ),
-            gr.Dropdown(
-                list(input_mapping['YUSUITHKYR'].keys()),
-                label="YUSUITHKYR: Seriously thought about killing self?"
-            )
         ]
         predict_btn = gr.Button("Predict")
-        # 6 outputs now
         out_pred_res = gr.Textbox(label="Prediction Results", lines=8)
         out_sev      = gr.Textbox(label="Mental Health Severity", lines=2)
         out_count    = gr.Markdown(label="Total Patient Count")
-        out_nn       = gr.Markdown(label="Nearest Neighbors Summary")
         out_bar_input= gr.Plot(label="Input Feature Counts")
         out_bar_label= gr.Plot(label="Predicted Label Counts")
-        # Wire up the button
         predict_btn.click(
             fn=predict,
-            inputs=inputs,
             outputs=[
                 out_pred_res,   # 1
                 out_sev,        # 2
@@ -571,10 +502,7 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
     # ======== TAB 2: Distribution Analysis ========
     with gr.Tab("Distribution Analysis"):
         gr.Markdown("## Distribution Plot\nSelect one feature and one label column to see bar counts.")
-        # 1) We gather the 'input features' from input_mapping keys:
         list_of_features = sorted(input_mapping.keys())
-        # 2) We gather the 'label columns' from predictor.prediction_map keys:
         list_of_labels = sorted(predictor.prediction_map.keys())
         feat_dd = gr.Dropdown(choices=list_of_features, label="Feature Column")
@@ -606,5 +534,5 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
             outputs=co_occ_output
         )
-# Finally, launch the Gradio interface
 demo.launch()

         self.model_filenames = model_filenames
         self.models = self.load_models()
         self.prediction_map = {
             "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
             "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
             "YOWRHRS":  ["Did not have trouble sleeping", "Had trouble sleeping"],
             "YO_MDEA5": ["Others did not notice restlessness/lethargy", "Others noticed restlessness/lethargy"],
             "YOWRCHR":  ["Did not feel so sad nothing could cheer up", "Felt so sad that nothing could cheer up"],
+            "YOWRLSIN": ["Did not feel bored / lose interest", "Felt bored / lost interest"],
             "YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
             "YOWRPROB": ["No 'worst time ever' feeling", "Had 'worst time ever' feeling"],
+            "YODPR2WK": ["No periods with depressed feelings lasting 2+ weeks", "Had depressed feelings 2+ weeks"],
             "YOWRDEPR": ["Did not feel sad/depressed mostly everyday", "Felt sad/depressed mostly everyday"],
+            "YODPDISC": ["Overall mood not sad/depressed", "Overall mood was sad/depressed"],
+            "YOLOSEV":  ["Did not lose interest", "Lost interest in enjoyable things"],
             "YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
+            "YODSMMDE": ["Never had 2 weeks depression symptoms", "Had 2+ weeks of depression symptoms"],
+            "YO_MDEA3": ["No changes in appetite/weight", "Had changes in appetite/weight"],
+            "YODPLSIN": ["Never lost interest / felt bored", "Lost interest/felt bored"],
             "YOWRELES": ["Did not eat less than usual", "Ate less than usual"],
             "YODSCEV":  ["Fewer severe depression symptoms", "More severe depression symptoms"],
+            "YOPB2WK":  ["No uneasy feelings lasting 2+ weeks", "Uneasy feelings lasting 2+ weeks"],
+            "YO_MDEA2": ["No physical/mental issues (2+ weeks)", "Had physical/mental issues (2+ weeks)"]
         }
     def load_models(self):
         return loaded
     def make_predictions(self, user_input: pd.DataFrame):
         predictions = []
         for model in self.models:
             out = model.predict(user_input)
         return predictions
     def get_majority_vote(self, predictions):
         combined = np.concatenate(predictions)
         return np.bincount(combined).argmax()
     def evaluate_severity(self, count_ones: int) -> str:
         if count_ones >= 13:
             return "Mental Health Severity: Severe"
         elif count_ones >= 9:
 ######################################
+# 3) FEATURE CATEGORIES + MAPPING
 ######################################
+categories_dict = {
+    "1. Depression & Substance Use Diagnosis": [
+        "YMDESUD5ANYO", "YMDELT", "YMDEYR", "YMDERSUD5ANY",
+        "YMSUD5YANY", "YMIUD5YANY", "YMIMS5YANY", "YMIMI5YANY"
+    ],
+    "2. Mental Health Treatment & Prof Consultation": [
+        "YMDEHPO", "YMDETXRX", "YMDEHARX", "YRXMDEYR", "YHLTMDE",
+        "YTXMDEYR", "YDOCMDE", "YPSY2MDE", "YPSY1MDE", "YCOUNMDE"
+    ],
+    "3. Functional & Cognitive Impairment": [
+        "MDEIMPY", "LVLDIFMEM2"
+    ],
+    "4. Suicidal Thoughts & Behaviors": [
+        "YUSUITHK", "YUSUITHKYR", "YUSUIPLNYR", "YUSUIPLN"
+    ]
+}
+# The numeric mappings for each of the 25 features
+input_mapping = {
+    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
+    'YMDELT':       {"Yes": 1, "No": 2},
+    'YMDEYR':       {"Yes": 1, "No": 2},
+    'YMDERSUD5ANY': {"Yes": 1, "No": 0},
+    'YMSUD5YANY':   {"Yes": 1, "No": 0},
+    'YMIUD5YANY':   {"Yes": 1, "No": 0},
+    'YMIMS5YANY':   {"Yes": 1, "No": 0},
+    'YMIMI5YANY':   {"Yes": 1, "No": 0},
+    'YMDEHPO':      {"Yes": 1, "No": 0},
+    'YMDETXRX':     {"Yes": 1, "No": 0},
+    'YMDEHARX':     {"Yes": 1, "No": 0},
+    'YRXMDEYR':     {"Yes": 1, "No": 0},
+    'YHLTMDE':      {"Yes": 1, "No": 0},
+    'YTXMDEYR':     {"Yes": 1, "No": 0},
+    'YDOCMDE':      {"Yes": 1, "No": 0},
+    'YPSY2MDE':     {"Yes": 1, "No": 0},
+    'YPSY1MDE':     {"Yes": 1, "No": 0},
+    'YCOUNMDE':     {"Yes": 1, "No": 0},
+    'MDEIMPY':      {"Yes": 1, "No": 2},
+    'LVLDIFMEM2':   {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
+    'YUSUITHK':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YUSUITHKYR':   {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YUSUIPLNYR':   {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YUSUIPLN':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+}
 def validate_inputs(*args):
     for arg in args:
         if not arg:  # empty or None
             return False
     return True
+######################################
+# 4) NEAREST NEIGHBORS (Grouped)
+######################################
+def get_nearest_neighbors_info(user_input_df: pd.DataFrame, k=5):
+    # Ensure columns exist in df
+    user_cols = user_input_df.columns
+    if not all(col in df.columns for col in user_cols):
+        return "Cannot compute nearest neighbors. Some columns not found in df."
+    # Subset df
+    sub_df = df[list(user_cols)].copy()
+    diffs = sub_df - user_input_df.iloc[0]
+    dists = (diffs**2).sum(axis=1)**0.5
+    nn_indices = dists.nsmallest(k).index
+    neighbors = df.loc[nn_indices]
+    lines = [f"**Nearest Neighbors (k={k})**",
+             f"Distances Range: {dists[nn_indices].min():.2f} to {dists[nn_indices].max():.2f}",
+             ""]
+    # Group the features by our categories_dict
+    for cat_name, cat_feats in categories_dict.items():
+        lines.append(f"### {cat_name}")
+        for feat in cat_feats:
+            if feat not in neighbors.columns:
+                continue
+            # Count how many neighbors had each numeric value
+            val_counts = neighbors[feat].value_counts().to_dict()
+            # Build string like: "YMDESUD5ANYO => 3 had 1, 2 had 2..."
+            parts = []
+            for val_, count_ in val_counts.items():
+                parts.append(f"{count_} had '{val_}'")
+            joined = "; ".join(parts)
+            lines.append(f"**{feat}** => {joined}")
+        lines.append("")  # blank line
+    return "\n".join(lines)
 ######################################
+# 5) PREDICT FUNCTION
 ######################################
 def predict(
+    # EXACTLY 25 features, matching categories_dict ordering.
+    # We'll just list them in the dictionary order we want to show them:
+    YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
+    YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
+    YMDEHPO, YMDETXRX, YMDEHARX, YRXMDEYR, YHLTMDE,
+    YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
+    MDEIMPY, LVLDIFMEM2,
+    YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
 ):
     if not validate_inputs(
+        YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
+        YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
+        YMDEHPO, YMDETXRX, YMDEHARX, YRXMDEYR, YHLTMDE,
+        YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
+        MDEIMPY, LVLDIFMEM2,
+        YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
     ):
         return (
             "Please select all required fields.",  # 1) Prediction Results
+            "Validation Error",                    # 2) Severity
+            "No data",                             # 3) Total Count
+            "No nearest neighbors info",           # 4) NN Summary
+            None,                                  # 5) Bar chart (Input)
+            None                                   # 6) Bar chart (Labels)
         )
+    # 1) Map user-friendly -> numeric
     user_input_dict = {
         'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
         'YMDELT':       input_mapping['YMDELT'][YMDELT],
+        'YMDEYR':       input_mapping['YMDEYR'][YMDEYR],
+        'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
+        'YMSUD5YANY':   input_mapping['YMSUD5YANY'][YMSUD5YANY],
+        'YMIUD5YANY':   input_mapping['YMIUD5YANY'][YMIUD5YANY],
+        'YMIMS5YANY':   input_mapping['YMIMS5YANY'][YMIMS5YANY],
         'YMIMI5YANY':   input_mapping['YMIMI5YANY'][YMIMI5YANY],
+        'YMDEHPO':      input_mapping['YMDEHPO'][YMDEHPO],
+        'YMDETXRX':     input_mapping['YMDETXRX'][YMDETXRX],
         'YMDEHARX':     input_mapping['YMDEHARX'][YMDEHARX],
         'YRXMDEYR':     input_mapping['YRXMDEYR'][YRXMDEYR],
         'YHLTMDE':      input_mapping['YHLTMDE'][YHLTMDE],
         'YTXMDEYR':     input_mapping['YTXMDEYR'][YTXMDEYR],
+        'YDOCMDE':      input_mapping['YDOCMDE'][YDOCMDE],
+        'YPSY2MDE':     input_mapping['YPSY2MDE'][YPSY2MDE],
+        'YPSY1MDE':     input_mapping['YPSY1MDE'][YPSY1MDE],
         'YCOUNMDE':     input_mapping['YCOUNMDE'][YCOUNMDE],
+        'MDEIMPY':      input_mapping['MDEIMPY'][MDEIMPY],
+        'LVLDIFMEM2':   input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
+        'YUSUITHK':     input_mapping['YUSUITHK'][YUSUITHK],
+        'YUSUITHKYR':   input_mapping['YUSUITHKYR'][YUSUITHKYR],
+        'YUSUIPLNYR':   input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
+        'YUSUIPLN':     input_mapping['YUSUIPLN'][YUSUIPLN]
     }
     user_df = pd.DataFrame(user_input_dict, index=[0])
+    # 2) Predict
+    predictions = predictor.make_predictions(user_df)
     all_preds = np.concatenate(predictions)
     count_ones = sum(all_preds == 1)
     severity_msg = predictor.evaluate_severity(count_ones)
+    # 3) Grouped textual results
     groups = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
             "YODPPROB", "YOWRPROB", "YODPR2WK", "YODSMMDE", "YOPB2WK"
         ]
     }
     group_text = {g: [] for g in groups}
+    # The model_filenames order determines which label is i
     for i, arr in enumerate(predictions):
         label_col = model_filenames[i].split('.')[0]  # e.g. "YOWRCONC"
         val = arr[0]
+        # If we have a textual map, use it
         if label_col in predictor.prediction_map and val in range(len(predictor.prediction_map[label_col])):
             text_label = predictor.prediction_map[label_col][val]
         else:
             text_label = f"Prediction={val}"
+        # Put in whichever group
+        for group_name, cols_ in groups.items():
+            if label_col in cols_:
                 group_text[group_name].append(f"{label_col} => {text_label}")
                 break
             gtitle = gname.replace("_", " ")
             final_str_parts.append(f"**{gtitle}**")
             final_str_parts.append("\n".join(lines))
+            final_str_parts.append("")
     if not final_str_parts:
         final_str = "No predictions made or no matching group columns."
     else:
         final_str = "\n".join(final_str_parts)
+    # 4) Additional info
     total_count = len(df)
     total_count_md = f"We have **{total_count}** patients in the dataset."
+    # 5) Nearest Neighbors
     nn_md = get_nearest_neighbors_info(user_df, k=5)
+    # 6) Bar chart for input features
     input_counts = {}
     for col, val_ in user_input_dict.items():
         matched = len(df[df[col] == val_])
     )
     fig_in.update_layout(width=1200, height=400)
+    # 7) Bar chart for predicted labels
     label_counts = {}
     for i, arr in enumerate(predictions):
         lbl = model_filenames[i].split('.')[0]
         pred_val = arr[0]
         if lbl in df.columns:
             label_counts[lbl] = len(df[df[lbl] == pred_val])
     if label_counts:
         bar_lbl_df = pd.DataFrame({
         severity_msg,      # 2) Mental Health Severity
         total_count_md,    # 3) Total Patient Count
         nn_md,             # 4) Nearest Neighbors Summary
+        fig_in,            # 5) Bar Chart (input features)
+        fig_lbl            # 6) Bar Chart (labels)
     )
 # 6) EXTRA TABS / FUNCTIONS
 ######################################
 def distribution_plot(feature_col, label_col):
     if not feature_col or not label_col:
         return px.bar(title="Please select both Feature and Label.")
     if (feature_col not in df.columns) or (label_col not in df.columns):
 def co_occurrence_plot(feature1, feature2, label_col):
     if not feature1 or not feature2 or not label_col:
         return px.bar(title="Please select all three fields.")
     if feature1 not in df.columns or feature2 not in df.columns or label_col not in df.columns:
     # ======== TAB 1: PREDICTION ========
     with gr.Tab("Prediction"):
+        gr.Markdown(
+            """
+            ### Please provide inputs in each of the four categories below.
+            *All fields are required.*
+            """
+        )
+        # For clarity, we define an ordered list of the features in the exact sequence
+        # matching our predict() function. We’ll group them under the same headings.
+        cat1_col_labels = [
+            ("YMDESUD5ANYO", "YMDESUD5ANYO: ONLY MDE, ONLY SUD, BOTH, OR NEITHER"),
+            ("YMDELT",       "YMDELT: Had major depressive episode in lifetime"),
+            ("YMDEYR",       "YMDEYR: Past-year major depressive episode"),
+            ("YMDERSUD5ANY", "YMDERSUD5ANY: MDE or substance use disorder - past year"),
+            ("YMSUD5YANY",   "YMSUD5YANY: Past-year MDE & substance use disorder"),
+            ("YMIUD5YANY",   "YMIUD5YANY: Past-year MDE & illicit drug use disorder"),
+            ("YMIMS5YANY",   "YMIMS5YANY: Past-year MDE + severe impairment + substance use"),
+            ("YMIMI5YANY",   "YMIMI5YANY: Past-year MDE with severe impairment & illicit drug use")
+        ]
+        cat2_col_labels = [
+            ("YMDEHPO",  "YMDEHPO: Saw health prof only for MDE in past year"),
+            ("YMDETXRX", "YMDETXRX: Received treatment/counseling if saw doc/prof for MDE"),
+            ("YMDEHARX", "YMDEHARX: Saw health professional & received medication for MDE"),
+            ("YRXMDEYR", "YRXMDEYR: Used received medication for MDE in past years"),
+            ("YHLTMDE",  "YHLTMDE: Saw/talked to health professional about MDE in past year"),
+            ("YTXMDEYR", "YTXMDEYR: Saw or talked to doc/health prof for MDE in past year"),
+            ("YDOCMDE",  "YDOCMDE: Saw/talked to general practitioner/family MD about MDE"),
+            ("YPSY2MDE", "YPSY2MDE: Saw/talked to psychiatrist about MDE"),
+            ("YPSY1MDE", "YPSY1MDE: Saw/talked to psychologist about MDE"),
+            ("YCOUNMDE", "YCOUNMDE: Saw/talked to counselor about MDE")
+        ]
+        cat3_col_labels = [
+            ("MDEIMPY",    "MDEIMPY: MDE with severe role impairment"),
+            ("LVLDIFMEM2", "LVLDIFMEM2: Difficulty remembering/concentrating")
+        ]
+        cat4_col_labels = [
+            ("YUSUITHK",   "YUSUITHK: Youth seriously think about killing self in past 12 months"),
+            ("YUSUITHKYR", "YUSUITHKYR: Seriously thought about killing self"),
+            ("YUSUIPLNYR", "YUSUIPLNYR: Made plans to kill self in past year"),
+            ("YUSUIPLN",   "YUSUIPLN: Made plans to kill yourself in past 12 months")
         ]
+        # Category 1 block
+        gr.Markdown("#### 1. Depression & Substance Use Diagnosis")
+        cat1_inputs = []
+        for col, label_text in cat1_col_labels:
+            dd = gr.Dropdown(
+                choices=list(input_mapping[col].keys()),
+                label=label_text
+            )
+            cat1_inputs.append(dd)
+        # Category 2 block
+        gr.Markdown("#### 2. Mental Health Treatment & Professional Consultation")
+        cat2_inputs = []
+        for col, label_text in cat2_col_labels:
+            dd = gr.Dropdown(
+                choices=list(input_mapping[col].keys()),
+                label=label_text
+            )
+            cat2_inputs.append(dd)
+        # Category 3 block
+        gr.Markdown("#### 3. Functional & Cognitive Impairment")
+        cat3_inputs = []
+        for col, label_text in cat3_col_labels:
+            dd = gr.Dropdown(
+                choices=list(input_mapping[col].keys()),
+                label=label_text
+            )
+            cat3_inputs.append(dd)
+        # Category 4 block
+        gr.Markdown("#### 4. Suicidal Thoughts & Behaviors")
+        cat4_inputs = []
+        for col, label_text in cat4_col_labels:
+            dd = gr.Dropdown(
+                choices=list(input_mapping[col].keys()),
+                label=label_text
+            )
+            cat4_inputs.append(dd)
+        # The overall input list must match the order in `predict()`
+        all_inputs = cat1_inputs + cat2_inputs + cat3_inputs + cat4_inputs
         predict_btn = gr.Button("Predict")
+        # 6 outputs
         out_pred_res = gr.Textbox(label="Prediction Results", lines=8)
         out_sev      = gr.Textbox(label="Mental Health Severity", lines=2)
         out_count    = gr.Markdown(label="Total Patient Count")
+        out_nn       = gr.Markdown(label="Nearest Neighbors Summary (Grouped by Category)")
         out_bar_input= gr.Plot(label="Input Feature Counts")
         out_bar_label= gr.Plot(label="Predicted Label Counts")
         predict_btn.click(
             fn=predict,
+            inputs=all_inputs,
             outputs=[
                 out_pred_res,   # 1
                 out_sev,        # 2
     # ======== TAB 2: Distribution Analysis ========
     with gr.Tab("Distribution Analysis"):
         gr.Markdown("## Distribution Plot\nSelect one feature and one label column to see bar counts.")
         list_of_features = sorted(input_mapping.keys())
         list_of_labels = sorted(predictor.prediction_map.keys())
         feat_dd = gr.Dropdown(choices=list_of_features, label="Feature Column")
             outputs=co_occ_output
         )
+# Finally, launch
 demo.launch()