Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

pantdipendra commited on 19 days ago

Commit

3b96ce2

verified ·

1 Parent(s): 69090fc

Update app.py

Browse files

Files changed (1) hide show

app.py +283 -395

app.py CHANGED Viewed

@@ -1,14 +1,24 @@
 import pickle
-import gradio as gr
 import numpy as np
 import pandas as pd
 import plotly.express as px
-# Load the training CSV once.
 df = pd.read_csv("X_train_Y_Train_merged_train.csv")
 ######################################
-# 1) MODEL PREDICTOR CLASS
 ######################################
 class ModelPredictor:
     def __init__(self, model_path, model_filenames):
@@ -17,106 +27,83 @@ class ModelPredictor:
         self.models = self.load_models()
         # Mapping from label column to human-readable strings for 0/1
         self.prediction_map = {
-            "YOWRCONC": ["No difficulty concentrating", "Had difficulty concentrating"],
-            "YOSEEDOC": ["No need to see doctor", "Needed to see doctor"],
-            "YOWRHRS": ["No trouble sleeping", "Had trouble sleeping"],
-            "YO_MDEA5": ["Others didn't notice restlessness", "Others noticed restlessness"],
-            "YOWRCHR": ["Not sad beyond cheering", "Felt so sad no one could cheer up"],
-            "YOWRLSIN": ["Never felt bored/lost interest", "Felt bored/lost interest"],
             "YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
-            "YOWRPROB": ["No worst time feeling", "Felt worst time ever"],
-            "YODPR2WK": ["No depressed feelings for 2+ wks", "Depressed feelings for 2+ wks"],
-            "YOWRDEPR": ["Not sad or depressed most days", "Sad or depressed most days"],
             "YODPDISC": ["Mood not depressed overall", "Mood depressed overall (discrepancy)"],
-            "YOLOSEV": ["Did not lose interest in activities", "Lost interest in activities"],
-            "YOWRDCSN": ["Could make decisions", "Could not make decisions"],
-            "YODSMMDE": ["No 2+ week depression episodes", "Had 2+ week depression episodes"],
-            "YO_MDEA3": ["No appetite/weight changes", "Yes appetite/weight changes"],
-            "YODPLSIN": ["Never bored/lost interest", "Often bored/lost interest"],
-            "YOWRELES": ["Did not eat less", "Ate less than usual"],
             "YODSCEV": ["Fewer severe symptoms", "More severe symptoms"],
-            "YOPB2WK": ["No uneasy feelings daily 2+ wks", "Uneasy feelings daily 2+ wks"],
-            "YO_MDEA2": ["No issues physical/mental daily", "Issues physical/mental daily 2+ wks"]
         }
     def load_models(self):
         models = []
-        for fn in self.model_filenames:
-            filepath = self.model_path + fn
-            with open(filepath, "rb") as file:
-                models.append(pickle.load(file))
         return models
     def make_predictions(self, user_input):
-        """Return list of numpy arrays, each array either [0] or [1]."""
-        preds = []
-        for m in self.models:
-            out = m.predict(user_input)
-            preds.append(np.array(out).flatten())
-        return preds
     def get_majority_vote(self, predictions):
-        """Flatten all predictions and find 0 or 1 with majority."""
         combined = np.concatenate(predictions)
-        return np.bincount(combined).argmax()
     def evaluate_severity(self, majority_vote_count):
-        """Heuristic: Based on 16 total models, 0-4=Very Low, 5-8=Low, 9-12=Moderate, 13-16=Severe."""
         if majority_vote_count >= 13:
-            return "Mental health severity: Severe"
         elif majority_vote_count >= 9:
-            return "Mental health severity: Moderate"
         elif majority_vote_count >= 5:
-            return "Mental health severity: Low"
         else:
-            return "Mental health severity: Very Low"
-######################################
-# 2) CONFIGURATIONS
-######################################
-model_filenames = [
-    "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
-    "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
-    "YODPDISC.pkl", "YOLOSEV.pkl", "YOWRDCSN.pkl", "YODSMMDE.pkl",
-    "YO_MDEA3.pkl", "YODPLSIN.pkl", "YOWRELES.pkl", "YOPB2WK.pkl"
-]
-model_path = "models/"
-predictor = ModelPredictor(model_path, model_filenames)
 ######################################
-# 3) INPUT VALIDATION
 ######################################
 def validate_inputs(*args):
-    # Just ensure all required (non-co-occurrence) fields are picked
     for arg in args:
         if arg == '' or arg is None:
             return False
     return True
 ######################################
-# 4) PREDICTION FUNCTION
 ######################################
 def predict(
-    # Original required features
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
-    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR,
-    # **New** optional picks for co-occurrence
-    co_occ_feature1, co_occ_feature2, co_occ_label
 ):
-    """
-    Main function that:
-      - Predicts with the 16 models
-      - Aggregates results
-      - Produces severity
-      - Returns distribution & bar charts
-      - Finds K=2 Nearest Neighbors
-      - Produces *one* co-occurrence plot based on user-chosen columns
-    """
-    # 1) Build user_input for models
     user_input_data = {
         'YNURSMDE': [int(YNURSMDE)],
         'YMDEYR': [int(YMDEYR)],
@@ -150,21 +137,21 @@ def predict(
     }
     user_input = pd.DataFrame(user_input_data)
-    # 2) Model Predictions
     predictions = predictor.make_predictions(user_input)
     majority_vote = predictor.get_majority_vote(predictions)
-    majority_vote_count = np.sum(np.concatenate(predictions) == 1)
-    severity = predictor.evaluate_severity(majority_vote_count)
-    # 3) Summarize textual results
-    results_by_group = {
-        "Concentration_and_Decision_Making": [],
-        "Sleep_and_Energy_Levels": [],
-        "Mood_and_Emotional_State": [],
-        "Appetite_and_Weight_Changes": [],
-        "Duration_and_Severity_of_Depression_Symptoms": []
-    }
-    group_map = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
         "Mood_and_Emotional_State": ["YOWRCHR", "YOWRLSIN", "YOWRDEPR", "YODPDISC",
@@ -174,199 +161,139 @@ def predict(
                                                          "YODPR2WK", "YODSMMDE",
                                                          "YOPB2WK"]
     }
-    # Convert each model's 0/1 to text
-    grouped_output_lines = []
-    for i, pred_array in enumerate(predictions):
-        col_name = model_filenames[i].split(".")[0]  # e.g., "YOWRCONC"
-        val = pred_array[0]
-        if col_name in predictor.prediction_map and val in [0, 1]:
-            text = predictor.prediction_map[col_name][val]
-            out_line = f"{col_name}: {text}"
         else:
-            out_line = f"{col_name}: Prediction={val}"
-        # Find group
-        placed = False
-        for g_key, g_cols in group_map.items():
-            if col_name in g_cols:
-                results_by_group[g_key].append(out_line)
-                placed = True
                 break
-        if not placed:
-            # If it didn't fall into any known group, skip or handle
             pass
-    # Format into a single string
-    for group_label, pred_lines in results_by_group.items():
-        if pred_lines:
-            grouped_output_lines.append(f"Group {group_label}:")
-            grouped_output_lines.append("\n".join(pred_lines))
-            grouped_output_lines.append("")
-    if len(grouped_output_lines) == 0:
-        final_result_text = "No predictions made. Check inputs."
-    else:
-        final_result_text = "\n".join(grouped_output_lines).strip()
-    # 4) Additional Features
-    # A) Total patient count
     total_patients = len(df)
-    total_count_md = (
-        "### Total Patient Count\n"
-        f"**{total_patients}** total patients in the dataset."
-    )
-    # B) Bar chart of how many have same inputs
-    input_counts = {}
-    for c in user_input_data.keys():
-        v = user_input_data[c][0]
-        input_counts[c] = len(df[df[c] == v])
-    df_input_counts = pd.DataFrame({"Feature": list(input_counts.keys()), "Count": list(input_counts.values())})
-    fig_input_bar = px.bar(
-        df_input_counts,
-        x="Feature",
-        y="Count",
-        title="Number of Patients with the Same Value for Each Input Feature"
     )
-    fig_input_bar.update_layout(xaxis={"categoryorder": "total descending"})
-    # C) Bar chart for predicted labels
     label_counts = {}
-    for i, pred_array in enumerate(predictions):
-        col_name = model_filenames[i].split(".")[0]
-        val = pred_array[0]
-        if val in [0,1]:
-            label_counts[col_name] = len(df[df[col_name] == val])
-    if len(label_counts) > 0:
-        df_label_counts = pd.DataFrame({
-            "Label Column": list(label_counts.keys()),
-            "Count": list(label_counts.values())
-        })
-        fig_label_bar = px.bar(
-            df_label_counts,
-            x="Label Column",
-            y="Count",
-            title="Number of Patients with the Same Predicted Label"
-        )
     else:
-        fig_label_bar = px.bar(title="No valid predicted labels to display")
-    # D) Simple Distribution Plot (demo for first 3 labels & 4 inputs)
-    # (Unchanged from prior approach; you can remove if you prefer.)
-    sample_feats = list(user_input_data.keys())[:31]
-    sample_labels = [fn.split(".")[0] for fn in model_filenames[:15]]
-    dist_segments = []
-    for feat in sample_feats:
         if feat not in df.columns:
             continue
-        for lbl in sample_labels:
-            if lbl not in df.columns:
                 continue
-            temp_g = df.groupby([feat,lbl]).size().reset_index(name="count")
-            temp_g["feature"] = feat
-            temp_g["label"] = lbl
-            dist_segments.append(temp_g)
-    if len(dist_segments) > 0:
-        big_dist_df = pd.concat(dist_segments, ignore_index=True)
-        fig_dist = px.bar(
-            big_dist_df,
-            x=big_dist_df.columns[0],
-            y="count",
-            color=big_dist_df.columns[1],
-            facet_row="feature",
-            facet_col="label",
-            title="Sample Distribution Plot (first 4 features vs first 3 labels)"
-        )
-        fig_dist.update_layout(height=700)
-    else:
-        fig_dist = px.bar(title="No distribution plot generated (columns not found).")
-    # E) Nearest Neighbors with K=2
-    # We keep K=2, but for *all* label columns, we show their actual 0/1 or mapped text
-    # (same approach as before).
-    #  ... [omitted here for brevity, or replicate your existing code for K=2 nearest neighbors] ...
-    # We'll do a short version to keep focus on co-occ:
-    # ---------------------------------------------------------------------
-    # Build Hamming distance across user_input columns
-    columns_for_distance = list(user_input.columns)
-    sub_df = df[columns_for_distance].copy()
-    user_row = user_input.iloc[0]
-    distances = []
-    for idx, row_ in sub_df.iterrows():
-        dist_ = sum(row_[col] != user_row[col] for col in columns_for_distance)
-        distances.append(dist_)
-    df_dist = df.copy()
-    df_dist["distance"] = distances
-    # Sort ascending, pick K=2
-    K = 2
-    nearest_neighbors = df_dist.sort_values("distance", ascending=True).head(K)
-    # Summarize in Markdown
-    nn_md = ["### Nearest Neighbors (K=2)"]
-    nn_md.append("(In a real application, you'd refine which features matter, how to encode them, etc.)\n")
-    for irow in nearest_neighbors.itertuples():
-        nn_md.append(f"- **Neighbor ID {irow.Index}**: distance={irow.distance}")
-    nn_md_str = "\n".join(nn_md)
-    # F) Co-occurrence Plot for user-chosen feature1, feature2, label
-    #    If the user picks "None" or doesn't pick valid columns, skip or fallback.
-    if (co_occ_feature1 is not None and co_occ_feature1 != "None" and
-        co_occ_feature2 is not None and co_occ_feature2 != "None" and
-        co_occ_label is not None and co_occ_label != "None"):
-        # Check if these columns are in df
-        if (co_occ_feature1 in df.columns and
-            co_occ_feature2 in df.columns and
-            co_occ_label in df.columns):
-            # Group by [co_occ_feature1, co_occ_feature2, co_occ_label]
-            co_data = df.groupby([co_occ_feature1, co_occ_feature2, co_occ_label]).size().reset_index(name="count")
-            fig_co_occ = px.bar(
-                co_data,
-                x=co_occ_feature1,
-                y="count",
-                color=co_occ_label,
-                facet_col=co_occ_feature2,
-                title=f"Co-occurrence: {co_occ_feature1} & {co_occ_feature2} vs {co_occ_label}"
-            )
-        else:
-            fig_co_occ = px.bar(title="One or more selected columns not found in dataframe.")
     else:
-        fig_co_occ = px.bar(title="No co-occurrence plot (choose two features + one label).")
-    # Return all 8 outputs
     return (
-        final_result_text,  # (1) Predictions
-        severity,           # (2) Severity
-        total_count_md,     # (3) Total patient count
-        fig_dist,           # (4) Distribution Plot
-        nn_md_str,          # (5) Nearest Neighbors
-        fig_co_occ,         # (6) Co-occurrence
-        fig_input_bar,      # (7) Bar Chart (input features)
-        fig_label_bar       # (8) Bar Chart (labels)
     )
 ######################################
-# 5) MAPPING (user -> int)
 ######################################
 input_mapping = {
     'YNURSMDE': {"Yes": 1, "No": 0},
     'YMDEYR': {"Yes": 1, "No": 2},
     'YSOCMDE': {"Yes": 1, "No": 0},
-    'YMDESUD5ANYO': {"SUD only": 1, "MDE only": 2, "SUD & MDE": 3, "Neither": 4},
     'YMSUD5YANY': {"Yes": 1, "No": 0},
-    'YUSUITHK': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YMDETXRX': {"Yes": 1, "No": 0},
-    'YUSUITHKYR': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YMDERSUD5ANY': {"Yes": 1, "No": 0},
-    'YUSUIPLNYR': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YCOUNMDE': {"Yes": 1, "No": 0},
     'YPSY1MDE': {"Yes": 1, "No": 0},
     'YHLTMDE': {"Yes": 1, "No": 0},
     'YDOCMDE': {"Yes": 1, "No": 0},
     'YPSY2MDE': {"Yes": 1, "No": 0},
     'YMDEHARX': {"Yes": 1, "No": 0},
-    'LVLDIFMEM2': {"No Difficulty": 1, "Some Difficulty": 2, "A lot or cannot do": 3},
     'MDEIMPY': {"Yes": 1, "No": 2},
     'YMDEHPO': {"Yes": 1, "No": 0},
     'YMIMS5YANY': {"Yes": 1, "No": 0},
@@ -374,7 +301,7 @@ input_mapping = {
     'YMIUD5YANY': {"Yes": 1, "No": 0},
     'YMDEHPRX': {"Yes": 1, "No": 0},
     'YMIMI5YANY': {"Yes": 1, "No": 0},
-    'YUSUIPLN': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YTXMDEYR': {"Yes": 1, "No": 0},
     'YMDEAUD5YR': {"Yes": 1, "No": 0},
     'YRXMDEYR': {"Yes": 1, "No": 0},
@@ -382,166 +309,127 @@ input_mapping = {
 }
 ######################################
-# 6) THE GRADIO INTERFACE
 ######################################
-import gradio as gr
-# (A) The original required inputs
-original_inputs = [
-    gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR: Past Year MDE?"),
-    gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY: MDE or SUD - ANY?"),
-    gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR: MDE + ALCOHOL?"),
-    gr.Dropdown(list(input_mapping['YMIMS5YANY'].keys()), label="YMIMS5YANY: MDE + SUBSTANCE?"),
-    gr.Dropdown(list(input_mapping['YMDELT'].keys()), label="YMDELT: MDE in Lifetime?"),
-    gr.Dropdown(list(input_mapping['YMDEHARX'].keys()), label="YMDEHARX: Saw Health Prof + Meds?"),
-    gr.Dropdown(list(input_mapping['YMDEHPRX'].keys()), label="YMDEHPRX: Saw Health Prof or Meds?"),
-    gr.Dropdown(list(input_mapping['YMDETXRX'].keys()), label="YMDETXRX: Received Treatment?"),
-    gr.Dropdown(list(input_mapping['YMDEHPO'].keys()), label="YMDEHPO: Saw Health Prof Only?"),
-    gr.Dropdown(list(input_mapping['YMDEAUD5YR'].keys()), label="YMDEAUD5YR: MDE + Alcohol Use?"),
-    gr.Dropdown(list(input_mapping['YMIMI5YANY'].keys()), label="YMIMI5YANY: MDE + ILL Drug Use?"),
-    gr.Dropdown(list(input_mapping['YMIUD5YANY'].keys()), label="YMIUD5YANY: MDE + ILL Drug Use?"),
-    gr.Dropdown(list(input_mapping['YMDESUD5ANYO'].keys()), label="YMDESUD5ANYO: MDE vs SUD vs BOTH vs NEITHER"),
-    # Consultations
-    gr.Dropdown(list(input_mapping['YNURSMDE'].keys()), label="YNURSMDE: Nurse/OT about MDE?"),
-    gr.Dropdown(list(input_mapping['YSOCMDE'].keys()), label="YSOCMDE: Social Worker?"),
-    gr.Dropdown(list(input_mapping['YCOUNMDE'].keys()), label="YCOUNMDE: Counselor?"),
-    gr.Dropdown(list(input_mapping['YPSY1MDE'].keys()), label="YPSY1MDE: Psychologist?"),
-    gr.Dropdown(list(input_mapping['YPSY2MDE'].keys()), label="YPSY2MDE: Psychiatrist?"),
-    gr.Dropdown(list(input_mapping['YHLTMDE'].keys()), label="YHLTMDE: Health Prof?"),
-    gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE: GP/Family MD?"),
-    gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR: Doctor/Health Prof?"),
-    # Suicidal
-    gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR: Serious Suicide Thoughts?"),
-    gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR: Made Plans?"),
-    gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK: Suicide Thoughts (12 mo)?"),
-    gr.Dropdown(list(input_mapping['YUSUIPLN'].keys()), label="YUSUIPLN: Made Plans (12 mo)?"),
-    # Impairments
-    gr.Dropdown(list(input_mapping['MDEIMPY'].keys()), label="MDEIMPY: Severe Role Impairment?"),
-    gr.Dropdown(list(input_mapping['LVLDIFMEM2'].keys()), label="LVLDIFMEM2: Difficulty Remembering/Concentrating?"),
-    gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY: MDE + Substance?"),
-    gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR: Used Meds for MDE (12 mo)?"),
-]
-# (B) The new co-occurrence inputs
-# We'll give them defaults of "None" to indicate no selection.
-all_cols = ["None"] + df.columns.tolist()  # 'None' plus the actual columns from your df
-co_occ_feature1 = gr.Dropdown(all_cols, label="Co-Occ Feature 1", value="None")
-co_occ_feature2 = gr.Dropdown(all_cols, label="Co-Occ Feature 2", value="None")
-all_label_cols = ["None"] + list(predictor.prediction_map.keys())  # e.g., "YOWRCONC", "YOWRHRS", ...
-co_occ_label = gr.Dropdown(all_label_cols, label="Co-Occ Label", value="None")
-# Combine them into a single input list
-inputs = original_inputs + [co_occ_feature1, co_occ_feature2, co_occ_label]
-# 8 outputs as before
-outputs = [
-    gr.Textbox(label="Prediction Results", lines=15),
-    gr.Textbox(label="Mental Health Severity", lines=2),
-    gr.Markdown(label="Total Patient Count"),
-    gr.Plot(label="Distribution Plot (Sample)"),
-    gr.Markdown(label="Nearest Neighbors (K=2)"),
-    gr.Plot(label="Co-occurrence Plot"),
-    gr.Plot(label="Same Value Bar (Inputs)"),
-    gr.Plot(label="Predicted Label Bar")
-]
 ######################################
-# 7) WRAPPER
 ######################################
-def predict_with_text(
-    # match the function signature exactly (29 required + 3 for co-occ)
-    YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
-    YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
-    YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
-    YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
-    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR,
-    co_occ_feature1, co_occ_feature2, co_occ_label
-):
-    # Validate the original 29 fields
-    valid = validate_inputs(
-        YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
-        YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
-        YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
-        YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
-        YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
-    )
-    if not valid:
-        return (
-            "Please select all required fields.",
-            "Validation Error",
-            "No data",
-            None,
-            "No data",
-            None,
-            None,
-            None
         )
-    # Map to numeric
-    user_inputs = {
-        'YNURSMDE': input_mapping['YNURSMDE'][YNURSMDE],
-        'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
-        'YSOCMDE': input_mapping['YSOCMDE'][YSOCMDE],
-        'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
-        'YMSUD5YANY': input_mapping['YMSUD5YANY'][YMSUD5YANY],
-        'YUSUITHK': input_mapping['YUSUITHK'][YUSUITHK],
-        'YMDETXRX': input_mapping['YMDETXRX'][YMDETXRX],
-        'YUSUITHKYR': input_mapping['YUSUITHKYR'][YUSUITHKYR],
-        'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
-        'YUSUIPLNYR': input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
-        'YCOUNMDE': input_mapping['YCOUNMDE'][YCOUNMDE],
-        'YPSY1MDE': input_mapping['YPSY1MDE'][YPSY1MDE],
-        'YHLTMDE': input_mapping['YHLTMDE'][YHLTMDE],
-        'YDOCMDE': input_mapping['YDOCMDE'][YDOCMDE],
-        'YPSY2MDE': input_mapping['YPSY2MDE'][YPSY2MDE],
-        'YMDEHARX': input_mapping['YMDEHARX'][YMDEHARX],
-        'LVLDIFMEM2': input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
-        'MDEIMPY': input_mapping['MDEIMPY'][MDEIMPY],
-        'YMDEHPO': input_mapping['YMDEHPO'][YMDEHPO],
-        'YMIMS5YANY': input_mapping['YMIMS5YANY'][YMIMS5YANY],
-        'YMDEIMAD5YR': input_mapping['YMDEIMAD5YR'][YMDEIMAD5YR],
-        'YMIUD5YANY': input_mapping['YMIUD5YANY'][YMIUD5YANY],
-        'YMDEHPRX': input_mapping['YMDEHPRX'][YMDEHPRX],
-        'YMIMI5YANY': input_mapping['YMIMI5YANY'][YMIMI5YANY],
-        'YUSUIPLN': input_mapping['YUSUIPLN'][YUSUIPLN],
-        'YTXMDEYR': input_mapping['YTXMDEYR'][YTXMDEYR],
-        'YMDEAUD5YR': input_mapping['YMDEAUD5YR'][YMDEAUD5YR],
-        'YRXMDEYR': input_mapping['YRXMDEYR'][YRXMDEYR],
-        'YMDELT': input_mapping['YMDELT'][YMDELT]
-    }
-    # Call the core predict function with the co-occ choices as well
-    return predict(
-        **user_inputs,
-        co_occ_feature1=co_occ_feature1,
-        co_occ_feature2=co_occ_feature2,
-        co_occ_label=co_occ_label
-    )
 custom_css = """
-.gradio-container * {
-    color: #1B1212 !important;
-}
 """
-interface = gr.Interface(
-    fn=predict_with_text,
-    inputs=inputs,
-    outputs=outputs,
-    title="Mental Health Screening (NSDUH) with Selective Co-Occurrence",
-    css=custom_css,
-    description="""
-    **Instructions**:
-    1. Fill out all required fields regarding MDE/Substance Use/Consultations/Suicidal/Impairments.
-    2. (Optional) Choose 2 features and 1 label for the *Co-occurrence* plot.
-       - If you do not select them (or leave them as "None"), that plot will be skipped.
-    3. Click "Submit" to get predictions, severity, distribution plots, nearest neighbors, and your custom co-occurrence chart.
-    """
-)
-if __name__ == "__main__":
-    interface.launch()

 import pickle
 import numpy as np
 import pandas as pd
 import plotly.express as px
+import gradio as gr
+######################################
+# 1) Load Data & Prepare
+######################################
 df = pd.read_csv("X_train_Y_Train_merged_train.csv")
+model_filenames = [
+    "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
+    "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
+    "YODPDISC.pkl", "YOLOSEV.pkl", "YOWRDCSN.pkl", "YODSMMDE.pkl",
+    "YO_MDEA3.pkl", "YODPLSIN.pkl", "YOWRELES.pkl", "YOPB2WK.pkl"
+]
+model_path = "models/"
 ######################################
+# 2) Model Predictor
 ######################################
 class ModelPredictor:
     def __init__(self, model_path, model_filenames):
         self.models = self.load_models()
         # Mapping from label column to human-readable strings for 0/1
         self.prediction_map = {
+            "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
+            "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
+            "YOWRHRS": ["Did not have trouble sleeping", "Had trouble sleeping"],
+            "YO_MDEA5": ["Others did not notice restlessness/lethargy", "Others noticed restlessness/lethargy"],
+            "YOWRCHR": ["Did not feel so sad", "Felt so sad nothing could cheer up"],
+            "YOWRLSIN": ["Did not feel bored and lose interest", "Felt bored and lost interest"],
             "YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
+            "YOWRPROB": ["Did not have the worst time ever feeling", "Had the worst time ever feeling"],
+            "YODPR2WK": ["No periods of 2+ weeks feelings", "Had periods of 2+ weeks feelings"],
+            "YOWRDEPR": ["Did not feel depressed mostly everyday", "Felt depressed mostly everyday"],
             "YODPDISC": ["Mood not depressed overall", "Mood depressed overall (discrepancy)"],
+            "YOLOSEV": ["Did not lose interest in enjoyable things", "Lost interest in enjoyable things"],
+            "YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
+            "YODSMMDE": ["Never had depression for 2+ weeks", "Had depression for 2+ weeks"],
+            "YO_MDEA3": ["No appetite/weight changes", "Had appetite/weight changes"],
+            "YODPLSIN": ["Never bored/lost interest", "Felt bored/lost interest"],
+            "YOWRELES": ["Did not eat less than usual", "Ate less than usual"],
             "YODSCEV": ["Fewer severe symptoms", "More severe symptoms"],
+            "YOPB2WK": ["No uneasy feelings 2+ weeks", "Had uneasy feelings 2+ weeks"],
+            "YO_MDEA2": ["No issues w/ physical/mental well-being", "Issues w/ physical/mental well-being"]
         }
     def load_models(self):
         models = []
+        for filename in model_filenames:
+            filepath = self.model_path + filename
+            with open(filepath, 'rb') as file:
+                model = pickle.load(file)
+            models.append(model)
         return models
     def make_predictions(self, user_input):
+        # Each model => returns array of [0] or [1]
+        predictions = []
+        for model in self.models:
+            pred = model.predict(user_input)
+            predictions.append(pred.flatten())
+        return predictions
     def get_majority_vote(self, predictions):
         combined = np.concatenate(predictions)
+        # 0 or 1 with highest frequency
+        majority_vote = np.bincount(combined).argmax()
+        return majority_vote
     def evaluate_severity(self, majority_vote_count):
+        # Simple threshold approach
         if majority_vote_count >= 13:
+            return "Mental Health Severity: Severe"
         elif majority_vote_count >= 9:
+            return "Mental Health Severity: Moderate"
         elif majority_vote_count >= 5:
+            return "Mental Health Severity: Low"
         else:
+            return "Mental Health Severity: Very Low"
 ######################################
+# 3) Validate Inputs
 ######################################
 def validate_inputs(*args):
     for arg in args:
         if arg == '' or arg is None:
             return False
     return True
 ######################################
+# 4) Core Prediction
 ######################################
+predictor = ModelPredictor(model_path, model_filenames)
 def predict(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
+    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
     user_input_data = {
         'YNURSMDE': [int(YNURSMDE)],
         'YMDEYR': [int(YMDEYR)],
     }
     user_input = pd.DataFrame(user_input_data)
+    # 1) Predict
     predictions = predictor.make_predictions(user_input)
+    # 2) Majority vote
     majority_vote = predictor.get_majority_vote(predictions)
+    # 3) Count how many are '1'
+    num_ones = sum(np.concatenate(predictions) == 1)
+    # 4) Severity
+    severity = predictor.evaluate_severity(num_ones)
+    # 5) Grouped textual results
+    #    [Same grouping logic as before, or adapt as needed]
+    groups = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
         "Mood_and_Emotional_State": ["YOWRCHR", "YOWRLSIN", "YOWRDEPR", "YODPDISC",
                                                          "YODPR2WK", "YODSMMDE",
                                                          "YOPB2WK"]
     }
+    grouped_text = {k: [] for k in groups}
+    for i, pred in enumerate(predictions):
+        col_name = model_filenames[i].split('.')[0]
+        pred_val = pred[0]
+        if col_name in predictor.prediction_map and pred_val in [0,1]:
+            text_val = predictor.prediction_map[col_name][pred_val]
         else:
+            text_val = f"Prediction={pred_val}"
+        # Find which group
+        assigned = False
+        for gname, gcols in groups.items():
+            if col_name in gcols:
+                grouped_text[gname].append(f"{col_name} => {text_val}")
+                assigned = True
                 break
+        if not assigned:
+            # Or skip
             pass
+    final_str = []
+    for gname, items in grouped_text.items():
+        if items:
+            final_str.append(f"**{gname.replace('_',' ')}**")
+            final_str.append("\n".join(items))
+            final_str.append("\n")
+    final_str = "\n".join(final_str).strip()
+    if not final_str:
+        final_str = "No predictions made. Please check inputs."
+    # 6) Additional charts: total patients, distribution for input features, etc.
     total_patients = len(df)
+    total_patient_markdown = (
+        f"### Total Patient Count\nThere are **{total_patients}** patients in the dataset."
     )
+    # A) Bar chart for input features
+    same_val_counts = {}
+    for col, val_list in user_input_data.items():
+        val_ = val_list[0]
+        same_val_counts[col] = len(df[df[col] == val_])
+    bar_input_df = pd.DataFrame({"Feature": list(same_val_counts.keys()),
+                                 "Count": list(same_val_counts.values())})
+    fig_bar_input = px.bar(bar_input_df, x="Feature", y="Count",
+                           title="Number of Patients with Same Input Feature Values")
+    fig_bar_input.update_layout(width=800, height=500)
+    # B) Bar chart for predicted labels
     label_counts = {}
+    all_preds_flat = np.concatenate(predictions)
+    for i, arr in enumerate(predictions):
+        lbl_col = model_filenames[i].split('.')[0]
+        pred_val = arr[0]
+        if pred_val in [0,1]:
+            label_counts[lbl_col] = len(df[df[lbl_col] == pred_val])
+    if label_counts:
+        bar_label_df = pd.DataFrame({"Label": list(label_counts.keys()),
+                                     "Count": list(label_counts.values())})
+        fig_bar_labels = px.bar(bar_label_df, x="Label", y="Count",
+                                title="Number of Patients with the Same Predicted Label")
+        fig_bar_labels.update_layout(width=800, height=500)
     else:
+        fig_bar_labels = px.bar(title="No valid predicted labels to display.")
+        fig_bar_labels.update_layout(width=800, height=500)
+    # C) Distribution Plot (small sample)
+    #    We'll pick the first 4 user_input columns & first 3 labels
+    subset_input_cols = list(user_input_data.keys())[:4]
+    subset_labels = [fn.split('.')[0] for fn in model_filenames[:3]]
+    dist_rows = []
+    for feat in subset_input_cols:
         if feat not in df.columns:
             continue
+        for label_col in subset_labels:
+            if label_col not in df.columns:
                 continue
+            tmp = df.groupby([feat, label_col]).size().reset_index(name="count")
+            tmp["feature"] = feat
+            tmp["label"] = label_col
+            dist_rows.append(tmp)
+    if dist_rows:
+        big_dist_df = pd.concat(dist_rows, ignore_index=True)
+        fig_dist = px.bar(big_dist_df,
+                          x=big_dist_df.columns[0],
+                          y="count",
+                          color=big_dist_df.columns[1],
+                          facet_row="feature",
+                          facet_col="label",
+                          title="Distribution of Sample Input Features vs. Sample Predicted Labels")
+        fig_dist.update_layout(width=1000, height=700)
     else:
+        fig_dist = px.bar(title="Distribution plot not generated.")
+    # D) Nearest Neighbors (K=2) [Optional as before]
+    #    ... omitted for brevity if you want to keep from prior code ...
+    #    or keep it.
+    # For now, let's produce an empty markdown
+    nearest_neighbors_markdown = "Nearest neighbors omitted here for brevity..."
+    # We won't produce a default co-occurrence plot here, since we do it in a separate tab.
+    # Return 8 items
     return (
+        final_str,
+        severity,
+        total_patient_markdown,
+        fig_dist,
+        nearest_neighbors_markdown,
+        None,  # placeholder for a single co-occurrence plot
+        fig_bar_input,
+        fig_bar_labels
     )
 ######################################
+# 5) Input Mapping
 ######################################
 input_mapping = {
     'YNURSMDE': {"Yes": 1, "No": 0},
     'YMDEYR': {"Yes": 1, "No": 2},
     'YSOCMDE': {"Yes": 1, "No": 0},
+    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
     'YMSUD5YANY': {"Yes": 1, "No": 0},
+    'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YMDETXRX': {"Yes": 1, "No": 0},
+    'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YMDERSUD5ANY': {"Yes": 1, "No": 0},
+    'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YCOUNMDE': {"Yes": 1, "No": 0},
     'YPSY1MDE': {"Yes": 1, "No": 0},
     'YHLTMDE': {"Yes": 1, "No": 0},
     'YDOCMDE': {"Yes": 1, "No": 0},
     'YPSY2MDE': {"Yes": 1, "No": 0},
     'YMDEHARX': {"Yes": 1, "No": 0},
+    'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
     'MDEIMPY': {"Yes": 1, "No": 2},
     'YMDEHPO': {"Yes": 1, "No": 0},
     'YMIMS5YANY': {"Yes": 1, "No": 0},
     'YMIUD5YANY': {"Yes": 1, "No": 0},
     'YMDEHPRX': {"Yes": 1, "No": 0},
     'YMIMI5YANY': {"Yes": 1, "No": 0},
+    'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YTXMDEYR': {"Yes": 1, "No": 0},
     'YMDEAUD5YR': {"Yes": 1, "No": 0},
     'YRXMDEYR': {"Yes": 1, "No": 0},
 }
 ######################################
+# 6) Co-Occurrence Function (Separate)
 ######################################
+def co_occurrence_plot(feature1, feature2, label_col):
+    """
+    Generate a single co-occurrence bar chart grouping by [feature1, feature2, label_col].
+    We set a custom width/height so it's clearly visible.
+    """
+    if not feature1 or not feature2 or not label_col:
+        return px.bar(title="Please select all three fields.")
+    if feature1 not in df.columns or feature2 not in df.columns or label_col not in df.columns:
+        return px.bar(title="Selected columns not found in the dataset.")
+    # Group
+    grouped_df = df.groupby([feature1, feature2, label_col]).size().reset_index(name="count")
+    fig = px.bar(
+        grouped_df,
+        x=feature1,
+        y="count",
+        color=label_col,
+        facet_col=feature2,
+        title=f"Co-Occurrence Plot: {feature1} & {feature2} vs. {label_col}"
+    )
+    fig.update_layout(width=1000, height=600)
+    return fig
 ######################################
+# 7) Gradio with Tabs
 ######################################
+with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
+    with gr.Tab("Prediction"):
+        # Inputs (same order as function signature)
+        YMDEYR_dd = gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR")
+        YMDERSUD5ANY_dd = gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY")
+        YMDEIMAD5YR_dd = gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR")
+        YMIMS5YANY_dd = gr.Dropdown(list(input_mapping['YMIMS5YANY'].keys()), label="YMIMS5YANY")
+        YMDELT_dd = gr.Dropdown(list(input_mapping['YMDELT'].keys()), label="YMDELT")
+        YMDEHARX_dd = gr.Dropdown(list(input_mapping['YMDEHARX'].keys()), label="YMDEHARX")
+        YMDEHPRX_dd = gr.Dropdown(list(input_mapping['YMDEHPRX'].keys()), label="YMDEHPRX")
+        YMDETXRX_dd = gr.Dropdown(list(input_mapping['YMDETXRX'].keys()), label="YMDETXRX")
+        YMDEHPO_dd = gr.Dropdown(list(input_mapping['YMDEHPO'].keys()), label="YMDEHPO")
+        YMDEAUD5YR_dd = gr.Dropdown(list(input_mapping['YMDEAUD5YR'].keys()), label="YMDEAUD5YR")
+        YMIMI5YANY_dd = gr.Dropdown(list(input_mapping['YMIMI5YANY'].keys()), label="YMIMI5YANY")
+        YMIUD5YANY_dd = gr.Dropdown(list(input_mapping['YMIUD5YANY'].keys()), label="YMIUD5YANY")
+        YMDESUD5ANYO_dd = gr.Dropdown(list(input_mapping['YMDESUD5ANYO'].keys()), label="YMDESUD5ANYO")
+        # Consultations
+        YNURSMDE_dd = gr.Dropdown(list(input_mapping['YNURSMDE'].keys()), label="YNURSMDE")
+        YSOCMDE_dd = gr.Dropdown(list(input_mapping['YSOCMDE'].keys()), label="YSOCMDE")
+        YCOUNMDE_dd = gr.Dropdown(list(input_mapping['YCOUNMDE'].keys()), label="YCOUNMDE")
+        YPSY1MDE_dd = gr.Dropdown(list(input_mapping['YPSY1MDE'].keys()), label="YPSY1MDE")
+        YPSY2MDE_dd = gr.Dropdown(list(input_mapping['YPSY2MDE'].keys()), label="YPSY2MDE")
+        YHLTMDE_dd = gr.Dropdown(list(input_mapping['YHLTMDE'].keys()), label="YHLTMDE")
+        YDOCMDE_dd = gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE")
+        YTXMDEYR_dd = gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR")
+        # Suicidal thoughts/plans
+        YUSUITHKYR_dd = gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR")
+        YUSUIPLNYR_dd = gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR")
+        YUSUITHK_dd = gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK")
+        YUSUIPLN_dd = gr.Dropdown(list(input_mapping['YUSUIPLN'].keys()), label="YUSUIPLN")
+        # Impairments
+        MDEIMPY_dd = gr.Dropdown(list(input_mapping['MDEIMPY'].keys()), label="MDEIMPY")
+        LVLDIFMEM2_dd = gr.Dropdown(list(input_mapping['LVLDIFMEM2'].keys()), label="LVLDIFMEM2")
+        YMSUD5YANY_dd = gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY")
+        YRXMDEYR_dd = gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR")
+        # 8 outputs
+        out_pred_res = gr.Textbox(label="Prediction Results", lines=8)
+        out_sev = gr.Textbox(label="Mental Health Severity", lines=2)
+        out_count = gr.Markdown(label="Total Patient Count")
+        out_distplot = gr.Plot(label="Distribution Plot")
+        out_nn = gr.Markdown(label="Nearest Neighbors Summary")
+        out_cooc = gr.Plot(label="Co-occurrence Plot Placeholder")
+        out_bar_input = gr.Plot(label="Input Feature Counts")
+        out_bar_labels = gr.Plot(label="Predicted Label Counts")
+        # Button
+        predict_btn = gr.Button("Predict")
+        # Link button to the function
+        predict_btn.click(
+            fn=predict,
+            inputs=[
+                YMDEYR_dd, YMDERSUD5ANY_dd, YMDEIMAD5YR_dd, YMIMS5YANY_dd, YMDELT_dd, YMDEHARX_dd,
+                YMDEHPRX_dd, YMDETXRX_dd, YMDEHPO_dd, YMDEAUD5YR_dd, YMIMI5YANY_dd, YMIUD5YANY_dd,
+                YMDESUD5ANYO_dd, YNURSMDE_dd, YSOCMDE_dd, YCOUNMDE_dd, YPSY1MDE_dd, YPSY2MDE_dd,
+                YHLTMDE_dd, YDOCMDE_dd, YTXMDEYR_dd, YUSUITHKYR_dd, YUSUIPLNYR_dd, YUSUITHK_dd,
+                YUSUIPLN_dd, MDEIMPY_dd, LVLDIFMEM2_dd, YMSUD5YANY_dd, YRXMDEYR_dd
+            ],
+            outputs=[
+                out_pred_res, out_sev, out_count, out_distplot, out_nn, out_cooc, out_bar_input, out_bar_labels
+            ]
         )
+    with gr.Tab("Co-occurrence"):
+        gr.Markdown("## Generate a Co-Occurrence Plot on Demand\nSelect two features and one label:")
+        with gr.Row():
+            feature1_dd = gr.Dropdown(sorted(df.columns), label="Feature 1")
+            feature2_dd = gr.Dropdown(sorted(df.columns), label="Feature 2")
+            label_dd = gr.Dropdown(sorted(df.columns), label="Label Column")
+        out_co_occ_plot = gr.Plot(label="Co-occurrence Plot")
+        co_occ_btn = gr.Button("Generate Plot")
+        # Link to co_occurrence_plot function
+        co_occ_btn.click(
+            fn=co_occurrence_plot,
+            inputs=[feature1_dd, feature2_dd, label_dd],
+            outputs=out_co_occ_plot
+        )
+# Optional custom CSS for bigger container
 custom_css = """
+    .gradio-container {
+        max-width: 1200px;
+        margin-left: auto;
+        margin-right: auto;
+    }
 """
+# Launch
+demo.launch(server_name="0.0.0.0", server_port=7860)