Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

pantdipendra commited on 19 days ago

Commit

16ca108

verified ·

1 Parent(s): 6749d1f

Update app.py

Browse files

Files changed (1) hide show

app.py +216 -242

app.py CHANGED Viewed

@@ -5,11 +5,10 @@ import plotly.express as px
 import gradio as gr
 ######################################
-# 1) Load Data & Prepare
 ######################################
-df = pd.read_csv("X_train_Y_Train_merged_train.csv")
-# List of model filenames (adjust if needed)
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
     "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
@@ -18,70 +17,60 @@ model_filenames = [
 ]
 model_path = "models/"
-######################################
-# 2) Model Predictor
-######################################
 class ModelPredictor:
     def __init__(self, model_path, model_filenames):
         self.model_path = model_path
         self.model_filenames = model_filenames
         self.models = self.load_models()
-        # Mapping from label column to human-readable strings for 0/1
         self.prediction_map = {
-            "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
-            "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
-            "YOWRHRS": ["Did not have trouble sleeping", "Had trouble sleeping"],
-            "YO_MDEA5": ["Others did not notice restlessness/lethargy", "Others noticed restlessness/lethargy"],
-            "YOWRCHR": ["Did not feel so sad", "Felt so sad nothing could cheer up"],
-            "YOWRLSIN": ["Did not feel bored and lose interest", "Felt bored and lost interest"],
-            "YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
-            "YOWRPROB": ["Did not have the worst time ever feeling", "Had the worst time ever feeling"],
-            "YODPR2WK": ["No periods of 2+ weeks feelings", "Had periods of 2+ weeks feelings"],
             "YOWRDEPR": ["Did not feel depressed mostly everyday", "Felt depressed mostly everyday"],
-            "YODPDISC": ["Mood not depressed overall", "Mood depressed overall (discrepancy)"],
-            "YOLOSEV": ["Did not lose interest in enjoyable things", "Lost interest in enjoyable things"],
             "YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
-            "YODSMMDE": ["Never had depression for 2+ weeks", "Had depression for 2+ weeks"],
             "YO_MDEA3": ["No appetite/weight changes", "Had appetite/weight changes"],
             "YODPLSIN": ["Never bored/lost interest", "Felt bored/lost interest"],
             "YOWRELES": ["Did not eat less than usual", "Ate less than usual"],
             "YODSCEV": ["Fewer severe symptoms", "More severe symptoms"],
-            "YOPB2WK": ["No uneasy feelings 2+ weeks", "Had uneasy feelings 2+ weeks"],
-            "YO_MDEA2": ["No issues w/ physical/mental well-being", "Issues w/ physical/mental well-being"]
         }
     def load_models(self):
-        models = []
-        for filename in model_filenames:
-            filepath = self.model_path + filename
-            with open(filepath, 'rb') as file:
-                model = pickle.load(file)
-            models.append(model)
-        return models
-    def make_predictions(self, user_input):
         """
-        Returns a list of numpy arrays, each array is [0] or [1].
-        The i-th array corresponds to the i-th model in self.models.
         """
         predictions = []
         for model in self.models:
-            pred = model.predict(user_input)
-            predictions.append(pred.flatten())
         return predictions
     def get_majority_vote(self, predictions):
-        """
-        Flatten all predictions from all models, combine them,
-        then find the majority class (0 or 1).
-        """
         combined = np.concatenate(predictions)
-        majority = np.bincount(combined).argmax()
-        return majority
-    # Simple threshold approach (0-4 => Very Low, 5-8 => Low, etc.)
-    def evaluate_severity(self, majority_vote_count):
         if majority_vote_count >= 13:
             return "Mental Health Severity: Severe"
         elif majority_vote_count >= 9:
@@ -91,22 +80,52 @@ class ModelPredictor:
         else:
             return "Mental Health Severity: Very Low"
 ######################################
-# 3) Validate Inputs
 ######################################
 def validate_inputs(*args):
     for arg in args:
-        if arg == '' or arg is None:
             return False
     return True
 ######################################
-# 4) Core Prediction
 ######################################
-predictor = ModelPredictor(model_path, model_filenames)
 def predict(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
@@ -114,7 +133,7 @@ def predict(
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
     YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
-    # Validate
     if not validate_inputs(
         YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
         YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
@@ -133,53 +152,50 @@ def predict(
             None
         )
-    # Build dataframe from user inputs
-    user_input_data = {
-        'YNURSMDE': [int(YNURSMDE)],
-        'YMDEYR': [int(YMDEYR)],
-        'YSOCMDE': [int(YSOCMDE)],
-        'YMDESUD5ANYO': [int(YMDESUD5ANYO)],
-        'YMSUD5YANY': [int(YMSUD5YANY)],
-        'YUSUITHK': [int(YUSUITHK)],
-        'YMDETXRX': [int(YMDETXRX)],
-        'YUSUITHKYR': [int(YUSUITHKYR)],
-        'YMDERSUD5ANY': [int(YMDERSUD5ANY)],
-        'YUSUIPLNYR': [int(YUSUIPLNYR)],
-        'YCOUNMDE': [int(YCOUNMDE)],
-        'YPSY1MDE': [int(YPSY1MDE)],
-        'YHLTMDE': [int(YHLTMDE)],
-        'YDOCMDE': [int(YDOCMDE)],
-        'YPSY2MDE': [int(YPSY2MDE)],
-        'YMDEHARX': [int(YMDEHARX)],
-        'LVLDIFMEM2': [int(LVLDIFMEM2)],
-        'MDEIMPY': [int(MDEIMPY)],
-        'YMDEHPO': [int(YMDEHPO)],
-        'YMIMS5YANY': [int(YMIMS5YANY)],
-        'YMDEIMAD5YR': [int(YMDEIMAD5YR)],
-        'YMIUD5YANY': [int(YMIUD5YANY)],
-        'YMDEHPRX': [int(YMDEHPRX)],
-        'YMIMI5YANY': [int(YMIMI5YANY)],
-        'YUSUIPLN': [int(YUSUIPLN)],
-        'YTXMDEYR': [int(YTXMDEYR)],
-        'YMDEAUD5YR': [int(YMDEAUD5YR)],
-        'YRXMDEYR': [int(YRXMDEYR)],
-        'YMDELT': [int(YMDELT)]
     }
-    user_input = pd.DataFrame(user_input_data)
-    # 1) Predictions
-    predictions = predictor.make_predictions(user_input)
-    # 2) Majority vote
     majority_vote = predictor.get_majority_vote(predictions)
-    # 3) Count of '1's
-    num_ones = sum(np.concatenate(predictions) == 1)
-    # 4) Severity
-    severity = predictor.evaluate_severity(num_ones)
-    # 5) Group textual results
     groups = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
@@ -191,186 +207,145 @@ def predict(
                                                          "YOPB2WK"]
     }
-    grouped_text = {k: [] for k in groups}
     for i, arr in enumerate(predictions):
-        col_name = model_filenames[i].split('.')[0]
-        pred_val = arr[0]
-        if col_name in predictor.prediction_map and pred_val in [0,1]:
-            text_val = predictor.prediction_map[col_name][pred_val]
         else:
-            text_val = f"Prediction={pred_val}"
-        found_group = False
         for gname, gcols in groups.items():
-            if col_name in gcols:
-                grouped_text[gname].append(f"{col_name} => {text_val}")
-                found_group = True
                 break
-        # If not found_group, we do nothing (skip or put in a "misc" group)
-    final_str = []
-    for gname, items in grouped_text.items():
-        if items:
-            final_str.append(f"**{gname.replace('_',' ')}**")
-            final_str.append("\n".join(items))
-            final_str.append("\n")
-    final_str = "\n".join(final_str).strip()
-    if not final_str:
-        final_str = "No predictions made. Please check inputs."
-    # Additional info
-    total_patients = len(df)
-    total_patient_markdown = (
-        f"### Total Patient Count\nThere are **{total_patients}** patients in the dataset."
-    )
-    # A) Bar chart for input features
-    same_val_counts = {}
-    for col, val_list in user_input_data.items():
-        val_ = val_list[0]
-        same_val_counts[col] = len(df[df[col] == val_])
-    bar_input_df = pd.DataFrame({"Feature": list(same_val_counts.keys()),
-                                 "Count": list(same_val_counts.values())})
-    fig_bar_input = px.bar(
-        bar_input_df, x="Feature", y="Count",
-        title="Number of Patients with Same Input Feature Values"
-    )
-    fig_bar_input.update_layout(width=800, height=500)
-    # B) Bar chart for predicted labels
     label_counts = {}
     for i, arr in enumerate(predictions):
-        lbl_col = model_filenames[i].split('.')[0]
         pred_val = arr[0]
         if pred_val in [0,1]:
-            label_counts[lbl_col] = len(df[df[lbl_col] == pred_val])
     if label_counts:
-        bar_label_df = pd.DataFrame({"Label": list(label_counts.keys()),
-                                     "Count": list(label_counts.values())})
-        fig_bar_labels = px.bar(bar_label_df, x="Label", y="Count",
-                                title="Number of Patients with the Same Predicted Label")
-        fig_bar_labels.update_layout(width=800, height=500)
     else:
-        fig_bar_labels = px.bar(title="No valid predicted labels to display.")
-        fig_bar_labels.update_layout(width=800, height=500)
-    # C) Distribution Plot (small sample)
-    subset_input_cols = list(user_input_data.keys())[:4]  # first 4 input columns
-    subset_labels = [fn.split('.')[0] for fn in model_filenames[:3]]  # first 3 label columns
-    dist_rows = []
-    for feat in subset_input_cols:
-        if feat not in df.columns:
             continue
-        for label_col in subset_labels:
-            if label_col not in df.columns:
                 continue
-            tmp = df.groupby([feat, label_col]).size().reset_index(name="count")
-            tmp["feature"] = feat
-            tmp["label"] = label_col
-            dist_rows.append(tmp)
-    if dist_rows:
-        big_dist_df = pd.concat(dist_rows, ignore_index=True)
         fig_dist = px.bar(
-            big_dist_df,
-            x=big_dist_df.columns[0],
             y="count",
-            color=big_dist_df.columns[1],
             facet_row="feature",
             facet_col="label",
-            title="Distribution of Sample Input Features vs. Sample Predicted Labels"
         )
-        fig_dist.update_layout(width=1000, height=700)
     else:
         fig_dist = px.bar(title="Distribution plot not generated.")
-    # D) Nearest neighbors (placeholder or your own logic)
-    nearest_neighbors_markdown = "Nearest neighbors omitted or placed here if needed..."
-    # We won't produce a co-occurrence plot by default here, so set to None
-    co_occurrence_placeholder = None
-    # Return the 8 outputs
     return (
-        final_str,           # 1) Prediction Results
-        severity,            # 2) Mental Health Severity
-        total_patient_markdown,  # 3) Total Patient Count
-        fig_dist,            # 4) Distribution Plot
-        nearest_neighbors_markdown, # 5) Nearest Neighbors
-        co_occurrence_placeholder,  # 6) Co-occurrence Plot placeholder
-        fig_bar_input,       # 7) Bar Chart for input features
-        fig_bar_labels       # 8) Bar Chart for predicted labels
     )
-######################################
-# 5) Input Mapping
-######################################
-input_mapping = {
-    'YNURSMDE': {"Yes": 1, "No": 0},
-    'YMDEYR': {"Yes": 1, "No": 2},
-    'YSOCMDE': {"Yes": 1, "No": 0},
-    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
-    'YMSUD5YANY': {"Yes": 1, "No": 0},
-    'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YMDETXRX': {"Yes": 1, "No": 0},
-    'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YMDERSUD5ANY': {"Yes": 1, "No": 0},
-    'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YCOUNMDE': {"Yes": 1, "No": 0},
-    'YPSY1MDE': {"Yes": 1, "No": 0},
-    'YHLTMDE': {"Yes": 1, "No": 0},
-    'YDOCMDE': {"Yes": 1, "No": 0},
-    'YPSY2MDE': {"Yes": 1, "No": 0},
-    'YMDEHARX': {"Yes": 1, "No": 0},
-    'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
-    'MDEIMPY': {"Yes": 1, "No": 2},
-    'YMDEHPO': {"Yes": 1, "No": 0},
-    'YMIMS5YANY': {"Yes": 1, "No": 0},
-    'YMDEIMAD5YR': {"Yes": 1, "No": 0},
-    'YMIUD5YANY': {"Yes": 1, "No": 0},
-    'YMDEHPRX': {"Yes": 1, "No": 0},
-    'YMIMI5YANY': {"Yes": 1, "No": 0},
-    'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YTXMDEYR': {"Yes": 1, "No": 0},
-    'YMDEAUD5YR': {"Yes": 1, "No": 0},
-    'YRXMDEYR': {"Yes": 1, "No": 0},
-    'YMDELT': {"Yes": 1, "No": 2}
-}
 ######################################
-# 6) Co-Occurrence Function
 ######################################
 def co_occurrence_plot(feature1, feature2, label_col):
     """
-    Generate a single co-occurrence bar chart grouping by [feature1, feature2, label_col].
     """
-    if not feature1 or not feature2 or not label_col:
         return px.bar(title="Please select all three fields.")
     if feature1 not in df.columns or feature2 not in df.columns or label_col not in df.columns:
         return px.bar(title="Selected columns not found in the dataset.")
-    grouped_df = df.groupby([feature1, feature2, label_col]).size().reset_index(name="count")
     fig = px.bar(
-        grouped_df,
         x=feature1,
         y="count",
         color=label_col,
         facet_col=feature2,
-        title=f"Co-Occurrence Plot: {feature1} & {feature2} vs. {label_col}"
     )
-    fig.update_layout(width=1000, height=600)
     return fig
 ######################################
-# 7) Gradio Interface with Tabs
 ######################################
-with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
     with gr.Tab("Prediction"):
-        # --------- INPUT FIELDS --------- #
         YMDEYR_dd = gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR")
         YMDERSUD5ANY_dd = gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY")
         YMDEIMAD5YR_dd = gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR")
@@ -395,7 +370,7 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         YDOCMDE_dd = gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE")
         YTXMDEYR_dd = gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR")
-        # Suicidal thoughts/plans
         YUSUITHKYR_dd = gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR")
         YUSUIPLNYR_dd = gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR")
         YUSUITHK_dd = gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK")
@@ -407,10 +382,10 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         YMSUD5YANY_dd = gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY")
         YRXMDEYR_dd = gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR")
-        # --------- PREDICT BUTTON (BEFORE OUTPUTS) --------- #
         predict_btn = gr.Button("Predict")
-        # --------- OUTPUTS (IN THE SAME ORDER AS THE RETURN TUPLE) --------- #
         out_pred_res = gr.Textbox(label="Prediction Results", lines=8)
         out_sev = gr.Textbox(label="Mental Health Severity", lines=2)
         out_count = gr.Markdown(label="Total Patient Count")
@@ -420,7 +395,7 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         out_bar_input = gr.Plot(label="Input Feature Counts")
         out_bar_labels = gr.Plot(label="Predicted Label Counts")
-        # Link button to the function
         predict_btn.click(
             fn=predict,
             inputs=[
@@ -436,21 +411,20 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
             ]
         )
-    # ------------- SECOND TAB (CO-OCCURRENCE) -------------
     with gr.Tab("Co-occurrence"):
-        gr.Markdown("## Generate a Co-Occurrence Plot on Demand\nSelect two features and one label:")
         with gr.Row():
-            feature1_dd = gr.Dropdown(sorted(df.columns), label="Feature 1")
-            feature2_dd = gr.Dropdown(sorted(df.columns), label="Feature 2")
             label_dd = gr.Dropdown(sorted(df.columns), label="Label Column")
-        out_co_occ_plot = gr.Plot(label="Co-occurrence Plot")
-        co_occ_btn = gr.Button("Generate Plot")
-        co_occ_btn.click(
             fn=co_occurrence_plot,
-            inputs=[feature1_dd, feature2_dd, label_dd],
-            outputs=out_co_occ_plot
         )
-# Optionally, you can customize your CSS or server launch parameters
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 ######################################
+# 1) LOAD DATA & MODELS
 ######################################
+df = pd.read_csv("X_train_Y_Train_merged_train.csv")  # Make sure the CSV is present
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
     "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
 ]
 model_path = "models/"
 class ModelPredictor:
     def __init__(self, model_path, model_filenames):
         self.model_path = model_path
         self.model_filenames = model_filenames
         self.models = self.load_models()
+        # Mapping from each label column to a list: [meaning_of_0, meaning_of_1]
         self.prediction_map = {
+            "YOWRCONC": ["No difficulty concentrating", "Had difficulty concentrating"],
+            "YOSEEDOC": ["Did not feel need for doctor", "Felt need for doctor"],
+            "YOWRHRS": ["No trouble sleeping", "Had trouble sleeping"],
+            "YO_MDEA5": ["No restlessness/lethargy noted", "Others noticed restlessness/lethargy"],
+            "YOWRCHR": ["Did not feel so sad", "Felt so sad that nothing cheered up"],
+            "YOWRLSIN": ["No boredom/loss of interest", "Bored/lost interest in everything"],
+            "YODPPROB": ["No other 2+ week problems", "Had other 2+ week problems"],
+            "YOWRPROB": ["Did not have worst feeling ever", "Had worst time feeling"],
+            "YODPR2WK": ["No 2+ weeks of these feelings", "Had 2+ weeks of these feelings"],
             "YOWRDEPR": ["Did not feel depressed mostly everyday", "Felt depressed mostly everyday"],
+            "YODPDISC": ["Mood not depressed overall", "Mood depressed overall discrepancy"],
+            "YOLOSEV": ["No loss of interest in enjoyable things", "Lost interest in enjoyable things"],
             "YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
+            "YODSMMDE": ["No 2+ week depression episodes", "Had 2+ week depression episodes"],
             "YO_MDEA3": ["No appetite/weight changes", "Had appetite/weight changes"],
             "YODPLSIN": ["Never bored/lost interest", "Felt bored/lost interest"],
             "YOWRELES": ["Did not eat less than usual", "Ate less than usual"],
             "YODSCEV": ["Fewer severe symptoms", "More severe symptoms"],
+            "YOPB2WK": ["No uneasy feelings for 2+ weeks", "Had uneasy feelings for 2+ weeks"],
+            "YO_MDEA2": ["No daily well-being issues", "Daily well-being issues for 2+ weeks"]
         }
     def load_models(self):
+        loaded = []
+        for fname in self.model_filenames:
+            with open(self.model_path + fname, "rb") as f:
+                model = pickle.load(f)
+            loaded.append(model)
+        return loaded
+    def make_predictions(self, user_input: pd.DataFrame):
         """
+        Return list of arrays, each array is [0] or [1].
         """
         predictions = []
         for model in self.models:
+            out = model.predict(user_input)
+            predictions.append(out.flatten())
         return predictions
     def get_majority_vote(self, predictions):
         combined = np.concatenate(predictions)
+        # find 0 or 1 that is most frequent
+        return np.bincount(combined).argmax()
+    def evaluate_severity(self, majority_vote_count: int) -> str:
+        # Simple thresholds
         if majority_vote_count >= 13:
             return "Mental Health Severity: Severe"
         elif majority_vote_count >= 9:
         else:
             return "Mental Health Severity: Very Low"
+predictor = ModelPredictor(model_path, model_filenames)
 ######################################
+# 2) VALIDATION, INPUT MAPPING
 ######################################
 def validate_inputs(*args):
     for arg in args:
+        if not arg:  # empty or None
             return False
     return True
+input_mapping = {
+    'YNURSMDE': {"Yes": 1, "No": 0},
+    'YMDEYR': {"Yes": 1, "No": 2},
+    'YSOCMDE': {"Yes": 1, "No": 0},
+    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
+    'YMSUD5YANY': {"Yes": 1, "No": 0},
+    'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YMDETXRX': {"Yes": 1, "No": 0},
+    'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YMDERSUD5ANY': {"Yes": 1, "No": 0},
+    'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YCOUNMDE': {"Yes": 1, "No": 0},
+    'YPSY1MDE': {"Yes": 1, "No": 0},
+    'YHLTMDE': {"Yes": 1, "No": 0},
+    'YDOCMDE': {"Yes": 1, "No": 0},
+    'YPSY2MDE': {"Yes": 1, "No": 0},
+    'YMDEHARX': {"Yes": 1, "No": 0},
+    'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
+    'MDEIMPY': {"Yes": 1, "No": 2},
+    'YMDEHPO': {"Yes": 1, "No": 0},
+    'YMIMS5YANY': {"Yes": 1, "No": 0},
+    'YMDEIMAD5YR': {"Yes": 1, "No": 0},
+    'YMIUD5YANY': {"Yes": 1, "No": 0},
+    'YMDEHPRX': {"Yes": 1, "No": 0},
+    'YMIMI5YANY': {"Yes": 1, "No": 0},
+    'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YTXMDEYR': {"Yes": 1, "No": 0},
+    'YMDEAUD5YR': {"Yes": 1, "No": 0},
+    'YRXMDEYR': {"Yes": 1, "No": 0},
+    'YMDELT': {"Yes": 1, "No": 2}
+}
 ######################################
+# 3) PREDICT FUNCTION
 ######################################
 def predict(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
     YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
+    # 1) Validate
     if not validate_inputs(
         YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
         YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
             None
         )
+    # 2) Map user-friendly -> numeric
+    user_input_dict = {
+        'YNURSMDE': input_mapping['YNURSMDE'][YNURSMDE],
+        'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
+        'YSOCMDE': input_mapping['YSOCMDE'][YSOCMDE],
+        'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
+        'YMSUD5YANY': input_mapping['YMSUD5YANY'][YMSUD5YANY],
+        'YUSUITHK': input_mapping['YUSUITHK'][YUSUITHK],
+        'YMDETXRX': input_mapping['YMDETXRX'][YMDETXRX],
+        'YUSUITHKYR': input_mapping['YUSUITHKYR'][YUSUITHKYR],
+        'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
+        'YUSUIPLNYR': input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
+        'YCOUNMDE': input_mapping['YCOUNMDE'][YCOUNMDE],
+        'YPSY1MDE': input_mapping['YPSY1MDE'][YPSY1MDE],
+        'YHLTMDE': input_mapping['YHLTMDE'][YHLTMDE],
+        'YDOCMDE': input_mapping['YDOCMDE'][YDOCMDE],
+        'YPSY2MDE': input_mapping['YPSY2MDE'][YPSY2MDE],
+        'YMDEHARX': input_mapping['YMDEHARX'][YMDEHARX],
+        'LVLDIFMEM2': input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
+        'MDEIMPY': input_mapping['MDEIMPY'][MDEIMPY],
+        'YMDEHPO': input_mapping['YMDEHPO'][YMDEHPO],
+        'YMIMS5YANY': input_mapping['YMIMS5YANY'][YMIMS5YANY],
+        'YMDEIMAD5YR': input_mapping['YMDEIMAD5YR'][YMDEIMAD5YR],
+        'YMIUD5YANY': input_mapping['YMIUD5YANY'][YMIUD5YANY],
+        'YMDEHPRX': input_mapping['YMDEHPRX'][YMDEHPRX],
+        'YMIMI5YANY': input_mapping['YMIMI5YANY'][YMIMI5YANY],
+        'YUSUIPLN': input_mapping['YUSUIPLN'][YUSUIPLN],
+        'YTXMDEYR': input_mapping['YTXMDEYR'][YTXMDEYR],
+        'YMDEAUD5YR': input_mapping['YMDEAUD5YR'][YMDEAUD5YR],
+        'YRXMDEYR': input_mapping['YRXMDEYR'][YRXMDEYR],
+        'YMDELT': input_mapping['YMDELT'][YMDELT]
     }
+    user_df = pd.DataFrame(user_input_dict, index=[0])
+    # 3) Make predictions
+    predictions = predictor.make_predictions(user_df)
+    # majority
     majority_vote = predictor.get_majority_vote(predictions)
+    # how many are '1'
+    count_ones = sum(np.concatenate(predictions) == 1)
+    # severity
+    severity_msg = predictor.evaluate_severity(count_ones)
+    # 4) Format textual results for each group (just as an example)
     groups = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
                                                          "YOPB2WK"]
     }
+    group_text = {g: [] for g in groups}
     for i, arr in enumerate(predictions):
+        label_col = model_filenames[i].split('.')[0]  # e.g. 'YOWRCONC'
+        val = arr[0]
+        if label_col in predictor.prediction_map and val in [0,1]:
+            text_label = predictor.prediction_map[label_col][val]
         else:
+            text_label = f"Prediction={val}"
+        # see which group
+        found = False
         for gname, gcols in groups.items():
+            if label_col in gcols:
+                group_text[gname].append(f"{label_col} => {text_label}")
+                found = True
                 break
+    # build final results
+    final_str_parts = []
+    for gname, lines in group_text.items():
+        if lines:
+            final_str_parts.append(f"**{gname.replace('_',' ')}**")
+            final_str_parts.append("\n".join(lines))
+            final_str_parts.append("")
+    if not final_str_parts:
+        final_str = "No predictions made or no matching group columns."
+    else:
+        final_str = "\n".join(final_str_parts)
+    # 5) Additional features
+    # total patients
+    total_count = len(df)
+    total_count_md = f"### Total Patient Count\nWe have **{total_count}** patients in the dataset."
+    # bar chart for input features
+    input_counts = {}
+    for col, val_ in user_input_dict.items():
+        # only 1 item
+        v = val_
+        # how many have that value?
+        matched = len(df[df[col] == v])
+        input_counts[col] = matched
+    bar_in_df = pd.DataFrame({"Feature": list(input_counts.keys()),
+                              "Count": list(input_counts.values())})
+    fig_in = px.bar(bar_in_df, x="Feature", y="Count",
+                    title="Number of Patients with Same Input Feature Values")
+    fig_in.update_layout(width=700, height=400)
+    # bar chart for predicted labels
     label_counts = {}
     for i, arr in enumerate(predictions):
+        lblcol = model_filenames[i].split('.')[0]
         pred_val = arr[0]
         if pred_val in [0,1]:
+            # how many in df have that label?
+            label_counts[lblcol] = len(df[df[lblcol] == pred_val])
     if label_counts:
+        bar_lbl_df = pd.DataFrame({"Label": list(label_counts.keys()),
+                                   "Count": list(label_counts.values())})
+        fig_lbl = px.bar(bar_lbl_df, x="Label", y="Count",
+                         title="Number of Patients with the Same Predicted Label")
+        fig_lbl.update_layout(width=700, height=400)
     else:
+        fig_lbl = px.bar(title="No valid predicted labels to display.")
+        fig_lbl.update_layout(width=700, height=400)
+    # distribution plot (just a small sample)
+    feat_sample = list(user_input_dict.keys())[:3]
+    label_sample = [mf.split('.')[0] for mf in model_filenames[:2]]
+    rows = []
+    for f_ in feat_sample:
+        if f_ not in df.columns:
             continue
+        for l_ in label_sample:
+            if l_ not in df.columns:
                 continue
+            sub_g = df.groupby([f_, l_]).size().reset_index(name="count")
+            sub_g["feature"] = f_
+            sub_g["label"] = l_
+            rows.append(sub_g)
+    if rows:
+        big_df = pd.concat(rows, ignore_index=True)
         fig_dist = px.bar(
+            big_df,
+            x=big_df.columns[0],  # feature value
             y="count",
+            color=big_df.columns[1],  # label value
             facet_row="feature",
             facet_col="label",
+            title="Distribution (Sample Input Features vs Sample Labels)"
         )
+        fig_dist.update_layout(width=900, height=600)
     else:
         fig_dist = px.bar(title="Distribution plot not generated.")
+    # nearest neighbors or co-occ placeholder
+    nn_md = "Nearest neighbors / advanced metrics not implemented in this version."
+    co_occ_placeholder = None
     return (
+        final_str,               # 1) Prediction Results
+        severity_msg,            # 2) Mental Health Severity
+        total_count_md,          # 3) Total Patient Count
+        fig_dist,                # 4) Distribution Plot
+        nn_md,                   # 5) Nearest Neighbors (Markdown)
+        co_occ_placeholder,      # 6) Co-occurrence Plot
+        fig_in,                  # 7) Bar Chart for input features
+        fig_lbl                  # 8) Bar Chart for predicted labels
     )
 ######################################
+# 4) CO-OCCURRENCE FUNCTION
 ######################################
 def co_occurrence_plot(feature1, feature2, label_col):
     """
+    Create a bar chart for co-occurrence among feature1, feature2, and label_col.
     """
+    if (not feature1) or (not feature2) or (not label_col):
         return px.bar(title="Please select all three fields.")
     if feature1 not in df.columns or feature2 not in df.columns or label_col not in df.columns:
         return px.bar(title="Selected columns not found in the dataset.")
+    grouped = df.groupby([feature1, feature2, label_col]).size().reset_index(name="count")
     fig = px.bar(
+        grouped,
         x=feature1,
         y="count",
         color=label_col,
         facet_col=feature2,
+        title=f"Co-occurrence: {feature1}, {feature2} vs {label_col}"
     )
+    fig.update_layout(width=900, height=600)
     return fig
 ######################################
+# 5) BUILD GRADIO UI
 ######################################
+with gr.Blocks(css=".gradio-container {max-width: 1100px;}") as demo:
     with gr.Tab("Prediction"):
+        # Input fields in the same order as predict(...)
         YMDEYR_dd = gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR")
         YMDERSUD5ANY_dd = gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY")
         YMDEIMAD5YR_dd = gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR")
         YDOCMDE_dd = gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE")
         YTXMDEYR_dd = gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR")
+        # Suicidal
         YUSUITHKYR_dd = gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR")
         YUSUIPLNYR_dd = gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR")
         YUSUITHK_dd = gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK")
         YMSUD5YANY_dd = gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY")
         YRXMDEYR_dd = gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR")
+        # Button
         predict_btn = gr.Button("Predict")
+        # 8 outputs
         out_pred_res = gr.Textbox(label="Prediction Results", lines=8)
         out_sev = gr.Textbox(label="Mental Health Severity", lines=2)
         out_count = gr.Markdown(label="Total Patient Count")
         out_bar_input = gr.Plot(label="Input Feature Counts")
         out_bar_labels = gr.Plot(label="Predicted Label Counts")
+        # Connect
         predict_btn.click(
             fn=predict,
             inputs=[
             ]
         )
     with gr.Tab("Co-occurrence"):
+        gr.Markdown("## Co-Occurrence Plot\nSelect two features + one label to see a distribution.")
         with gr.Row():
+            feat1_dd = gr.Dropdown(sorted(df.columns), label="Feature 1")
+            feat2_dd = gr.Dropdown(sorted(df.columns), label="Feature 2")
             label_dd = gr.Dropdown(sorted(df.columns), label="Label Column")
+        generate_btn = gr.Button("Generate Plot")
+        co_occ_output = gr.Plot()
+        generate_btn.click(
             fn=co_occurrence_plot,
+            inputs=[feat1_dd, feat2_dd, label_dd],
+            outputs=co_occ_output
         )
+# Launch
+demo.launch()