Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

pantdipendra commited on 17 days ago

Commit

ebac442

verified ·

1 Parent(s): 6b501f6

v4

Browse files

Files changed (1) hide show

app.py +140 -83

app.py CHANGED Viewed

@@ -7,8 +7,14 @@ import plotly.express as px
 ######################################
 # 1) LOAD DATA & MODELS
 ######################################
 df = pd.read_csv("X_train_test_combined_dataset_Filtered_dataset.csv")
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
     "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
@@ -17,7 +23,6 @@ model_filenames = [
 ]
 model_path = "models/"
 ######################################
 # 2) MODEL PREDICTOR
 ######################################
@@ -38,7 +43,7 @@ class ModelPredictor:
             "YODPPROB": ["No other problems for 2+ weeks",         "Had other problems for 2+ weeks"],
             "YOWRPROB": ["No 'worst time ever' feeling",           "Had 'worst time ever' feeling"],
             "YODPR2WK": ["No depressed feelings for 2+ wks",       "Had depressed feelings for 2+ wks"],
-            "YOWRDEPR": ["Did NOT feel sad/depressed daily",       "Felt sad/depressed mostly everyday"],
             "YODPDISC": ["Overall mood not sad/depressed",         "Overall mood was sad/depressed"],
             "YOLOSEV":  ["Did NOT lose interest in things",        "Lost interest in enjoyable things"],
             "YOWRDCSN": ["Was able to make decisions",             "Was unable to make decisions"],
@@ -54,9 +59,14 @@ class ModelPredictor:
     def load_models(self):
         loaded = []
         for fname in self.model_filenames:
-            with open(self.model_path + fname, "rb") as f:
-                model = pickle.load(f)
-            loaded.append(model)
         return loaded
     def make_predictions(self, user_input: pd.DataFrame):
@@ -91,17 +101,14 @@ class ModelPredictor:
         else:
             return "Mental Health Severity: Very Low"
 predictor = ModelPredictor(model_path, model_filenames)
 ######################################
 # 3) FEATURE CATEGORIES + MAPPING
 ######################################
-#  Replaced 'YMDESUD5ANYO' with 'YMDESUD5ANY' to match your CSV
 categories_dict = {
     "1. Depression & Substance Use Diagnosis": [
-        "YMDESUD5ANY", "YMDELT", "YMDEYR", "YMDERSUD5ANY",
         "YMSUD5YANY", "YMIUD5YANY", "YMIMS5YANY", "YMIMI5YANY"
     ],
     "2. Mental Health Treatment & Prof Consultation": [
@@ -116,9 +123,13 @@ categories_dict = {
     ]
 }
-# Again, replaced 'YMDESUD5ANYO' with 'YMDESUD5ANY'
 input_mapping = {
-    'YMDESUD5ANY': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
     'YMDELT':       {"Yes": 1, "No": 2},
     'YMDEYR':       {"Yes": 1, "No": 2},
     'YMDERSUD5ANY': {"Yes": 1, "No": 0},
@@ -140,7 +151,11 @@ input_mapping = {
     'YCOUNMDE':     {"Yes": 1, "No": 0},
     'MDEIMPY':      {"Yes": 1, "No": 2},
-    'LVLDIFMEM2':   {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
     'YUSUITHK':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YUSUITHKYR':   {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
@@ -148,10 +163,9 @@ input_mapping = {
     'YUSUIPLN':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4}
 }
 def validate_inputs(*args):
     for arg in args:
-        if not arg:  # empty or None
             return False
     return True
@@ -209,13 +223,12 @@ def get_nearest_neighbors_info(user_input_df: pd.DataFrame, k=5):
     lines.append("")
     return "\n".join(lines)
 ######################################
 # 5) PREDICT FUNCTION
 ######################################
 def predict(
     # Category 1 (8):
-    YMDESUD5ANY, YMDELT, YMDEYR, YMDERSUD5ANY,
     YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
     # Category 2 (11):
     YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
@@ -227,7 +240,7 @@ def predict(
 ):
     # 1) Validate
     if not validate_inputs(
-        YMDESUD5ANY, YMDELT, YMDEYR, YMDERSUD5ANY,
         YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
         YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
         YHLTMDE, YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
@@ -235,49 +248,71 @@ def predict(
         YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
     ):
         return (
-            "Please select all required fields.",
-            "Validation Error",
             "No data",
             "No nearest neighbors info",
             None,
             None
         )
-    # 2) Convert text -> numeric
-    user_input_dict = {
-        'YMDESUD5ANY': input_mapping['YMDESUD5ANY'][YMDESUD5ANY],
-        'YMDELT':       input_mapping['YMDELT'][YMDELT],
-        'YMDEYR':       input_mapping['YMDEYR'][YMDEYR],
-        'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
-        'YMSUD5YANY':   input_mapping['YMSUD5YANY'][YMSUD5YANY],
-        'YMIUD5YANY':   input_mapping['YMIUD5YANY'][YMIUD5YANY],
-        'YMIMS5YANY':   input_mapping['YMIMS5YANY'][YMIMS5YANY],
-        'YMIMI5YANY':   input_mapping['YMIMI5YANY'][YMIMI5YANY],
-        'YMDEHPO':      input_mapping['YMDEHPO'][YMDEHPO],
-        'YMDETXRX':     input_mapping['YMDETXRX'][YMDETXRX],
-        'YMDEHARX':     input_mapping['YMDEHARX'][YMDEHARX],
-        'YMDEHPRX':     input_mapping['YMDEHPRX'][YMDEHPRX],
-        'YRXMDEYR':     input_mapping['YRXMDEYR'][YRXMDEYR],
-        'YHLTMDE':      input_mapping['YHLTMDE'][YHLTMDE],
-        'YTXMDEYR':     input_mapping['YTXMDEYR'][YTXMDEYR],
-        'YDOCMDE':      input_mapping['YDOCMDE'][YDOCMDE],
-        'YPSY2MDE':     input_mapping['YPSY2MDE'][YPSY2MDE],
-        'YPSY1MDE':     input_mapping['YPSY1MDE'][YPSY1MDE],
-        'YCOUNMDE':     input_mapping['YCOUNMDE'][YCOUNMDE],
-        'MDEIMPY':      input_mapping['MDEIMPY'][MDEIMPY],
-        'LVLDIFMEM2':   input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
-        'YUSUITHK':     input_mapping['YUSUITHK'][YUSUITHK],
-        'YUSUITHKYR':   input_mapping['YUSUITHKYR'][YUSUITHKYR],
-        'YUSUIPLNYR':   input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
-        'YUSUIPLN':     input_mapping['YUSUIPLN'][YUSUIPLN]
-    }
     user_df = pd.DataFrame(user_input_dict, index=[0])
     # 3) Make predictions
-    preds, probs = predictor.make_predictions(user_df)
     # Flatten predictions for severity count
     all_preds = np.concatenate(preds)
@@ -295,13 +330,13 @@ def predict(
     # Group them by domain
     domain_groups = {
-        "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
-        "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
-        "Mood_and_Emotional_State": [
             "YOWRCHR", "YOWRLSIN", "YOWRDEPR", "YODPDISC", "YOLOSEV", "YODPLSIN", "YODSCEV"
         ],
-        "Appetite_and_Weight_Changes": ["YO_MDEA3", "YOWRELES"],
-        "Duration_and_Severity_of_Depression_Symptoms": [
             "YODPPROB", "YOWRPROB", "YODPR2WK", "YODSMMDE", "YOPB2WK"
         ]
     }
@@ -320,14 +355,13 @@ def predict(
                 if not np.isnan(prob_val):
                     text_prob = f"(Prob= {prob_val:.2f})"
                 else:
-                    text_prob = "(No prob available)"
                 group_lines.append(f"{lbl} => {text_pred} {text_prob}")
         if group_lines:
-            gtitle = gname.replace("_", " ")
-            final_str_parts.append(f"**{gtitle}**")
             final_str_parts.append("\n".join(group_lines))
-            final_str_parts.append("")
     if final_str_parts:
         final_str = "\n".join(final_str_parts)
@@ -345,8 +379,10 @@ def predict(
     for col, val_ in user_input_dict.items():
         matched = len(df[df[col] == val_])
         input_counts[col] = matched
-    bar_in_df = pd.DataFrame({"Feature": list(input_counts.keys()),
-                              "Count": list(input_counts.values())})
     fig_in = px.bar(
         bar_in_df, x="Feature", y="Count",
         title="Number of Patients with the Same Input Feature Values"
@@ -376,12 +412,11 @@ def predict(
         final_str,         # 1) Prediction Results
         severity_msg,      # 2) Mental Health Severity
         total_count_md,    # 3) Total Patient Count
-        nn_md,             # 4) Nearest Neighbors
         fig_in,            # 5) Bar Chart (input features)
         fig_lbl            # 6) Bar Chart (labels)
     )
 ######################################
 # 6) UNIFIED DISTRIBUTION/CO-OCCURRENCE
 ######################################
@@ -399,8 +434,13 @@ def combined_plot(feature_list, label_col):
         if f_ not in df.columns or label_col not in df.columns:
             return px.bar(title="Selected columns not found in the dataset.")
         grouped = df.groupby([f_, label_col]).size().reset_index(name="count")
-        fig = px.bar(grouped, x=f_, y="count", color=label_col,
-                     title=f"Distribution of {f_} vs {label_col}")
         fig.update_layout(width=1200, height=600)
         return fig
@@ -410,8 +450,12 @@ def combined_plot(feature_list, label_col):
             return px.bar(title="Selected columns not found in the dataset.")
         grouped = df.groupby([f1, f2, label_col]).size().reset_index(name="count")
         fig = px.bar(
-            grouped, x=f1, y="count", color=label_col,
-            facet_col=f2, title=f"Co-occurrence: {f1}, {f2} vs {label_col}"
         )
         fig.update_layout(width=1200, height=600)
         return fig
@@ -419,20 +463,19 @@ def combined_plot(feature_list, label_col):
     else:
         return px.bar(title="Please select exactly 1 or 2 features.")
 ######################################
 # 7) BUILD GRADIO UI
 ######################################
 with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
-    # TAB 1: Prediction
     with gr.Tab("Prediction"):
         gr.Markdown("### Please provide inputs in each of the four categories below. All fields are required.")
-        # Category 1
         gr.Markdown("#### 1. Depression & Substance Use Diagnosis")
         cat1_col_labels = [
-            ("YMDESUD5ANY", "YMDESUD5ANY: ONLY MDE, ONLY SUD, BOTH, OR NEITHER"),
             ("YMDELT",       "YMDELT: Had major depressive episode in lifetime"),
             ("YMDEYR",       "YMDEYR: Past-year major depressive episode"),
             ("YMDERSUD5ANY", "YMDERSUD5ANY: MDE or SUD in past year?"),
@@ -444,10 +487,13 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         cat1_inputs = []
         for col, label_text in cat1_col_labels:
             cat1_inputs.append(
-                gr.Dropdown(choices=list(input_mapping[col].keys()), label=label_text)
             )
-        # Category 2
         gr.Markdown("#### 2. Mental Health Treatment & Professional Consultation")
         cat2_col_labels = [
             ("YMDEHPO",   "YMDEHPO: Saw health prof only for MDE"),
@@ -465,10 +511,13 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         cat2_inputs = []
         for col, label_text in cat2_col_labels:
             cat2_inputs.append(
-                gr.Dropdown(choices=list(input_mapping[col].keys()), label=label_text)
             )
-        # Category 3
         gr.Markdown("#### 3. Functional & Cognitive Impairment")
         cat3_col_labels = [
             ("MDEIMPY",    "MDEIMPY: MDE with severe role impairment?"),
@@ -477,10 +526,13 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         cat3_inputs = []
         for col, label_text in cat3_col_labels:
             cat3_inputs.append(
-                gr.Dropdown(choices=list(input_mapping[col].keys()), label=label_text)
             )
-        # Category 4
         gr.Markdown("#### 4. Suicidal Thoughts & Behaviors")
         cat4_col_labels = [
             ("YUSUITHK",   "YUSUITHK: Thought of killing self (past 12 months)?"),
@@ -491,12 +543,16 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         cat4_inputs = []
         for col, label_text in cat4_col_labels:
             cat4_inputs.append(
-                gr.Dropdown(choices=list(input_mapping[col].keys()), label=label_text)
             )
-        # Combine in the same order
         all_inputs = cat1_inputs + cat2_inputs + cat3_inputs + cat4_inputs
         predict_btn = gr.Button("Predict")
         out_pred_res = gr.Textbox(label="Prediction Results (with Probability)", lines=8)
@@ -506,6 +562,7 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
         out_bar_input= gr.Plot(label="Input Feature Counts")
         out_bar_label= gr.Plot(label="Predicted Label Counts")
         predict_btn.click(
             fn=predict,
             inputs=all_inputs,
@@ -522,8 +579,8 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
     # ======== TAB 2: Unified Distribution/Co-occurrence ========
     with gr.Tab("Distribution/Co-occurrence"):
         gr.Markdown("### Select 1 or 2 features + 1 label to see a bar chart.")
-        # Possibly you want only columns from input_mapping or from df
-        # We'll let user pick from df.columns:
         list_of_features = sorted(df.columns)
         list_of_labels = sorted(predictor.prediction_map.keys())
@@ -545,5 +602,5 @@ with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
             outputs=combined_output
         )
-# Finally, launch
 demo.launch()

 ######################################
 # 1) LOAD DATA & MODELS
 ######################################
+# Load your dataset
 df = pd.read_csv("X_train_test_combined_dataset_Filtered_dataset.csv")
+# Ensure 'YMDESUD5ANYO' exists in your DataFrame
+if 'YMDESUD5ANYO' not in df.columns:
+    raise ValueError("The column 'YMDESUD5ANYO' is missing from the dataset. Please check your CSV file.")
+# List of model filenames
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
     "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
 ]
 model_path = "models/"
 ######################################
 # 2) MODEL PREDICTOR
 ######################################
             "YODPPROB": ["No other problems for 2+ weeks",         "Had other problems for 2+ weeks"],
             "YOWRPROB": ["No 'worst time ever' feeling",           "Had 'worst time ever' feeling"],
             "YODPR2WK": ["No depressed feelings for 2+ wks",       "Had depressed feelings for 2+ wks"],
+            "YOWRDEPR": ["Did NOT feel sad/depressed daily",        "Felt sad/depressed mostly everyday"],
             "YODPDISC": ["Overall mood not sad/depressed",         "Overall mood was sad/depressed"],
             "YOLOSEV":  ["Did NOT lose interest in things",        "Lost interest in enjoyable things"],
             "YOWRDCSN": ["Was able to make decisions",             "Was unable to make decisions"],
     def load_models(self):
         loaded = []
         for fname in self.model_filenames:
+            try:
+                with open(self.model_path + fname, "rb") as f:
+                    model = pickle.load(f)
+                loaded.append(model)
+            except FileNotFoundError:
+                raise FileNotFoundError(f"Model file '{fname}' not found in path '{self.model_path}'.")
+            except Exception as e:
+                raise Exception(f"Error loading model '{fname}': {e}")
         return loaded
     def make_predictions(self, user_input: pd.DataFrame):
         else:
             return "Mental Health Severity: Very Low"
 predictor = ModelPredictor(model_path, model_filenames)
 ######################################
 # 3) FEATURE CATEGORIES + MAPPING
 ######################################
 categories_dict = {
     "1. Depression & Substance Use Diagnosis": [
+        "YMDESUD5ANYO", "YMDELT", "YMDEYR", "YMDERSUD5ANY",
         "YMSUD5YANY", "YMIUD5YANY", "YMIMS5YANY", "YMIMI5YANY"
     ],
     "2. Mental Health Treatment & Prof Consultation": [
     ]
 }
 input_mapping = {
+    'YMDESUD5ANYO': {
+        "SUD only, no MDE": 1,
+        "MDE only, no SUD": 2,
+        "SUD and MDE": 3,
+        "Neither SUD or MDE": 4
+    },
     'YMDELT':       {"Yes": 1, "No": 2},
     'YMDEYR':       {"Yes": 1, "No": 2},
     'YMDERSUD5ANY': {"Yes": 1, "No": 0},
     'YCOUNMDE':     {"Yes": 1, "No": 0},
     'MDEIMPY':      {"Yes": 1, "No": 2},
+    'LVLDIFMEM2':   {
+        "No Difficulty": 1,
+        "Some difficulty": 2,
+        "A lot of difficulty or cannot do at all": 3
+    },
     'YUSUITHK':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YUSUITHKYR':   {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YUSUIPLN':     {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4}
 }
 def validate_inputs(*args):
     for arg in args:
+        if arg is None or arg == "":
             return False
     return True
     lines.append("")
     return "\n".join(lines)
 ######################################
 # 5) PREDICT FUNCTION
 ######################################
 def predict(
     # Category 1 (8):
+    YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
     YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
     # Category 2 (11):
     YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
 ):
     # 1) Validate
     if not validate_inputs(
+        YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
         YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
         YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
         YHLTMDE, YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
         YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
     ):
         return (
+            "Please select all required fields.",  # 1) Prediction Results
+            "Validation Error",                    # 2) Severity
+            "No data",                             # 3) Total Count
+            "No nearest neighbors info",           # 4) NN Summary
+            None,                                  # 5) Bar chart (Input)
+            None                                   # 6) Bar chart (Labels)
+        )
+    # 2) Convert text -> numeric
+    try:
+        user_input_dict = {
+            'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
+            'YMDELT':       input_mapping['YMDELT'][YMDELT],
+            'YMDEYR':       input_mapping['YMDEYR'][YMDEYR],
+            'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
+            'YMSUD5YANY':   input_mapping['YMSUD5YANY'][YMSUD5YANY],
+            'YMIUD5YANY':   input_mapping['YMIUD5YANY'][YMIUD5YANY],
+            'YMIMS5YANY':   input_mapping['YMIMS5YANY'][YMIMS5YANY],
+            'YMIMI5YANY':   input_mapping['YMIMI5YANY'][YMIMI5YANY],
+            'YMDEHPO':      input_mapping['YMDEHPO'][YMDEHPO],
+            'YMDETXRX':     input_mapping['YMDETXRX'][YMDETXRX],
+            'YMDEHARX':     input_mapping['YMDEHARX'][YMDEHARX],
+            'YMDEHPRX':     input_mapping['YMDEHPRX'][YMDEHPRX],
+            'YRXMDEYR':     input_mapping['YRXMDEYR'][YRXMDEYR],
+            'YHLTMDE':      input_mapping['YHLTMDE'][YHLTMDE],
+            'YTXMDEYR':     input_mapping['YTXMDEYR'][YTXMDEYR],
+            'YDOCMDE':      input_mapping['YDOCMDE'][YDOCMDE],
+            'YPSY2MDE':     input_mapping['YPSY2MDE'][YPSY2MDE],
+            'YPSY1MDE':     input_mapping['YPSY1MDE'][YPSY1MDE],
+            'YCOUNMDE':     input_mapping['YCOUNMDE'][YCOUNMDE],
+            'MDEIMPY':      input_mapping['MDEIMPY'][MDEIMPY],
+            'LVLDIFMEM2':   input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
+            'YUSUITHK':     input_mapping['YUSUITHK'][YUSUITHK],
+            'YUSUITHKYR':   input_mapping['YUSUITHKYR'][YUSUITHKYR],
+            'YUSUIPLNYR':   input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
+            'YUSUIPLN':     input_mapping['YUSUIPLN'][YUSUIPLN]
+        }
+    except KeyError as e:
+        missing_key = e.args[0]
+        return (
+            f"Input mapping missing for key: {missing_key}. Please check your `input_mapping` dictionary.",
+            "Mapping Error",
             "No data",
             "No nearest neighbors info",
             None,
             None
         )
     user_df = pd.DataFrame(user_input_dict, index=[0])
     # 3) Make predictions
+    try:
+        preds, probs = predictor.make_predictions(user_df)
+    except Exception as e:
+        return (
+            f"Error during prediction: {e}",
+            "Prediction Error",
+            "No data",
+            "No nearest neighbors info",
+            None,
+            None
+        )
     # Flatten predictions for severity count
     all_preds = np.concatenate(preds)
     # Group them by domain
     domain_groups = {
+        "Concentration and Decision Making": ["YOWRCONC", "YOWRDCSN"],
+        "Sleep and Energy Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
+        "Mood and Emotional State": [
             "YOWRCHR", "YOWRLSIN", "YOWRDEPR", "YODPDISC", "YOLOSEV", "YODPLSIN", "YODSCEV"
         ],
+        "Appetite and Weight Changes": ["YO_MDEA3", "YOWRELES"],
+        "Duration and Severity of Depression Symptoms": [
             "YODPPROB", "YOWRPROB", "YODPR2WK", "YODSMMDE", "YOPB2WK"
         ]
     }
                 if not np.isnan(prob_val):
                     text_prob = f"(Prob= {prob_val:.2f})"
                 else:
+                    text_prob = "(No probability available)"
                 group_lines.append(f"{lbl} => {text_pred} {text_prob}")
         if group_lines:
+            final_str_parts.append(f"**{gname}**")
             final_str_parts.append("\n".join(group_lines))
+            final_str_parts.append("")  # Add an empty line for spacing
     if final_str_parts:
         final_str = "\n".join(final_str_parts)
     for col, val_ in user_input_dict.items():
         matched = len(df[df[col] == val_])
         input_counts[col] = matched
+    bar_in_df = pd.DataFrame({
+        "Feature": list(input_counts.keys()),
+        "Count": list(input_counts.values())
+    })
     fig_in = px.bar(
         bar_in_df, x="Feature", y="Count",
         title="Number of Patients with the Same Input Feature Values"
         final_str,         # 1) Prediction Results
         severity_msg,      # 2) Mental Health Severity
         total_count_md,    # 3) Total Patient Count
+        nn_md,             # 4) Nearest Neighbors Summary
         fig_in,            # 5) Bar Chart (input features)
         fig_lbl            # 6) Bar Chart (labels)
     )
 ######################################
 # 6) UNIFIED DISTRIBUTION/CO-OCCURRENCE
 ######################################
         if f_ not in df.columns or label_col not in df.columns:
             return px.bar(title="Selected columns not found in the dataset.")
         grouped = df.groupby([f_, label_col]).size().reset_index(name="count")
+        fig = px.bar(
+            grouped,
+            x=f_,
+            y="count",
+            color=label_col,
+            title=f"Distribution of {f_} vs {label_col}"
+        )
         fig.update_layout(width=1200, height=600)
         return fig
             return px.bar(title="Selected columns not found in the dataset.")
         grouped = df.groupby([f1, f2, label_col]).size().reset_index(name="count")
         fig = px.bar(
+            grouped,
+            x=f1,
+            y="count",
+            color=label_col,
+            facet_col=f2,
+            title=f"Co-occurrence: {f1}, {f2} vs {label_col}"
         )
         fig.update_layout(width=1200, height=600)
         return fig
     else:
         return px.bar(title="Please select exactly 1 or 2 features.")
 ######################################
 # 7) BUILD GRADIO UI
 ######################################
 with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
+    # ======== TAB 1: Prediction ========
     with gr.Tab("Prediction"):
         gr.Markdown("### Please provide inputs in each of the four categories below. All fields are required.")
+        # Category 1: Depression & Substance Use Diagnosis (8 features)
         gr.Markdown("#### 1. Depression & Substance Use Diagnosis")
         cat1_col_labels = [
+            ("YMDESUD5ANYO", "YMDESUD5ANYO: ONLY MDE, ONLY SUD, BOTH, OR NEITHER"),
             ("YMDELT",       "YMDELT: Had major depressive episode in lifetime"),
             ("YMDEYR",       "YMDEYR: Past-year major depressive episode"),
             ("YMDERSUD5ANY", "YMDERSUD5ANY: MDE or SUD in past year?"),
         cat1_inputs = []
         for col, label_text in cat1_col_labels:
             cat1_inputs.append(
+                gr.Dropdown(
+                    choices=list(input_mapping[col].keys()),
+                    label=label_text
+                )
             )
+        # Category 2: Mental Health Treatment & Professional Consultation (11 features)
         gr.Markdown("#### 2. Mental Health Treatment & Professional Consultation")
         cat2_col_labels = [
             ("YMDEHPO",   "YMDEHPO: Saw health prof only for MDE"),
         cat2_inputs = []
         for col, label_text in cat2_col_labels:
             cat2_inputs.append(
+                gr.Dropdown(
+                    choices=list(input_mapping[col].keys()),
+                    label=label_text
+                )
             )
+        # Category 3: Functional & Cognitive Impairment (2 features)
         gr.Markdown("#### 3. Functional & Cognitive Impairment")
         cat3_col_labels = [
             ("MDEIMPY",    "MDEIMPY: MDE with severe role impairment?"),
         cat3_inputs = []
         for col, label_text in cat3_col_labels:
             cat3_inputs.append(
+                gr.Dropdown(
+                    choices=list(input_mapping[col].keys()),
+                    label=label_text
+                )
             )
+        # Category 4: Suicidal Thoughts & Behaviors (4 features)
         gr.Markdown("#### 4. Suicidal Thoughts & Behaviors")
         cat4_col_labels = [
             ("YUSUITHK",   "YUSUITHK: Thought of killing self (past 12 months)?"),
         cat4_inputs = []
         for col, label_text in cat4_col_labels:
             cat4_inputs.append(
+                gr.Dropdown(
+                    choices=list(input_mapping[col].keys()),
+                    label=label_text
+                )
             )
+        # Combine all inputs in the correct order
         all_inputs = cat1_inputs + cat2_inputs + cat3_inputs + cat4_inputs
+        # Output components
         predict_btn = gr.Button("Predict")
         out_pred_res = gr.Textbox(label="Prediction Results (with Probability)", lines=8)
         out_bar_input= gr.Plot(label="Input Feature Counts")
         out_bar_label= gr.Plot(label="Predicted Label Counts")
+        # Connect the predict button to the predict function
         predict_btn.click(
             fn=predict,
             inputs=all_inputs,
     # ======== TAB 2: Unified Distribution/Co-occurrence ========
     with gr.Tab("Distribution/Co-occurrence"):
         gr.Markdown("### Select 1 or 2 features + 1 label to see a bar chart.")
+        # Features can be selected from the dataset's columns
         list_of_features = sorted(df.columns)
         list_of_labels = sorted(predictor.prediction_map.keys())
             outputs=combined_output
         )
+# Finally, launch the Gradio app
 demo.launch()