Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

pantdipendra commited on 19 days ago

Commit

cf4c3a5

verified ·

1 Parent(s): 67c356a

Update app.py

Browse files

Files changed (1) hide show

app.py +372 -295

app.py CHANGED Viewed

@@ -8,17 +8,16 @@ import plotly.express as px
 # Load the training CSV once (outside the functions so it is read only once).
 df = pd.read_csv("X_train_Y_Train_merged_train.csv")
-###############################################################################
-# 1) Model Predictor class
-###############################################################################
 class ModelPredictor:
     def __init__(self, model_path, model_filenames):
         self.model_path = model_path
         self.model_filenames = model_filenames
         self.models = self.load_models()
-        # For each model name, define the mapping from 0->..., 1->...
-        # If you have more labels, expand this dictionary accordingly.
         self.prediction_map = {
             "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
             "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
@@ -95,9 +94,9 @@ class ModelPredictor:
         else:
             return "Mental health severity: Very Low"
-###############################################################################
-# 2) Model Filenames & Predictor
-###############################################################################
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
     "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
@@ -107,60 +106,18 @@ model_filenames = [
 model_path = "models/"
 predictor = ModelPredictor(model_path, model_filenames)
-###############################################################################
-# 3) Validate Inputs
-###############################################################################
 def validate_inputs(*args):
     for arg in args:
         if arg == '' or arg is None:  # Assuming empty string or None as unselected
             return False
     return True
-###############################################################################
-# 4) Reverse Lookup (numeric -> user-friendly text) for input columns
-###############################################################################
-# We'll define the forward mapping here. The reverse mapping is constructed below.
-input_mapping = {
-    'YNURSMDE': {"Yes": 1, "No": 0},
-    'YMDEYR': {"Yes": 1, "No": 2},
-    'YSOCMDE': {"Yes": 1, "No": 0},
-    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
-    'YMSUD5YANY': {"Yes": 1, "No": 0},
-    'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YMDETXRX': {"Yes": 1, "No": 0},
-    'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YMDERSUD5ANY': {"Yes": 1, "No": 0},
-    'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YCOUNMDE': {"Yes": 1, "No": 0},
-    'YPSY1MDE': {"Yes": 1, "No": 0},
-    'YHLTMDE': {"Yes": 1, "No": 0},
-    'YDOCMDE': {"Yes": 1, "No": 0},
-    'YPSY2MDE': {"Yes": 1, "No": 0},
-    'YMDEHARX': {"Yes": 1, "No": 0},
-    'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
-    'MDEIMPY': {"Yes": 1, "No": 2},
-    'YMDEHPO': {"Yes": 1, "No": 0},
-    'YMIMS5YANY': {"Yes": 1, "No": 0},
-    'YMDEIMAD5YR': {"Yes": 1, "No": 0},
-    'YMIUD5YANY': {"Yes": 1, "No": 0},
-    'YMDEHPRX': {"Yes": 1, "No": 0},
-    'YMIMI5YANY': {"Yes": 1, "No": 0},
-    'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-    'YTXMDEYR': {"Yes": 1, "No": 0},
-    'YMDEAUD5YR': {"Yes": 1, "No": 0},
-    'YRXMDEYR': {"Yes": 1, "No": 0},
-    'YMDELT': {"Yes": 1, "No": 2}
-}
-# Build reverse mapping: { "YNURSMDE": {1: "Yes", 0: "No"}, ... } etc.
-reverse_mapping = {}
-for col, mapping_dict in input_mapping.items():
-    rev = {v: k for k, v in mapping_dict.items()}  # invert dict
-    reverse_mapping[col] = rev
-###############################################################################
-# 5) Main Predict Function
-###############################################################################
 def predict(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
@@ -168,17 +125,7 @@ def predict(
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
     YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
-    """
-    Core prediction function that:
-      1) Predicts with each model
-      2) Aggregates results
-      3) Produces an overall 'severity'
-      4) Returns detailed per-model predictions
-      5) Creates a distribution plot for ALL input features vs. a chosen label
-      6) Nearest neighbor logic (with disclaimers), mapping numeric -> user text
-    """
-    # 1) Prepare user_input dataframe
     user_input_data = {
         'YNURSMDE': [int(YNURSMDE)],
         'YMDEYR': [int(YMDEYR)],
@@ -212,20 +159,20 @@ def predict(
     }
     user_input = pd.DataFrame(user_input_data)
-    # 2) Make predictions
     predictions = predictor.make_predictions(user_input)
-    # 3) Calculate majority vote (0 or 1) across all models
     majority_vote = predictor.get_majority_vote(predictions)
-    # 4) Count how many 1's in all predictions combined
     majority_vote_count = sum([1 for pred in np.concatenate(predictions) if pred == 1])
-    # 5) Evaluate severity
     severity = predictor.evaluate_severity(majority_vote_count)
-    # 6) Prepare per-model predictions
-    #    We'll group them just like before
     results = {
         "Concentration_and_Decision_Making": [],
         "Sleep_and_Energy_Levels": [],
@@ -245,17 +192,18 @@ def predict(
                                                          "YOPB2WK"]
     }
-    # We'll keep a record of which model => which predicted label
     for i, pred in enumerate(predictions):
-        model_name = predictor.model_filenames[i].split('.')[0]
         pred_value = pred[0]
         # Map the prediction value to a human-readable string
         if model_name in predictor.prediction_map and pred_value in [0, 1]:
             result_text = f"Model {model_name}: {predictor.prediction_map[model_name][pred_value]}"
         else:
-            result_text = f"Model {model_name}: Unknown or out-of-range"
-        # Append to the appropriate group
         found_group = False
         for group_name, group_models in prediction_groups.items():
             if model_name in group_models:
@@ -263,10 +211,10 @@ def predict(
                 found_group = True
                 break
         if not found_group:
-            # If no group matches, skip or store in "Other"
             pass
-    # 7) Nicely format the results
     formatted_results = []
     for group, preds in results.items():
         if preds:
@@ -274,184 +222,366 @@ def predict(
             formatted_results.append("\n".join(preds))
             formatted_results.append("\n")
     formatted_results = "\n".join(formatted_results).strip()
-    if len(formatted_results) == 0:
         formatted_results = "No predictions made. Please check your inputs."
-    # 8) Additional disclaimers if there's a large fraction of unknown
-    num_unknown = sum(1 for group, preds in results.items() if any("Unknown or out-of-range" in p for p in preds))
-    if num_unknown > len(predictor.model_filenames) / 2:
         severity += " (Unknown prediction count is high. Please consult with a human.)"
-    ############################################################################
     # A) Total Patient Count
-    ############################################################################
     total_patients = len(df)
     total_patient_count_markdown = (
         "### Total Patient Count\n"
-        f"There are **{total_patients}** total patients in the dataset.\n\n"
-        "This number helps you understand the size of the dataset used."
     )
-    ############################################################################
-    # B) Distribution Plot: All Input Features vs. a single predicted label
-    ############################################################################
-    # For demonstration, let's pick "YOWRCONC" if it exists in df:
-    # We'll melt the dataset so that each input feature is in a "FeatureName" column,
-    # and each distinct category is in "FeatureValue". We'll group by those + label to get counts.
-    chosen_label = "YOWRCONC"
-    if chosen_label in df.columns:
-        # 1) Narrow down to the columns of interest
-        #    We'll only use the input features that exist in df
-        input_cols_in_df = [c for c in user_input_data.keys() if c in df.columns]
-        # 2) We'll create a "melted" version of these input features
-        #    i.e., row per (patient_id, FeatureName, FeatureValue)
-        sub_df = df[input_cols_in_df + [chosen_label]].copy()
-        # Melt them
-        melted = sub_df.melt(
-            id_vars=[chosen_label],
-            var_name="FeatureName",
-            value_name="FeatureValue"
-        )
-        # 3) Group by (FeatureName, FeatureValue, chosen_label) to get size
-        dist_data = melted.groupby(["FeatureName", "FeatureValue", chosen_label]).size().reset_index(name="count")
-        # 4) We'll try to map FeatureValue from numeric -> user-friendly text if possible
-        #    We'll do it only if FeatureName is in reverse_mapping.
-        def map_value(row):
-            fn = row["FeatureName"]
-            fv = row["FeatureValue"]
-            if fn in reverse_mapping:
-                if fv in reverse_mapping[fn]:
-                    return reverse_mapping[fn][fv]  # e.g. 1->"Yes"
-            return fv  # fallback
-        dist_data["FeatureValueText"] = dist_data.apply(map_value, axis=1)
-        # 5) Similarly, map chosen_label (0 or 1) to text if in predictor.prediction_map
-        if chosen_label in predictor.prediction_map:
-            def map_label(val):
-                if val in [0, 1]:
-                    return predictor.prediction_map[chosen_label][val]
-                return f"Unknown label {val}"
-            dist_data["LabelText"] = dist_data[chosen_label].apply(map_label)
-        else:
-            dist_data["LabelText"] = dist_data[chosen_label].astype(str)
-        # 6) Now produce a bar chart with facet_col = FeatureName
-        fig_distribution = px.bar(
-            dist_data,
-            x="FeatureValueText",
             y="count",
-            color="LabelText",
-            facet_col="FeatureName",
-            facet_col_wrap=4,     # how many facets per row
-            title=f"Distribution of All Input Features vs. {chosen_label}",
-            height=800
         )
-        fig_distribution.update_layout(legend=dict(title=chosen_label))
-        # (Optional) Adjust layout or text angle if you have many categories
-        fig_distribution.update_xaxes(tickangle=45)
     else:
-        # Fallback
-        fig_distribution = px.bar(title=f"Label {chosen_label} not found in dataset. Distribution not available.")
-    ############################################################################
-    # C) Nearest Neighbors (Hamming Distance) with disclaimers & user-friendly text
-    ############################################################################
-    # "Nearest neighbor” methods for high-dimensional or purely categorical data can be non-trivial.
-    # This demo uses a Hamming distance over all input features, picks K=5.
-    # In real practice, you'd refine which features to use, how to encode them, etc.
-    # 1) Build a DataFrame to compare with the user_input
-    features_to_compare = [col for col in user_input_data if col in df.columns]
     user_series = user_input.iloc[0]
-    # 2) Compute distances
     distances = []
-    for idx, row in df[features_to_compare].iterrows():
-        d = 0
-        for col in features_to_compare:
-            if row[col] != user_series[col]:
-                d += 1
-        distances.append(d)
     df_with_dist = df.copy()
     df_with_dist["distance"] = distances
-    # 3) Sort and pick top K=5
     K = 5
     nearest_neighbors = df_with_dist.sort_values("distance", ascending=True).head(K)
-    # 4) Show how many had the chosen_label=0 vs 1, but also map them
-    #    We'll also demonstrate showing user-friendly text for each neighbor's feature values.
-    #    However, if you have large K or many features, this can be big.
-    if chosen_label in nearest_neighbors.columns:
-        nn_label_0 = len(nearest_neighbors[nearest_neighbors[chosen_label] == 0])
-        nn_label_1 = len(nearest_neighbors[nearest_neighbors[chosen_label] == 1])
-        if chosen_label in predictor.prediction_map:
-            label0_text = predictor.prediction_map[chosen_label][0]
-            label1_text = predictor.prediction_map[chosen_label][1]
-        else:
-            label0_text = "Label=0"
-            label1_text = "Label=1"
-    else:
-        nn_label_0 = nn_label_1 = 0
-        label0_text = "Label=0"
-        label1_text = "Label=1"
-    # 5) Build an example table of those neighbors in user-friendly text
-    neighbor_text_rows = []
-    for idx, nn_row in nearest_neighbors.iterrows():
-        # For each feature, map numeric -> user text
-        row_str_parts = []
-        row_str_parts.append(f"distance={nn_row['distance']}")
-        for fcol in features_to_compare:
-            val = nn_row[fcol]
-            # try to map
-            if fcol in reverse_mapping and val in reverse_mapping[fcol]:
-                val_str = reverse_mapping[fcol][val]
             else:
-                val_str = str(val)
-            row_str_parts.append(f"{fcol}={val_str}")
-        # For the label
-        if chosen_label in nn_row:
-            lbl_val = nn_row[chosen_label]
-            if chosen_label in predictor.prediction_map and lbl_val in [0, 1]:
-                lbl_str = predictor.prediction_map[chosen_label][lbl_val]
             else:
-                lbl_str = str(lbl_val)
-            row_str_parts.append(f"{chosen_label}={lbl_str}")
-        neighbor_text_rows.append(" | ".join(row_str_parts))
-    neighbor_text_block = "\n".join(neighbor_text_rows)
     similar_patient_markdown = (
         "### Nearest Neighbors (Simple Hamming Distance)\n"
-        "“Nearest neighbor” methods for high-dimensional or purely categorical data can be non-trivial. "
-        "This demo simply uses a Hamming distance over all input features and picks **K=5** neighbors.\n\n"
         "In a real application, you would refine which features are most relevant, how to encode them, "
         "and how many neighbors to select.\n\n"
-        f"Among these **{K}** nearest neighbors:\n"
-        f"- **{nn_label_0}** had {label0_text}\n"
-        f"- **{nn_label_1}** had {label1_text}\n\n"
-        "Below is a breakdown of each neighbor's key features in user-friendly text:\n\n"
-        f"```\n{neighbor_text_block}\n```"
     )
-    ############################################################################
-    # Return 8 outputs
-    ############################################################################
     return (
-        formatted_results,              # 1) Prediction results (Textbox)
-        severity,                       # 2) Mental Health Severity (Textbox)
-        total_patient_count_markdown,   # 3) Total Patient Count (Markdown)
-        fig_distribution,               # 4) Distribution Plot (Plot)
-        similar_patient_markdown,       # 5) Nearest Neighbor Summary (Markdown)
-        None,                           # 6) Placeholder if you need more plots
-        None,                           # 7) Another placeholder
-        None                            # 8) Another placeholder
     )
-###############################################################################
-# 6) Gradio Interface: We'll keep 8 outputs, but only use some in this demo
-###############################################################################
 def predict_with_text(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
@@ -459,7 +589,7 @@ def predict_with_text(
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
     YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
-    # Validate that all required inputs are selected
     if not validate_inputs(
         YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
         YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
@@ -468,15 +598,17 @@ def predict_with_text(
         YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
     ):
         return (
-            "Please select all required fields.",  # Prediction Results
-            "Validation Error",                    # Severity
-            "No data",                             # Total Patient Count
-            None,                                  # Distribution Plot
-            "No data",                             # Nearest Neighbors
-            None, None, None                       # Placeholders
         )
-    # Map from user-friendly text to int
     user_inputs = {
         'YNURSMDE': input_mapping['YNURSMDE'][YNURSMDE],
         'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
@@ -508,68 +640,11 @@ def predict_with_text(
         'YRXMDEYR': input_mapping['YRXMDEYR'][YRXMDEYR],
         'YMDELT': input_mapping['YMDELT'][YMDELT]
     }
-    # Pass our mapped values into the original 'predict' function
-    return predict(**user_inputs)
-###############################################################################
-# 7) Define and Launch Gradio Interface
-###############################################################################
-import sys
-# We have 8 outputs (some are placeholders)
-outputs = [
-    gr.Textbox(label="Prediction Results", lines=30),
-    gr.Textbox(label="Mental Health Severity", lines=4),
-    gr.Markdown(label="Total Patient Count"),
-    gr.Plot(label="Distribution of All Input Features vs. One Label"),
-    gr.Markdown(label="Nearest Neighbors Summary"),
-    gr.Plot(label="Placeholder Plot"),
-    gr.Plot(label="Placeholder Plot"),
-    gr.Plot(label="Placeholder Plot")
-]
-# Define the inputs
-inputs = [
-    # Major Depressive Episode (MDE) questions
-    gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR: PAST YEAR MDE?"),
-    gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY: MDE OR SUBSTANCE USE DISORDER - ANY"),
-    gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR: MDE + ALCOHOL USE DISORDER?"),
-    gr.Dropdown(list(input_mapping['YMIMS5YANY'].keys()), label="YMIMS5YANY: MDE + SUBSTANCE USE DISORDER?"),
-    gr.Dropdown(list(input_mapping['YMDELT'].keys()), label="YMDELT: EVER HAD MDE LIFETIME?"),
-    gr.Dropdown(list(input_mapping['YMDEHARX'].keys()), label="YMDEHARX: SAW HEALTH PROF + MEDS FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDEHPRX'].keys()), label="YMDEHPRX: SAW HEALTH PROF OR MEDS FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDETXRX'].keys()), label="YMDETXRX: TREATMENT/COUNSELING FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDEHPO'].keys()), label="YMDEHPO: HEALTH PROF ONLY FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDEAUD5YR'].keys()), label="YMDEAUD5YR: MDE + ALCOHOL USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMIMI5YANY'].keys()), label="YMIMI5YANY: MDE + ILL DRUG USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMIUD5YANY'].keys()), label="YMIUD5YANY: MDE + ILL DRUG USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMDESUD5ANYO'].keys()), label="YMDESUD5ANYO: MDE vs. SUD vs. BOTH vs. NEITHER"),
-    # Consultations
-    gr.Dropdown(list(input_mapping['YNURSMDE'].keys()), label="YNURSMDE: NURSE / OT FOR MDE"),
-    gr.Dropdown(list(input_mapping['YSOCMDE'].keys()), label="YSOCMDE: SOCIAL WORKER FOR MDE"),
-    gr.Dropdown(list(input_mapping['YCOUNMDE'].keys()), label="YCOUNMDE: COUNSELOR FOR MDE"),
-    gr.Dropdown(list(input_mapping['YPSY1MDE'].keys()), label="YPSY1MDE: PSYCHOLOGIST FOR MDE"),
-    gr.Dropdown(list(input_mapping['YPSY2MDE'].keys()), label="YPSY2MDE: PSYCHIATRIST FOR MDE"),
-    gr.Dropdown(list(input_mapping['YHLTMDE'].keys()), label="YHLTMDE: HEALTH PROF FOR MDE"),
-    gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE: GP/FAMILY MD FOR MDE"),
-    gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR: DOCTOR/HEALTH PROF FOR MDE THIS YEAR"),
-    # Suicidal thoughts / plans
-    gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR: SERIOUSLY THOUGHT ABOUT KILLING SELF"),
-    gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR: MADE PLANS TO KILL SELF"),
-    gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK: THINK ABOUT KILLING SELF (12 MONTHS)"),
-    gr.Dropdown(list(input_mapping['YUSUIPLN'].keys()), label="YUSUIPLN: MADE PLANS TO KILL SELF (12 MONTHS)"),
-    # Impairment
-    gr.Dropdown(list(input_mapping['MDEIMPY'].keys()), label="MDEIMPY: MDE WITH SEVERE ROLE IMPAIRMENT?"),
-    gr.Dropdown(list(input_mapping['LVLDIFMEM2'].keys()), label="LVLDIFMEM2: DIFFICULTY REMEMBERING/CONCENTRATING"),
-    gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY: MDE + SUBSTANCE USE DISORDER?"),
-    gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR: USED MEDS FOR MDE IN PAST YEAR?")
-]
-# Custom CSS (optional)
 custom_css = """
     .gradio-container * {
         color: #1B1212 !important;
@@ -587,13 +662,15 @@ custom_css = """
     }
 """
-# Build the interface
 interface = gr.Interface(
-    fn=predict_with_text,
-    inputs=inputs,
-    outputs=outputs,
-    title="Adolescents with Substance Use Mental Health Screening (NSDUH Data)",
-    css=custom_css,
 )
 if __name__ == "__main__":

 # Load the training CSV once (outside the functions so it is read only once).
 df = pd.read_csv("X_train_Y_Train_merged_train.csv")
+######################################
+# 1) MODEL PREDICTOR CLASS
+######################################
 class ModelPredictor:
     def __init__(self, model_path, model_filenames):
         self.model_path = model_path
         self.model_filenames = model_filenames
         self.models = self.load_models()
+        # Mapping from label column to human-readable strings for 0/1
+        # (Adjust as needed for the columns you actually have.)
         self.prediction_map = {
             "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
             "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
         else:
             return "Mental health severity: Very Low"
+######################################
+# 2) MODEL & DATA
+######################################
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
     "YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
 model_path = "models/"
 predictor = ModelPredictor(model_path, model_filenames)
+######################################
+# 3) INPUT VALIDATION
+######################################
 def validate_inputs(*args):
     for arg in args:
         if arg == '' or arg is None:  # Assuming empty string or None as unselected
             return False
     return True
+######################################
+# 4) MAIN PREDICTION FUNCTION
+######################################
 def predict(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
     YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
+    # Prepare user_input dataframe for prediction
     user_input_data = {
         'YNURSMDE': [int(YNURSMDE)],
         'YMDEYR': [int(YMDEYR)],
     }
     user_input = pd.DataFrame(user_input_data)
+    # 1) Make predictions with each model
     predictions = predictor.make_predictions(user_input)
+    # 2) Calculate majority vote (0 or 1) across all models
     majority_vote = predictor.get_majority_vote(predictions)
+    # 3) Count how many 1's in all predictions combined
     majority_vote_count = sum([1 for pred in np.concatenate(predictions) if pred == 1])
+    # 4) Evaluate severity
     severity = predictor.evaluate_severity(majority_vote_count)
+    # 5) Prepare detailed results (group them)
+    #    We keep the old grouping as an example, but you can adapt as needed.
     results = {
         "Concentration_and_Decision_Making": [],
         "Sleep_and_Energy_Levels": [],
                                                          "YOPB2WK"]
     }
+    # For textual results
     for i, pred in enumerate(predictions):
+        model_name = model_filenames[i].split('.')[0]
         pred_value = pred[0]
         # Map the prediction value to a human-readable string
         if model_name in predictor.prediction_map and pred_value in [0, 1]:
             result_text = f"Model {model_name}: {predictor.prediction_map[model_name][pred_value]}"
         else:
+            # Fallback
+            result_text = f"Model {model_name}: Prediction = {pred_value} (unmapped)"
+        # Append to the appropriate group if matched
         found_group = False
         for group_name, group_models in prediction_groups.items():
             if model_name in group_models:
                 found_group = True
                 break
         if not found_group:
+            # If it doesn't match any group, skip or handle differently
             pass
+    # Format the grouped results
     formatted_results = []
     for group, preds in results.items():
         if preds:
             formatted_results.append("\n".join(preds))
             formatted_results.append("\n")
     formatted_results = "\n".join(formatted_results).strip()
+    if not formatted_results:
         formatted_results = "No predictions made. Please check your inputs."
+    # If too many unknown predictions, add a note
+    num_unknown = len([p for group_preds in results.values() for p in group_preds if "(unmapped)" in p])
+    if num_unknown > len(model_filenames) / 2:
         severity += " (Unknown prediction count is high. Please consult with a human.)"
+    # =============== ADDITIONAL FEATURES ===============
     # A) Total Patient Count
     total_patients = len(df)
     total_patient_count_markdown = (
         "### Total Patient Count\n"
+        f"There are **{total_patients}** total patients in the dataset.\n"
+        "All subsequent analyses refer to these patients."
     )
+    # B) Bar Chart for input features (how many share same value as user_input)
+    input_counts = {}
+    for col in user_input_data.keys():
+        val = user_input_data[col][0]
+        same_val_count = len(df[df[col] == val])
+        input_counts[col] = same_val_count
+    bar_input_data = pd.DataFrame({
+        "Feature": list(input_counts.keys()),
+        "Count": list(input_counts.values())
+    })
+    fig_bar_input = px.bar(
+        bar_input_data,
+        x="Feature",
+        y="Count",
+        title="Number of Patients with the Same Value for Each Input Feature",
+        labels={"Feature": "Input Feature", "Count": "Number of Patients"}
+    )
+    fig_bar_input.update_layout(xaxis={'categoryorder':'total descending'})
+    # C) Bar Chart for predicted labels (distribution in df)
+    label_counts = {}
+    for i, pred in enumerate(predictions):
+        model_name = model_filenames[i].split('.')[0]
+        pred_value = pred[0]
+        if pred_value in [0, 1]:
+            label_counts[model_name] = len(df[df[model_name] == pred_value])
+    if len(label_counts) > 0:
+        bar_label_data = pd.DataFrame({
+            "Model": list(label_counts.keys()),
+            "Count": list(label_counts.values())
+        })
+        fig_bar_labels = px.bar(
+            bar_label_data,
+            x="Model",
+            y="Count",
+            title="Number of Patients with the Same Predicted Label",
+            labels={"Model": "Predicted Column", "Count": "Patient Count"}
+        )
+    else:
+        # Fallback if no valid predictions
+        fig_bar_labels = px.bar(title="No valid predicted labels to display")
+    # D) Distribution Plot: All Input Features vs. All Predicted Labels
+    #    This can create MANY subplots if you have many features & labels.
+    #    We'll do a small demonstration with a subset of input features & model columns
+    #    to avoid overwhelming the UI.
+    demonstration_features = list(user_input_data.keys())[:4]  # first 4 features as a sample
+    demonstration_labels = [fn.split('.')[0] for fn in model_filenames[:3]]  # first 3 labels as a sample
+    # We'll build a single figure with "facet_col" = label and "facet_row" = feature (small sample)
+    # The approach: for each (feature, label), group by (feature_value, label_value) -> count.
+    # Then we combine them into one big DataFrame with "feature" & "label" columns for Plotly facets.
+    dist_rows = []
+    for feat in demonstration_features:
+        if feat not in df.columns:
+            continue
+        for lbl in demonstration_labels:
+            if lbl not in df.columns:
+                continue
+            tmp_df = df.groupby([feat, lbl]).size().reset_index(name="count")
+            tmp_df["feature"] = feat
+            tmp_df["label"] = lbl
+            dist_rows.append(tmp_df)
+    if len(dist_rows) > 0:
+        big_dist_df = pd.concat(dist_rows, ignore_index=True)
+        # We can re-map numeric to user-friendly text for "feat" if desired, but each feature might have a different mapping.
+        # For now, we just show numeric codes. Real usage would do a reverse mapping if feasible.
+        # For the label (0,1), we can map to short strings if we want (like "Label0" / "Label1"), or a direct numeric.
+        fig_dist = px.bar(
+            big_dist_df,
+            x=big_dist_df.columns[0],  # the feature's value is the 0-th col in groupby
             y="count",
+            color=big_dist_df.columns[1],  # the label's value is the 1st col in groupby
+            facet_row="feature",
+            facet_col="label",
+            title="Distribution of Sample Input Features vs. Sample Predicted Labels (Demo)",
+            labels={
+                big_dist_df.columns[0]: "Feature Value",
+                big_dist_df.columns[1]: "Label Value"
+            }
         )
+        fig_dist.update_layout(height=800)
     else:
+        fig_dist = px.bar(title="No distribution plot could be generated (check feature/label columns).")
+    # E) Nearest Neighbors: Hamming Distance, K=5, with disclaimers & user-friendly text
+    #    "Nearest neighbor” methods for high-dimensional or purely categorical data can be non-trivial.
+    #    This demo simply uses a Hamming distance over all input features and picks K=5 neighbors.
+    #    In a real application, you would refine which features are most relevant, how to encode them,
+    #    and how many neighbors to select.
+    #    We also show how to revert numeric codes -> user-friendly text.
+    # 1. Invert the user-friendly text mapping (for inputs).
+    #    We'll assume input_mapping is consistent. We build a reverse mapping for each column.
+    reverse_input_mapping = {}
+    # We'll build it after the code block below for each column.
+    # 2. Invert label mappings from predictor.prediction_map if needed
+    #    For each label column, 0 => first string, 1 => second string
+    #    We'll store them in a dict: reverse_label_mapping[label_col][0 or 1] => string
+    reverse_label_mapping = {}
+    for lbl, str_list in predictor.prediction_map.items():
+        # str_list[0] => for 0, str_list[1] => for 1
+        reverse_label_mapping[lbl] = {
+            0: str_list[0],
+            1: str_list[1]
+        }
+    # Build the reverse input mapping from the provided dictionary
+    # We'll define that dictionary below to ensure we can invert it:
+    input_mapping = {
+        'YNURSMDE': {"Yes": 1, "No": 0},
+        'YMDEYR': {"Yes": 1, "No": 2},
+        'YSOCMDE': {"Yes": 1, "No": 0},
+        'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
+        'YMSUD5YANY': {"Yes": 1, "No": 0},
+        'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+        'YMDETXRX': {"Yes": 1, "No": 0},
+        'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+        'YMDERSUD5ANY': {"Yes": 1, "No": 0},
+        'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+        'YCOUNMDE': {"Yes": 1, "No": 0},
+        'YPSY1MDE': {"Yes": 1, "No": 0},
+        'YHLTMDE': {"Yes": 1, "No": 0},
+        'YDOCMDE': {"Yes": 1, "No": 0},
+        'YPSY2MDE': {"Yes": 1, "No": 0},
+        'YMDEHARX': {"Yes": 1, "No": 0},
+        'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
+        'MDEIMPY': {"Yes": 1, "No": 2},
+        'YMDEHPO': {"Yes": 1, "No": 0},
+        'YMIMS5YANY': {"Yes": 1, "No": 0},
+        'YMDEIMAD5YR': {"Yes": 1, "No": 0},
+        'YMIUD5YANY': {"Yes": 1, "No": 0},
+        'YMDEHPRX': {"Yes": 1, "No": 0},
+        'YMIMI5YANY': {"Yes": 1, "No": 0},
+        'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+        'YTXMDEYR': {"Yes": 1, "No": 0},
+        'YMDEAUD5YR': {"Yes": 1, "No": 0},
+        'YRXMDEYR': {"Yes": 1, "No": 0},
+        'YMDELT': {"Yes": 1, "No": 2}
+    }
+    # Build the reverse mapping for each column
+    for col, fwd_map in input_mapping.items():
+        reverse_input_mapping[col] = {v: k for k, v in fwd_map.items()}
+    # 3. Calculate Hamming distance for each row
+    #    We'll consider the columns in user_input for comparison
+    features_to_compare = list(user_input.columns)
+    subset_df = df[features_to_compare].copy()
     user_series = user_input.iloc[0]
     distances = []
+    for idx, row in subset_df.iterrows():
+        dist = sum(row[col] != user_series[col] for col in features_to_compare)
+        distances.append(dist)
     df_with_dist = df.copy()
     df_with_dist["distance"] = distances
+    # 4. Sort by distance ascending, pick top K=5
     K = 5
     nearest_neighbors = df_with_dist.sort_values("distance", ascending=True).head(K)
+    # 5. Summarize neighbor info in user-friendly text
+    #    For demonstration, let's show a small table with each neighbor's values
+    #    for the same features. We'll also show a label or two.
+    #    We'll do this in Markdown format.
+    nn_rows = []
+    for idx, nr in nearest_neighbors.iterrows():
+        # Convert each feature to text if possible
+        row_text = []
+        for col in features_to_compare:
+            val_numeric = nr[col]
+            if col in reverse_input_mapping:
+                row_text.append(f"{col}={reverse_input_mapping[col].get(val_numeric, val_numeric)}")
             else:
+                row_text.append(f"{col}={val_numeric}")
+        # Let's also show YOWRCONC as an example label (if present)
+        if "YOWRCONC" in nearest_neighbors.columns:
+            label_val = nr["YOWRCONC"]
+            if "YOWRCONC" in reverse_label_mapping:
+                label_str = reverse_label_mapping["YOWRCONC"].get(label_val, label_val)
+                row_text.append(f"YOWRCONC={label_str}")
             else:
+                row_text.append(f"YOWRCONC={label_val}")
+        nn_rows.append(f"- **Neighbor ID {idx}** (distance={nr['distance']}): " + ", ".join(row_text))
     similar_patient_markdown = (
         "### Nearest Neighbors (Simple Hamming Distance)\n"
+        f"We searched for the top **{K}** patients whose features most closely match your input.\n\n"
+        "> **Note**: “Nearest neighbor” methods for high-dimensional or purely categorical data can be non-trivial. "
+        "This demo simply uses a Hamming distance over all input features and picks K=5 neighbors. "
         "In a real application, you would refine which features are most relevant, how to encode them, "
         "and how many neighbors to select.\n\n"
+        "Below is a brief overview of each neighbor's input-feature values and one example label (`YOWRCONC`).\n\n"
+        + "\n".join(nn_rows)
     )
+    # F) Co-occurrence Plot from the previous example (kept for completeness)
+    if all(col in df.columns for col in ["YMDEYR", "YMDERSUD5ANY", "YOWRCONC"]):
+        co_occ_data = df.groupby(["YMDEYR", "YMDERSUD5ANY", "YOWRCONC"]).size().reset_index(name="count")
+        fig_co_occ = px.bar(
+            co_occ_data,
+            x="YMDEYR",
+            y="count",
+            color="YOWRCONC",
+            facet_col="YMDERSUD5ANY",
+            title="Co-Occurrence Plot: YMDEYR and YMDERSUD5ANY vs YOWRCONC"
+        )
+    else:
+        fig_co_occ = px.bar(title="Co-occurrence plot not available (check columns).")
+    # =======================
+    # RETURN EVERYTHING
+    # We have 8 outputs:
+    #  1) Prediction Results (Textbox)
+    #  2) Mental Health Severity (Textbox)
+    #  3) Total Patient Count (Markdown)
+    #  4) Distribution Plot (for multiple input features vs. multiple labels)
+    #  5) Nearest Neighbors Summary (Markdown)
+    #  6) Co-Occurrence Plot
+    #  7) Bar Chart for input features
+    #  8) Bar Chart for predicted labels
+    # =======================
     return (
+        formatted_results,
+        severity,
+        total_patient_count_markdown,
+        fig_dist,
+        similar_patient_markdown,
+        fig_co_occ,
+        fig_bar_input,
+        fig_bar_labels
     )
+######################################
+# 5) MAPPING user-friendly text => numeric
+######################################
+input_mapping = {
+    'YNURSMDE': {"Yes": 1, "No": 0},
+    'YMDEYR': {"Yes": 1, "No": 2},
+    'YSOCMDE': {"Yes": 1, "No": 0},
+    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
+    'YMSUD5YANY': {"Yes": 1, "No": 0},
+    'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YMDETXRX': {"Yes": 1, "No": 0},
+    'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YMDERSUD5ANY': {"Yes": 1, "No": 0},
+    'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YCOUNMDE': {"Yes": 1, "No": 0},
+    'YPSY1MDE': {"Yes": 1, "No": 0},
+    'YHLTMDE': {"Yes": 1, "No": 0},
+    'YDOCMDE': {"Yes": 1, "No": 0},
+    'YPSY2MDE': {"Yes": 1, "No": 0},
+    'YMDEHARX': {"Yes": 1, "No": 0},
+    'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
+    'MDEIMPY': {"Yes": 1, "No": 2},
+    'YMDEHPO': {"Yes": 1, "No": 0},
+    'YMIMS5YANY': {"Yes": 1, "No": 0},
+    'YMDEIMAD5YR': {"Yes": 1, "No": 0},
+    'YMIUD5YANY': {"Yes": 1, "No": 0},
+    'YMDEHPRX': {"Yes": 1, "No": 0},
+    'YMIMI5YANY': {"Yes": 1, "No": 0},
+    'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
+    'YTXMDEYR': {"Yes": 1, "No": 0},
+    'YMDEAUD5YR': {"Yes": 1, "No": 0},
+    'YRXMDEYR': {"Yes": 1, "No": 0},
+    'YMDELT': {"Yes": 1, "No": 2}
+}
+######################################
+# 6) GRADIO INTERFACE
+######################################
+# We have 8 outputs in total:
+#   1) Prediction Results
+#   2) Mental Health Severity
+#   3) Total Patient Count
+#   4) Distribution Plot
+#   5) Nearest Neighbors
+#   6) Co-Occurrence Plot
+#   7) Bar Chart for input features
+#   8) Bar Chart for predicted labels
+import gradio as gr
+# Define the inputs in the same order as function signature
+inputs = [
+    gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR: PAST YEARS MAJOR DEPRESSIVE EPISODE"),
+    gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY: MDE OR SUBSTANCE USE DISORDER - ANY"),
+    gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR: MDE WITH SEV. IMP + ALCOHOL USE DISORDER"),
+    gr.Dropdown(list(input_mapping['YMIMS5YANY'].keys()), label="YMIMS5YANY: MDE W/ SEV. IMP + SUBSTANCE USE DISORDER"),
+    gr.Dropdown(list(input_mapping['YMDELT'].keys()), label="YMDELT: HAD MAJOR DEPRESSIVE EPISODE IN LIFETIME"),
+    gr.Dropdown(list(input_mapping['YMDEHARX'].keys()), label="YMDEHARX: SAW HEALTH PROF + MEDS FOR MDE"),
+    gr.Dropdown(list(input_mapping['YMDEHPRX'].keys()), label="YMDEHPRX: SAW HEALTH PROF OR MEDS FOR MDE"),
+    gr.Dropdown(list(input_mapping['YMDETXRX'].keys()), label="YMDETXRX: RECEIVED TREATMENT/COUNSELING FOR MDE"),
+    gr.Dropdown(list(input_mapping['YMDEHPO'].keys()), label="YMDEHPO: SAW HEALTH PROF ONLY FOR MDE"),
+    gr.Dropdown(list(input_mapping['YMDEAUD5YR'].keys()), label="YMDEAUD5YR: MDE + ALCOHOL USE DISORDER"),
+    gr.Dropdown(list(input_mapping['YMIMI5YANY'].keys()), label="YMIMI5YANY: MDE W/ ILL DRUG USE DISORDER"),
+    gr.Dropdown(list(input_mapping['YMIUD5YANY'].keys()), label="YMIUD5YANY: MDE + ILL DRUG USE DISORDER"),
+    gr.Dropdown(list(input_mapping['YMDESUD5ANYO'].keys()), label="YMDESUD5ANYO: MDE vs. SUD vs. BOTH vs. NEITHER"),
+    # Consultations
+    gr.Dropdown(list(input_mapping['YNURSMDE'].keys()), label="YNURSMDE: SAW/TALK TO NURSE/OT ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YSOCMDE'].keys()), label="YSOCMDE: SAW/TALK TO SOCIAL WORKER ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YCOUNMDE'].keys()), label="YCOUNMDE: SAW/TALK TO COUNSELOR ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YPSY1MDE'].keys()), label="YPSY1MDE: SAW/TALK TO PSYCHOLOGIST ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YPSY2MDE'].keys()), label="YPSY2MDE: SAW/TALK TO PSYCHIATRIST ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YHLTMDE'].keys()), label="YHLTMDE: SAW/TALK TO HEALTH PROFESSIONAL ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE: SAW/TALK TO GP/FAMILY MD ABOUT MDE"),
+    gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR: SAW/TALK DOCTOR/HEALTH PROF FOR MDE"),
+    # Suicidal thoughts/plans
+    gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR: SERIOUSLY THOUGHT ABOUT KILLING SELF"),
+    gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR: MADE PLANS TO KILL SELF"),
+    gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK: THINK ABOUT KILLING SELF (12 MONTHS)"),
+    gr.Dropdown(list(input_mapping['YUSUIPLN'].keys()), label="YUSUIPLN: MADE PLANS TO KILL SELF (12 MONTHS)"),
+    # Impairments
+    gr.Dropdown(list(input_mapping['MDEIMPY'].keys()), label="MDEIMPY: MDE W/ SEVERE ROLE IMPAIRMENT"),
+    gr.Dropdown(list(input_mapping['LVLDIFMEM2'].keys()), label="LVLDIFMEM2: LEVEL OF DIFFICULTY REMEMBERING/CONCENTRATING"),
+    gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY: MDE + SUBSTANCE USE DISORDER - ANY"),
+    gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR: USED MEDS FOR MDE IN PAST YEAR"),
+]
+# The 8 outputs
+outputs = [
+    gr.Textbox(label="Prediction Results", lines=30),
+    gr.Textbox(label="Mental Health Severity", lines=4),
+    gr.Markdown(label="Total Patient Count"),
+    gr.Plot(label="Distribution Plot (Sample of Features & Labels)"),
+    gr.Markdown(label="Nearest Neighbors Summary"),
+    gr.Plot(label="Co-Occurrence Plot"),
+    gr.Plot(label="Number of Patients per Input Feature"),
+    gr.Plot(label="Number of Patients with Predicted Labels")
+]
+######################################
+# 7) WRAPPER FOR PREDICT
+######################################
 def predict_with_text(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
     YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
+    # Validate user inputs
     if not validate_inputs(
         YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
         YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
         YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
     ):
         return (
+            "Please select all required fields.",
+            "Validation Error",
+            "No data",
+            None,
+            "No data",
+            None,
+            None,
+            None
         )
+    # Map user-friendly text to numeric
     user_inputs = {
         'YNURSMDE': input_mapping['YNURSMDE'][YNURSMDE],
         'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
         'YRXMDEYR': input_mapping['YRXMDEYR'][YRXMDEYR],
         'YMDELT': input_mapping['YMDELT'][YMDELT]
     }
+    # Pass these mapped values into the core predict function
+    return predict(**user_inputs)
+# Optional custom CSS
 custom_css = """
     .gradio-container * {
         color: #1B1212 !important;
     }
 """
+######################################
+# 8) LAUNCH
+######################################
 interface = gr.Interface(
+    fn=predict_with_text,
+    inputs=inputs,
+    outputs=outputs,
+    title="Adolescents with Substance Use Mental Health Screening (NSDUH Data)",
+    css=custom_css
 )
 if __name__ == "__main__":