Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

pantdipendra commited on 19 days ago

Commit

1fd21ae

verified ·

1 Parent(s): cf4c3a5

Update app.py

Browse files

Files changed (1) hide show

app.py +288 -418

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import pickle
 import gradio as gr
 import numpy as np
 import pandas as pd
 import plotly.express as px
-# Load the training CSV once (outside the functions so it is read only once).
 df = pd.read_csv("X_train_Y_Train_merged_train.csv")
 ######################################
@@ -17,74 +16,52 @@ class ModelPredictor:
         self.model_filenames = model_filenames
         self.models = self.load_models()
         # Mapping from label column to human-readable strings for 0/1
-        # (Adjust as needed for the columns you actually have.)
         self.prediction_map = {
-            "YOWRCONC": ["Did not have difficulty concentrating", "Had difficulty concentrating"],
-            "YOSEEDOC": ["Did not feel the need to see a doctor", "Felt the need to see a doctor"],
-            "YOWRHRS": ["Did not have trouble sleeping", "Had trouble sleeping"],
-            "YO_MDEA5": ["Others did not notice restlessness/lethargy", "Others noticed restlessness/lethargy"],
-            "YOWRCHR": ["Did not feel so sad that nothing could cheer up", "Felt so sad that nothing could cheer up"],
-            "YOWRLSIN": ["Did not feel bored and lose interest in all enjoyable things",
-                         "Felt bored and lost interest in all enjoyable things"],
-            "YODPPROB": ["Did not have other problems for 2+ weeks", "Had other problems for 2+ weeks"],
-            "YOWRPROB": ["Did not have the worst time ever feeling", "Had the worst time ever feeling"],
-            "YODPR2WK": ["Did not have periods where feelings lasted 2+ weeks",
-                         "Had periods where feelings lasted 2+ weeks"],
-            "YOWRDEPR": ["Did not feel sad/depressed mostly everyday", "Felt sad/depressed mostly everyday"],
-            "YODPDISC": ["Overall mood duration was not sad/depressed",
-                         "Overall mood duration was sad/depressed (discrepancy)"],
-            "YOLOSEV": ["Did not lose interest in enjoyable things and activities",
-                        "Lost interest in enjoyable things and activities"],
-            "YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
-            "YODSMMDE": ["Never had depression symptoms lasting 2 weeks or longer",
-                         "Had depression symptoms lasting 2 weeks or longer"],
-            "YO_MDEA3": ["Did not experience changes in appetite or weight",
-                         "Experienced changes in appetite or weight"],
-            "YODPLSIN": ["Never lost interest and felt bored", "Lost interest and felt bored"],
-            "YOWRELES": ["Did not eat less than usual", "Ate less than usual"],
-            "YODSCEV": ["Had fewer severe symptoms of depression", "Had more severe symptoms of depression"],
-            "YOPB2WK": ["Did not experience uneasy feelings lasting every day for 2+ weeks or longer",
-                        "Experienced uneasy feelings lasting every day for 2+ weeks or longer"],
-            "YO_MDEA2": ["Did not have issues with physical and mental well-being every day for 2 weeks or longer",
-                         "Had issues with physical and mental well-being every day for 2 weeks or longer"]
         }
     def load_models(self):
         models = []
-        for filename in self.model_filenames:
-            filepath = self.model_path + filename
-            with open(filepath, 'rb') as file:
-                model = pickle.load(file)
-            models.append(model)
         return models
     def make_predictions(self, user_input):
-        """
-        Returns a list of numpy arrays, each array is [0] or [1].
-        The i-th array corresponds to the i-th model in self.models.
-        """
-        predictions = []
-        for model in self.models:
-            pred = model.predict(user_input)
-            pred = np.array(pred).flatten()
-            predictions.append(pred)
-        return predictions
     def get_majority_vote(self, predictions):
-        """
-        Flatten all predictions from all models, combine them into a single array,
-        then find the majority class (0 or 1) across all of them.
-        """
-        combined_predictions = np.concatenate(predictions)
-        majority_vote = np.bincount(combined_predictions).argmax()
-        return majority_vote
-    # Based on Equal Interval and Percentage-Based Method
-    # Severe: 13 to 16 votes (upper 25%)
-    # Moderate: 9 to 12 votes (upper-middle 25%)
-    # Low: 5 to 8 votes (lower-middle 25%)
-    # Very Low: 0 to 4 votes (lower 25%)
     def evaluate_severity(self, majority_vote_count):
         if majority_vote_count >= 13:
             return "Mental health severity: Severe"
         elif majority_vote_count >= 9:
@@ -95,7 +72,7 @@ class ModelPredictor:
             return "Mental health severity: Very Low"
 ######################################
-# 2) MODEL & DATA
 ######################################
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
@@ -110,22 +87,36 @@ predictor = ModelPredictor(model_path, model_filenames)
 # 3) INPUT VALIDATION
 ######################################
 def validate_inputs(*args):
     for arg in args:
-        if arg == '' or arg is None:  # Assuming empty string or None as unselected
             return False
     return True
 ######################################
-# 4) MAIN PREDICTION FUNCTION
 ######################################
 def predict(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
-    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
-    # Prepare user_input dataframe for prediction
     user_input_data = {
         'YNURSMDE': [int(YNURSMDE)],
         'YMDEYR': [int(YMDEYR)],
@@ -159,29 +150,21 @@ def predict(
     }
     user_input = pd.DataFrame(user_input_data)
-    # 1) Make predictions with each model
     predictions = predictor.make_predictions(user_input)
-    # 2) Calculate majority vote (0 or 1) across all models
     majority_vote = predictor.get_majority_vote(predictions)
-    # 3) Count how many 1's in all predictions combined
-    majority_vote_count = sum([1 for pred in np.concatenate(predictions) if pred == 1])
-    # 4) Evaluate severity
     severity = predictor.evaluate_severity(majority_vote_count)
-    # 5) Prepare detailed results (group them)
-    #    We keep the old grouping as an example, but you can adapt as needed.
-    results = {
         "Concentration_and_Decision_Making": [],
         "Sleep_and_Energy_Levels": [],
         "Mood_and_Emotional_State": [],
         "Appetite_and_Weight_Changes": [],
         "Duration_and_Severity_of_Depression_Symptoms": []
     }
-    prediction_groups = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
         "Mood_and_Emotional_State": ["YOWRCHR", "YOWRLSIN", "YOWRDEPR", "YODPDISC",
@@ -192,313 +175,198 @@ def predict(
                                                          "YOPB2WK"]
     }
-    # For textual results
-    for i, pred in enumerate(predictions):
-        model_name = model_filenames[i].split('.')[0]
-        pred_value = pred[0]
-        # Map the prediction value to a human-readable string
-        if model_name in predictor.prediction_map and pred_value in [0, 1]:
-            result_text = f"Model {model_name}: {predictor.prediction_map[model_name][pred_value]}"
         else:
-            # Fallback
-            result_text = f"Model {model_name}: Prediction = {pred_value} (unmapped)"
-        # Append to the appropriate group if matched
-        found_group = False
-        for group_name, group_models in prediction_groups.items():
-            if model_name in group_models:
-                results[group_name].append(result_text)
-                found_group = True
                 break
-        if not found_group:
-            # If it doesn't match any group, skip or handle differently
             pass
-    # Format the grouped results
-    formatted_results = []
-    for group, preds in results.items():
-        if preds:
-            formatted_results.append(f"Group {group.replace('_', ' ')}:")
-            formatted_results.append("\n".join(preds))
-            formatted_results.append("\n")
-    formatted_results = "\n".join(formatted_results).strip()
-    if not formatted_results:
-        formatted_results = "No predictions made. Please check your inputs."
-    # If too many unknown predictions, add a note
-    num_unknown = len([p for group_preds in results.values() for p in group_preds if "(unmapped)" in p])
-    if num_unknown > len(model_filenames) / 2:
-        severity += " (Unknown prediction count is high. Please consult with a human.)"
-    # =============== ADDITIONAL FEATURES ===============
-    # A) Total Patient Count
     total_patients = len(df)
-    total_patient_count_markdown = (
         "### Total Patient Count\n"
-        f"There are **{total_patients}** total patients in the dataset.\n"
-        "All subsequent analyses refer to these patients."
     )
-    # B) Bar Chart for input features (how many share same value as user_input)
     input_counts = {}
-    for col in user_input_data.keys():
-        val = user_input_data[col][0]
-        same_val_count = len(df[df[col] == val])
-        input_counts[col] = same_val_count
-    bar_input_data = pd.DataFrame({
-        "Feature": list(input_counts.keys()),
-        "Count": list(input_counts.values())
-    })
-    fig_bar_input = px.bar(
-        bar_input_data,
-        x="Feature",
-        y="Count",
-        title="Number of Patients with the Same Value for Each Input Feature",
-        labels={"Feature": "Input Feature", "Count": "Number of Patients"}
     )
-    fig_bar_input.update_layout(xaxis={'categoryorder':'total descending'})
-    # C) Bar Chart for predicted labels (distribution in df)
     label_counts = {}
-    for i, pred in enumerate(predictions):
-        model_name = model_filenames[i].split('.')[0]
-        pred_value = pred[0]
-        if pred_value in [0, 1]:
-            label_counts[model_name] = len(df[df[model_name] == pred_value])
     if len(label_counts) > 0:
-        bar_label_data = pd.DataFrame({
-            "Model": list(label_counts.keys()),
             "Count": list(label_counts.values())
         })
-        fig_bar_labels = px.bar(
-            bar_label_data,
-            x="Model",
             y="Count",
-            title="Number of Patients with the Same Predicted Label",
-            labels={"Model": "Predicted Column", "Count": "Patient Count"}
         )
     else:
-        # Fallback if no valid predictions
-        fig_bar_labels = px.bar(title="No valid predicted labels to display")
-    # D) Distribution Plot: All Input Features vs. All Predicted Labels
-    #    This can create MANY subplots if you have many features & labels.
-    #    We'll do a small demonstration with a subset of input features & model columns
-    #    to avoid overwhelming the UI.
-    demonstration_features = list(user_input_data.keys())[:4]  # first 4 features as a sample
-    demonstration_labels = [fn.split('.')[0] for fn in model_filenames[:3]]  # first 3 labels as a sample
-    # We'll build a single figure with "facet_col" = label and "facet_row" = feature (small sample)
-    # The approach: for each (feature, label), group by (feature_value, label_value) -> count.
-    # Then we combine them into one big DataFrame with "feature" & "label" columns for Plotly facets.
-    dist_rows = []
-    for feat in demonstration_features:
         if feat not in df.columns:
             continue
-        for lbl in demonstration_labels:
             if lbl not in df.columns:
                 continue
-            tmp_df = df.groupby([feat, lbl]).size().reset_index(name="count")
-            tmp_df["feature"] = feat
-            tmp_df["label"] = lbl
-            dist_rows.append(tmp_df)
-    if len(dist_rows) > 0:
-        big_dist_df = pd.concat(dist_rows, ignore_index=True)
-        # We can re-map numeric to user-friendly text for "feat" if desired, but each feature might have a different mapping.
-        # For now, we just show numeric codes. Real usage would do a reverse mapping if feasible.
-        # For the label (0,1), we can map to short strings if we want (like "Label0" / "Label1"), or a direct numeric.
         fig_dist = px.bar(
             big_dist_df,
-            x=big_dist_df.columns[0],  # the feature's value is the 0-th col in groupby
             y="count",
-            color=big_dist_df.columns[1],  # the label's value is the 1st col in groupby
             facet_row="feature",
             facet_col="label",
-            title="Distribution of Sample Input Features vs. Sample Predicted Labels (Demo)",
-            labels={
-                big_dist_df.columns[0]: "Feature Value",
-                big_dist_df.columns[1]: "Label Value"
-            }
         )
-        fig_dist.update_layout(height=800)
     else:
-        fig_dist = px.bar(title="No distribution plot could be generated (check feature/label columns).")
-    # E) Nearest Neighbors: Hamming Distance, K=5, with disclaimers & user-friendly text
-    #    "Nearest neighbor” methods for high-dimensional or purely categorical data can be non-trivial.
-    #    This demo simply uses a Hamming distance over all input features and picks K=5 neighbors.
-    #    In a real application, you would refine which features are most relevant, how to encode them,
-    #    and how many neighbors to select.
-    #    We also show how to revert numeric codes -> user-friendly text.
-    # 1. Invert the user-friendly text mapping (for inputs).
-    #    We'll assume input_mapping is consistent. We build a reverse mapping for each column.
-    reverse_input_mapping = {}
-    # We'll build it after the code block below for each column.
-    # 2. Invert label mappings from predictor.prediction_map if needed
-    #    For each label column, 0 => first string, 1 => second string
-    #    We'll store them in a dict: reverse_label_mapping[label_col][0 or 1] => string
-    reverse_label_mapping = {}
-    for lbl, str_list in predictor.prediction_map.items():
-        # str_list[0] => for 0, str_list[1] => for 1
-        reverse_label_mapping[lbl] = {
-            0: str_list[0],
-            1: str_list[1]
-        }
-    # Build the reverse input mapping from the provided dictionary
-    # We'll define that dictionary below to ensure we can invert it:
-    input_mapping = {
-        'YNURSMDE': {"Yes": 1, "No": 0},
-        'YMDEYR': {"Yes": 1, "No": 2},
-        'YSOCMDE': {"Yes": 1, "No": 0},
-        'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
-        'YMSUD5YANY': {"Yes": 1, "No": 0},
-        'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-        'YMDETXRX': {"Yes": 1, "No": 0},
-        'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-        'YMDERSUD5ANY': {"Yes": 1, "No": 0},
-        'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-        'YCOUNMDE': {"Yes": 1, "No": 0},
-        'YPSY1MDE': {"Yes": 1, "No": 0},
-        'YHLTMDE': {"Yes": 1, "No": 0},
-        'YDOCMDE': {"Yes": 1, "No": 0},
-        'YPSY2MDE': {"Yes": 1, "No": 0},
-        'YMDEHARX': {"Yes": 1, "No": 0},
-        'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
-        'MDEIMPY': {"Yes": 1, "No": 2},
-        'YMDEHPO': {"Yes": 1, "No": 0},
-        'YMIMS5YANY': {"Yes": 1, "No": 0},
-        'YMDEIMAD5YR': {"Yes": 1, "No": 0},
-        'YMIUD5YANY': {"Yes": 1, "No": 0},
-        'YMDEHPRX': {"Yes": 1, "No": 0},
-        'YMIMI5YANY': {"Yes": 1, "No": 0},
-        'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
-        'YTXMDEYR': {"Yes": 1, "No": 0},
-        'YMDEAUD5YR': {"Yes": 1, "No": 0},
-        'YRXMDEYR': {"Yes": 1, "No": 0},
-        'YMDELT': {"Yes": 1, "No": 2}
-    }
-    # Build the reverse mapping for each column
-    for col, fwd_map in input_mapping.items():
-        reverse_input_mapping[col] = {v: k for k, v in fwd_map.items()}
-    # 3. Calculate Hamming distance for each row
-    #    We'll consider the columns in user_input for comparison
-    features_to_compare = list(user_input.columns)
-    subset_df = df[features_to_compare].copy()
-    user_series = user_input.iloc[0]
     distances = []
-    for idx, row in subset_df.iterrows():
-        dist = sum(row[col] != user_series[col] for col in features_to_compare)
-        distances.append(dist)
-    df_with_dist = df.copy()
-    df_with_dist["distance"] = distances
-    # 4. Sort by distance ascending, pick top K=5
-    K = 5
-    nearest_neighbors = df_with_dist.sort_values("distance", ascending=True).head(K)
-    # 5. Summarize neighbor info in user-friendly text
-    #    For demonstration, let's show a small table with each neighbor's values
-    #    for the same features. We'll also show a label or two.
-    #    We'll do this in Markdown format.
-    nn_rows = []
-    for idx, nr in nearest_neighbors.iterrows():
-        # Convert each feature to text if possible
-        row_text = []
-        for col in features_to_compare:
-            val_numeric = nr[col]
-            if col in reverse_input_mapping:
-                row_text.append(f"{col}={reverse_input_mapping[col].get(val_numeric, val_numeric)}")
-            else:
-                row_text.append(f"{col}={val_numeric}")
-        # Let's also show YOWRCONC as an example label (if present)
-        if "YOWRCONC" in nearest_neighbors.columns:
-            label_val = nr["YOWRCONC"]
-            if "YOWRCONC" in reverse_label_mapping:
-                label_str = reverse_label_mapping["YOWRCONC"].get(label_val, label_val)
-                row_text.append(f"YOWRCONC={label_str}")
-            else:
-                row_text.append(f"YOWRCONC={label_val}")
-        nn_rows.append(f"- **Neighbor ID {idx}** (distance={nr['distance']}): " + ", ".join(row_text))
-    similar_patient_markdown = (
-        "### Nearest Neighbors (Simple Hamming Distance)\n"
-        f"We searched for the top **{K}** patients whose features most closely match your input.\n\n"
-        "> **Note**: “Nearest neighbor” methods for high-dimensional or purely categorical data can be non-trivial. "
-        "This demo simply uses a Hamming distance over all input features and picks K=5 neighbors. "
-        "In a real application, you would refine which features are most relevant, how to encode them, "
-        "and how many neighbors to select.\n\n"
-        "Below is a brief overview of each neighbor's input-feature values and one example label (`YOWRCONC`).\n\n"
-        + "\n".join(nn_rows)
-    )
-    # F) Co-occurrence Plot from the previous example (kept for completeness)
-    if all(col in df.columns for col in ["YMDEYR", "YMDERSUD5ANY", "YOWRCONC"]):
-        co_occ_data = df.groupby(["YMDEYR", "YMDERSUD5ANY", "YOWRCONC"]).size().reset_index(name="count")
-        fig_co_occ = px.bar(
-            co_occ_data,
-            x="YMDEYR",
-            y="count",
-            color="YOWRCONC",
-            facet_col="YMDERSUD5ANY",
-            title="Co-Occurrence Plot: YMDEYR and YMDERSUD5ANY vs YOWRCONC"
-        )
     else:
-        fig_co_occ = px.bar(title="Co-occurrence plot not available (check columns).")
-    # =======================
-    # RETURN EVERYTHING
-    # We have 8 outputs:
-    #  1) Prediction Results (Textbox)
-    #  2) Mental Health Severity (Textbox)
-    #  3) Total Patient Count (Markdown)
-    #  4) Distribution Plot (for multiple input features vs. multiple labels)
-    #  5) Nearest Neighbors Summary (Markdown)
-    #  6) Co-Occurrence Plot
-    #  7) Bar Chart for input features
-    #  8) Bar Chart for predicted labels
-    # =======================
     return (
-        formatted_results,
-        severity,
-        total_patient_count_markdown,
-        fig_dist,
-        similar_patient_markdown,
-        fig_co_occ,
-        fig_bar_input,
-        fig_bar_labels
     )
 ######################################
-# 5) MAPPING user-friendly text => numeric
 ######################################
 input_mapping = {
     'YNURSMDE': {"Yes": 1, "No": 0},
     'YMDEYR': {"Yes": 1, "No": 2},
     'YSOCMDE': {"Yes": 1, "No": 0},
-    'YMDESUD5ANYO': {"SUD only, no MDE": 1, "MDE only, no SUD": 2, "SUD and MDE": 3, "Neither SUD or MDE": 4},
     'YMSUD5YANY': {"Yes": 1, "No": 0},
-    'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YMDETXRX': {"Yes": 1, "No": 0},
-    'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YMDERSUD5ANY': {"Yes": 1, "No": 0},
-    'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YCOUNMDE': {"Yes": 1, "No": 0},
     'YPSY1MDE': {"Yes": 1, "No": 0},
     'YHLTMDE': {"Yes": 1, "No": 0},
     'YDOCMDE': {"Yes": 1, "No": 0},
     'YPSY2MDE': {"Yes": 1, "No": 0},
     'YMDEHARX': {"Yes": 1, "No": 0},
-    'LVLDIFMEM2': {"No Difficulty": 1, "Some difficulty": 2, "A lot of difficulty or cannot do at all": 3},
     'MDEIMPY': {"Yes": 1, "No": 2},
     'YMDEHPO': {"Yes": 1, "No": 0},
     'YMIMS5YANY': {"Yes": 1, "No": 0},
@@ -506,7 +374,7 @@ input_mapping = {
     'YMIUD5YANY': {"Yes": 1, "No": 0},
     'YMDEHPRX': {"Yes": 1, "No": 0},
     'YMIMI5YANY': {"Yes": 1, "No": 0},
-    'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
     'YTXMDEYR': {"Yes": 1, "No": 0},
     'YMDEAUD5YR': {"Yes": 1, "No": 0},
     'YRXMDEYR': {"Yes": 1, "No": 0},
@@ -514,89 +382,93 @@ input_mapping = {
 }
 ######################################
-# 6) GRADIO INTERFACE
 ######################################
-# We have 8 outputs in total:
-#   1) Prediction Results
-#   2) Mental Health Severity
-#   3) Total Patient Count
-#   4) Distribution Plot
-#   5) Nearest Neighbors
-#   6) Co-Occurrence Plot
-#   7) Bar Chart for input features
-#   8) Bar Chart for predicted labels
 import gradio as gr
-# Define the inputs in the same order as function signature
-inputs = [
-    gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR: PAST YEARS MAJOR DEPRESSIVE EPISODE"),
-    gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY: MDE OR SUBSTANCE USE DISORDER - ANY"),
-    gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR: MDE WITH SEV. IMP + ALCOHOL USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMIMS5YANY'].keys()), label="YMIMS5YANY: MDE W/ SEV. IMP + SUBSTANCE USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMDELT'].keys()), label="YMDELT: HAD MAJOR DEPRESSIVE EPISODE IN LIFETIME"),
-    gr.Dropdown(list(input_mapping['YMDEHARX'].keys()), label="YMDEHARX: SAW HEALTH PROF + MEDS FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDEHPRX'].keys()), label="YMDEHPRX: SAW HEALTH PROF OR MEDS FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDETXRX'].keys()), label="YMDETXRX: RECEIVED TREATMENT/COUNSELING FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDEHPO'].keys()), label="YMDEHPO: SAW HEALTH PROF ONLY FOR MDE"),
-    gr.Dropdown(list(input_mapping['YMDEAUD5YR'].keys()), label="YMDEAUD5YR: MDE + ALCOHOL USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMIMI5YANY'].keys()), label="YMIMI5YANY: MDE W/ ILL DRUG USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMIUD5YANY'].keys()), label="YMIUD5YANY: MDE + ILL DRUG USE DISORDER"),
-    gr.Dropdown(list(input_mapping['YMDESUD5ANYO'].keys()), label="YMDESUD5ANYO: MDE vs. SUD vs. BOTH vs. NEITHER"),
     # Consultations
-    gr.Dropdown(list(input_mapping['YNURSMDE'].keys()), label="YNURSMDE: SAW/TALK TO NURSE/OT ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YSOCMDE'].keys()), label="YSOCMDE: SAW/TALK TO SOCIAL WORKER ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YCOUNMDE'].keys()), label="YCOUNMDE: SAW/TALK TO COUNSELOR ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YPSY1MDE'].keys()), label="YPSY1MDE: SAW/TALK TO PSYCHOLOGIST ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YPSY2MDE'].keys()), label="YPSY2MDE: SAW/TALK TO PSYCHIATRIST ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YHLTMDE'].keys()), label="YHLTMDE: SAW/TALK TO HEALTH PROFESSIONAL ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE: SAW/TALK TO GP/FAMILY MD ABOUT MDE"),
-    gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR: SAW/TALK DOCTOR/HEALTH PROF FOR MDE"),
-    # Suicidal thoughts/plans
-    gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR: SERIOUSLY THOUGHT ABOUT KILLING SELF"),
-    gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR: MADE PLANS TO KILL SELF"),
-    gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK: THINK ABOUT KILLING SELF (12 MONTHS)"),
-    gr.Dropdown(list(input_mapping['YUSUIPLN'].keys()), label="YUSUIPLN: MADE PLANS TO KILL SELF (12 MONTHS)"),
     # Impairments
-    gr.Dropdown(list(input_mapping['MDEIMPY'].keys()), label="MDEIMPY: MDE W/ SEVERE ROLE IMPAIRMENT"),
-    gr.Dropdown(list(input_mapping['LVLDIFMEM2'].keys()), label="LVLDIFMEM2: LEVEL OF DIFFICULTY REMEMBERING/CONCENTRATING"),
-    gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY: MDE + SUBSTANCE USE DISORDER - ANY"),
-    gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR: USED MEDS FOR MDE IN PAST YEAR"),
 ]
-# The 8 outputs
 outputs = [
-    gr.Textbox(label="Prediction Results", lines=30),
-    gr.Textbox(label="Mental Health Severity", lines=4),
     gr.Markdown(label="Total Patient Count"),
-    gr.Plot(label="Distribution Plot (Sample of Features & Labels)"),
-    gr.Markdown(label="Nearest Neighbors Summary"),
-    gr.Plot(label="Co-Occurrence Plot"),
-    gr.Plot(label="Number of Patients per Input Feature"),
-    gr.Plot(label="Number of Patients with Predicted Labels")
 ]
 ######################################
-# 7) WRAPPER FOR PREDICT
 ######################################
 def predict_with_text(
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
-    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
 ):
-    # Validate user inputs
-    if not validate_inputs(
         YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
         YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
         YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
         YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
         YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
-    ):
         return (
             "Please select all required fields.",
             "Validation Error",
@@ -608,7 +480,7 @@ def predict_with_text(
             None
         )
-    # Map user-friendly text to numeric
     user_inputs = {
         'YNURSMDE': input_mapping['YNURSMDE'][YNURSMDE],
         'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
@@ -641,36 +513,34 @@ def predict_with_text(
         'YMDELT': input_mapping['YMDELT'][YMDELT]
     }
-    # Pass these mapped values into the core predict function
-    return predict(**user_inputs)
-# Optional custom CSS
 custom_css = """
-    .gradio-container * {
-        color: #1B1212 !important;
-    }
-    .gradio-container .form .form-group label {
-        color: #1B1212 !important;
-    }
-    .gradio-container .output-textbox,
-    .gradio-container .output-textbox textarea {
-        color: #1B1212 !important;
-    }
-    .gradio-container .label,
-    .gradio-container .input-label {
-        color: #1B1212 !important;
-    }
 """
-######################################
-# 8) LAUNCH
-######################################
 interface = gr.Interface(
-    fn=predict_with_text,
     inputs=inputs,
-    outputs=outputs,
-    title="Adolescents with Substance Use Mental Health Screening (NSDUH Data)",
-    css=custom_css
 )
 if __name__ == "__main__":

 import pickle
 import gradio as gr
 import numpy as np
 import pandas as pd
 import plotly.express as px
+# Load the training CSV once.
 df = pd.read_csv("X_train_Y_Train_merged_train.csv")
 ######################################
         self.model_filenames = model_filenames
         self.models = self.load_models()
         # Mapping from label column to human-readable strings for 0/1
         self.prediction_map = {
+            "YOWRCONC": ["No difficulty concentrating", "Had difficulty concentrating"],
+            "YOSEEDOC": ["No need to see doctor", "Needed to see doctor"],
+            "YOWRHRS": ["No trouble sleeping", "Had trouble sleeping"],
+            "YO_MDEA5": ["Others didn't notice restlessness", "Others noticed restlessness"],
+            "YOWRCHR": ["Not sad beyond cheering", "Felt so sad no one could cheer up"],
+            "YOWRLSIN": ["Never felt bored/lost interest", "Felt bored/lost interest"],
+            "YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
+            "YOWRPROB": ["No worst time feeling", "Felt worst time ever"],
+            "YODPR2WK": ["No depressed feelings for 2+ wks", "Depressed feelings for 2+ wks"],
+            "YOWRDEPR": ["Not sad or depressed most days", "Sad or depressed most days"],
+            "YODPDISC": ["Mood not depressed overall", "Mood depressed overall (discrepancy)"],
+            "YOLOSEV": ["Did not lose interest in activities", "Lost interest in activities"],
+            "YOWRDCSN": ["Could make decisions", "Could not make decisions"],
+            "YODSMMDE": ["No 2+ week depression episodes", "Had 2+ week depression episodes"],
+            "YO_MDEA3": ["No appetite/weight changes", "Yes appetite/weight changes"],
+            "YODPLSIN": ["Never bored/lost interest", "Often bored/lost interest"],
+            "YOWRELES": ["Did not eat less", "Ate less than usual"],
+            "YODSCEV": ["Fewer severe symptoms", "More severe symptoms"],
+            "YOPB2WK": ["No uneasy feelings daily 2+ wks", "Uneasy feelings daily 2+ wks"],
+            "YO_MDEA2": ["No issues physical/mental daily", "Issues physical/mental daily 2+ wks"]
         }
     def load_models(self):
         models = []
+        for fn in self.model_filenames:
+            filepath = self.model_path + fn
+            with open(filepath, "rb") as file:
+                models.append(pickle.load(file))
         return models
     def make_predictions(self, user_input):
+        """Return list of numpy arrays, each array either [0] or [1]."""
+        preds = []
+        for m in self.models:
+            out = m.predict(user_input)
+            preds.append(np.array(out).flatten())
+        return preds
     def get_majority_vote(self, predictions):
+        """Flatten all predictions and find 0 or 1 with majority."""
+        combined = np.concatenate(predictions)
+        return np.bincount(combined).argmax()
     def evaluate_severity(self, majority_vote_count):
+        """Heuristic: Based on 16 total models, 0-4=Very Low, 5-8=Low, 9-12=Moderate, 13-16=Severe."""
         if majority_vote_count >= 13:
             return "Mental health severity: Severe"
         elif majority_vote_count >= 9:
             return "Mental health severity: Very Low"
 ######################################
+# 2) CONFIGURATIONS
 ######################################
 model_filenames = [
     "YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
 # 3) INPUT VALIDATION
 ######################################
 def validate_inputs(*args):
+    # Just ensure all required (non-co-occurrence) fields are picked
     for arg in args:
+        if arg == '' or arg is None:
             return False
     return True
 ######################################
+# 4) PREDICTION FUNCTION
 ######################################
 def predict(
+    # Original required features
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
+    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR,
+    # **New** optional picks for co-occurrence
+    co_occ_feature1, co_occ_feature2, co_occ_label
 ):
+    """
+    Main function that:
+      - Predicts with the 16 models
+      - Aggregates results
+      - Produces severity
+      - Returns distribution & bar charts
+      - Finds K=2 Nearest Neighbors
+      - Produces *one* co-occurrence plot based on user-chosen columns
+    """
+    # 1) Build user_input for models
     user_input_data = {
         'YNURSMDE': [int(YNURSMDE)],
         'YMDEYR': [int(YMDEYR)],
     }
     user_input = pd.DataFrame(user_input_data)
+    # 2) Model Predictions
     predictions = predictor.make_predictions(user_input)
     majority_vote = predictor.get_majority_vote(predictions)
+    majority_vote_count = np.sum(np.concatenate(predictions) == 1)
     severity = predictor.evaluate_severity(majority_vote_count)
+    # 3) Summarize textual results
+    results_by_group = {
         "Concentration_and_Decision_Making": [],
         "Sleep_and_Energy_Levels": [],
         "Mood_and_Emotional_State": [],
         "Appetite_and_Weight_Changes": [],
         "Duration_and_Severity_of_Depression_Symptoms": []
     }
+    group_map = {
         "Concentration_and_Decision_Making": ["YOWRCONC", "YOWRDCSN"],
         "Sleep_and_Energy_Levels": ["YOWRHRS", "YO_MDEA5", "YOWRELES", "YO_MDEA2"],
         "Mood_and_Emotional_State": ["YOWRCHR", "YOWRLSIN", "YOWRDEPR", "YODPDISC",
                                                          "YOPB2WK"]
     }
+    # Convert each model's 0/1 to text
+    grouped_output_lines = []
+    for i, pred_array in enumerate(predictions):
+        col_name = model_filenames[i].split(".")[0]  # e.g., "YOWRCONC"
+        val = pred_array[0]
+        if col_name in predictor.prediction_map and val in [0, 1]:
+            text = predictor.prediction_map[col_name][val]
+            out_line = f"{col_name}: {text}"
         else:
+            out_line = f"{col_name}: Prediction={val}"
+        # Find group
+        placed = False
+        for g_key, g_cols in group_map.items():
+            if col_name in g_cols:
+                results_by_group[g_key].append(out_line)
+                placed = True
                 break
+        if not placed:
+            # If it didn't fall into any known group, skip or handle
             pass
+    # Format into a single string
+    for group_label, pred_lines in results_by_group.items():
+        if pred_lines:
+            grouped_output_lines.append(f"Group {group_label}:")
+            grouped_output_lines.append("\n".join(pred_lines))
+            grouped_output_lines.append("")
+    if len(grouped_output_lines) == 0:
+        final_result_text = "No predictions made. Check inputs."
+    else:
+        final_result_text = "\n".join(grouped_output_lines).strip()
+    # 4) Additional Features
+    # A) Total patient count
     total_patients = len(df)
+    total_count_md = (
         "### Total Patient Count\n"
+        f"**{total_patients}** total patients in the dataset."
     )
+    # B) Bar chart of how many have same inputs
     input_counts = {}
+    for c in user_input_data.keys():
+        v = user_input_data[c][0]
+        input_counts[c] = len(df[df[c] == v])
+    df_input_counts = pd.DataFrame({"Feature": list(input_counts.keys()), "Count": list(input_counts.values())})
+    fig_input_bar = px.bar(
+        df_input_counts,
+        x="Feature",
+        y="Count",
+        title="Number of Patients with the Same Value for Each Input Feature"
     )
+    fig_input_bar.update_layout(xaxis={"categoryorder": "total descending"})
+    # C) Bar chart for predicted labels
     label_counts = {}
+    for i, pred_array in enumerate(predictions):
+        col_name = model_filenames[i].split(".")[0]
+        val = pred_array[0]
+        if val in [0,1]:
+            label_counts[col_name] = len(df[df[col_name] == val])
     if len(label_counts) > 0:
+        df_label_counts = pd.DataFrame({
+            "Label Column": list(label_counts.keys()),
             "Count": list(label_counts.values())
         })
+        fig_label_bar = px.bar(
+            df_label_counts,
+            x="Label Column",
             y="Count",
+            title="Number of Patients with the Same Predicted Label"
         )
     else:
+        fig_label_bar = px.bar(title="No valid predicted labels to display")
+    # D) Simple Distribution Plot (demo for first 3 labels & 4 inputs)
+    # (Unchanged from prior approach; you can remove if you prefer.)
+    sample_feats = list(user_input_data.keys())[:4]
+    sample_labels = [fn.split(".")[0] for fn in model_filenames[:3]]
+    dist_segments = []
+    for feat in sample_feats:
         if feat not in df.columns:
             continue
+        for lbl in sample_labels:
             if lbl not in df.columns:
                 continue
+            temp_g = df.groupby([feat,lbl]).size().reset_index(name="count")
+            temp_g["feature"] = feat
+            temp_g["label"] = lbl
+            dist_segments.append(temp_g)
+    if len(dist_segments) > 0:
+        big_dist_df = pd.concat(dist_segments, ignore_index=True)
         fig_dist = px.bar(
             big_dist_df,
+            x=big_dist_df.columns[0],
             y="count",
+            color=big_dist_df.columns[1],
             facet_row="feature",
             facet_col="label",
+            title="Sample Distribution Plot (first 4 features vs first 3 labels)"
         )
+        fig_dist.update_layout(height=700)
     else:
+        fig_dist = px.bar(title="No distribution plot generated (columns not found).")
+    # E) Nearest Neighbors with K=2
+    # We keep K=2, but for *all* label columns, we show their actual 0/1 or mapped text
+    # (same approach as before).
+    #  ... [omitted here for brevity, or replicate your existing code for K=2 nearest neighbors] ...
+    # We'll do a short version to keep focus on co-occ:
+    # ---------------------------------------------------------------------
+    # Build Hamming distance across user_input columns
+    columns_for_distance = list(user_input.columns)
+    sub_df = df[columns_for_distance].copy()
+    user_row = user_input.iloc[0]
     distances = []
+    for idx, row_ in sub_df.iterrows():
+        dist_ = sum(row_[col] != user_row[col] for col in columns_for_distance)
+        distances.append(dist_)
+    df_dist = df.copy()
+    df_dist["distance"] = distances
+    # Sort ascending, pick K=2
+    K = 2
+    nearest_neighbors = df_dist.sort_values("distance", ascending=True).head(K)
+    # Summarize in Markdown
+    nn_md = ["### Nearest Neighbors (K=2)"]
+    nn_md.append("(In a real application, you'd refine which features matter, how to encode them, etc.)\n")
+    for irow in nearest_neighbors.itertuples():
+        nn_md.append(f"- **Neighbor ID {irow.Index}**: distance={irow.distance}")
+    nn_md_str = "\n".join(nn_md)
+    # F) Co-occurrence Plot for user-chosen feature1, feature2, label
+    #    If the user picks "None" or doesn't pick valid columns, skip or fallback.
+    if (co_occ_feature1 is not None and co_occ_feature1 != "None" and
+        co_occ_feature2 is not None and co_occ_feature2 != "None" and
+        co_occ_label is not None and co_occ_label != "None"):
+        # Check if these columns are in df
+        if (co_occ_feature1 in df.columns and
+            co_occ_feature2 in df.columns and
+            co_occ_label in df.columns):
+            # Group by [co_occ_feature1, co_occ_feature2, co_occ_label]
+            co_data = df.groupby([co_occ_feature1, co_occ_feature2, co_occ_label]).size().reset_index(name="count")
+            fig_co_occ = px.bar(
+                co_data,
+                x=co_occ_feature1,
+                y="count",
+                color=co_occ_label,
+                facet_col=co_occ_feature2,
+                title=f"Co-occurrence: {co_occ_feature1} & {co_occ_feature2} vs {co_occ_label}"
+            )
+        else:
+            fig_co_occ = px.bar(title="One or more selected columns not found in dataframe.")
     else:
+        fig_co_occ = px.bar(title="No co-occurrence plot (choose two features + one label).")
+    # Return all 8 outputs
     return (
+        final_result_text,  # (1) Predictions
+        severity,           # (2) Severity
+        total_count_md,     # (3) Total patient count
+        fig_dist,           # (4) Distribution Plot
+        nn_md_str,          # (5) Nearest Neighbors
+        fig_co_occ,         # (6) Co-occurrence
+        fig_input_bar,      # (7) Bar Chart (input features)
+        fig_label_bar       # (8) Bar Chart (labels)
     )
 ######################################
+# 5) MAPPING (user -> int)
 ######################################
 input_mapping = {
     'YNURSMDE': {"Yes": 1, "No": 0},
     'YMDEYR': {"Yes": 1, "No": 2},
     'YSOCMDE': {"Yes": 1, "No": 0},
+    'YMDESUD5ANYO': {"SUD only": 1, "MDE only": 2, "SUD & MDE": 3, "Neither": 4},
     'YMSUD5YANY': {"Yes": 1, "No": 0},
+    'YUSUITHK': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YMDETXRX': {"Yes": 1, "No": 0},
+    'YUSUITHKYR': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YMDERSUD5ANY': {"Yes": 1, "No": 0},
+    'YUSUIPLNYR': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YCOUNMDE': {"Yes": 1, "No": 0},
     'YPSY1MDE': {"Yes": 1, "No": 0},
     'YHLTMDE': {"Yes": 1, "No": 0},
     'YDOCMDE': {"Yes": 1, "No": 0},
     'YPSY2MDE': {"Yes": 1, "No": 0},
     'YMDEHARX': {"Yes": 1, "No": 0},
+    'LVLDIFMEM2': {"No Difficulty": 1, "Some Difficulty": 2, "A lot or cannot do": 3},
     'MDEIMPY': {"Yes": 1, "No": 2},
     'YMDEHPO': {"Yes": 1, "No": 0},
     'YMIMS5YANY': {"Yes": 1, "No": 0},
     'YMIUD5YANY': {"Yes": 1, "No": 0},
     'YMDEHPRX': {"Yes": 1, "No": 0},
     'YMIMI5YANY': {"Yes": 1, "No": 0},
+    'YUSUIPLN': {"Yes": 1, "No": 2, "Unsure": 3, "Don't want to answer": 4},
     'YTXMDEYR': {"Yes": 1, "No": 0},
     'YMDEAUD5YR': {"Yes": 1, "No": 0},
     'YRXMDEYR': {"Yes": 1, "No": 0},
 }
 ######################################
+# 6) THE GRADIO INTERFACE
 ######################################
 import gradio as gr
+# (A) The original required inputs
+original_inputs = [
+    gr.Dropdown(list(input_mapping['YMDEYR'].keys()), label="YMDEYR: Past Year MDE?"),
+    gr.Dropdown(list(input_mapping['YMDERSUD5ANY'].keys()), label="YMDERSUD5ANY: MDE or SUD - ANY?"),
+    gr.Dropdown(list(input_mapping['YMDEIMAD5YR'].keys()), label="YMDEIMAD5YR: MDE + ALCOHOL?"),
+    gr.Dropdown(list(input_mapping['YMIMS5YANY'].keys()), label="YMIMS5YANY: MDE + SUBSTANCE?"),
+    gr.Dropdown(list(input_mapping['YMDELT'].keys()), label="YMDELT: MDE in Lifetime?"),
+    gr.Dropdown(list(input_mapping['YMDEHARX'].keys()), label="YMDEHARX: Saw Health Prof + Meds?"),
+    gr.Dropdown(list(input_mapping['YMDEHPRX'].keys()), label="YMDEHPRX: Saw Health Prof or Meds?"),
+    gr.Dropdown(list(input_mapping['YMDETXRX'].keys()), label="YMDETXRX: Received Treatment?"),
+    gr.Dropdown(list(input_mapping['YMDEHPO'].keys()), label="YMDEHPO: Saw Health Prof Only?"),
+    gr.Dropdown(list(input_mapping['YMDEAUD5YR'].keys()), label="YMDEAUD5YR: MDE + Alcohol Use?"),
+    gr.Dropdown(list(input_mapping['YMIMI5YANY'].keys()), label="YMIMI5YANY: MDE + ILL Drug Use?"),
+    gr.Dropdown(list(input_mapping['YMIUD5YANY'].keys()), label="YMIUD5YANY: MDE + ILL Drug Use?"),
+    gr.Dropdown(list(input_mapping['YMDESUD5ANYO'].keys()), label="YMDESUD5ANYO: MDE vs SUD vs BOTH vs NEITHER"),
     # Consultations
+    gr.Dropdown(list(input_mapping['YNURSMDE'].keys()), label="YNURSMDE: Nurse/OT about MDE?"),
+    gr.Dropdown(list(input_mapping['YSOCMDE'].keys()), label="YSOCMDE: Social Worker?"),
+    gr.Dropdown(list(input_mapping['YCOUNMDE'].keys()), label="YCOUNMDE: Counselor?"),
+    gr.Dropdown(list(input_mapping['YPSY1MDE'].keys()), label="YPSY1MDE: Psychologist?"),
+    gr.Dropdown(list(input_mapping['YPSY2MDE'].keys()), label="YPSY2MDE: Psychiatrist?"),
+    gr.Dropdown(list(input_mapping['YHLTMDE'].keys()), label="YHLTMDE: Health Prof?"),
+    gr.Dropdown(list(input_mapping['YDOCMDE'].keys()), label="YDOCMDE: GP/Family MD?"),
+    gr.Dropdown(list(input_mapping['YTXMDEYR'].keys()), label="YTXMDEYR: Doctor/Health Prof?"),
+    # Suicidal
+    gr.Dropdown(list(input_mapping['YUSUITHKYR'].keys()), label="YUSUITHKYR: Serious Suicide Thoughts?"),
+    gr.Dropdown(list(input_mapping['YUSUIPLNYR'].keys()), label="YUSUIPLNYR: Made Plans?"),
+    gr.Dropdown(list(input_mapping['YUSUITHK'].keys()), label="YUSUITHK: Suicide Thoughts (12 mo)?"),
+    gr.Dropdown(list(input_mapping['YUSUIPLN'].keys()), label="YUSUIPLN: Made Plans (12 mo)?"),
     # Impairments
+    gr.Dropdown(list(input_mapping['MDEIMPY'].keys()), label="MDEIMPY: Severe Role Impairment?"),
+    gr.Dropdown(list(input_mapping['LVLDIFMEM2'].keys()), label="LVLDIFMEM2: Difficulty Remembering/Concentrating?"),
+    gr.Dropdown(list(input_mapping['YMSUD5YANY'].keys()), label="YMSUD5YANY: MDE + Substance?"),
+    gr.Dropdown(list(input_mapping['YRXMDEYR'].keys()), label="YRXMDEYR: Used Meds for MDE (12 mo)?"),
 ]
+# (B) The new co-occurrence inputs
+# We'll give them defaults of "None" to indicate no selection.
+all_cols = ["None"] + df.columns.tolist()  # 'None' plus the actual columns from your df
+co_occ_feature1 = gr.Dropdown(all_cols, label="Co-Occ Feature 1", value="None")
+co_occ_feature2 = gr.Dropdown(all_cols, label="Co-Occ Feature 2", value="None")
+all_label_cols = ["None"] + list(predictor.prediction_map.keys())  # e.g., "YOWRCONC", "YOWRHRS", ...
+co_occ_label = gr.Dropdown(all_label_cols, label="Co-Occ Label", value="None")
+# Combine them into a single input list
+inputs = original_inputs + [co_occ_feature1, co_occ_feature2, co_occ_label]
+# 8 outputs as before
 outputs = [
+    gr.Textbox(label="Prediction Results", lines=15),
+    gr.Textbox(label="Mental Health Severity", lines=2),
     gr.Markdown(label="Total Patient Count"),
+    gr.Plot(label="Distribution Plot (Sample)"),
+    gr.Markdown(label="Nearest Neighbors (K=2)"),
+    gr.Plot(label="Co-occurrence Plot"),
+    gr.Plot(label="Same Value Bar (Inputs)"),
+    gr.Plot(label="Predicted Label Bar")
 ]
 ######################################
+# 7) WRAPPER
 ######################################
 def predict_with_text(
+    # match the function signature exactly (29 required + 3 for co-occ)
     YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
     YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
     YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
     YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
+    YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR,
+    co_occ_feature1, co_occ_feature2, co_occ_label
 ):
+    # Validate the original 29 fields
+    valid = validate_inputs(
         YMDEYR, YMDERSUD5ANY, YMDEIMAD5YR, YMIMS5YANY, YMDELT, YMDEHARX,
         YMDEHPRX, YMDETXRX, YMDEHPO, YMDEAUD5YR, YMIMI5YANY, YMIUD5YANY,
         YMDESUD5ANYO, YNURSMDE, YSOCMDE, YCOUNMDE, YPSY1MDE, YPSY2MDE,
         YHLTMDE, YDOCMDE, YTXMDEYR, YUSUITHKYR, YUSUIPLNYR, YUSUITHK,
         YUSUIPLN, MDEIMPY, LVLDIFMEM2, YMSUD5YANY, YRXMDEYR
+    )
+    if not valid:
         return (
             "Please select all required fields.",
             "Validation Error",
             None
         )
+    # Map to numeric
     user_inputs = {
         'YNURSMDE': input_mapping['YNURSMDE'][YNURSMDE],
         'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
         'YMDELT': input_mapping['YMDELT'][YMDELT]
     }
+    # Call the core predict function with the co-occ choices as well
+    return predict(
+        **user_inputs,
+        co_occ_feature1=co_occ_feature1,
+        co_occ_feature2=co_occ_feature2,
+        co_occ_label=co_occ_label
+    )
 custom_css = """
+.gradio-container * {
+    color: #1B1212 !important;
+}
 """
 interface = gr.Interface(
+    fn=predict_with_text,
     inputs=inputs,
+    outputs=outputs,
+    title="Mental Health Screening (NSDUH) with Selective Co-Occurrence",
+    css=custom_css,
+    description="""
+    **Instructions**:
+    1. Fill out all required fields regarding MDE/Substance Use/Consultations/Suicidal/Impairments.
+    2. (Optional) Choose 2 features and 1 label for the *Co-occurrence* plot.
+       - If you do not select them (or leave them as "None"), that plot will be skipped.
+    3. Click "Submit" to get predictions, severity, distribution plots, nearest neighbors, and your custom co-occurrence chart.
+    """
 )
 if __name__ == "__main__":