pantdipendra's picture
v7 plot
0b36f6e verified
raw
history blame
25.9 kB
import pickle
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
######################################
# 1) LOAD DATA & MODELS
######################################
# Load your dataset
df = pd.read_csv("X_train_test_combined_dataset_Filtered_dataset.csv")
# Ensure 'YMDESUD5ANYO' exists in your DataFrame
if 'YMDESUD5ANYO' not in df.columns:
raise ValueError("The column 'YMDESUD5ANYO' is missing from the dataset. Please check your CSV file.")
# List of model filenames
model_filenames = [
"YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
"YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
"YODPDISC.pkl", "YOLOSEV.pkl", "YOWRDCSN.pkl", "YODSMMDE.pkl",
"YO_MDEA3.pkl", "YODPLSIN.pkl", "YOWRELES.pkl", "YOPB2WK.pkl"
]
model_path = "models/"
######################################
# 2) MODEL PREDICTOR
######################################
class ModelPredictor:
def __init__(self, model_path, model_filenames):
self.model_path = model_path
self.model_filenames = model_filenames
self.models = self.load_models()
# Mapping each label (column) to textual meaning for 0/1
self.prediction_map = {
"YOWRCONC": ["Did NOT have difficulty concentrating", "Had difficulty concentrating"],
"YOSEEDOC": ["Did NOT feel the need to see a doctor", "Felt the need to see a doctor"],
"YO_MDEA5": ["No restlessness/lethargy noticed", "Others noticed restlessness/lethargy"],
"YOWRLSIN": ["Did NOT feel bored/lose interest", "Felt bored/lost interest"],
"YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
"YOWRPROB": ["No 'worst time ever' feeling", "Had 'worst time ever' feeling"],
"YODPR2WK": ["No depressed feelings for 2+ wks", "Had depressed feelings for 2+ wks"],
"YOWRDEPR": ["Did NOT feel sad/depressed daily", "Felt sad/depressed mostly everyday"],
"YODPDISC": ["Overall mood not sad/depressed", "Overall mood was sad/depressed"],
"YOLOSEV": ["Did NOT lose interest in things", "Lost interest in enjoyable things"],
"YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
"YODSMMDE": ["No 2+ wks depression symptoms", "Had 2+ wks depression symptoms"],
"YO_MDEA3": ["No appetite/weight changes", "Had changes in appetite/weight"],
"YODPLSIN": ["Never lost interest/felt bored", "Lost interest/felt bored"],
"YOWRELES": ["Did NOT eat less than usual", "Ate less than usual"],
"YOPB2WK": ["No uneasy feelings 2+ weeks", "Uneasy feelings 2+ weeks"]
}
def load_models(self):
loaded = []
for fname in self.model_filenames:
try:
with open(self.model_path + fname, "rb") as f:
model = pickle.load(f)
loaded.append(model)
except FileNotFoundError:
raise FileNotFoundError(f"Model file '{fname}' not found in path '{self.model_path}'.")
except Exception as e:
raise Exception(f"Error loading model '{fname}': {e}")
return loaded
def make_predictions(self, user_input: pd.DataFrame):
"""
Return:
- A list of np.array [0/1], one for each model
- A list of np.array [prob_of_1], if predict_proba is available, else np.nan
"""
preds = []
probs = []
for model in self.models:
y_pred = model.predict(user_input)
preds.append(y_pred.flatten())
if hasattr(model, "predict_proba"):
y_prob = model.predict_proba(user_input)[:, 1] # Probability that label=1
probs.append(y_prob)
else:
probs.append(np.full(len(user_input), np.nan))
return preds, probs
def evaluate_severity(self, count_ones: int) -> str:
"""
Evaluate severity based on total # of '1' predictions across all labels.
"""
if count_ones >= 13:
return "Mental Health Severity: Severe"
elif count_ones >= 9:
return "Mental Health Severity: Moderate"
elif count_ones >= 5:
return "Mental Health Severity: Low"
else:
return "Mental Health Severity: Very Low"
predictor = ModelPredictor(model_path, model_filenames)
######################################
# 3) FEATURE CATEGORIES + MAPPING
######################################
categories_dict = {
"1. Depression & Substance Use Diagnosis": [
"YMDESUD5ANYO", "YMDELT", "YMDEYR", "YMDERSUD5ANY",
"YMSUD5YANY", "YMIUD5YANY", "YMIMS5YANY", "YMIMI5YANY"
],
"2. Mental Health Treatment & Prof Consultation": [
"YMDEHPO", "YMDETXRX", "YMDEHARX", "YMDEHPRX", "YRXMDEYR",
"YHLTMDE", "YTXMDEYR", "YDOCMDE", "YPSY2MDE", "YPSY1MDE", "YCOUNMDE"
],
"3. Functional & Cognitive Impairment": [
"MDEIMPY", "LVLDIFMEM2"
],
"4. Suicidal Thoughts & Behaviors": [
"YUSUITHK", "YUSUITHKYR", "YUSUIPLNYR", "YUSUIPLN"
]
}
input_mapping = {
'YMDESUD5ANYO': {
"SUD only, no MDE": 1,
"MDE only, no SUD": 2,
"SUD and MDE": 3,
"Neither SUD or MDE": 4
},
'YMDELT': {"Yes": 1, "No": 2},
'YMDEYR': {"Yes": 1, "No": 2},
'YMDERSUD5ANY': {"Yes": 1, "No": 0},
'YMSUD5YANY': {"Yes": 1, "No": 0},
'YMIUD5YANY': {"Yes": 1, "No": 0},
'YMIMS5YANY': {"Yes": 1, "No": 0},
'YMIMI5YANY': {"Yes": 1, "No": 0},
'YMDEHPO': {"Yes": 1, "No": 0},
'YMDETXRX': {"Yes": 1, "No": 0},
'YMDEHARX': {"Yes": 1, "No": 0},
'YMDEHPRX': {"Yes": 1, "No": 0},
'YRXMDEYR': {"Yes": 1, "No": 0},
'YHLTMDE': {"Yes": 1, "No": 0},
'YTXMDEYR': {"Yes": 1, "No": 0},
'YDOCMDE': {"Yes": 1, "No": 0},
'YPSY2MDE': {"Yes": 1, "No": 0},
'YPSY1MDE': {"Yes": 1, "No": 0},
'YCOUNMDE': {"Yes": 1, "No": 0},
'MDEIMPY': {"Yes": 1, "No": 2},
'LVLDIFMEM2': {
"No Difficulty": 1,
"Some difficulty": 2,
"A lot of difficulty or cannot do at all": 3
},
'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4}
}
def validate_inputs(*args):
for arg in args:
if arg is None or arg == "":
return False
return True
######################################
# 4) NEAREST NEIGHBORS
######################################
def get_nearest_neighbors_info(user_input_df: pd.DataFrame, k=5):
user_cols = user_input_df.columns
if not all(col in df.columns for col in user_cols):
return "Cannot compute nearest neighbors. Some columns not found in df."
sub_df = df[list(user_cols)].copy()
diffs = sub_df - user_input_df.iloc[0]
dists = (diffs**2).sum(axis=1)**0.5
nn_indices = dists.nsmallest(k).index
neighbors = df.loc[nn_indices]
lines = [
f"**Nearest Neighbors (k={k})**",
f"Distances range: {dists[nn_indices].min():.2f} to {dists[nn_indices].max():.2f}",
""
]
# A) Show user input in numeric->text form
lines.append("**User Input (numeric -> text)**")
for col in user_cols:
val_numeric = user_input_df.iloc[0][col]
text_val = None
if col in input_mapping:
for txt_key, num_val in input_mapping[col].items():
if val_numeric == num_val:
text_val = txt_key
break
if not text_val:
text_val = f"{val_numeric} (no mapping found)"
lines.append(f"- {col} = {val_numeric} => '{text_val}'")
lines.append("")
# B) Show label columns among neighbors
label_cols = list(predictor.prediction_map.keys())
lines.append("**Label Distribution Among Neighbors**")
for lbl in label_cols:
if lbl not in neighbors.columns:
continue
val_counts = neighbors[lbl].value_counts().to_dict()
parts = []
for val_, count_ in val_counts.items():
if val_ in [0,1] and lbl in predictor.prediction_map:
label_text = predictor.prediction_map[lbl][val_]
parts.append(f"{count_} had '{label_text}'")
else:
parts.append(f"{count_} had numeric={val_}")
lines.append(f"- {lbl}: " + "; ".join(parts))
lines.append("")
return "\n".join(lines)
######################################
# 5) PREDICT FUNCTION
######################################
def predict(
# Category 1 (8):
YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
# Category 2 (11):
YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
YHLTMDE, YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
# Category 3 (2):
MDEIMPY, LVLDIFMEM2,
# Category 4 (4):
YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
):
# 1) Validate
if not validate_inputs(
YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
YHLTMDE, YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
MDEIMPY, LVLDIFMEM2,
YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
):
return (
"Please select all required fields.", # 1) Prediction Results
"Validation Error", # 2) Severity
"No data", # 3) Total Count
"No nearest neighbors info", # 4) NN Summary
None, # 5) Bar chart (Input)
None # 6) Bar chart (Labels)
)
# 2) Convert text -> numeric
try:
user_input_dict = {
'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
'YMDELT': input_mapping['YMDELT'][YMDELT],
'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
'YMSUD5YANY': input_mapping['YMSUD5YANY'][YMSUD5YANY],
'YMIUD5YANY': input_mapping['YMIUD5YANY'][YMIUD5YANY],
'YMIMS5YANY': input_mapping['YMIMS5YANY'][YMIMS5YANY],
'YMIMI5YANY': input_mapping['YMIMI5YANY'][YMIMI5YANY],
'YMDEHPO': input_mapping['YMDEHPO'][YMDEHPO],
'YMDETXRX': input_mapping['YMDETXRX'][YMDETXRX],
'YMDEHARX': input_mapping['YMDEHARX'][YMDEHARX],
'YMDEHPRX': input_mapping['YMDEHPRX'][YMDEHPRX],
'YRXMDEYR': input_mapping['YRXMDEYR'][YRXMDEYR],
'YHLTMDE': input_mapping['YHLTMDE'][YHLTMDE],
'YTXMDEYR': input_mapping['YTXMDEYR'][YTXMDEYR],
'YDOCMDE': input_mapping['YDOCMDE'][YDOCMDE],
'YPSY2MDE': input_mapping['YPSY2MDE'][YPSY2MDE],
'YPSY1MDE': input_mapping['YPSY1MDE'][YPSY1MDE],
'YCOUNMDE': input_mapping['YCOUNMDE'][YCOUNMDE],
'MDEIMPY': input_mapping['MDEIMPY'][MDEIMPY],
'LVLDIFMEM2': input_mapping['LVLDIFMEM2'][LVLDIFMEM2],
'YUSUITHK': input_mapping['YUSUITHK'][YUSUITHK],
'YUSUITHKYR': input_mapping['YUSUITHKYR'][YUSUITHKYR],
'YUSUIPLNYR': input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
'YUSUIPLN': input_mapping['YUSUIPLN'][YUSUIPLN]
}
except KeyError as e:
missing_key = e.args[0]
return (
f"Input mapping missing for key: {missing_key}. Please check your `input_mapping` dictionary.",
"Mapping Error",
"No data",
"No nearest neighbors info",
None,
None
)
user_df = pd.DataFrame(user_input_dict, index=[0])
# 3) Make predictions
try:
preds, probs = predictor.make_predictions(user_df)
except Exception as e:
return (
f"Error during prediction: {e}",
"Prediction Error",
"No data",
"No nearest neighbors info",
None,
None
)
# Flatten predictions for severity count
all_preds = np.concatenate(preds)
count_ones = np.sum(all_preds == 1)
severity_msg = predictor.evaluate_severity(count_ones)
# 4) Summarize predictions (with probabilities)
# Build label -> (pred_value, prob_value)
label_prediction_info = {}
for i, fname in enumerate(model_filenames):
lbl_col = fname.split('.')[0]
pred_val = preds[i][0]
prob_val = probs[i][0]
label_prediction_info[lbl_col] = (pred_val, prob_val)
# Group them by domain
domain_groups = {
"Concentration and Decision Making": ["YOWRCONC", "YOWRDCSN"],
"Sleep and Energy Levels": ["YO_MDEA5", "YOWRELES"],
"Mood and Emotional State": [
"YOWRLSIN", "YOWRDEPR", "YODPDISC", "YOLOSEV", "YODPLSIN"
],
"Appetite and Weight Changes": ["YO_MDEA3", "YOWRELES"],
"Duration and Severity of Depression Symptoms": [
"YODPPROB", "YOWRPROB", "YODPR2WK", "YODSMMDE", "YOPB2WK"
]
}
final_str_parts = []
for gname, lbls in domain_groups.items():
group_lines = []
for lbl in lbls:
if lbl in label_prediction_info:
pred_val, prob_val = label_prediction_info[lbl]
if lbl in predictor.prediction_map and pred_val in [0,1]:
text_pred = predictor.prediction_map[lbl][pred_val]
else:
text_pred = f"Prediction={pred_val}"
if not np.isnan(prob_val):
text_prob = f"(Prob= {prob_val:.2f})"
else:
text_prob = "(No probability available)"
group_lines.append(f"{lbl} => {text_pred} {text_prob}")
if group_lines:
final_str_parts.append(f"**{gname}**")
final_str_parts.append("\n".join(group_lines))
final_str_parts.append("") # Add an empty line for spacing
if final_str_parts:
final_str = "\n".join(final_str_parts)
else:
final_str = "No predictions made or no matching group columns."
# 5) Additional info
total_count_md = f"We have **{len(df)}** patients in the dataset."
# 6) Nearest Neighbors
nn_md = get_nearest_neighbors_info(user_df, k=5)
# 7) Bar chart for input features
input_counts = {}
for col, val_ in user_input_dict.items():
matched = len(df[df[col] == val_])
input_counts[col] = matched
bar_in_df = pd.DataFrame({
"Feature": list(input_counts.keys()),
"Count": list(input_counts.values())
})
fig_in = px.bar(
bar_in_df, x="Feature", y="Count",
title="Number of Patients with the Same Input Feature Values"
)
fig_in.update_layout(width=1200, height=400)
# 8) Bar chart for predicted labels (UPDATED)
label_df_list = []
for lbl_col, (pred_val, _) in label_prediction_info.items():
if lbl_col in df.columns:
# Count how many patients in df have the predicted value
predicted_count = len(df[df[lbl_col] == pred_val])
# Determine the "other" class (0 ↔ 1)
other_val = 1 - pred_val
other_count = len(df[df[lbl_col] == other_val])
label_df_list.append({
"Label": lbl_col,
"Class": f"Predicted_{pred_val}",
"Count": predicted_count
})
label_df_list.append({
"Label": lbl_col,
"Class": f"Opposite_{other_val}",
"Count": other_count
})
if label_df_list:
bar_lbl_df = pd.DataFrame(label_df_list)
fig_lbl = px.bar(
bar_lbl_df,
x="Label",
y="Count",
color="Class",
barmode="group",
title="Number of Patients with the Predicted vs. Opposite Label"
)
fig_lbl.update_layout(width=1200, height=400)
else:
fig_lbl = px.bar(title="No valid predicted labels to display.")
fig_lbl.update_layout(width=1200, height=400)
return (
final_str, # 1) Prediction Results
severity_msg, # 2) Mental Health Severity
total_count_md, # 3) Total Patient Count
nn_md, # 4) Nearest Neighbors Summary
fig_in, # 5) Bar Chart (input features)
fig_lbl # 6) Bar Chart (labels)
)
######################################
# 6) UNIFIED DISTRIBUTION/CO-OCCURRENCE
######################################
def combined_plot(feature_list, label_col):
"""
If user picks 1 feature => distribution plot.
If user picks 2 features => co-occurrence plot.
Otherwise => show error or empty plot.
This function also maps numeric codes to text using 'input_mapping'
and 'predictor.prediction_map' so that the plots display more readable labels.
"""
if not label_col:
return px.bar(title="Please select a label column.")
# Make a copy of your dataset
df_copy = df.copy()
# A) Convert numeric codes -> text for each feature in `input_mapping`
for col, text_to_num_dict in input_mapping.items():
if col in df_copy.columns:
# Reverse mapping: "Yes"->1 becomes 1->"Yes"
num_to_text = {v: k for k, v in text_to_num_dict.items()}
df_copy[col] = df_copy[col].map(num_to_text).fillna(df_copy[col])
# B) Convert label 0/1 to text in df_copy if label_col is in predictor.prediction_map
if label_col in predictor.prediction_map and label_col in df_copy.columns:
zero_text, one_text = predictor.prediction_map[label_col]
label_map = {0: zero_text, 1: one_text}
df_copy[label_col] = df_copy[label_col].map(label_map).fillna(df_copy[label_col])
# Now proceed with the plotting
if len(feature_list) == 1:
f_ = feature_list[0]
if f_ not in df_copy.columns or label_col not in df_copy.columns:
return px.bar(title="Selected columns not found in the dataset.")
grouped = df_copy.groupby([f_, label_col]).size().reset_index(name="count")
fig = px.bar(
grouped,
x=f_,
y="count",
color=label_col,
title=f"Distribution of {f_} vs {label_col} (Text Mapped)"
)
fig.update_layout(width=1200, height=600)
return fig
elif len(feature_list) == 2:
f1, f2 = feature_list
if (f1 not in df_copy.columns) or (f2 not in df_copy.columns) or (label_col not in df_copy.columns):
return px.bar(title="Selected columns not found in the dataset.")
grouped = df_copy.groupby([f1, f2, label_col]).size().reset_index(name="count")
fig = px.bar(
grouped,
x=f1,
y="count",
color=label_col,
facet_col=f2,
title=f"Co-occurrence: {f1}, {f2} vs {label_col} (Text Mapped)"
)
fig.update_layout(width=1200, height=600)
return fig
else:
return px.bar(title="Please select exactly 1 or 2 features.")
######################################
# 7) BUILD GRADIO UI
######################################
with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:
# ======== TAB 1: Prediction ========
with gr.Tab("Prediction"):
gr.Markdown("### Please provide inputs in each of the four categories below. All fields are required.")
# Category 1: Depression & Substance Use Diagnosis (8 features)
gr.Markdown("#### 1. Depression & Substance Use Diagnosis")
cat1_col_labels = [
("YMDESUD5ANYO", "YMDESUD5ANYO: ONLY MDE, ONLY SUD, BOTH, OR NEITHER"),
("YMDELT", "YMDELT: Had major depressive episode in lifetime"),
("YMDEYR", "YMDEYR: Past-year major depressive episode"),
("YMDERSUD5ANY", "YMDERSUD5ANY: MDE or SUD in past year?"),
("YMSUD5YANY", "YMSUD5YANY: Past-year MDE & substance use disorder"),
("YMIUD5YANY", "YMIUD5YANY: Past-year MDE & illicit drug use disorder"),
("YMIMS5YANY", "YMIMS5YANY: Past-year MDE + severe impairment + substance use"),
("YMIMI5YANY", "YMIMI5YANY: Past-year MDE w/ severe impairment & illicit drug use")
]
cat1_inputs = []
for col, label_text in cat1_col_labels:
cat1_inputs.append(
gr.Dropdown(
choices=list(input_mapping[col].keys()),
label=label_text
)
)
# Category 2: Mental Health Treatment & Professional Consultation (11 features)
gr.Markdown("#### 2. Mental Health Treatment & Professional Consultation")
cat2_col_labels = [
("YMDEHPO", "YMDEHPO: Saw health prof only for MDE"),
("YMDETXRX", "YMDETXRX: Received treatment/counseling if saw doc/prof for MDE"),
("YMDEHARX", "YMDEHARX: Saw health prof & medication for MDE"),
("YMDEHPRX", "YMDEHPRX: Saw health prof or med for MDE in past year?"),
("YRXMDEYR", "YRXMDEYR: Used medication for MDE in past years"),
("YHLTMDE", "YHLTMDE: Saw/talked to health prof about MDE"),
("YTXMDEYR", "YTXMDEYR: Saw/talked to doc/prof for MDE in past year"),
("YDOCMDE", "YDOCMDE: Saw/talked to general practitioner/family MD"),
("YPSY2MDE", "YPSY2MDE: Saw/talked to psychiatrist"),
("YPSY1MDE", "YPSY1MDE: Saw/talked to psychologist"),
("YCOUNMDE", "YCOUNMDE: Saw/talked to counselor")
]
cat2_inputs = []
for col, label_text in cat2_col_labels:
cat2_inputs.append(
gr.Dropdown(
choices=list(input_mapping[col].keys()),
label=label_text
)
)
# Category 3: Functional & Cognitive Impairment (2 features)
gr.Markdown("#### 3. Functional & Cognitive Impairment")
cat3_col_labels = [
("MDEIMPY", "MDEIMPY: MDE with severe role impairment?"),
("LVLDIFMEM2", "LVLDIFMEM2: Difficulty remembering/concentrating")
]
cat3_inputs = []
for col, label_text in cat3_col_labels:
cat3_inputs.append(
gr.Dropdown(
choices=list(input_mapping[col].keys()),
label=label_text
)
)
# Category 4: Suicidal Thoughts & Behaviors (4 features)
gr.Markdown("#### 4. Suicidal Thoughts & Behaviors")
cat4_col_labels = [
("YUSUITHK", "YUSUITHK: Thought of killing self (past 12 months)?"),
("YUSUITHKYR", "YUSUITHKYR: Seriously thought about killing self?"),
("YUSUIPLNYR", "YUSUIPLNYR: Made plans to kill self in past year?"),
("YUSUIPLN", "YUSUIPLN: Made plans to kill yourself in past 12 months?")
]
cat4_inputs = []
for col, label_text in cat4_col_labels:
cat4_inputs.append(
gr.Dropdown(
choices=list(input_mapping[col].keys()),
label=label_text
)
)
# Combine all inputs in the correct order
all_inputs = cat1_inputs + cat2_inputs + cat3_inputs + cat4_inputs
# Output components
predict_btn = gr.Button("Predict")
out_pred_res = gr.Textbox(label="Prediction Results (with Probability)", lines=8)
out_sev = gr.Textbox(label="Mental Health Severity", lines=2)
out_count = gr.Markdown(label="Total Patient Count")
out_nn = gr.Markdown(label="Nearest Neighbors Summary")
out_bar_input= gr.Plot(label="Input Feature Counts")
out_bar_label= gr.Plot(label="Predicted Label Counts")
# Connect the predict button to the predict function
predict_btn.click(
fn=predict,
inputs=all_inputs,
outputs=[
out_pred_res,
out_sev,
out_count,
out_nn,
out_bar_input,
out_bar_label
]
)
# ======== TAB 2: Unified Distribution/Co-occurrence ========
with gr.Tab("Distribution/Co-occurrence"):
gr.Markdown("### Select 1 or 2 features + 1 label to see a bar chart.")
# Show only your 25 input features
list_of_features = sorted(input_mapping.keys())
# Show all label columns from the predictor map
list_of_labels = sorted(predictor.prediction_map.keys())
selected_features = gr.CheckboxGroup(
choices=list_of_features,
label="Select 1 or 2 features"
)
label_dd = gr.Dropdown(
choices=list_of_labels,
label="Label Column (e.g., YOWRCONC, YOSEEDOC, etc.)"
)
generate_combined_btn = gr.Button("Generate Plot")
combined_output = gr.Plot()
generate_combined_btn.click(
fn=combined_plot,
inputs=[selected_features, label_dd],
outputs=combined_output
)
# Finally, launch the Gradio app
demo.launch()