Spaces:

pantdipendra
/

AdolescentsMentalHealthPrediction

Running

App Files Files Community

AdolescentsMentalHealthPrediction / app.py

pantdipendra

v7 plot

0b36f6e verified 17 days ago

raw

history blame

25.9 kB

	import pickle
	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.express as px

	######################################
	# 1) LOAD DATA & MODELS
	######################################
	# Load your dataset
	df = pd.read_csv("X_train_test_combined_dataset_Filtered_dataset.csv")

	# Ensure 'YMDESUD5ANYO' exists in your DataFrame
	if 'YMDESUD5ANYO' not in df.columns:
	raise ValueError("The column 'YMDESUD5ANYO' is missing from the dataset. Please check your CSV file.")

	# List of model filenames
	model_filenames = [
	"YOWRCONC.pkl", "YOSEEDOC.pkl", "YO_MDEA5.pkl", "YOWRLSIN.pkl",
	"YODPPROB.pkl", "YOWRPROB.pkl", "YODPR2WK.pkl", "YOWRDEPR.pkl",
	"YODPDISC.pkl", "YOLOSEV.pkl", "YOWRDCSN.pkl", "YODSMMDE.pkl",
	"YO_MDEA3.pkl", "YODPLSIN.pkl", "YOWRELES.pkl", "YOPB2WK.pkl"
	]
	model_path = "models/"

	######################################
	# 2) MODEL PREDICTOR
	######################################
	class ModelPredictor:
	def __init__(self, model_path, model_filenames):
	self.model_path = model_path
	self.model_filenames = model_filenames
	self.models = self.load_models()

	# Mapping each label (column) to textual meaning for 0/1
	self.prediction_map = {
	"YOWRCONC": ["Did NOT have difficulty concentrating", "Had difficulty concentrating"],
	"YOSEEDOC": ["Did NOT feel the need to see a doctor", "Felt the need to see a doctor"],
	"YO_MDEA5": ["No restlessness/lethargy noticed", "Others noticed restlessness/lethargy"],
	"YOWRLSIN": ["Did NOT feel bored/lose interest", "Felt bored/lost interest"],
	"YODPPROB": ["No other problems for 2+ weeks", "Had other problems for 2+ weeks"],
	"YOWRPROB": ["No 'worst time ever' feeling", "Had 'worst time ever' feeling"],
	"YODPR2WK": ["No depressed feelings for 2+ wks", "Had depressed feelings for 2+ wks"],
	"YOWRDEPR": ["Did NOT feel sad/depressed daily", "Felt sad/depressed mostly everyday"],
	"YODPDISC": ["Overall mood not sad/depressed", "Overall mood was sad/depressed"],
	"YOLOSEV": ["Did NOT lose interest in things", "Lost interest in enjoyable things"],
	"YOWRDCSN": ["Was able to make decisions", "Was unable to make decisions"],
	"YODSMMDE": ["No 2+ wks depression symptoms", "Had 2+ wks depression symptoms"],
	"YO_MDEA3": ["No appetite/weight changes", "Had changes in appetite/weight"],
	"YODPLSIN": ["Never lost interest/felt bored", "Lost interest/felt bored"],
	"YOWRELES": ["Did NOT eat less than usual", "Ate less than usual"],
	"YOPB2WK": ["No uneasy feelings 2+ weeks", "Uneasy feelings 2+ weeks"]
	}

	def load_models(self):
	loaded = []
	for fname in self.model_filenames:
	try:
	with open(self.model_path + fname, "rb") as f:
	model = pickle.load(f)
	loaded.append(model)
	except FileNotFoundError:
	raise FileNotFoundError(f"Model file '{fname}' not found in path '{self.model_path}'.")
	except Exception as e:
	raise Exception(f"Error loading model '{fname}': {e}")
	return loaded

	def make_predictions(self, user_input: pd.DataFrame):
	"""
	Return:
	- A list of np.array [0/1], one for each model
	- A list of np.array [prob_of_1], if predict_proba is available, else np.nan
	"""
	preds = []
	probs = []
	for model in self.models:
	y_pred = model.predict(user_input)
	preds.append(y_pred.flatten())

	if hasattr(model, "predict_proba"):
	y_prob = model.predict_proba(user_input)[:, 1] # Probability that label=1
	probs.append(y_prob)
	else:
	probs.append(np.full(len(user_input), np.nan))
	return preds, probs

	def evaluate_severity(self, count_ones: int) -> str:
	"""
	Evaluate severity based on total # of '1' predictions across all labels.
	"""
	if count_ones >= 13:
	return "Mental Health Severity: Severe"
	elif count_ones >= 9:
	return "Mental Health Severity: Moderate"
	elif count_ones >= 5:
	return "Mental Health Severity: Low"
	else:
	return "Mental Health Severity: Very Low"

	predictor = ModelPredictor(model_path, model_filenames)

	######################################
	# 3) FEATURE CATEGORIES + MAPPING
	######################################
	categories_dict = {
	"1. Depression & Substance Use Diagnosis": [
	"YMDESUD5ANYO", "YMDELT", "YMDEYR", "YMDERSUD5ANY",
	"YMSUD5YANY", "YMIUD5YANY", "YMIMS5YANY", "YMIMI5YANY"
	],
	"2. Mental Health Treatment & Prof Consultation": [
	"YMDEHPO", "YMDETXRX", "YMDEHARX", "YMDEHPRX", "YRXMDEYR",
	"YHLTMDE", "YTXMDEYR", "YDOCMDE", "YPSY2MDE", "YPSY1MDE", "YCOUNMDE"
	],
	"3. Functional & Cognitive Impairment": [
	"MDEIMPY", "LVLDIFMEM2"
	],
	"4. Suicidal Thoughts & Behaviors": [
	"YUSUITHK", "YUSUITHKYR", "YUSUIPLNYR", "YUSUIPLN"
	]
	}

	input_mapping = {
	'YMDESUD5ANYO': {
	"SUD only, no MDE": 1,
	"MDE only, no SUD": 2,
	"SUD and MDE": 3,
	"Neither SUD or MDE": 4
	},
	'YMDELT': {"Yes": 1, "No": 2},
	'YMDEYR': {"Yes": 1, "No": 2},
	'YMDERSUD5ANY': {"Yes": 1, "No": 0},
	'YMSUD5YANY': {"Yes": 1, "No": 0},
	'YMIUD5YANY': {"Yes": 1, "No": 0},
	'YMIMS5YANY': {"Yes": 1, "No": 0},
	'YMIMI5YANY': {"Yes": 1, "No": 0},

	'YMDEHPO': {"Yes": 1, "No": 0},
	'YMDETXRX': {"Yes": 1, "No": 0},
	'YMDEHARX': {"Yes": 1, "No": 0},
	'YMDEHPRX': {"Yes": 1, "No": 0},
	'YRXMDEYR': {"Yes": 1, "No": 0},
	'YHLTMDE': {"Yes": 1, "No": 0},
	'YTXMDEYR': {"Yes": 1, "No": 0},
	'YDOCMDE': {"Yes": 1, "No": 0},
	'YPSY2MDE': {"Yes": 1, "No": 0},
	'YPSY1MDE': {"Yes": 1, "No": 0},
	'YCOUNMDE': {"Yes": 1, "No": 0},

	'MDEIMPY': {"Yes": 1, "No": 2},
	'LVLDIFMEM2': {
	"No Difficulty": 1,
	"Some difficulty": 2,
	"A lot of difficulty or cannot do at all": 3
	},

	'YUSUITHK': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
	'YUSUITHKYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
	'YUSUIPLNYR': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4},
	'YUSUIPLN': {"Yes": 1, "No": 2, "I'm not sure": 3, "I don't want to answer": 4}
	}

	def validate_inputs(*args):
	for arg in args:
	if arg is None or arg == "":
	return False
	return True

	######################################
	# 4) NEAREST NEIGHBORS
	######################################
	def get_nearest_neighbors_info(user_input_df: pd.DataFrame, k=5):
	user_cols = user_input_df.columns
	if not all(col in df.columns for col in user_cols):
	return "Cannot compute nearest neighbors. Some columns not found in df."

	sub_df = df[list(user_cols)].copy()
	diffs = sub_df - user_input_df.iloc[0]
	dists = (diffs2).sum(axis=1)0.5
	nn_indices = dists.nsmallest(k).index
	neighbors = df.loc[nn_indices]

	lines = [
	f"Nearest Neighbors (k={k})",
	f"Distances range: {dists[nn_indices].min():.2f} to {dists[nn_indices].max():.2f}",
	""
	]

	# A) Show user input in numeric->text form
	lines.append("User Input (numeric -> text)")
	for col in user_cols:
	val_numeric = user_input_df.iloc[0][col]
	text_val = None
	if col in input_mapping:
	for txt_key, num_val in input_mapping[col].items():
	if val_numeric == num_val:
	text_val = txt_key
	break
	if not text_val:
	text_val = f"{val_numeric} (no mapping found)"
	lines.append(f"- {col} = {val_numeric} => '{text_val}'")
	lines.append("")

	# B) Show label columns among neighbors
	label_cols = list(predictor.prediction_map.keys())
	lines.append("Label Distribution Among Neighbors")
	for lbl in label_cols:
	if lbl not in neighbors.columns:
	continue
	val_counts = neighbors[lbl].value_counts().to_dict()
	parts = []
	for val_, count_ in val_counts.items():
	if val_ in [0,1] and lbl in predictor.prediction_map:
	label_text = predictor.prediction_map[lbl][val_]
	parts.append(f"{count_} had '{label_text}'")
	else:
	parts.append(f"{count_} had numeric={val_}")
	lines.append(f"- {lbl}: " + "; ".join(parts))

	lines.append("")
	return "\n".join(lines)

	######################################
	# 5) PREDICT FUNCTION
	######################################
	def predict(
	# Category 1 (8):
	YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
	YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
	# Category 2 (11):
	YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
	YHLTMDE, YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
	# Category 3 (2):
	MDEIMPY, LVLDIFMEM2,
	# Category 4 (4):
	YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
	):
	# 1) Validate
	if not validate_inputs(
	YMDESUD5ANYO, YMDELT, YMDEYR, YMDERSUD5ANY,
	YMSUD5YANY, YMIUD5YANY, YMIMS5YANY, YMIMI5YANY,
	YMDEHPO, YMDETXRX, YMDEHARX, YMDEHPRX, YRXMDEYR,
	YHLTMDE, YTXMDEYR, YDOCMDE, YPSY2MDE, YPSY1MDE, YCOUNMDE,
	MDEIMPY, LVLDIFMEM2,
	YUSUITHK, YUSUITHKYR, YUSUIPLNYR, YUSUIPLN
	):
	return (
	"Please select all required fields.", # 1) Prediction Results
	"Validation Error", # 2) Severity
	"No data", # 3) Total Count
	"No nearest neighbors info", # 4) NN Summary
	None, # 5) Bar chart (Input)
	None # 6) Bar chart (Labels)
	)

	# 2) Convert text -> numeric
	try:
	user_input_dict = {
	'YMDESUD5ANYO': input_mapping['YMDESUD5ANYO'][YMDESUD5ANYO],
	'YMDELT': input_mapping['YMDELT'][YMDELT],
	'YMDEYR': input_mapping['YMDEYR'][YMDEYR],
	'YMDERSUD5ANY': input_mapping['YMDERSUD5ANY'][YMDERSUD5ANY],
	'YMSUD5YANY': input_mapping['YMSUD5YANY'][YMSUD5YANY],
	'YMIUD5YANY': input_mapping['YMIUD5YANY'][YMIUD5YANY],
	'YMIMS5YANY': input_mapping['YMIMS5YANY'][YMIMS5YANY],
	'YMIMI5YANY': input_mapping['YMIMI5YANY'][YMIMI5YANY],

	'YMDEHPO': input_mapping['YMDEHPO'][YMDEHPO],
	'YMDETXRX': input_mapping['YMDETXRX'][YMDETXRX],
	'YMDEHARX': input_mapping['YMDEHARX'][YMDEHARX],
	'YMDEHPRX': input_mapping['YMDEHPRX'][YMDEHPRX],
	'YRXMDEYR': input_mapping['YRXMDEYR'][YRXMDEYR],
	'YHLTMDE': input_mapping['YHLTMDE'][YHLTMDE],
	'YTXMDEYR': input_mapping['YTXMDEYR'][YTXMDEYR],
	'YDOCMDE': input_mapping['YDOCMDE'][YDOCMDE],
	'YPSY2MDE': input_mapping['YPSY2MDE'][YPSY2MDE],
	'YPSY1MDE': input_mapping['YPSY1MDE'][YPSY1MDE],
	'YCOUNMDE': input_mapping['YCOUNMDE'][YCOUNMDE],

	'MDEIMPY': input_mapping['MDEIMPY'][MDEIMPY],
	'LVLDIFMEM2': input_mapping['LVLDIFMEM2'][LVLDIFMEM2],

	'YUSUITHK': input_mapping['YUSUITHK'][YUSUITHK],
	'YUSUITHKYR': input_mapping['YUSUITHKYR'][YUSUITHKYR],
	'YUSUIPLNYR': input_mapping['YUSUIPLNYR'][YUSUIPLNYR],
	'YUSUIPLN': input_mapping['YUSUIPLN'][YUSUIPLN]
	}
	except KeyError as e:
	missing_key = e.args[0]
	return (
	f"Input mapping missing for key: {missing_key}. Please check your `input_mapping` dictionary.",
	"Mapping Error",
	"No data",
	"No nearest neighbors info",
	None,
	None
	)

	user_df = pd.DataFrame(user_input_dict, index=[0])

	# 3) Make predictions
	try:
	preds, probs = predictor.make_predictions(user_df)
	except Exception as e:
	return (
	f"Error during prediction: {e}",
	"Prediction Error",
	"No data",
	"No nearest neighbors info",
	None,
	None
	)

	# Flatten predictions for severity count
	all_preds = np.concatenate(preds)
	count_ones = np.sum(all_preds == 1)
	severity_msg = predictor.evaluate_severity(count_ones)

	# 4) Summarize predictions (with probabilities)
	# Build label -> (pred_value, prob_value)
	label_prediction_info = {}
	for i, fname in enumerate(model_filenames):
	lbl_col = fname.split('.')[0]
	pred_val = preds[i][0]
	prob_val = probs[i][0]
	label_prediction_info[lbl_col] = (pred_val, prob_val)

	# Group them by domain
	domain_groups = {
	"Concentration and Decision Making": ["YOWRCONC", "YOWRDCSN"],
	"Sleep and Energy Levels": ["YO_MDEA5", "YOWRELES"],
	"Mood and Emotional State": [
	"YOWRLSIN", "YOWRDEPR", "YODPDISC", "YOLOSEV", "YODPLSIN"
	],
	"Appetite and Weight Changes": ["YO_MDEA3", "YOWRELES"],
	"Duration and Severity of Depression Symptoms": [
	"YODPPROB", "YOWRPROB", "YODPR2WK", "YODSMMDE", "YOPB2WK"
	]
	}

	final_str_parts = []
	for gname, lbls in domain_groups.items():
	group_lines = []
	for lbl in lbls:
	if lbl in label_prediction_info:
	pred_val, prob_val = label_prediction_info[lbl]
	if lbl in predictor.prediction_map and pred_val in [0,1]:
	text_pred = predictor.prediction_map[lbl][pred_val]
	else:
	text_pred = f"Prediction={pred_val}"

	if not np.isnan(prob_val):
	text_prob = f"(Prob= {prob_val:.2f})"
	else:
	text_prob = "(No probability available)"

	group_lines.append(f"{lbl} => {text_pred} {text_prob}")
	if group_lines:
	final_str_parts.append(f"{gname}")
	final_str_parts.append("\n".join(group_lines))
	final_str_parts.append("") # Add an empty line for spacing

	if final_str_parts:
	final_str = "\n".join(final_str_parts)
	else:
	final_str = "No predictions made or no matching group columns."

	# 5) Additional info
	total_count_md = f"We have {len(df)} patients in the dataset."

	# 6) Nearest Neighbors
	nn_md = get_nearest_neighbors_info(user_df, k=5)

	# 7) Bar chart for input features
	input_counts = {}
	for col, val_ in user_input_dict.items():
	matched = len(df[df[col] == val_])
	input_counts[col] = matched
	bar_in_df = pd.DataFrame({
	"Feature": list(input_counts.keys()),
	"Count": list(input_counts.values())
	})
	fig_in = px.bar(
	bar_in_df, x="Feature", y="Count",
	title="Number of Patients with the Same Input Feature Values"
	)
	fig_in.update_layout(width=1200, height=400)

	# 8) Bar chart for predicted labels (UPDATED)
	label_df_list = []
	for lbl_col, (pred_val, _) in label_prediction_info.items():
	if lbl_col in df.columns:
	# Count how many patients in df have the predicted value
	predicted_count = len(df[df[lbl_col] == pred_val])

	# Determine the "other" class (0 ↔ 1)
	other_val = 1 - pred_val
	other_count = len(df[df[lbl_col] == other_val])

	label_df_list.append({
	"Label": lbl_col,
	"Class": f"Predicted_{pred_val}",
	"Count": predicted_count
	})
	label_df_list.append({
	"Label": lbl_col,
	"Class": f"Opposite_{other_val}",
	"Count": other_count
	})

	if label_df_list:
	bar_lbl_df = pd.DataFrame(label_df_list)
	fig_lbl = px.bar(
	bar_lbl_df,
	x="Label",
	y="Count",
	color="Class",
	barmode="group",
	title="Number of Patients with the Predicted vs. Opposite Label"
	)
	fig_lbl.update_layout(width=1200, height=400)
	else:
	fig_lbl = px.bar(title="No valid predicted labels to display.")
	fig_lbl.update_layout(width=1200, height=400)

	return (
	final_str, # 1) Prediction Results
	severity_msg, # 2) Mental Health Severity
	total_count_md, # 3) Total Patient Count
	nn_md, # 4) Nearest Neighbors Summary
	fig_in, # 5) Bar Chart (input features)
	fig_lbl # 6) Bar Chart (labels)
	)

	######################################
	# 6) UNIFIED DISTRIBUTION/CO-OCCURRENCE
	######################################
	def combined_plot(feature_list, label_col):
	"""
	If user picks 1 feature => distribution plot.
	If user picks 2 features => co-occurrence plot.
	Otherwise => show error or empty plot.
	This function also maps numeric codes to text using 'input_mapping'
	and 'predictor.prediction_map' so that the plots display more readable labels.
	"""
	if not label_col:
	return px.bar(title="Please select a label column.")

	# Make a copy of your dataset
	df_copy = df.copy()

	# A) Convert numeric codes -> text for each feature in `input_mapping`
	for col, text_to_num_dict in input_mapping.items():
	if col in df_copy.columns:
	# Reverse mapping: "Yes"->1 becomes 1->"Yes"
	num_to_text = {v: k for k, v in text_to_num_dict.items()}
	df_copy[col] = df_copy[col].map(num_to_text).fillna(df_copy[col])

	# B) Convert label 0/1 to text in df_copy if label_col is in predictor.prediction_map
	if label_col in predictor.prediction_map and label_col in df_copy.columns:
	zero_text, one_text = predictor.prediction_map[label_col]
	label_map = {0: zero_text, 1: one_text}
	df_copy[label_col] = df_copy[label_col].map(label_map).fillna(df_copy[label_col])

	# Now proceed with the plotting
	if len(feature_list) == 1:
	f_ = feature_list[0]
	if f_ not in df_copy.columns or label_col not in df_copy.columns:
	return px.bar(title="Selected columns not found in the dataset.")
	grouped = df_copy.groupby([f_, label_col]).size().reset_index(name="count")
	fig = px.bar(
	grouped,
	x=f_,
	y="count",
	color=label_col,
	title=f"Distribution of {f_} vs {label_col} (Text Mapped)"
	)
	fig.update_layout(width=1200, height=600)
	return fig

	elif len(feature_list) == 2:
	f1, f2 = feature_list
	if (f1 not in df_copy.columns) or (f2 not in df_copy.columns) or (label_col not in df_copy.columns):
	return px.bar(title="Selected columns not found in the dataset.")
	grouped = df_copy.groupby([f1, f2, label_col]).size().reset_index(name="count")
	fig = px.bar(
	grouped,
	x=f1,
	y="count",
	color=label_col,
	facet_col=f2,
	title=f"Co-occurrence: {f1}, {f2} vs {label_col} (Text Mapped)"
	)
	fig.update_layout(width=1200, height=600)
	return fig

	else:
	return px.bar(title="Please select exactly 1 or 2 features.")

	######################################
	# 7) BUILD GRADIO UI
	######################################
	with gr.Blocks(css=".gradio-container {max-width: 1200px;}") as demo:

	# ======== TAB 1: Prediction ========
	with gr.Tab("Prediction"):
	gr.Markdown("### Please provide inputs in each of the four categories below. All fields are required.")

	# Category 1: Depression & Substance Use Diagnosis (8 features)
	gr.Markdown("#### 1. Depression & Substance Use Diagnosis")
	cat1_col_labels = [
	("YMDESUD5ANYO", "YMDESUD5ANYO: ONLY MDE, ONLY SUD, BOTH, OR NEITHER"),
	("YMDELT", "YMDELT: Had major depressive episode in lifetime"),
	("YMDEYR", "YMDEYR: Past-year major depressive episode"),
	("YMDERSUD5ANY", "YMDERSUD5ANY: MDE or SUD in past year?"),
	("YMSUD5YANY", "YMSUD5YANY: Past-year MDE & substance use disorder"),
	("YMIUD5YANY", "YMIUD5YANY: Past-year MDE & illicit drug use disorder"),
	("YMIMS5YANY", "YMIMS5YANY: Past-year MDE + severe impairment + substance use"),
	("YMIMI5YANY", "YMIMI5YANY: Past-year MDE w/ severe impairment & illicit drug use")
	]
	cat1_inputs = []
	for col, label_text in cat1_col_labels:
	cat1_inputs.append(
	gr.Dropdown(
	choices=list(input_mapping[col].keys()),
	label=label_text
	)
	)

	# Category 2: Mental Health Treatment & Professional Consultation (11 features)
	gr.Markdown("#### 2. Mental Health Treatment & Professional Consultation")
	cat2_col_labels = [
	("YMDEHPO", "YMDEHPO: Saw health prof only for MDE"),
	("YMDETXRX", "YMDETXRX: Received treatment/counseling if saw doc/prof for MDE"),
	("YMDEHARX", "YMDEHARX: Saw health prof & medication for MDE"),
	("YMDEHPRX", "YMDEHPRX: Saw health prof or med for MDE in past year?"),
	("YRXMDEYR", "YRXMDEYR: Used medication for MDE in past years"),
	("YHLTMDE", "YHLTMDE: Saw/talked to health prof about MDE"),
	("YTXMDEYR", "YTXMDEYR: Saw/talked to doc/prof for MDE in past year"),
	("YDOCMDE", "YDOCMDE: Saw/talked to general practitioner/family MD"),
	("YPSY2MDE", "YPSY2MDE: Saw/talked to psychiatrist"),
	("YPSY1MDE", "YPSY1MDE: Saw/talked to psychologist"),
	("YCOUNMDE", "YCOUNMDE: Saw/talked to counselor")
	]
	cat2_inputs = []
	for col, label_text in cat2_col_labels:
	cat2_inputs.append(
	gr.Dropdown(
	choices=list(input_mapping[col].keys()),
	label=label_text
	)
	)

	# Category 3: Functional & Cognitive Impairment (2 features)
	gr.Markdown("#### 3. Functional & Cognitive Impairment")
	cat3_col_labels = [
	("MDEIMPY", "MDEIMPY: MDE with severe role impairment?"),
	("LVLDIFMEM2", "LVLDIFMEM2: Difficulty remembering/concentrating")
	]
	cat3_inputs = []
	for col, label_text in cat3_col_labels:
	cat3_inputs.append(
	gr.Dropdown(
	choices=list(input_mapping[col].keys()),
	label=label_text
	)
	)

	# Category 4: Suicidal Thoughts & Behaviors (4 features)
	gr.Markdown("#### 4. Suicidal Thoughts & Behaviors")
	cat4_col_labels = [
	("YUSUITHK", "YUSUITHK: Thought of killing self (past 12 months)?"),
	("YUSUITHKYR", "YUSUITHKYR: Seriously thought about killing self?"),
	("YUSUIPLNYR", "YUSUIPLNYR: Made plans to kill self in past year?"),
	("YUSUIPLN", "YUSUIPLN: Made plans to kill yourself in past 12 months?")
	]
	cat4_inputs = []
	for col, label_text in cat4_col_labels:
	cat4_inputs.append(
	gr.Dropdown(
	choices=list(input_mapping[col].keys()),
	label=label_text
	)
	)

	# Combine all inputs in the correct order
	all_inputs = cat1_inputs + cat2_inputs + cat3_inputs + cat4_inputs

	# Output components
	predict_btn = gr.Button("Predict")

	out_pred_res = gr.Textbox(label="Prediction Results (with Probability)", lines=8)
	out_sev = gr.Textbox(label="Mental Health Severity", lines=2)
	out_count = gr.Markdown(label="Total Patient Count")
	out_nn = gr.Markdown(label="Nearest Neighbors Summary")
	out_bar_input= gr.Plot(label="Input Feature Counts")
	out_bar_label= gr.Plot(label="Predicted Label Counts")

	# Connect the predict button to the predict function
	predict_btn.click(
	fn=predict,
	inputs=all_inputs,
	outputs=[
	out_pred_res,
	out_sev,
	out_count,
	out_nn,
	out_bar_input,
	out_bar_label
	]
	)

	# ======== TAB 2: Unified Distribution/Co-occurrence ========
	with gr.Tab("Distribution/Co-occurrence"):
	gr.Markdown("### Select 1 or 2 features + 1 label to see a bar chart.")

	# Show only your 25 input features
	list_of_features = sorted(input_mapping.keys())
	# Show all label columns from the predictor map
	list_of_labels = sorted(predictor.prediction_map.keys())

	selected_features = gr.CheckboxGroup(
	choices=list_of_features,
	label="Select 1 or 2 features"
	)
	label_dd = gr.Dropdown(
	choices=list_of_labels,
	label="Label Column (e.g., YOWRCONC, YOSEEDOC, etc.)"
	)

	generate_combined_btn = gr.Button("Generate Plot")
	combined_output = gr.Plot()

	generate_combined_btn.click(
	fn=combined_plot,
	inputs=[selected_features, label_dd],
	outputs=combined_output
	)

	# Finally, launch the Gradio app
	demo.launch()