Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from sklearn import datasets, linear_model | |
from sklearn.metrics import mean_squared_error, r2_score | |
import matplotlib | |
matplotlib.use('agg') | |
FIGSIZE = (10,10) | |
feature_names = ["Age", "Body-Mass Index (BMI)", "Blood Pressure", | |
"Total serum Cholesterol", "Low-Density Lipoproteins (LDL)", | |
"High-Density Lipoproteins (HDL)", "Total cholesterol / HDL", | |
"log(Serum Triglycerides Level) (possibly)","Blood Sugar Level"] | |
def create_dataset(feature_id=2): | |
# Load the diabetes dataset | |
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) | |
# Use only one feature | |
diabetes_X = diabetes_X[:, np.newaxis, feature_id] | |
# Split the data into training/testing sets | |
diabetes_X_train = diabetes_X[:-20] | |
diabetes_X_test = diabetes_X[-20:] | |
# Split the targets into training/testing sets | |
diabetes_y_train = diabetes_y[:-20] | |
diabetes_y_test = diabetes_y[-20:] | |
return diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test | |
def train_model(input_data): | |
# We removed the sex variable | |
if input_data == 'age': | |
feature_id = 0 | |
else: | |
feature_id = feature_names.index(input_data) + 1 | |
diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test = create_dataset(feature_id) | |
# Create linear regression object | |
regr = linear_model.LinearRegression() | |
# Train the model using the training sets | |
regr.fit(diabetes_X_train, diabetes_y_train) | |
# Make predictions using the testing set | |
diabetes_y_pred = regr.predict(diabetes_X_test) | |
mse = mean_squared_error(diabetes_y_test, diabetes_y_pred) | |
r2 = r2_score(diabetes_y_test, diabetes_y_pred) | |
# Plot outputs | |
fig = plt.figure(figsize=FIGSIZE) | |
# plt.title(input_data) | |
plt.scatter(diabetes_X_test, diabetes_y_test, color="black") | |
plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3) | |
plt.xlabel(input_data, fontsize=18) | |
plt.ylabel("Disease progression", fontsize=18) | |
plt.xticks(()) | |
plt.yticks(()) | |
return fig, regr.coef_, mse, r2 | |
title = "Linear Regression Example π" | |
description = """The example shows how linear regression attempts to draw a straight line that will best minimize the residual sum of squares between the observed responses in the dataset. | |
The diabetes dataset contains baseline variables (features), age, sex, body mass index, average blood pressure, and six blood serum measurements that were obtained for 442 diabetes patients. | |
The predictive variable is a quantitative measure of the disease progression one year after the baseline. | |
When selecting a feature from the drop-down menu, a linear regression model is trained for the specific feature and the predictive variable. | |
The figure shows a scatter plot of the test set as well as the linear model (line). | |
The mean square error and R2 scores are calculated using the test set and they are printed, along with the regression coefficiet of the model. | |
""" | |
with gr.Blocks() as demo: | |
gr.Markdown(f"## {title}") | |
gr.Markdown(description) | |
with gr.Column(): | |
with gr.Row(): | |
plot = gr.Plot() | |
with gr.Column(): | |
input_data = gr.Dropdown(choices=feature_names, label="Feature", value="Body-Mass Index") | |
coef = gr.Textbox(label="Coefficients") | |
mse = gr.Textbox(label="Mean Squared Error (MSE)") | |
r2 = gr.Textbox(label="R2 score") | |
input_data.change(fn=train_model, inputs=[input_data], outputs=[plot, coef, mse, r2], queue=False) | |
demo.launch(enable_queue=True) | |