Mishal23's picture
Update app.py
5544e11 verified
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from transformers import pipeline
import gradio as gr
# Load the creditcard.csv dataset from your local directory
file_path = 'creditcard.csv' # Make sure this file is in the same directory as your script
# Load the dataset
df = pd.read_csv(file_path)
# Display basic information
print("Columns in the dataset:", df.columns)
print(df.head())
# Preprocessing: Selecting relevant columns
time_col = 'Time'
amount_col = 'Amount'
class_col = 'Class'
feature_cols = [col for col in df.columns if col not in [class_col, time_col]]
# Handle missing values
df = df.fillna(df.mean())
# Downsample the majority class to handle class imbalance
df_majority = df[df[class_col] == 0]
df_minority = df[df[class_col] == 1]
df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority))
df_balanced = pd.concat([df_majority_downsampled, df_minority])
# Feature scaling
X = df_balanced[feature_cols]
y = df_balanced[class_col]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Balancing the dataset using SMOTE
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)
# Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)
# Model evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
# Initialize the retrieval pipeline with a lightweight model (if required)
retrieval_pipeline = pipeline("feature-extraction", model="distilbert-base-uncased")
def retrieve_explanation(prediction):
if prediction == 1:
return "The transaction is classified as fraudulent based on the provided features."
return "The transaction is classified as non-fraudulent based on the provided features."
# Gradio prediction function
def fraud_detection_predictor(V1, V2, V3, Amount):
# Prepare input features
input_features = [0] * len(feature_cols)
input_features[feature_cols.index('V1')] = V1
input_features[feature_cols.index('V2')] = V2
input_features[feature_cols.index('V3')] = V3
input_features[feature_cols.index('Amount')] = Amount
# Scale input data
input_data = scaler.transform([input_features])
# Make a prediction
prediction = model.predict(input_data)[0]
fraud_status = "Fraudulent" if prediction == 1 else "Non-Fraudulent"
explanation = retrieve_explanation(prediction)
return fraud_status, explanation
# Define Gradio Interface
interface = gr.Interface(
fn=fraud_detection_predictor,
inputs=[
gr.Number(label="V1"),
gr.Number(label="V2"),
gr.Number(label="V3"),
gr.Number(label="Amount")
],
outputs=[
gr.Textbox(label="Fraud Status"),
gr.Textbox(label="Explanation")
],
title="Credit Card Fraud Detection",
description="Enter transaction features (V1, V2, V3, Amount) to predict fraud status."
)
# Launch Gradio Interface
interface.launch()