Spaces:

Mishal23
/

CreditCard-fraud-detection

Running

App Files Files Community

CreditCard-fraud-detection / app.py

Mishal23

Update app.py

5544e11 verified 3 months ago

raw

history blame contribute delete

3.46 kB

	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.utils import resample
	from sklearn.metrics import accuracy_score, classification_report
	from sklearn.linear_model import LogisticRegression
	from imblearn.over_sampling import SMOTE
	from transformers import pipeline
	import gradio as gr

	# Load the creditcard.csv dataset from your local directory
	file_path = 'creditcard.csv' # Make sure this file is in the same directory as your script

	# Load the dataset
	df = pd.read_csv(file_path)

	# Display basic information
	print("Columns in the dataset:", df.columns)
	print(df.head())

	# Preprocessing: Selecting relevant columns
	time_col = 'Time'
	amount_col = 'Amount'
	class_col = 'Class'
	feature_cols = [col for col in df.columns if col not in [class_col, time_col]]

	# Handle missing values
	df = df.fillna(df.mean())

	# Downsample the majority class to handle class imbalance
	df_majority = df[df[class_col] == 0]
	df_minority = df[df[class_col] == 1]
	df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority))
	df_balanced = pd.concat([df_majority_downsampled, df_minority])

	# Feature scaling
	X = df_balanced[feature_cols]
	y = df_balanced[class_col]
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	# Train-test split
	X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

	# Balancing the dataset using SMOTE
	smote = SMOTE()
	X_train, y_train = smote.fit_resample(X_train, y_train)

	# Logistic Regression Model
	model = LogisticRegression(max_iter=1000)
	model.fit(X_train, y_train)

	# Predictions
	y_pred = model.predict(X_test)

	# Model evaluation
	print("Accuracy:", accuracy_score(y_test, y_pred))
	print("Classification Report:\n", classification_report(y_test, y_pred))

	# Initialize the retrieval pipeline with a lightweight model (if required)
	retrieval_pipeline = pipeline("feature-extraction", model="distilbert-base-uncased")

	def retrieve_explanation(prediction):
	if prediction == 1:
	return "The transaction is classified as fraudulent based on the provided features."
	return "The transaction is classified as non-fraudulent based on the provided features."

	# Gradio prediction function
	def fraud_detection_predictor(V1, V2, V3, Amount):
	# Prepare input features
	input_features = [0] * len(feature_cols)
	input_features[feature_cols.index('V1')] = V1
	input_features[feature_cols.index('V2')] = V2
	input_features[feature_cols.index('V3')] = V3
	input_features[feature_cols.index('Amount')] = Amount

	# Scale input data
	input_data = scaler.transform([input_features])

	# Make a prediction
	prediction = model.predict(input_data)[0]
	fraud_status = "Fraudulent" if prediction == 1 else "Non-Fraudulent"
	explanation = retrieve_explanation(prediction)
	return fraud_status, explanation

	# Define Gradio Interface
	interface = gr.Interface(
	fn=fraud_detection_predictor,
	inputs=[
	gr.Number(label="V1"),
	gr.Number(label="V2"),
	gr.Number(label="V3"),
	gr.Number(label="Amount")
	],
	outputs=[
	gr.Textbox(label="Fraud Status"),
	gr.Textbox(label="Explanation")
	],
	title="Credit Card Fraud Detection",
	description="Enter transaction features (V1, V2, V3, Amount) to predict fraud status."
	)

	# Launch Gradio Interface
	interface.launch()