File size: 3,458 Bytes
2f3185c
 
 
 
 
 
 
 
 
 
 
5544e11
 
 
 
 
2f3185c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5544e11
 
2f3185c
5544e11
2f3185c
5544e11
2f3185c
5544e11
 
 
 
2f3185c
5544e11
2f3185c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5544e11
 
2f3185c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from transformers import pipeline
import gradio as gr

# Load the creditcard.csv dataset from your local directory
file_path = 'creditcard.csv'  # Make sure this file is in the same directory as your script

# Load the dataset
df = pd.read_csv(file_path)

# Display basic information
print("Columns in the dataset:", df.columns)
print(df.head())

# Preprocessing: Selecting relevant columns
time_col = 'Time'
amount_col = 'Amount'
class_col = 'Class'
feature_cols = [col for col in df.columns if col not in [class_col, time_col]]

# Handle missing values
df = df.fillna(df.mean())

# Downsample the majority class to handle class imbalance
df_majority = df[df[class_col] == 0]
df_minority = df[df[class_col] == 1]
df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority))
df_balanced = pd.concat([df_majority_downsampled, df_minority])

# Feature scaling
X = df_balanced[feature_cols]
y = df_balanced[class_col]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Balancing the dataset using SMOTE
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)

# Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Initialize the retrieval pipeline with a lightweight model (if required)
retrieval_pipeline = pipeline("feature-extraction", model="distilbert-base-uncased")

def retrieve_explanation(prediction):
    if prediction == 1:
        return "The transaction is classified as fraudulent based on the provided features."
    return "The transaction is classified as non-fraudulent based on the provided features."

# Gradio prediction function
def fraud_detection_predictor(V1, V2, V3, Amount):
    # Prepare input features
    input_features = [0] * len(feature_cols)
    input_features[feature_cols.index('V1')] = V1
    input_features[feature_cols.index('V2')] = V2
    input_features[feature_cols.index('V3')] = V3
    input_features[feature_cols.index('Amount')] = Amount

    # Scale input data
    input_data = scaler.transform([input_features])
    
    # Make a prediction
    prediction = model.predict(input_data)[0]
    fraud_status = "Fraudulent" if prediction == 1 else "Non-Fraudulent"
    explanation = retrieve_explanation(prediction)
    return fraud_status, explanation

# Define Gradio Interface
interface = gr.Interface(
    fn=fraud_detection_predictor,
    inputs=[
        gr.Number(label="V1"),
        gr.Number(label="V2"),
        gr.Number(label="V3"),
        gr.Number(label="Amount")
    ],
    outputs=[
        gr.Textbox(label="Fraud Status"),
        gr.Textbox(label="Explanation")
    ],
    title="Credit Card Fraud Detection",
    description="Enter transaction features (V1, V2, V3, Amount) to predict fraud status."
)

# Launch Gradio Interface
interface.launch()