{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"V28","authorship_tag":"ABX9TyNFsavRsbMSlQB8V41bPfJ2"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"TPU"},"cells":[{"cell_type":"code","source":["!pip install seaborn matplotlib scikit-learn\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QS4Zv5o3S8kO","executionInfo":{"status":"ok","timestamp":1725105479506,"user_tz":-60,"elapsed":12643,"user":{"displayName":"Mail Cloud","userId":"01708480096028966588"}},"outputId":"0f4396e7-bcbe-427a-a194-9170820261eb"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: seaborn in /usr/local/lib/python3.10/dist-packages (0.13.1)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.3.2)\n","Requirement already satisfied: numpy!=1.24.0,>=1.20 in /usr/local/lib/python3.10/dist-packages (from seaborn) (1.26.4)\n","Requirement already satisfied: pandas>=1.2 in /usr/local/lib/python3.10/dist-packages (from seaborn) (2.1.4)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.2.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.53.1)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.5)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (24.1)\n","Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (9.4.0)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.1.4)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (2.8.2)\n","Requirement already satisfied: scipy>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.13.1)\n","Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n","Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2->seaborn) (2024.1)\n","Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2->seaborn) (2024.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n"]}]},{"cell_type":"code","source":["# Import necessary libraries\n","import pandas as pd\n","from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV\n","from sklearn.impute import SimpleImputer\n","from sklearn.svm import SVC\n","from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","from sklearn.preprocessing import LabelBinarizer\n","\n","# Load the dataset\n","filename = \"dataset.csv\"\n","df = pd.read_csv(filename)\n","\n","# Separate features and labels\n","X = df.drop(columns=[\"Label\"]) # Features (all columns except the label)\n","y = df[\"Label\"] # Labels\n","\n","# Handle missing values by filling them with the mean of each column\n","imputer = SimpleImputer(strategy=\"mean\")\n","X = imputer.fit_transform(X)\n","\n","# Split the data into training and testing sets\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n","\n","print(\"Training features shape:\", X_train.shape)\n","\n","# Initialize and train the SVM model\n","svm_model = SVC(kernel='linear', probability=True)\n","svm_model.fit(X_train, y_train)\n","\n","# Make predictions and evaluate the model\n","y_pred = svm_model.predict(X_test)\n","accuracy = accuracy_score(y_test, y_pred)\n","print(f\"Accuracy: {accuracy:.2f}\")\n","print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n","\n","# Confusion Matrix\n","conf_matrix = confusion_matrix(y_test, y_pred)\n","sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')\n","plt.title('Confusion Matrix')\n","plt.xlabel('Predicted Label')\n","plt.ylabel('True Label')\n","plt.show()\n","\n","# Cross-Validation\n","cv_scores = cross_val_score(svm_model, X, y, cv=5)\n","print(f\"Cross-Validation Accuracy: {cv_scores.mean():.2f} ± {cv_scores.std():.2f}\")\n","\n","# ROC Curve and AUC (for binary classification only)\n","if len(set(y)) == 2:\n"," lb = LabelBinarizer()\n"," y_test_binarized = lb.fit_transform(y_test)\n"," y_pred_prob = svm_model.predict_proba(X_test)[:, 1]\n"," fpr, tpr, thresholds = roc_curve(y_test_binarized, y_pred_prob)\n"," roc_auc = auc(fpr, tpr)\n","\n"," plt.figure()\n"," plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')\n"," plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n"," plt.xlim([0.0, 1.0])\n"," plt.ylim([0.0, 1.05])\n"," plt.xlabel('False Positive Rate')\n"," plt.ylabel('True Positive Rate')\n"," plt.title('Receiver Operating Characteristic (ROC)')\n"," plt.legend(loc=\"lower right\")\n"," plt.show()\n","\n","# Hyperparameter Tuning using Grid Search\n","param_grid = {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf']}\n","grid_search = GridSearchCV(SVC(probability=True), param_grid, cv=5)\n","grid_search.fit(X_train, y_train)\n","print(f\"Best parameters: {grid_search.best_params_}\")\n","print(f\"Best cross-validation score: {grid_search.best_score_:.2f}\")\n"],"metadata":{"id":"Tn0Yi-ckCJpu","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1725116993957,"user_tz":-60,"elapsed":11448407,"user":{"displayName":"Mail Cloud","userId":"01708480096028966588"}},"outputId":"9247f6cb-5e01-4b39-f1c2-931e8f018047"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Training features shape: (1864, 2)\n","Accuracy: 0.97\n","Classification Report:\n"," precision recall f1-score support\n","\n"," 0 0.97 1.00 0.99 408\n"," 1 1.00 0.80 0.89 59\n","\n"," accuracy 0.97 467\n"," macro avg 0.99 0.90 0.94 467\n","weighted avg 0.98 0.97 0.97 467\n","\n"]},{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"\n"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Cross-Validation Accuracy: 0.97 ± 0.02\n"]},{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"\n"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Best parameters: {'C': 0.1, 'kernel': 'linear'}\n","Best cross-validation score: 0.98\n"]}]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"18Cjv_dT8hBl","executionInfo":{"status":"ok","timestamp":1724882440219,"user_tz":-60,"elapsed":242718,"user":{"displayName":"Mail Cloud","userId":"01708480096028966588"}},"outputId":"d8a450f3-8e65-4300-fc30-3f050dfaf01c"},"outputs":[{"output_type":"stream","name":"stdout","text":["Training features shape: (1864, 5)\n","Accuracy: 0.94\n","Classification Report:\n"," precision recall f1-score support\n","\n"," 0 0.94 1.00 0.97 434\n"," 1 1.00 0.15 0.26 33\n","\n"," accuracy 0.94 467\n"," macro avg 0.97 0.58 0.62 467\n","weighted avg 0.94 0.94 0.92 467\n","\n"]}],"source":["import pandas as pd\n","from sklearn.model_selection import train_test_split\n","from sklearn.impute import SimpleImputer\n","from sklearn.svm import SVC\n","from sklearn.metrics import accuracy_score, classification_report\n","\n","# Load the dataset\n","filename = \"dataset.csv\"\n","df = pd.read_csv(filename)\n","\n","# Separate features and labels\n","X = df.drop(columns=[\"Label\"]) # Features (all columns except the label)\n","y = df[\"Label\"] # Labels\n","\n","# Handle missing values by filling them with the mean of each column\n","imputer = SimpleImputer(strategy=\"mean\")\n","X = imputer.fit_transform(X)\n","\n","# Split the data into training and testing sets\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n","\n","print(\"Training features shape:\", X_train.shape)\n","\n","# Initialize and train the SVM model\n","svm_model = SVC(kernel='linear', probability=True)\n","svm_model.fit(X_train, y_train)\n","\n","# # Make predictions and evaluate the model\n","y_pred = svm_model.predict(X_test)\n","accuracy = accuracy_score(y_test, y_pred)\n","print(f\"Accuracy: {accuracy:.2f}\")\n","print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n"]},{"cell_type":"code","source":["import joblib\n","\n","# Save the model to a file\n","joblib.dump(svm_model, \"ais_modelv4.pkl\")\n","\n","# Load the model from the file\n","loaded_model = joblib.load(\"ais_modelv4.pkl\")\n"],"metadata":{"id":"6EcngT3N_Zh5"},"execution_count":null,"outputs":[]}]}