# Load the LogisticRegression classifier # Note, use CV for cross-validation as requested in the question from sklearn.ensemble import RandomForestClassifier # Load some other sklearn functions from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn import metrics # Import other libraries import pandas as pd, numpy as np import os import yaml import sys # $1 is the train data file name # $2 is the train label file name # $3 is the test data file name # $4 is the test data file name X_train = pd.read_csv(f'{sys.argv[1]}', index_col=0) y_train = pd.read_csv(f'{sys.argv[2]}', index_col=0) y_train = y_train['score'] y_train = y_train.astype(int) X_test = pd.read_csv(f'{sys.argv[3]}', index_col=0) y_test = pd.read_csv(f'{sys.argv[4]}', index_col=0) y_test = y_test['score'] y_test = y_test.astype(int) gp_classifier = RandomForestClassifier(random_state=0).fit(X_train, y_train) # draw fpr tpr fpr, tpr, thresholds = metrics.roc_curve(y_test.astype(float), gp_classifier.predict_proba(X_test)[:,1], pos_label=1) print("AUC={:.9f}".format(metrics.auc(fpr, tpr)))