Spaces:
Sleeping
Sleeping
import pandas as pd | |
from glob import glob | |
from sklearn import metrics | |
from statistics import harmonic_mean | |
files = glob('output_original/*.csv') | |
theoretical = 1357228 | |
dfs = [] | |
for file in files: | |
filename = file.split('/')[-1] | |
df = pd.read_csv(file) | |
df.columns = ['tpr', 'new_tweets', 'threshold'] | |
df['fpr'] = df['new_tweets'] / df['new_tweets'].max() | |
df['fpr2'] = df['new_tweets'] / theoretical | |
df = df.sort_values(by = ['tpr', 'new_tweets']) | |
df = df.drop_duplicates(subset = ['tpr'], keep = 'first') | |
df.to_csv('output_standardized/%s' % filename, index = False) | |
df['metric'] = filename.split('.csv')[0] | |
roc1 = metrics.auc(df['fpr'], df['tpr']) | |
roc2 = metrics.auc(df['fpr2'], df['tpr']) | |
df['roc1'] = roc1 | |
df['roc2'] = roc2 | |
#roc3 | |
df95 = df.copy() | |
df95 = df95[df95.fpr2 <= 0.016] | |
df95['fpr2'] = df95['fpr2']*(1/0.016) | |
tprmax = df95.tpr.max() | |
if(tprmax < 1): | |
fpr2_max = df95.fpr2.max() | |
multipli = 1/fpr2_max | |
tpr_interpolated = tprmax*multipli | |
tpr = df95['tpr'] | |
fpr = df95['fpr2'] | |
tpr.loc[-1] = tpr_interpolated | |
fpr.loc[-1] = 1 | |
roc95 = metrics.auc(fpr, tpr) | |
df['roc95'] = roc95 | |
df['fpr3'] = df.fpr2*(1/0.016) | |
df['harmonic'] = harmonic_mean([roc95,roc1]) | |
dfs.append(df) | |
df = pd.concat(dfs) | |
df.to_csv('merged_outputs.csv', index = False) | |