lm1-misc-pile
/
221m60b60b
/evaluation
/rankeval
/lm1-221m-60b-results_lm-eval_global_step115203_2023-01-22-18-55-49_0shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.338,0.01496596071022448,0 | |
anli_r2,acc,0.349,0.015080663991563102,0 | |
anli_r3,acc,0.3616666666666667,0.013876131663123872,0 | |
arc_challenge,acc,0.19197952218430034,0.0115095989065981,0 | |
arc_challenge,acc_norm,0.23037542662116042,0.01230492841874761,0 | |
arc_easy,acc,0.4541245791245791,0.010216507710244096,0 | |
arc_easy,acc_norm,0.4095117845117845,0.010090368160990055,0 | |
boolq,acc,0.5217125382262997,0.008736805647519953,1 | |
cb,acc,0.5178571428571429,0.06737697508644647,1 | |
cb,f1,0.3738576238576239,,1 | |
copa,acc,0.66,0.04760952285695237,0 | |
hellaswag,acc,0.28978291177056364,0.004527343651130804,0 | |
hellaswag,acc_norm,0.3158733320055766,0.004639126951051422,0 | |
piqa,acc,0.6311207834602829,0.011257546676908804,0 | |
piqa,acc_norm,0.6305767138193689,0.01126098862857233,0 | |
rte,acc,0.5379061371841155,0.03000984891252912,0 | |
sciq,acc,0.785,0.012997843819031825,0 | |
sciq,acc_norm,0.7,0.014498627873361425,0 | |
storycloze_2016,acc,0.5964724746125066,0.011345169332729754,0 | |
winogrande,acc,0.5074980268350434,0.014050905521228573,0 | |