lm1-misc-pile / 1b566b66b /1b566b66bpile /evaluation /rankeval /lm1-1b5-66b-results_lm-eval_global_step125429_2023-01-24-13-57-04_3shots.json
Muennighoff's picture
Add
46fa37e
{
"results": {
"anli_r1": {
"acc": 0.309,
"acc_stderr": 0.014619600977206493
},
"anli_r2": {
"acc": 0.347,
"acc_stderr": 0.015060472031706622
},
"anli_r3": {
"acc": 0.3541666666666667,
"acc_stderr": 0.013811933499570963
},
"cb": {
"acc": 0.5178571428571429,
"acc_stderr": 0.06737697508644648,
"f1": 0.45082590365609226
},
"copa": {
"acc": 0.73,
"acc_stderr": 0.0446196043338474
},
"hellaswag": {
"acc": 0.36666002788289187,
"acc_stderr": 0.0048090772053434976,
"acc_norm": 0.4627564230233021,
"acc_norm_stderr": 0.004975919665116536
},
"rte": {
"acc": 0.5487364620938628,
"acc_stderr": 0.029953149241808943
},
"winogrande": {
"acc": 0.5382794001578532,
"acc_stderr": 0.014011242594964115
},
"storycloze_2016": {
"acc": 0.6648850881881346,
"acc_stderr": 0.010915644164980044
},
"boolq": {
"acc": 0.5779816513761468,
"acc_stderr": 0.008638040428462952
},
"arc_easy": {
"acc": 0.5812289562289562,
"acc_stderr": 0.010123487160167812,
"acc_norm": 0.57996632996633,
"acc_norm_stderr": 0.010127718838529315
},
"arc_challenge": {
"acc": 0.257679180887372,
"acc_stderr": 0.012780770562768402,
"acc_norm": 0.29180887372013653,
"acc_norm_stderr": 0.013284525292403506
},
"sciq": {
"acc": 0.919,
"acc_stderr": 0.008632121032139992,
"acc_norm": 0.92,
"acc_norm_stderr": 0.008583336977753653
},
"piqa": {
"acc": 0.6931447225244831,
"acc_stderr": 0.010760295070580366,
"acc_norm": 0.7013057671381937,
"acc_norm_stderr": 0.010678556398149233
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}