lm1-misc-pile
/
3b92b62b6
/evaluation
/lm1-3b9-26b-results_lm-eval_global_step24424_2023-01-24-13-53-41_2shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.322, | |
"acc_stderr": 0.014782913600996666 | |
}, | |
"anli_r2": { | |
"acc": 0.332, | |
"acc_stderr": 0.014899597242811471 | |
}, | |
"anli_r3": { | |
"acc": 0.31583333333333335, | |
"acc_stderr": 0.013424568830356457 | |
}, | |
"cb": { | |
"acc": 0.44642857142857145, | |
"acc_stderr": 0.06703189227942398, | |
"f1": 0.3234429296979417 | |
}, | |
"copa": { | |
"acc": 0.65, | |
"acc_stderr": 0.0479372485441102 | |
}, | |
"hellaswag": { | |
"acc": 0.33339972117108146, | |
"acc_stderr": 0.004704645214506539, | |
"acc_norm": 0.39583748257319257, | |
"acc_norm_stderr": 0.004880303863138502 | |
}, | |
"rte": { | |
"acc": 0.516245487364621, | |
"acc_stderr": 0.030080573208738064 | |
}, | |
"winogrande": { | |
"acc": 0.5067087608524072, | |
"acc_stderr": 0.014051220692330349 | |
}, | |
"storycloze_2016": { | |
"acc": 0.6215927311598076, | |
"acc_stderr": 0.011215325833205824 | |
}, | |
"boolq": { | |
"acc": 0.5204892966360857, | |
"acc_stderr": 0.008737709345935946 | |
}, | |
"arc_easy": { | |
"acc": 0.5420875420875421, | |
"acc_stderr": 0.010223371342195902, | |
"acc_norm": 0.5214646464646465, | |
"acc_norm_stderr": 0.010250325159456652 | |
}, | |
"arc_challenge": { | |
"acc": 0.23720136518771331, | |
"acc_stderr": 0.012430399829260846, | |
"acc_norm": 0.2551194539249147, | |
"acc_norm_stderr": 0.012739038695202098 | |
}, | |
"sciq": { | |
"acc": 0.882, | |
"acc_stderr": 0.010206869264381791, | |
"acc_norm": 0.874, | |
"acc_norm_stderr": 0.010499249222408033 | |
}, | |
"piqa": { | |
"acc": 0.6708378672470077, | |
"acc_stderr": 0.010963750414134703, | |
"acc_norm": 0.6648531011969532, | |
"acc_norm_stderr": 0.011013513128643931 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |