|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.39967845659163986, |
|
"eval_steps": 500, |
|
"global_step": 4972, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.753351206434316e-08, |
|
"loss": 4.9531, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.042895442359249e-08, |
|
"loss": 4.9011, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2198391420911528e-07, |
|
"loss": 4.472, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.648793565683646e-07, |
|
"loss": 3.9678, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0777479892761392e-07, |
|
"loss": 3.3412, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5067024128686325e-07, |
|
"loss": 2.5657, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9356568364611256e-07, |
|
"loss": 1.9677, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.364611260053619e-07, |
|
"loss": 1.6646, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.7935656836461123e-07, |
|
"loss": 1.5032, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.222520107238606e-07, |
|
"loss": 1.3443, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.651474530831099e-07, |
|
"loss": 1.2277, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.080428954423593e-07, |
|
"loss": 1.0104, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.509383378016086e-07, |
|
"loss": 0.7317, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.938337801608579e-07, |
|
"loss": 0.3644, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.367292225201072e-07, |
|
"loss": 0.388, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.796246648793566e-07, |
|
"loss": 0.264, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.225201072386059e-07, |
|
"loss": 0.2479, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.654155495978551e-07, |
|
"loss": 0.403, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.083109919571045e-07, |
|
"loss": 0.3062, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.512064343163538e-07, |
|
"loss": 0.3016, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.941018766756032e-07, |
|
"loss": 0.3445, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.369973190348524e-07, |
|
"loss": 0.2554, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.798927613941018e-07, |
|
"loss": 0.2762, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.985462630408755e-07, |
|
"loss": 0.2265, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.958098170001709e-07, |
|
"loss": 0.2158, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.930733709594662e-07, |
|
"loss": 0.3106, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.903369249187618e-07, |
|
"loss": 0.3085, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.876004788780571e-07, |
|
"loss": 0.2633, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.848640328373525e-07, |
|
"loss": 0.2145, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.821275867966478e-07, |
|
"loss": 0.2594, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.793911407559432e-07, |
|
"loss": 0.2264, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.766546947152385e-07, |
|
"loss": 0.2512, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.739182486745339e-07, |
|
"loss": 0.3154, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.711818026338292e-07, |
|
"loss": 0.2672, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.684453565931246e-07, |
|
"loss": 0.2502, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.6570891055242e-07, |
|
"loss": 0.2702, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.629724645117153e-07, |
|
"loss": 0.2919, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.602360184710108e-07, |
|
"loss": 0.3925, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.574995724303062e-07, |
|
"loss": 0.285, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.547631263896015e-07, |
|
"loss": 0.3084, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.520266803488969e-07, |
|
"loss": 0.2275, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.492902343081922e-07, |
|
"loss": 0.245, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.465537882674875e-07, |
|
"loss": 0.233, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.438173422267829e-07, |
|
"loss": 0.2825, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.410808961860783e-07, |
|
"loss": 0.231, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.383444501453737e-07, |
|
"loss": 0.2449, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.35608004104669e-07, |
|
"loss": 0.2732, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.328715580639644e-07, |
|
"loss": 0.2031, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.301351120232597e-07, |
|
"loss": 0.1749, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.273986659825551e-07, |
|
"loss": 0.1722, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.246622199418504e-07, |
|
"loss": 0.2743, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.219257739011459e-07, |
|
"loss": 0.2907, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.192748417992132e-07, |
|
"loss": 0.2664, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.165383957585086e-07, |
|
"loss": 0.2085, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.13801949717804e-07, |
|
"loss": 0.1839, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.110655036770994e-07, |
|
"loss": 0.2667, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.083290576363947e-07, |
|
"loss": 0.1994, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.0559261159569e-07, |
|
"loss": 0.2568, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.028561655549855e-07, |
|
"loss": 0.2909, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.001197195142807e-07, |
|
"loss": 0.2697, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.973832734735761e-07, |
|
"loss": 0.3379, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.946468274328715e-07, |
|
"loss": 0.2866, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.919103813921669e-07, |
|
"loss": 0.2634, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.891739353514622e-07, |
|
"loss": 0.2234, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.864374893107576e-07, |
|
"loss": 0.2541, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.83701043270053e-07, |
|
"loss": 0.2341, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.809645972293484e-07, |
|
"loss": 0.2602, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.782281511886437e-07, |
|
"loss": 0.2602, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.75491705147939e-07, |
|
"loss": 0.2036, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.727552591072344e-07, |
|
"loss": 0.2342, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.700188130665298e-07, |
|
"loss": 0.2361, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.672823670258251e-07, |
|
"loss": 0.3299, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.645459209851206e-07, |
|
"loss": 0.3221, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.618094749444159e-07, |
|
"loss": 0.2119, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.590730289037113e-07, |
|
"loss": 0.1908, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.563365828630066e-07, |
|
"loss": 0.2736, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.536001368223021e-07, |
|
"loss": 0.1713, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.508636907815974e-07, |
|
"loss": 0.2658, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.481272447408928e-07, |
|
"loss": 0.2235, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.453907987001881e-07, |
|
"loss": 0.1858, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.426543526594834e-07, |
|
"loss": 0.2935, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.399179066187788e-07, |
|
"loss": 0.1996, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.371814605780741e-07, |
|
"loss": 0.2209, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.344450145373696e-07, |
|
"loss": 0.1611, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.317085684966649e-07, |
|
"loss": 0.28, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.289721224559603e-07, |
|
"loss": 0.2486, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.262356764152556e-07, |
|
"loss": 0.1978, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.234992303745511e-07, |
|
"loss": 0.2535, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.207627843338464e-07, |
|
"loss": 0.2666, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.180263382931417e-07, |
|
"loss": 0.1769, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.152898922524371e-07, |
|
"loss": 0.2803, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.125534462117325e-07, |
|
"loss": 0.2129, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.098170001710278e-07, |
|
"loss": 0.2255, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.070805541303232e-07, |
|
"loss": 0.1739, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.043441080896186e-07, |
|
"loss": 0.2321, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.01607662048914e-07, |
|
"loss": 0.2761, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.988712160082093e-07, |
|
"loss": 0.2867, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.961347699675047e-07, |
|
"loss": 0.1763, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.933983239268001e-07, |
|
"loss": 0.325, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.906618778860953e-07, |
|
"loss": 0.2515, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.879254318453907e-07, |
|
"loss": 0.1741, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.851889858046861e-07, |
|
"loss": 0.1999, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.824525397639815e-07, |
|
"loss": 0.2393, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.797160937232768e-07, |
|
"loss": 0.2242, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.769796476825722e-07, |
|
"loss": 0.1877, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.742432016418676e-07, |
|
"loss": 0.194, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.71506755601163e-07, |
|
"loss": 0.2499, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.687703095604583e-07, |
|
"loss": 0.2496, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.660338635197537e-07, |
|
"loss": 0.1899, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.63297417479049e-07, |
|
"loss": 0.1866, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.605609714383444e-07, |
|
"loss": 0.1843, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.578245253976397e-07, |
|
"loss": 0.1991, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.550880793569352e-07, |
|
"loss": 0.2122, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.523516333162305e-07, |
|
"loss": 0.2423, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.496151872755259e-07, |
|
"loss": 0.2568, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.468787412348212e-07, |
|
"loss": 0.2727, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.441422951941167e-07, |
|
"loss": 0.1825, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.41405849153412e-07, |
|
"loss": 0.1573, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.386694031127074e-07, |
|
"loss": 0.2034, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.359329570720028e-07, |
|
"loss": 0.1514, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.33196511031298e-07, |
|
"loss": 0.2618, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.304600649905934e-07, |
|
"loss": 0.244, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.277236189498887e-07, |
|
"loss": 0.1753, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.249871729091842e-07, |
|
"loss": 0.2044, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.222507268684795e-07, |
|
"loss": 0.1882, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.195142808277749e-07, |
|
"loss": 0.2397, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.167778347870702e-07, |
|
"loss": 0.2084, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.140413887463657e-07, |
|
"loss": 0.2635, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.11304942705661e-07, |
|
"loss": 0.2512, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.085684966649563e-07, |
|
"loss": 0.2411, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.058320506242517e-07, |
|
"loss": 0.1846, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.030956045835471e-07, |
|
"loss": 0.1447, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.003591585428424e-07, |
|
"loss": 0.2373, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.976227125021378e-07, |
|
"loss": 0.2097, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.948862664614332e-07, |
|
"loss": 0.2756, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.922353343595006e-07, |
|
"loss": 0.191, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.894988883187959e-07, |
|
"loss": 0.2076, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.867624422780914e-07, |
|
"loss": 0.2787, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.840259962373867e-07, |
|
"loss": 0.1894, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.81289550196682e-07, |
|
"loss": 0.1423, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.785531041559774e-07, |
|
"loss": 0.1738, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.758166581152727e-07, |
|
"loss": 0.2598, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.730802120745681e-07, |
|
"loss": 0.2753, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.703437660338634e-07, |
|
"loss": 0.2922, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.676073199931589e-07, |
|
"loss": 0.172, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.648708739524542e-07, |
|
"loss": 0.2269, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.621344279117496e-07, |
|
"loss": 0.2662, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.59397981871045e-07, |
|
"loss": 0.2674, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.566615358303404e-07, |
|
"loss": 0.2803, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.539250897896357e-07, |
|
"loss": 0.2253, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.51188643748931e-07, |
|
"loss": 0.2816, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.484521977082264e-07, |
|
"loss": 0.1596, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.457157516675218e-07, |
|
"loss": 0.2419, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.429793056268171e-07, |
|
"loss": 0.2344, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.402428595861125e-07, |
|
"loss": 0.2008, |
|
"step": 4960 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 12440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1243, |
|
"total_flos": 2.11128126308352e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|