aimonbc24's picture
Upload folder using huggingface_hub
a3bc393 verified
raw
history blame
19.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.39967845659163986,
"eval_steps": 500,
"global_step": 4972,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.753351206434316e-08,
"loss": 4.9531,
"step": 32
},
{
"epoch": 0.01,
"learning_rate": 8.042895442359249e-08,
"loss": 4.9011,
"step": 64
},
{
"epoch": 0.01,
"learning_rate": 1.2198391420911528e-07,
"loss": 4.472,
"step": 96
},
{
"epoch": 0.01,
"learning_rate": 1.648793565683646e-07,
"loss": 3.9678,
"step": 128
},
{
"epoch": 0.01,
"learning_rate": 2.0777479892761392e-07,
"loss": 3.3412,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 2.5067024128686325e-07,
"loss": 2.5657,
"step": 192
},
{
"epoch": 0.02,
"learning_rate": 2.9356568364611256e-07,
"loss": 1.9677,
"step": 224
},
{
"epoch": 0.02,
"learning_rate": 3.364611260053619e-07,
"loss": 1.6646,
"step": 256
},
{
"epoch": 0.02,
"learning_rate": 3.7935656836461123e-07,
"loss": 1.5032,
"step": 288
},
{
"epoch": 0.03,
"learning_rate": 4.222520107238606e-07,
"loss": 1.3443,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 4.651474530831099e-07,
"loss": 1.2277,
"step": 352
},
{
"epoch": 0.03,
"learning_rate": 5.080428954423593e-07,
"loss": 1.0104,
"step": 384
},
{
"epoch": 0.03,
"learning_rate": 5.509383378016086e-07,
"loss": 0.7317,
"step": 416
},
{
"epoch": 0.04,
"learning_rate": 5.938337801608579e-07,
"loss": 0.3644,
"step": 448
},
{
"epoch": 0.04,
"learning_rate": 6.367292225201072e-07,
"loss": 0.388,
"step": 480
},
{
"epoch": 0.04,
"learning_rate": 6.796246648793566e-07,
"loss": 0.264,
"step": 512
},
{
"epoch": 0.04,
"learning_rate": 7.225201072386059e-07,
"loss": 0.2479,
"step": 544
},
{
"epoch": 0.05,
"learning_rate": 7.654155495978551e-07,
"loss": 0.403,
"step": 576
},
{
"epoch": 0.05,
"learning_rate": 8.083109919571045e-07,
"loss": 0.3062,
"step": 608
},
{
"epoch": 0.05,
"learning_rate": 8.512064343163538e-07,
"loss": 0.3016,
"step": 640
},
{
"epoch": 0.05,
"learning_rate": 8.941018766756032e-07,
"loss": 0.3445,
"step": 672
},
{
"epoch": 0.06,
"learning_rate": 9.369973190348524e-07,
"loss": 0.2554,
"step": 704
},
{
"epoch": 0.06,
"learning_rate": 9.798927613941018e-07,
"loss": 0.2762,
"step": 736
},
{
"epoch": 0.06,
"learning_rate": 9.985462630408755e-07,
"loss": 0.2265,
"step": 768
},
{
"epoch": 0.06,
"learning_rate": 9.958098170001709e-07,
"loss": 0.2158,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 9.930733709594662e-07,
"loss": 0.3106,
"step": 832
},
{
"epoch": 0.07,
"learning_rate": 9.903369249187618e-07,
"loss": 0.3085,
"step": 864
},
{
"epoch": 0.07,
"learning_rate": 9.876004788780571e-07,
"loss": 0.2633,
"step": 896
},
{
"epoch": 0.07,
"learning_rate": 9.848640328373525e-07,
"loss": 0.2145,
"step": 928
},
{
"epoch": 0.08,
"learning_rate": 9.821275867966478e-07,
"loss": 0.2594,
"step": 960
},
{
"epoch": 0.08,
"learning_rate": 9.793911407559432e-07,
"loss": 0.2264,
"step": 992
},
{
"epoch": 0.08,
"learning_rate": 9.766546947152385e-07,
"loss": 0.2512,
"step": 1024
},
{
"epoch": 0.08,
"learning_rate": 9.739182486745339e-07,
"loss": 0.3154,
"step": 1056
},
{
"epoch": 0.09,
"learning_rate": 9.711818026338292e-07,
"loss": 0.2672,
"step": 1088
},
{
"epoch": 0.09,
"learning_rate": 9.684453565931246e-07,
"loss": 0.2502,
"step": 1120
},
{
"epoch": 0.09,
"learning_rate": 9.6570891055242e-07,
"loss": 0.2702,
"step": 1152
},
{
"epoch": 0.1,
"learning_rate": 9.629724645117153e-07,
"loss": 0.2919,
"step": 1184
},
{
"epoch": 0.1,
"learning_rate": 9.602360184710108e-07,
"loss": 0.3925,
"step": 1216
},
{
"epoch": 0.1,
"learning_rate": 9.574995724303062e-07,
"loss": 0.285,
"step": 1248
},
{
"epoch": 0.1,
"learning_rate": 9.547631263896015e-07,
"loss": 0.3084,
"step": 1280
},
{
"epoch": 0.11,
"learning_rate": 9.520266803488969e-07,
"loss": 0.2275,
"step": 1312
},
{
"epoch": 0.11,
"learning_rate": 9.492902343081922e-07,
"loss": 0.245,
"step": 1344
},
{
"epoch": 0.11,
"learning_rate": 9.465537882674875e-07,
"loss": 0.233,
"step": 1376
},
{
"epoch": 0.11,
"learning_rate": 9.438173422267829e-07,
"loss": 0.2825,
"step": 1408
},
{
"epoch": 0.12,
"learning_rate": 9.410808961860783e-07,
"loss": 0.231,
"step": 1440
},
{
"epoch": 0.12,
"learning_rate": 9.383444501453737e-07,
"loss": 0.2449,
"step": 1472
},
{
"epoch": 0.12,
"learning_rate": 9.35608004104669e-07,
"loss": 0.2732,
"step": 1504
},
{
"epoch": 0.12,
"learning_rate": 9.328715580639644e-07,
"loss": 0.2031,
"step": 1536
},
{
"epoch": 0.13,
"learning_rate": 9.301351120232597e-07,
"loss": 0.1749,
"step": 1568
},
{
"epoch": 0.13,
"learning_rate": 9.273986659825551e-07,
"loss": 0.1722,
"step": 1600
},
{
"epoch": 0.13,
"learning_rate": 9.246622199418504e-07,
"loss": 0.2743,
"step": 1632
},
{
"epoch": 0.13,
"learning_rate": 9.219257739011459e-07,
"loss": 0.2907,
"step": 1664
},
{
"epoch": 0.14,
"learning_rate": 9.192748417992132e-07,
"loss": 0.2664,
"step": 1696
},
{
"epoch": 0.14,
"learning_rate": 9.165383957585086e-07,
"loss": 0.2085,
"step": 1728
},
{
"epoch": 0.14,
"learning_rate": 9.13801949717804e-07,
"loss": 0.1839,
"step": 1760
},
{
"epoch": 0.14,
"learning_rate": 9.110655036770994e-07,
"loss": 0.2667,
"step": 1792
},
{
"epoch": 0.15,
"learning_rate": 9.083290576363947e-07,
"loss": 0.1994,
"step": 1824
},
{
"epoch": 0.15,
"learning_rate": 9.0559261159569e-07,
"loss": 0.2568,
"step": 1856
},
{
"epoch": 0.15,
"learning_rate": 9.028561655549855e-07,
"loss": 0.2909,
"step": 1888
},
{
"epoch": 0.15,
"learning_rate": 9.001197195142807e-07,
"loss": 0.2697,
"step": 1920
},
{
"epoch": 0.16,
"learning_rate": 8.973832734735761e-07,
"loss": 0.3379,
"step": 1952
},
{
"epoch": 0.16,
"learning_rate": 8.946468274328715e-07,
"loss": 0.2866,
"step": 1984
},
{
"epoch": 0.16,
"learning_rate": 8.919103813921669e-07,
"loss": 0.2634,
"step": 2016
},
{
"epoch": 0.16,
"learning_rate": 8.891739353514622e-07,
"loss": 0.2234,
"step": 2048
},
{
"epoch": 0.17,
"learning_rate": 8.864374893107576e-07,
"loss": 0.2541,
"step": 2080
},
{
"epoch": 0.17,
"learning_rate": 8.83701043270053e-07,
"loss": 0.2341,
"step": 2112
},
{
"epoch": 0.17,
"learning_rate": 8.809645972293484e-07,
"loss": 0.2602,
"step": 2144
},
{
"epoch": 0.17,
"learning_rate": 8.782281511886437e-07,
"loss": 0.2602,
"step": 2176
},
{
"epoch": 0.18,
"learning_rate": 8.75491705147939e-07,
"loss": 0.2036,
"step": 2208
},
{
"epoch": 0.18,
"learning_rate": 8.727552591072344e-07,
"loss": 0.2342,
"step": 2240
},
{
"epoch": 0.18,
"learning_rate": 8.700188130665298e-07,
"loss": 0.2361,
"step": 2272
},
{
"epoch": 0.19,
"learning_rate": 8.672823670258251e-07,
"loss": 0.3299,
"step": 2304
},
{
"epoch": 0.19,
"learning_rate": 8.645459209851206e-07,
"loss": 0.3221,
"step": 2336
},
{
"epoch": 0.19,
"learning_rate": 8.618094749444159e-07,
"loss": 0.2119,
"step": 2368
},
{
"epoch": 0.19,
"learning_rate": 8.590730289037113e-07,
"loss": 0.1908,
"step": 2400
},
{
"epoch": 0.2,
"learning_rate": 8.563365828630066e-07,
"loss": 0.2736,
"step": 2432
},
{
"epoch": 0.2,
"learning_rate": 8.536001368223021e-07,
"loss": 0.1713,
"step": 2464
},
{
"epoch": 0.2,
"learning_rate": 8.508636907815974e-07,
"loss": 0.2658,
"step": 2496
},
{
"epoch": 0.2,
"learning_rate": 8.481272447408928e-07,
"loss": 0.2235,
"step": 2528
},
{
"epoch": 0.21,
"learning_rate": 8.453907987001881e-07,
"loss": 0.1858,
"step": 2560
},
{
"epoch": 0.21,
"learning_rate": 8.426543526594834e-07,
"loss": 0.2935,
"step": 2592
},
{
"epoch": 0.21,
"learning_rate": 8.399179066187788e-07,
"loss": 0.1996,
"step": 2624
},
{
"epoch": 0.21,
"learning_rate": 8.371814605780741e-07,
"loss": 0.2209,
"step": 2656
},
{
"epoch": 0.22,
"learning_rate": 8.344450145373696e-07,
"loss": 0.1611,
"step": 2688
},
{
"epoch": 0.22,
"learning_rate": 8.317085684966649e-07,
"loss": 0.28,
"step": 2720
},
{
"epoch": 0.22,
"learning_rate": 8.289721224559603e-07,
"loss": 0.2486,
"step": 2752
},
{
"epoch": 0.22,
"learning_rate": 8.262356764152556e-07,
"loss": 0.1978,
"step": 2784
},
{
"epoch": 0.23,
"learning_rate": 8.234992303745511e-07,
"loss": 0.2535,
"step": 2816
},
{
"epoch": 0.23,
"learning_rate": 8.207627843338464e-07,
"loss": 0.2666,
"step": 2848
},
{
"epoch": 0.23,
"learning_rate": 8.180263382931417e-07,
"loss": 0.1769,
"step": 2880
},
{
"epoch": 0.23,
"learning_rate": 8.152898922524371e-07,
"loss": 0.2803,
"step": 2912
},
{
"epoch": 0.24,
"learning_rate": 8.125534462117325e-07,
"loss": 0.2129,
"step": 2944
},
{
"epoch": 0.24,
"learning_rate": 8.098170001710278e-07,
"loss": 0.2255,
"step": 2976
},
{
"epoch": 0.24,
"learning_rate": 8.070805541303232e-07,
"loss": 0.1739,
"step": 3008
},
{
"epoch": 0.24,
"learning_rate": 8.043441080896186e-07,
"loss": 0.2321,
"step": 3040
},
{
"epoch": 0.25,
"learning_rate": 8.01607662048914e-07,
"loss": 0.2761,
"step": 3072
},
{
"epoch": 0.25,
"learning_rate": 7.988712160082093e-07,
"loss": 0.2867,
"step": 3104
},
{
"epoch": 0.25,
"learning_rate": 7.961347699675047e-07,
"loss": 0.1763,
"step": 3136
},
{
"epoch": 0.25,
"learning_rate": 7.933983239268001e-07,
"loss": 0.325,
"step": 3168
},
{
"epoch": 0.26,
"learning_rate": 7.906618778860953e-07,
"loss": 0.2515,
"step": 3200
},
{
"epoch": 0.26,
"learning_rate": 7.879254318453907e-07,
"loss": 0.1741,
"step": 3232
},
{
"epoch": 0.26,
"learning_rate": 7.851889858046861e-07,
"loss": 0.1999,
"step": 3264
},
{
"epoch": 0.26,
"learning_rate": 7.824525397639815e-07,
"loss": 0.2393,
"step": 3296
},
{
"epoch": 0.27,
"learning_rate": 7.797160937232768e-07,
"loss": 0.2242,
"step": 3328
},
{
"epoch": 0.27,
"learning_rate": 7.769796476825722e-07,
"loss": 0.1877,
"step": 3360
},
{
"epoch": 0.27,
"learning_rate": 7.742432016418676e-07,
"loss": 0.194,
"step": 3392
},
{
"epoch": 0.28,
"learning_rate": 7.71506755601163e-07,
"loss": 0.2499,
"step": 3424
},
{
"epoch": 0.28,
"learning_rate": 7.687703095604583e-07,
"loss": 0.2496,
"step": 3456
},
{
"epoch": 0.28,
"learning_rate": 7.660338635197537e-07,
"loss": 0.1899,
"step": 3488
},
{
"epoch": 0.28,
"learning_rate": 7.63297417479049e-07,
"loss": 0.1866,
"step": 3520
},
{
"epoch": 0.29,
"learning_rate": 7.605609714383444e-07,
"loss": 0.1843,
"step": 3552
},
{
"epoch": 0.29,
"learning_rate": 7.578245253976397e-07,
"loss": 0.1991,
"step": 3584
},
{
"epoch": 0.29,
"learning_rate": 7.550880793569352e-07,
"loss": 0.2122,
"step": 3616
},
{
"epoch": 0.29,
"learning_rate": 7.523516333162305e-07,
"loss": 0.2423,
"step": 3648
},
{
"epoch": 0.3,
"learning_rate": 7.496151872755259e-07,
"loss": 0.2568,
"step": 3680
},
{
"epoch": 0.3,
"learning_rate": 7.468787412348212e-07,
"loss": 0.2727,
"step": 3712
},
{
"epoch": 0.3,
"learning_rate": 7.441422951941167e-07,
"loss": 0.1825,
"step": 3744
},
{
"epoch": 0.3,
"learning_rate": 7.41405849153412e-07,
"loss": 0.1573,
"step": 3776
},
{
"epoch": 0.31,
"learning_rate": 7.386694031127074e-07,
"loss": 0.2034,
"step": 3808
},
{
"epoch": 0.31,
"learning_rate": 7.359329570720028e-07,
"loss": 0.1514,
"step": 3840
},
{
"epoch": 0.31,
"learning_rate": 7.33196511031298e-07,
"loss": 0.2618,
"step": 3872
},
{
"epoch": 0.31,
"learning_rate": 7.304600649905934e-07,
"loss": 0.244,
"step": 3904
},
{
"epoch": 0.32,
"learning_rate": 7.277236189498887e-07,
"loss": 0.1753,
"step": 3936
},
{
"epoch": 0.32,
"learning_rate": 7.249871729091842e-07,
"loss": 0.2044,
"step": 3968
},
{
"epoch": 0.32,
"learning_rate": 7.222507268684795e-07,
"loss": 0.1882,
"step": 4000
},
{
"epoch": 0.32,
"learning_rate": 7.195142808277749e-07,
"loss": 0.2397,
"step": 4032
},
{
"epoch": 0.33,
"learning_rate": 7.167778347870702e-07,
"loss": 0.2084,
"step": 4064
},
{
"epoch": 0.33,
"learning_rate": 7.140413887463657e-07,
"loss": 0.2635,
"step": 4096
},
{
"epoch": 0.33,
"learning_rate": 7.11304942705661e-07,
"loss": 0.2512,
"step": 4128
},
{
"epoch": 0.33,
"learning_rate": 7.085684966649563e-07,
"loss": 0.2411,
"step": 4160
},
{
"epoch": 0.34,
"learning_rate": 7.058320506242517e-07,
"loss": 0.1846,
"step": 4192
},
{
"epoch": 0.34,
"learning_rate": 7.030956045835471e-07,
"loss": 0.1447,
"step": 4224
},
{
"epoch": 0.34,
"learning_rate": 7.003591585428424e-07,
"loss": 0.2373,
"step": 4256
},
{
"epoch": 0.34,
"learning_rate": 6.976227125021378e-07,
"loss": 0.2097,
"step": 4288
},
{
"epoch": 0.35,
"learning_rate": 6.948862664614332e-07,
"loss": 0.2756,
"step": 4320
},
{
"epoch": 0.35,
"learning_rate": 6.922353343595006e-07,
"loss": 0.191,
"step": 4352
},
{
"epoch": 0.35,
"learning_rate": 6.894988883187959e-07,
"loss": 0.2076,
"step": 4384
},
{
"epoch": 0.35,
"learning_rate": 6.867624422780914e-07,
"loss": 0.2787,
"step": 4416
},
{
"epoch": 0.36,
"learning_rate": 6.840259962373867e-07,
"loss": 0.1894,
"step": 4448
},
{
"epoch": 0.36,
"learning_rate": 6.81289550196682e-07,
"loss": 0.1423,
"step": 4480
},
{
"epoch": 0.36,
"learning_rate": 6.785531041559774e-07,
"loss": 0.1738,
"step": 4512
},
{
"epoch": 0.37,
"learning_rate": 6.758166581152727e-07,
"loss": 0.2598,
"step": 4544
},
{
"epoch": 0.37,
"learning_rate": 6.730802120745681e-07,
"loss": 0.2753,
"step": 4576
},
{
"epoch": 0.37,
"learning_rate": 6.703437660338634e-07,
"loss": 0.2922,
"step": 4608
},
{
"epoch": 0.37,
"learning_rate": 6.676073199931589e-07,
"loss": 0.172,
"step": 4640
},
{
"epoch": 0.38,
"learning_rate": 6.648708739524542e-07,
"loss": 0.2269,
"step": 4672
},
{
"epoch": 0.38,
"learning_rate": 6.621344279117496e-07,
"loss": 0.2662,
"step": 4704
},
{
"epoch": 0.38,
"learning_rate": 6.59397981871045e-07,
"loss": 0.2674,
"step": 4736
},
{
"epoch": 0.38,
"learning_rate": 6.566615358303404e-07,
"loss": 0.2803,
"step": 4768
},
{
"epoch": 0.39,
"learning_rate": 6.539250897896357e-07,
"loss": 0.2253,
"step": 4800
},
{
"epoch": 0.39,
"learning_rate": 6.51188643748931e-07,
"loss": 0.2816,
"step": 4832
},
{
"epoch": 0.39,
"learning_rate": 6.484521977082264e-07,
"loss": 0.1596,
"step": 4864
},
{
"epoch": 0.39,
"learning_rate": 6.457157516675218e-07,
"loss": 0.2419,
"step": 4896
},
{
"epoch": 0.4,
"learning_rate": 6.429793056268171e-07,
"loss": 0.2344,
"step": 4928
},
{
"epoch": 0.4,
"learning_rate": 6.402428595861125e-07,
"loss": 0.2008,
"step": 4960
}
],
"logging_steps": 32,
"max_steps": 12440,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1243,
"total_flos": 2.11128126308352e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}