|
{ |
|
"best_metric": 1.4909777641296387, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.1718213058419244, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000859106529209622, |
|
"grad_norm": 5.928728103637695, |
|
"learning_rate": 1.001e-05, |
|
"loss": 2.3539, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000859106529209622, |
|
"eval_loss": 2.5483806133270264, |
|
"eval_runtime": 26.3739, |
|
"eval_samples_per_second": 18.579, |
|
"eval_steps_per_second": 4.664, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001718213058419244, |
|
"grad_norm": 4.801278114318848, |
|
"learning_rate": 2.002e-05, |
|
"loss": 2.0539, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002577319587628866, |
|
"grad_norm": 4.651398181915283, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 2.2714, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003436426116838488, |
|
"grad_norm": 4.354385852813721, |
|
"learning_rate": 4.004e-05, |
|
"loss": 2.1376, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00429553264604811, |
|
"grad_norm": 3.075941324234009, |
|
"learning_rate": 5.005e-05, |
|
"loss": 2.0429, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005154639175257732, |
|
"grad_norm": 1.7578625679016113, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 1.9386, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006013745704467354, |
|
"grad_norm": 1.0474644899368286, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.7412, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006872852233676976, |
|
"grad_norm": 0.8305192589759827, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.8618, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007731958762886598, |
|
"grad_norm": 0.6909606456756592, |
|
"learning_rate": 9.009e-05, |
|
"loss": 1.5999, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00859106529209622, |
|
"grad_norm": 0.7085854411125183, |
|
"learning_rate": 0.0001001, |
|
"loss": 1.6194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009450171821305841, |
|
"grad_norm": 1.100886344909668, |
|
"learning_rate": 9.957315789473684e-05, |
|
"loss": 1.932, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010309278350515464, |
|
"grad_norm": 0.9694746732711792, |
|
"learning_rate": 9.904631578947367e-05, |
|
"loss": 1.5086, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011168384879725086, |
|
"grad_norm": 0.8996667861938477, |
|
"learning_rate": 9.851947368421052e-05, |
|
"loss": 1.7042, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012027491408934709, |
|
"grad_norm": 0.7877088785171509, |
|
"learning_rate": 9.799263157894736e-05, |
|
"loss": 1.6645, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01288659793814433, |
|
"grad_norm": 0.8245639801025391, |
|
"learning_rate": 9.746578947368421e-05, |
|
"loss": 2.0033, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013745704467353952, |
|
"grad_norm": 0.6540593504905701, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 1.6979, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014604810996563574, |
|
"grad_norm": 0.5481818318367004, |
|
"learning_rate": 9.641210526315789e-05, |
|
"loss": 1.4844, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.015463917525773196, |
|
"grad_norm": 0.5299813151359558, |
|
"learning_rate": 9.588526315789473e-05, |
|
"loss": 1.6551, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01632302405498282, |
|
"grad_norm": 0.5546098947525024, |
|
"learning_rate": 9.535842105263157e-05, |
|
"loss": 1.7145, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01718213058419244, |
|
"grad_norm": 0.5181926488876343, |
|
"learning_rate": 9.483157894736841e-05, |
|
"loss": 1.5542, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01804123711340206, |
|
"grad_norm": 0.464667409658432, |
|
"learning_rate": 9.430473684210526e-05, |
|
"loss": 1.4155, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.018900343642611683, |
|
"grad_norm": 0.5198472738265991, |
|
"learning_rate": 9.37778947368421e-05, |
|
"loss": 1.5655, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.019759450171821305, |
|
"grad_norm": 0.5255956053733826, |
|
"learning_rate": 9.325105263157894e-05, |
|
"loss": 1.5633, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.020618556701030927, |
|
"grad_norm": 0.44441619515419006, |
|
"learning_rate": 9.272421052631578e-05, |
|
"loss": 1.5421, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02147766323024055, |
|
"grad_norm": 0.4783642888069153, |
|
"learning_rate": 9.219736842105263e-05, |
|
"loss": 1.4316, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.022336769759450172, |
|
"grad_norm": 0.4691917896270752, |
|
"learning_rate": 9.167052631578946e-05, |
|
"loss": 1.5855, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.023195876288659795, |
|
"grad_norm": 0.4796159863471985, |
|
"learning_rate": 9.114368421052632e-05, |
|
"loss": 1.5567, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.024054982817869417, |
|
"grad_norm": 0.5819175243377686, |
|
"learning_rate": 9.061684210526315e-05, |
|
"loss": 1.7546, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02491408934707904, |
|
"grad_norm": 0.4925194978713989, |
|
"learning_rate": 9.009e-05, |
|
"loss": 1.7332, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02577319587628866, |
|
"grad_norm": 0.500887393951416, |
|
"learning_rate": 8.956315789473683e-05, |
|
"loss": 1.5155, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02663230240549828, |
|
"grad_norm": 0.48832571506500244, |
|
"learning_rate": 8.903631578947368e-05, |
|
"loss": 1.4386, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.027491408934707903, |
|
"grad_norm": 0.45185863971710205, |
|
"learning_rate": 8.850947368421052e-05, |
|
"loss": 1.5028, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.028350515463917526, |
|
"grad_norm": 0.49141839146614075, |
|
"learning_rate": 8.798263157894736e-05, |
|
"loss": 1.642, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.029209621993127148, |
|
"grad_norm": 0.44430410861968994, |
|
"learning_rate": 8.745578947368422e-05, |
|
"loss": 1.3989, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03006872852233677, |
|
"grad_norm": 0.43278783559799194, |
|
"learning_rate": 8.692894736842105e-05, |
|
"loss": 1.3636, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.030927835051546393, |
|
"grad_norm": 0.46601980924606323, |
|
"learning_rate": 8.64021052631579e-05, |
|
"loss": 1.6403, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03178694158075601, |
|
"grad_norm": 0.46934980154037476, |
|
"learning_rate": 8.587526315789473e-05, |
|
"loss": 1.6551, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03264604810996564, |
|
"grad_norm": 0.44036510586738586, |
|
"learning_rate": 8.534842105263157e-05, |
|
"loss": 1.5409, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03350515463917526, |
|
"grad_norm": 0.43330177664756775, |
|
"learning_rate": 8.482157894736842e-05, |
|
"loss": 1.4959, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03436426116838488, |
|
"grad_norm": 0.6163046956062317, |
|
"learning_rate": 8.429473684210525e-05, |
|
"loss": 1.6538, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0352233676975945, |
|
"grad_norm": 0.4682713747024536, |
|
"learning_rate": 8.376789473684211e-05, |
|
"loss": 1.3941, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03608247422680412, |
|
"grad_norm": 0.5424863696098328, |
|
"learning_rate": 8.324105263157894e-05, |
|
"loss": 1.541, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.036941580756013746, |
|
"grad_norm": 0.5334774851799011, |
|
"learning_rate": 8.271421052631579e-05, |
|
"loss": 1.5979, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.037800687285223365, |
|
"grad_norm": 0.6095847487449646, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 1.562, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03865979381443299, |
|
"grad_norm": 0.6060227155685425, |
|
"learning_rate": 8.166052631578947e-05, |
|
"loss": 1.4832, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03951890034364261, |
|
"grad_norm": 0.5880366563796997, |
|
"learning_rate": 8.113368421052631e-05, |
|
"loss": 1.7103, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.040378006872852236, |
|
"grad_norm": 0.5826221108436584, |
|
"learning_rate": 8.060684210526315e-05, |
|
"loss": 1.6033, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.041237113402061855, |
|
"grad_norm": 0.6990689635276794, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.7349, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04209621993127148, |
|
"grad_norm": 0.80436772108078, |
|
"learning_rate": 7.955315789473684e-05, |
|
"loss": 1.7074, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0429553264604811, |
|
"grad_norm": 0.8325445652008057, |
|
"learning_rate": 7.902631578947368e-05, |
|
"loss": 1.891, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0429553264604811, |
|
"eval_loss": 1.5498371124267578, |
|
"eval_runtime": 24.9498, |
|
"eval_samples_per_second": 19.639, |
|
"eval_steps_per_second": 4.93, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04381443298969072, |
|
"grad_norm": 0.44505757093429565, |
|
"learning_rate": 7.849947368421052e-05, |
|
"loss": 1.4996, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.044673539518900345, |
|
"grad_norm": 0.4838855266571045, |
|
"learning_rate": 7.797263157894736e-05, |
|
"loss": 1.5216, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04553264604810996, |
|
"grad_norm": 0.4800682067871094, |
|
"learning_rate": 7.744578947368421e-05, |
|
"loss": 1.5385, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04639175257731959, |
|
"grad_norm": 0.44465377926826477, |
|
"learning_rate": 7.691894736842104e-05, |
|
"loss": 1.4122, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04725085910652921, |
|
"grad_norm": 0.4586678743362427, |
|
"learning_rate": 7.63921052631579e-05, |
|
"loss": 1.6508, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.048109965635738834, |
|
"grad_norm": 0.42472296953201294, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 1.4994, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04896907216494845, |
|
"grad_norm": 0.4265405237674713, |
|
"learning_rate": 7.533842105263158e-05, |
|
"loss": 1.3472, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04982817869415808, |
|
"grad_norm": 0.4571130573749542, |
|
"learning_rate": 7.481157894736841e-05, |
|
"loss": 1.4216, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0506872852233677, |
|
"grad_norm": 0.43544867634773254, |
|
"learning_rate": 7.428473684210526e-05, |
|
"loss": 1.5216, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05154639175257732, |
|
"grad_norm": 0.4616435766220093, |
|
"learning_rate": 7.375789473684209e-05, |
|
"loss": 1.5369, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05240549828178694, |
|
"grad_norm": 0.44542810320854187, |
|
"learning_rate": 7.323105263157895e-05, |
|
"loss": 1.4106, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05326460481099656, |
|
"grad_norm": 0.48202329874038696, |
|
"learning_rate": 7.270421052631578e-05, |
|
"loss": 1.548, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05412371134020619, |
|
"grad_norm": 0.4340774416923523, |
|
"learning_rate": 7.217736842105263e-05, |
|
"loss": 1.4627, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.054982817869415807, |
|
"grad_norm": 0.47386494278907776, |
|
"learning_rate": 7.165052631578947e-05, |
|
"loss": 1.5337, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05584192439862543, |
|
"grad_norm": 0.5032212138175964, |
|
"learning_rate": 7.11236842105263e-05, |
|
"loss": 1.9408, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05670103092783505, |
|
"grad_norm": 0.4906267821788788, |
|
"learning_rate": 7.059684210526315e-05, |
|
"loss": 1.7887, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05756013745704467, |
|
"grad_norm": 0.4755704998970032, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.6393, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.058419243986254296, |
|
"grad_norm": 0.42872437834739685, |
|
"learning_rate": 6.954315789473684e-05, |
|
"loss": 1.3954, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.059278350515463915, |
|
"grad_norm": 0.42157912254333496, |
|
"learning_rate": 6.901631578947368e-05, |
|
"loss": 1.4267, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06013745704467354, |
|
"grad_norm": 0.48660025000572205, |
|
"learning_rate": 6.848947368421052e-05, |
|
"loss": 1.8621, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06099656357388316, |
|
"grad_norm": 0.4611688256263733, |
|
"learning_rate": 6.796263157894737e-05, |
|
"loss": 1.5259, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.061855670103092786, |
|
"grad_norm": 0.4612456262111664, |
|
"learning_rate": 6.74357894736842e-05, |
|
"loss": 1.6302, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0627147766323024, |
|
"grad_norm": 0.4128530025482178, |
|
"learning_rate": 6.690894736842105e-05, |
|
"loss": 1.3599, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06357388316151202, |
|
"grad_norm": 0.46616268157958984, |
|
"learning_rate": 6.638210526315788e-05, |
|
"loss": 1.8427, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06443298969072164, |
|
"grad_norm": 0.4137905240058899, |
|
"learning_rate": 6.585526315789474e-05, |
|
"loss": 1.3743, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06529209621993128, |
|
"grad_norm": 0.41269126534461975, |
|
"learning_rate": 6.532842105263157e-05, |
|
"loss": 1.4494, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0661512027491409, |
|
"grad_norm": 0.4382306933403015, |
|
"learning_rate": 6.480157894736842e-05, |
|
"loss": 1.5754, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06701030927835051, |
|
"grad_norm": 0.43855199217796326, |
|
"learning_rate": 6.427473684210526e-05, |
|
"loss": 1.5385, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06786941580756013, |
|
"grad_norm": 0.43043795228004456, |
|
"learning_rate": 6.37478947368421e-05, |
|
"loss": 1.4418, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06872852233676977, |
|
"grad_norm": 0.41860294342041016, |
|
"learning_rate": 6.322105263157894e-05, |
|
"loss": 1.4541, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06958762886597938, |
|
"grad_norm": 0.4255295395851135, |
|
"learning_rate": 6.269421052631577e-05, |
|
"loss": 1.5295, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.070446735395189, |
|
"grad_norm": 0.41935423016548157, |
|
"learning_rate": 6.216736842105263e-05, |
|
"loss": 1.4867, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07130584192439862, |
|
"grad_norm": 0.4140067398548126, |
|
"learning_rate": 6.164052631578947e-05, |
|
"loss": 1.5371, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07216494845360824, |
|
"grad_norm": 0.42022502422332764, |
|
"learning_rate": 6.111368421052631e-05, |
|
"loss": 1.3979, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07302405498281787, |
|
"grad_norm": 0.4209689795970917, |
|
"learning_rate": 6.058684210526315e-05, |
|
"loss": 1.5414, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07388316151202749, |
|
"grad_norm": 0.4335344135761261, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 1.3725, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07474226804123711, |
|
"grad_norm": 0.4103253483772278, |
|
"learning_rate": 5.953315789473684e-05, |
|
"loss": 1.3898, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07560137457044673, |
|
"grad_norm": 0.4868164360523224, |
|
"learning_rate": 5.9006315789473676e-05, |
|
"loss": 1.5759, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07646048109965636, |
|
"grad_norm": 0.4783855080604553, |
|
"learning_rate": 5.847947368421053e-05, |
|
"loss": 1.4645, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07731958762886598, |
|
"grad_norm": 0.5467380881309509, |
|
"learning_rate": 5.795263157894737e-05, |
|
"loss": 1.642, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0781786941580756, |
|
"grad_norm": 0.5185117721557617, |
|
"learning_rate": 5.742578947368421e-05, |
|
"loss": 1.6723, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07903780068728522, |
|
"grad_norm": 0.535789966583252, |
|
"learning_rate": 5.6898947368421046e-05, |
|
"loss": 1.619, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07989690721649484, |
|
"grad_norm": 0.5576356649398804, |
|
"learning_rate": 5.6372105263157886e-05, |
|
"loss": 1.6906, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08075601374570447, |
|
"grad_norm": 0.5344556570053101, |
|
"learning_rate": 5.584526315789473e-05, |
|
"loss": 1.6378, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08161512027491409, |
|
"grad_norm": 0.5366571545600891, |
|
"learning_rate": 5.531842105263158e-05, |
|
"loss": 1.3028, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08247422680412371, |
|
"grad_norm": 0.5787448883056641, |
|
"learning_rate": 5.4791578947368424e-05, |
|
"loss": 1.6638, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.5679211616516113, |
|
"learning_rate": 5.426473684210526e-05, |
|
"loss": 1.4092, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08419243986254296, |
|
"grad_norm": 0.6531189680099487, |
|
"learning_rate": 5.37378947368421e-05, |
|
"loss": 1.4579, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08505154639175258, |
|
"grad_norm": 0.8260589838027954, |
|
"learning_rate": 5.321105263157894e-05, |
|
"loss": 1.5204, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0859106529209622, |
|
"grad_norm": 0.7999553084373474, |
|
"learning_rate": 5.268421052631578e-05, |
|
"loss": 1.6669, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0859106529209622, |
|
"eval_loss": 1.5160658359527588, |
|
"eval_runtime": 24.1171, |
|
"eval_samples_per_second": 20.318, |
|
"eval_steps_per_second": 5.1, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08676975945017182, |
|
"grad_norm": 0.48756492137908936, |
|
"learning_rate": 5.2157368421052626e-05, |
|
"loss": 1.4015, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08762886597938144, |
|
"grad_norm": 0.42037105560302734, |
|
"learning_rate": 5.163052631578947e-05, |
|
"loss": 1.4429, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08848797250859107, |
|
"grad_norm": 0.4923235774040222, |
|
"learning_rate": 5.110368421052632e-05, |
|
"loss": 1.4581, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.08934707903780069, |
|
"grad_norm": 0.41760286688804626, |
|
"learning_rate": 5.057684210526316e-05, |
|
"loss": 1.1997, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09020618556701031, |
|
"grad_norm": 0.4583994746208191, |
|
"learning_rate": 5.005e-05, |
|
"loss": 1.4086, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09106529209621993, |
|
"grad_norm": 0.458413302898407, |
|
"learning_rate": 4.9523157894736836e-05, |
|
"loss": 1.5256, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09192439862542956, |
|
"grad_norm": 0.4747663140296936, |
|
"learning_rate": 4.899631578947368e-05, |
|
"loss": 1.3945, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09278350515463918, |
|
"grad_norm": 0.4569973647594452, |
|
"learning_rate": 4.846947368421052e-05, |
|
"loss": 1.4371, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0936426116838488, |
|
"grad_norm": 0.4393562972545624, |
|
"learning_rate": 4.794263157894737e-05, |
|
"loss": 1.4525, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09450171821305842, |
|
"grad_norm": 0.4615718424320221, |
|
"learning_rate": 4.7415789473684206e-05, |
|
"loss": 1.578, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09536082474226804, |
|
"grad_norm": 0.44367459416389465, |
|
"learning_rate": 4.688894736842105e-05, |
|
"loss": 1.4314, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09621993127147767, |
|
"grad_norm": 0.4110322594642639, |
|
"learning_rate": 4.636210526315789e-05, |
|
"loss": 1.4254, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09707903780068729, |
|
"grad_norm": 0.4521294832229614, |
|
"learning_rate": 4.583526315789473e-05, |
|
"loss": 1.5936, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0979381443298969, |
|
"grad_norm": 0.4364003837108612, |
|
"learning_rate": 4.530842105263158e-05, |
|
"loss": 1.5987, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09879725085910653, |
|
"grad_norm": 0.4320337772369385, |
|
"learning_rate": 4.4781578947368416e-05, |
|
"loss": 1.5928, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09965635738831616, |
|
"grad_norm": 0.47091159224510193, |
|
"learning_rate": 4.425473684210526e-05, |
|
"loss": 1.5915, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.10051546391752578, |
|
"grad_norm": 0.4349591135978699, |
|
"learning_rate": 4.372789473684211e-05, |
|
"loss": 1.4902, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1013745704467354, |
|
"grad_norm": 0.4385061264038086, |
|
"learning_rate": 4.320105263157895e-05, |
|
"loss": 1.6705, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.10223367697594501, |
|
"grad_norm": 0.4168137311935425, |
|
"learning_rate": 4.2674210526315786e-05, |
|
"loss": 1.5591, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 0.42430955171585083, |
|
"learning_rate": 4.2147368421052626e-05, |
|
"loss": 1.4457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10395189003436427, |
|
"grad_norm": 0.4482705295085907, |
|
"learning_rate": 4.162052631578947e-05, |
|
"loss": 1.6385, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.10481099656357389, |
|
"grad_norm": 0.4236373007297516, |
|
"learning_rate": 4.109368421052631e-05, |
|
"loss": 1.5653, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1056701030927835, |
|
"grad_norm": 0.4247753620147705, |
|
"learning_rate": 4.056684210526316e-05, |
|
"loss": 1.4705, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10652920962199312, |
|
"grad_norm": 0.4102475345134735, |
|
"learning_rate": 4.004e-05, |
|
"loss": 1.387, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10738831615120274, |
|
"grad_norm": 0.4251536428928375, |
|
"learning_rate": 3.951315789473684e-05, |
|
"loss": 1.5281, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10824742268041238, |
|
"grad_norm": 0.41173189878463745, |
|
"learning_rate": 3.898631578947368e-05, |
|
"loss": 1.5701, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.109106529209622, |
|
"grad_norm": 0.4324643015861511, |
|
"learning_rate": 3.845947368421052e-05, |
|
"loss": 1.4983, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10996563573883161, |
|
"grad_norm": 0.3974796235561371, |
|
"learning_rate": 3.7932631578947367e-05, |
|
"loss": 1.4384, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11082474226804123, |
|
"grad_norm": 0.43069207668304443, |
|
"learning_rate": 3.7405789473684206e-05, |
|
"loss": 1.5198, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11168384879725086, |
|
"grad_norm": 0.4067608416080475, |
|
"learning_rate": 3.6878947368421045e-05, |
|
"loss": 1.4449, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11254295532646048, |
|
"grad_norm": 0.38891565799713135, |
|
"learning_rate": 3.635210526315789e-05, |
|
"loss": 1.3167, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1134020618556701, |
|
"grad_norm": 0.41567280888557434, |
|
"learning_rate": 3.582526315789474e-05, |
|
"loss": 1.4248, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11426116838487972, |
|
"grad_norm": 0.44299566745758057, |
|
"learning_rate": 3.5298421052631576e-05, |
|
"loss": 1.5865, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11512027491408934, |
|
"grad_norm": 0.3968340754508972, |
|
"learning_rate": 3.477157894736842e-05, |
|
"loss": 1.5147, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11597938144329897, |
|
"grad_norm": 0.40769246220588684, |
|
"learning_rate": 3.424473684210526e-05, |
|
"loss": 1.5635, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11683848797250859, |
|
"grad_norm": 0.4221096336841583, |
|
"learning_rate": 3.37178947368421e-05, |
|
"loss": 1.447, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.11769759450171821, |
|
"grad_norm": 0.38482198119163513, |
|
"learning_rate": 3.319105263157894e-05, |
|
"loss": 1.3877, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11855670103092783, |
|
"grad_norm": 0.4175458550453186, |
|
"learning_rate": 3.2664210526315786e-05, |
|
"loss": 1.5137, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11941580756013746, |
|
"grad_norm": 0.4658557176589966, |
|
"learning_rate": 3.213736842105263e-05, |
|
"loss": 1.366, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12027491408934708, |
|
"grad_norm": 0.4771043658256531, |
|
"learning_rate": 3.161052631578947e-05, |
|
"loss": 1.4614, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1211340206185567, |
|
"grad_norm": 0.42562994360923767, |
|
"learning_rate": 3.108368421052632e-05, |
|
"loss": 1.1766, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.12199312714776632, |
|
"grad_norm": 0.45292311906814575, |
|
"learning_rate": 3.0556842105263156e-05, |
|
"loss": 1.3532, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.12285223367697594, |
|
"grad_norm": 0.5051841735839844, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 1.501, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.12371134020618557, |
|
"grad_norm": 0.525606095790863, |
|
"learning_rate": 2.9503157894736838e-05, |
|
"loss": 1.4933, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.12457044673539519, |
|
"grad_norm": 0.500237762928009, |
|
"learning_rate": 2.8976315789473684e-05, |
|
"loss": 1.2868, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1254295532646048, |
|
"grad_norm": 0.5657179355621338, |
|
"learning_rate": 2.8449473684210523e-05, |
|
"loss": 1.5876, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.12628865979381443, |
|
"grad_norm": 0.5619215369224548, |
|
"learning_rate": 2.7922631578947366e-05, |
|
"loss": 1.4058, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12714776632302405, |
|
"grad_norm": 0.5765167474746704, |
|
"learning_rate": 2.7395789473684212e-05, |
|
"loss": 1.6314, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12800687285223367, |
|
"grad_norm": 0.77131587266922, |
|
"learning_rate": 2.686894736842105e-05, |
|
"loss": 1.9087, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.12886597938144329, |
|
"grad_norm": 0.7527775168418884, |
|
"learning_rate": 2.634210526315789e-05, |
|
"loss": 1.4388, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12886597938144329, |
|
"eval_loss": 1.4959875345230103, |
|
"eval_runtime": 24.189, |
|
"eval_samples_per_second": 20.257, |
|
"eval_steps_per_second": 5.085, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12972508591065293, |
|
"grad_norm": 0.4515070617198944, |
|
"learning_rate": 2.5815263157894736e-05, |
|
"loss": 1.4754, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13058419243986255, |
|
"grad_norm": 0.3967503011226654, |
|
"learning_rate": 2.528842105263158e-05, |
|
"loss": 1.163, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.13144329896907217, |
|
"grad_norm": 0.4591720998287201, |
|
"learning_rate": 2.4761578947368418e-05, |
|
"loss": 1.3497, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1323024054982818, |
|
"grad_norm": 0.47611069679260254, |
|
"learning_rate": 2.423473684210526e-05, |
|
"loss": 1.2411, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1331615120274914, |
|
"grad_norm": 0.445859432220459, |
|
"learning_rate": 2.3707894736842103e-05, |
|
"loss": 1.3745, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13402061855670103, |
|
"grad_norm": 0.4517122507095337, |
|
"learning_rate": 2.3181052631578946e-05, |
|
"loss": 1.4243, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.13487972508591065, |
|
"grad_norm": 0.48769956827163696, |
|
"learning_rate": 2.265421052631579e-05, |
|
"loss": 1.4979, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.13573883161512026, |
|
"grad_norm": 0.3892180323600769, |
|
"learning_rate": 2.212736842105263e-05, |
|
"loss": 1.368, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13659793814432988, |
|
"grad_norm": 0.4427521228790283, |
|
"learning_rate": 2.1600526315789474e-05, |
|
"loss": 1.6416, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.13745704467353953, |
|
"grad_norm": 0.41149961948394775, |
|
"learning_rate": 2.1073684210526313e-05, |
|
"loss": 1.3097, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13831615120274915, |
|
"grad_norm": 0.44827204942703247, |
|
"learning_rate": 2.0546842105263155e-05, |
|
"loss": 1.488, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.13917525773195877, |
|
"grad_norm": 0.4299844801425934, |
|
"learning_rate": 2.002e-05, |
|
"loss": 1.4034, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1400343642611684, |
|
"grad_norm": 0.4039885997772217, |
|
"learning_rate": 1.949315789473684e-05, |
|
"loss": 1.3447, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.140893470790378, |
|
"grad_norm": 0.43283891677856445, |
|
"learning_rate": 1.8966315789473683e-05, |
|
"loss": 1.3755, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.14175257731958762, |
|
"grad_norm": 0.4442145526409149, |
|
"learning_rate": 1.8439473684210522e-05, |
|
"loss": 1.6471, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14261168384879724, |
|
"grad_norm": 0.42544904351234436, |
|
"learning_rate": 1.791263157894737e-05, |
|
"loss": 1.5591, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.14347079037800686, |
|
"grad_norm": 0.4591159522533417, |
|
"learning_rate": 1.738578947368421e-05, |
|
"loss": 1.4695, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.14432989690721648, |
|
"grad_norm": 0.4475899636745453, |
|
"learning_rate": 1.685894736842105e-05, |
|
"loss": 1.7011, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.14518900343642613, |
|
"grad_norm": 0.3987146019935608, |
|
"learning_rate": 1.6332105263157893e-05, |
|
"loss": 1.3711, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.14604810996563575, |
|
"grad_norm": 0.4292147159576416, |
|
"learning_rate": 1.5805263157894735e-05, |
|
"loss": 1.5364, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14690721649484537, |
|
"grad_norm": 0.4621577262878418, |
|
"learning_rate": 1.5278421052631578e-05, |
|
"loss": 1.5781, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.14776632302405499, |
|
"grad_norm": 0.4481081962585449, |
|
"learning_rate": 1.4751578947368419e-05, |
|
"loss": 1.5406, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1486254295532646, |
|
"grad_norm": 0.4390103220939636, |
|
"learning_rate": 1.4224736842105262e-05, |
|
"loss": 1.549, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.14948453608247422, |
|
"grad_norm": 0.41087478399276733, |
|
"learning_rate": 1.3697894736842106e-05, |
|
"loss": 1.4282, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.15034364261168384, |
|
"grad_norm": 0.4323715269565582, |
|
"learning_rate": 1.3171052631578945e-05, |
|
"loss": 1.421, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15120274914089346, |
|
"grad_norm": 0.4265320897102356, |
|
"learning_rate": 1.264421052631579e-05, |
|
"loss": 1.4488, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15206185567010308, |
|
"grad_norm": 0.4511622190475464, |
|
"learning_rate": 1.211736842105263e-05, |
|
"loss": 1.5736, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.15292096219931273, |
|
"grad_norm": 0.417791485786438, |
|
"learning_rate": 1.1590526315789473e-05, |
|
"loss": 1.4141, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.15378006872852235, |
|
"grad_norm": 0.4172487258911133, |
|
"learning_rate": 1.1063684210526316e-05, |
|
"loss": 1.3984, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.15463917525773196, |
|
"grad_norm": 0.42654547095298767, |
|
"learning_rate": 1.0536842105263156e-05, |
|
"loss": 1.4239, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15549828178694158, |
|
"grad_norm": 0.4155035614967346, |
|
"learning_rate": 1.001e-05, |
|
"loss": 1.51, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1563573883161512, |
|
"grad_norm": 0.39767947793006897, |
|
"learning_rate": 9.483157894736842e-06, |
|
"loss": 1.4256, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.15721649484536082, |
|
"grad_norm": 0.4175090789794922, |
|
"learning_rate": 8.956315789473684e-06, |
|
"loss": 1.3716, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.15807560137457044, |
|
"grad_norm": 0.44680315256118774, |
|
"learning_rate": 8.429473684210525e-06, |
|
"loss": 1.5732, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.15893470790378006, |
|
"grad_norm": 0.4153236150741577, |
|
"learning_rate": 7.902631578947368e-06, |
|
"loss": 1.5334, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.15979381443298968, |
|
"grad_norm": 0.4097844660282135, |
|
"learning_rate": 7.3757894736842095e-06, |
|
"loss": 1.4617, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16065292096219932, |
|
"grad_norm": 0.44263848662376404, |
|
"learning_rate": 6.848947368421053e-06, |
|
"loss": 1.5138, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16151202749140894, |
|
"grad_norm": 0.3853647708892822, |
|
"learning_rate": 6.322105263157895e-06, |
|
"loss": 1.3983, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.16237113402061856, |
|
"grad_norm": 0.4370990991592407, |
|
"learning_rate": 5.7952631578947365e-06, |
|
"loss": 1.4268, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.16323024054982818, |
|
"grad_norm": 0.437597393989563, |
|
"learning_rate": 5.268421052631578e-06, |
|
"loss": 1.3218, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1640893470790378, |
|
"grad_norm": 0.49196720123291016, |
|
"learning_rate": 4.741578947368421e-06, |
|
"loss": 1.5127, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.16494845360824742, |
|
"grad_norm": 0.450184166431427, |
|
"learning_rate": 4.2147368421052626e-06, |
|
"loss": 1.3564, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.16580756013745704, |
|
"grad_norm": 0.5065258145332336, |
|
"learning_rate": 3.6878947368421047e-06, |
|
"loss": 1.6407, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.5404831767082214, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 1.6031, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.16752577319587628, |
|
"grad_norm": 0.5111208558082581, |
|
"learning_rate": 2.634210526315789e-06, |
|
"loss": 1.4117, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.16838487972508592, |
|
"grad_norm": 0.628701388835907, |
|
"learning_rate": 2.1073684210526313e-06, |
|
"loss": 1.563, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.16924398625429554, |
|
"grad_norm": 0.591088056564331, |
|
"learning_rate": 1.5805263157894737e-06, |
|
"loss": 1.4572, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.17010309278350516, |
|
"grad_norm": 0.6137686371803284, |
|
"learning_rate": 1.0536842105263156e-06, |
|
"loss": 1.473, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.17096219931271478, |
|
"grad_norm": 0.787673830986023, |
|
"learning_rate": 5.268421052631578e-07, |
|
"loss": 1.5638, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"grad_norm": 0.8384268283843994, |
|
"learning_rate": 0.0, |
|
"loss": 1.466, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"eval_loss": 1.4909777641296387, |
|
"eval_runtime": 24.2168, |
|
"eval_samples_per_second": 20.234, |
|
"eval_steps_per_second": 5.079, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.9886658387968e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|