|
{ |
|
"best_metric": 0.3262763023376465, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 2.3112391930835736, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011527377521613832, |
|
"grad_norm": 15.795230865478516, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 4.073, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011527377521613832, |
|
"eval_loss": 1.3264315128326416, |
|
"eval_runtime": 17.2268, |
|
"eval_samples_per_second": 67.685, |
|
"eval_steps_per_second": 2.148, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.023054755043227664, |
|
"grad_norm": 16.31910514831543, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 4.1133, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0345821325648415, |
|
"grad_norm": 9.573389053344727, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 3.7994, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.04610951008645533, |
|
"grad_norm": 6.555534839630127, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 3.7398, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 10.255160331726074, |
|
"learning_rate": 5.05e-05, |
|
"loss": 4.3368, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.069164265129683, |
|
"grad_norm": 15.695327758789062, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 5.9102, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.08069164265129683, |
|
"grad_norm": 10.010425567626953, |
|
"learning_rate": 7.07e-05, |
|
"loss": 4.749, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 5.381749153137207, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 3.3209, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1037463976945245, |
|
"grad_norm": 2.711754083633423, |
|
"learning_rate": 9.09e-05, |
|
"loss": 3.2273, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 3.078639268875122, |
|
"learning_rate": 0.000101, |
|
"loss": 3.1344, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12680115273775217, |
|
"grad_norm": 3.792743682861328, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 3.5957, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.138328530259366, |
|
"grad_norm": 5.194520473480225, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 4.1843, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.14985590778097982, |
|
"grad_norm": 8.25356674194336, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 4.6054, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.16138328530259366, |
|
"grad_norm": 3.3276405334472656, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 2.9768, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 2.0909225940704346, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 2.8321, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 1.5769151449203491, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 2.6877, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.19596541786743515, |
|
"grad_norm": 3.1385788917541504, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 2.9761, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.207492795389049, |
|
"grad_norm": 3.5247676372528076, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 3.5156, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.21902017291066284, |
|
"grad_norm": 6.310729503631592, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 4.3218, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 3.2671117782592773, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 2.677, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2420749279538905, |
|
"grad_norm": 2.4811301231384277, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 2.6409, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.25360230547550433, |
|
"grad_norm": 1.5807781219482422, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 2.4935, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.26512968299711814, |
|
"grad_norm": 2.4525790214538574, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 2.6027, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 2.5126707553863525, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 2.9244, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 5.332083702087402, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 3.8721, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.29971181556195964, |
|
"grad_norm": 2.8693103790283203, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 2.4373, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3112391930835735, |
|
"grad_norm": 1.3940588235855103, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 2.3714, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3227665706051873, |
|
"grad_norm": 1.8282368183135986, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 2.272, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.33429394812680113, |
|
"grad_norm": 1.570576786994934, |
|
"learning_rate": 9.09e-05, |
|
"loss": 2.3689, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 1.7571359872817993, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 2.5752, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3573487031700288, |
|
"grad_norm": 3.287325143814087, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 2.9271, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 3.290728807449341, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 2.8079, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3804034582132565, |
|
"grad_norm": 1.4297772645950317, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 2.2268, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3919308357348703, |
|
"grad_norm": 1.2625175714492798, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 2.1646, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 1.2002922296524048, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 2.0982, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.414985590778098, |
|
"grad_norm": 1.4599192142486572, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 2.3861, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4265129682997118, |
|
"grad_norm": 2.3603785037994385, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 2.7989, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.43804034582132567, |
|
"grad_norm": 3.2342662811279297, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 2.6078, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.4495677233429395, |
|
"grad_norm": 1.4671645164489746, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 2.1885, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 1.2487242221832275, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 1.9964, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47262247838616717, |
|
"grad_norm": 1.3007112741470337, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 1.9985, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.484149855907781, |
|
"grad_norm": 1.5477087497711182, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 2.1297, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4956772334293948, |
|
"grad_norm": 2.269618272781372, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 2.4867, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5072046109510087, |
|
"grad_norm": 3.860551595687866, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 3.0229, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 1.6341471672058105, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 1.9718, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5302593659942363, |
|
"grad_norm": 1.2065647840499878, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 2.0059, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5417867435158501, |
|
"grad_norm": 1.6778544187545776, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 1.9092, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 1.4027705192565918, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 2.0142, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5648414985590778, |
|
"grad_norm": 1.6712775230407715, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 2.289, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 3.5697531700134277, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 2.8535, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"eval_loss": 0.522826075553894, |
|
"eval_runtime": 16.9076, |
|
"eval_samples_per_second": 68.963, |
|
"eval_steps_per_second": 2.188, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5878962536023055, |
|
"grad_norm": 3.06416392326355, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 1.8642, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5994236311239193, |
|
"grad_norm": 1.5283445119857788, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 1.8985, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6109510086455331, |
|
"grad_norm": 1.7631272077560425, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 1.7763, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.622478386167147, |
|
"grad_norm": 1.3397003412246704, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 1.9477, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 1.6053922176361084, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 2.18, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 2.7249755859375, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 2.3534, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.6570605187319885, |
|
"grad_norm": 2.97141432762146, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 2.4034, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.6685878962536023, |
|
"grad_norm": 1.9900548458099365, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 1.8997, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.6801152737752162, |
|
"grad_norm": 1.2884211540222168, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 1.7887, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 1.2060939073562622, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 1.7919, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7031700288184438, |
|
"grad_norm": 1.6086294651031494, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 1.947, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.7146974063400576, |
|
"grad_norm": 2.0594823360443115, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 2.2118, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.7262247838616714, |
|
"grad_norm": 3.474539041519165, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 2.4448, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 2.4555892944335938, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 1.6301, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 1.3167078495025635, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 1.8001, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.760806916426513, |
|
"grad_norm": 1.3120259046554565, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 1.6406, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.7723342939481268, |
|
"grad_norm": 1.2128572463989258, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.9768, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.7838616714697406, |
|
"grad_norm": 1.7433110475540161, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 2.0889, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.7953890489913544, |
|
"grad_norm": 2.9662296772003174, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 2.4274, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 1.7265340089797974, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 1.6667, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8184438040345822, |
|
"grad_norm": 1.2966630458831787, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 1.5911, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 1.1538748741149902, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 1.623, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.8414985590778098, |
|
"grad_norm": 1.2530492544174194, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 1.6128, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.8530259365994236, |
|
"grad_norm": 1.6641517877578735, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 2.1622, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 2.932410955429077, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 2.34, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8760806916426513, |
|
"grad_norm": 1.564931869506836, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 1.5343, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.8876080691642652, |
|
"grad_norm": 1.22451913356781, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 1.5855, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.899135446685879, |
|
"grad_norm": 1.0155211687088013, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 1.4917, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.9106628242074928, |
|
"grad_norm": 1.1343241930007935, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 1.4143, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 1.3770869970321655, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 1.9376, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9337175792507204, |
|
"grad_norm": 2.184307813644409, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 2.1743, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.9452449567723343, |
|
"grad_norm": 2.2855091094970703, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 2.0407, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.9567723342939481, |
|
"grad_norm": 1.2995476722717285, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 1.5107, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.968299711815562, |
|
"grad_norm": 1.1009643077850342, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 1.4408, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 1.1911717653274536, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 1.7328, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9913544668587896, |
|
"grad_norm": 1.749642014503479, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 1.8929, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.005763688760807, |
|
"grad_norm": 2.45554256439209, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 1.955, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.0172910662824208, |
|
"grad_norm": 1.4383282661437988, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 1.2303, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.0288184438040346, |
|
"grad_norm": 1.0873348712921143, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 1.2904, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.0403458213256485, |
|
"grad_norm": 1.093670129776001, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 1.342, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 1.1975847482681274, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 1.5195, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.063400576368876, |
|
"grad_norm": 1.6481062173843384, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 1.7187, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.07492795389049, |
|
"grad_norm": 2.8624935150146484, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 1.8387, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.0864553314121037, |
|
"grad_norm": 1.3205828666687012, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 1.3021, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.0979827089337175, |
|
"grad_norm": 1.1411229372024536, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 1.3437, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 1.0338324308395386, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 1.3011, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.1210374639769451, |
|
"grad_norm": 1.0531305074691772, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 1.3966, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.1325648414985592, |
|
"grad_norm": 1.6607333421707153, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 1.7677, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.144092219020173, |
|
"grad_norm": 2.978102684020996, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 2.0096, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.1556195965417868, |
|
"grad_norm": 1.1680306196212769, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 1.2335, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1556195965417868, |
|
"eval_loss": 0.3946053087711334, |
|
"eval_runtime": 17.2978, |
|
"eval_samples_per_second": 67.407, |
|
"eval_steps_per_second": 2.139, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 1.1594160795211792, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 1.1509, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.1786743515850144, |
|
"grad_norm": 1.061645269393921, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 1.1868, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.1902017291066282, |
|
"grad_norm": 1.021350622177124, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 1.2714, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.201729106628242, |
|
"grad_norm": 1.3108984231948853, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 1.5794, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.2132564841498559, |
|
"grad_norm": 2.1817190647125244, |
|
"learning_rate": 5.05e-05, |
|
"loss": 1.6861, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 2.2876627445220947, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 1.647, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.2363112391930835, |
|
"grad_norm": 1.1767092943191528, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 1.1436, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.2478386167146973, |
|
"grad_norm": 1.122977614402771, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 1.2584, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.2593659942363113, |
|
"grad_norm": 1.0112011432647705, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 1.2698, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.270893371757925, |
|
"grad_norm": 1.2474466562271118, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 1.4298, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 2.0961225032806396, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 1.5249, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.2939481268011528, |
|
"grad_norm": 2.892947196960449, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 1.7823, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.3054755043227666, |
|
"grad_norm": 1.2904059886932373, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 1.0597, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.3170028818443804, |
|
"grad_norm": 1.0860971212387085, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 1.3479, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.3285302593659942, |
|
"grad_norm": 1.0123194456100464, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 1.1235, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 1.1925913095474243, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 1.3878, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.3515850144092219, |
|
"grad_norm": 1.8660753965377808, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 1.6422, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.3631123919308357, |
|
"grad_norm": 2.7606379985809326, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 1.6949, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.3746397694524495, |
|
"grad_norm": 1.5100210905075073, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 1.0997, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.3861671469740635, |
|
"grad_norm": 1.165024995803833, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 1.1644, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 0.9900702834129333, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 1.2457, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.4092219020172911, |
|
"grad_norm": 1.1262096166610718, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 1.341, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.420749279538905, |
|
"grad_norm": 1.4549776315689087, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 1.4951, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.4322766570605188, |
|
"grad_norm": 2.960393190383911, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 1.9611, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.4438040345821326, |
|
"grad_norm": 1.1258149147033691, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 1.2408, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 1.0389220714569092, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 1.1756, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.4668587896253602, |
|
"grad_norm": 1.1349718570709229, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 1.2792, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.478386167146974, |
|
"grad_norm": 1.1235551834106445, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 1.3884, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.4899135446685878, |
|
"grad_norm": 1.4590579271316528, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 1.547, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.5014409221902016, |
|
"grad_norm": 2.1345937252044678, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 1.699, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 2.1057217121124268, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 1.3336, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.5244956772334293, |
|
"grad_norm": 1.0975521802902222, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 1.1837, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.5360230547550433, |
|
"grad_norm": 1.0290075540542603, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 1.0588, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.547550432276657, |
|
"grad_norm": 1.109420895576477, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 1.2782, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.559077809798271, |
|
"grad_norm": 1.3788710832595825, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 1.4358, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 1.9282630681991577, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 1.5525, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.5821325648414986, |
|
"grad_norm": 3.0819172859191895, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 1.6032, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.5936599423631124, |
|
"grad_norm": 1.0416362285614014, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 1.0864, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.6051873198847262, |
|
"grad_norm": 1.0696144104003906, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 1.0025, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.6167146974063402, |
|
"grad_norm": 0.9461542963981628, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 0.982, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 1.1103463172912598, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 1.2823, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.6397694524495678, |
|
"grad_norm": 1.6639349460601807, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 1.486, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.6512968299711814, |
|
"grad_norm": 2.9342904090881348, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 1.8242, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.6628242074927955, |
|
"grad_norm": 1.3234608173370361, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 1.1624, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.674351585014409, |
|
"grad_norm": 1.2971738576889038, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 1.1139, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 1.0851243734359741, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 1.0419, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.697406340057637, |
|
"grad_norm": 1.0544915199279785, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 1.2045, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.7089337175792507, |
|
"grad_norm": 1.4829477071762085, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 1.3821, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.7204610951008645, |
|
"grad_norm": 2.9280033111572266, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 1.8452, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.7319884726224783, |
|
"grad_norm": 1.1372859477996826, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 1.0575, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.7319884726224783, |
|
"eval_loss": 0.3432846665382385, |
|
"eval_runtime": 18.7516, |
|
"eval_samples_per_second": 62.181, |
|
"eval_steps_per_second": 1.973, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 1.023056983947754, |
|
"learning_rate": 2.6047368421052634e-05, |
|
"loss": 1.0717, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.755043227665706, |
|
"grad_norm": 0.9638779759407043, |
|
"learning_rate": 2.5515789473684213e-05, |
|
"loss": 0.9971, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.76657060518732, |
|
"grad_norm": 1.0617165565490723, |
|
"learning_rate": 2.4984210526315788e-05, |
|
"loss": 1.1202, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.7780979827089336, |
|
"grad_norm": 1.5653163194656372, |
|
"learning_rate": 2.445263157894737e-05, |
|
"loss": 1.4338, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.7896253602305476, |
|
"grad_norm": 2.3075835704803467, |
|
"learning_rate": 2.3921052631578946e-05, |
|
"loss": 1.5135, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 2.41831111907959, |
|
"learning_rate": 2.3389473684210528e-05, |
|
"loss": 1.4573, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.8126801152737753, |
|
"grad_norm": 1.1299927234649658, |
|
"learning_rate": 2.2857894736842106e-05, |
|
"loss": 0.9739, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.824207492795389, |
|
"grad_norm": 0.9729629755020142, |
|
"learning_rate": 2.2326315789473685e-05, |
|
"loss": 1.1128, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.8357348703170029, |
|
"grad_norm": 0.9762557744979858, |
|
"learning_rate": 2.1794736842105264e-05, |
|
"loss": 1.0709, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.8472622478386167, |
|
"grad_norm": 1.2971409559249878, |
|
"learning_rate": 2.1263157894736842e-05, |
|
"loss": 1.3502, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 1.9666305780410767, |
|
"learning_rate": 2.073157894736842e-05, |
|
"loss": 1.6911, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.8703170028818443, |
|
"grad_norm": 2.376969575881958, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 1.433, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.8818443804034581, |
|
"grad_norm": 0.978244423866272, |
|
"learning_rate": 1.966842105263158e-05, |
|
"loss": 1.0352, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.8933717579250722, |
|
"grad_norm": 0.9526923298835754, |
|
"learning_rate": 1.913684210526316e-05, |
|
"loss": 0.9, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.9048991354466858, |
|
"grad_norm": 0.9895343780517578, |
|
"learning_rate": 1.8605263157894736e-05, |
|
"loss": 0.981, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 1.156259536743164, |
|
"learning_rate": 1.8073684210526318e-05, |
|
"loss": 1.1636, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.9279538904899134, |
|
"grad_norm": 1.878818154335022, |
|
"learning_rate": 1.7542105263157897e-05, |
|
"loss": 1.4938, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.9394812680115274, |
|
"grad_norm": 2.605971097946167, |
|
"learning_rate": 1.7010526315789475e-05, |
|
"loss": 1.4686, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.9510086455331412, |
|
"grad_norm": 0.9951879978179932, |
|
"learning_rate": 1.6478947368421054e-05, |
|
"loss": 1.0558, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.962536023054755, |
|
"grad_norm": 0.976740300655365, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 1.0558, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 0.9469358325004578, |
|
"learning_rate": 1.541578947368421e-05, |
|
"loss": 1.0353, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.9855907780979827, |
|
"grad_norm": 1.4167894124984741, |
|
"learning_rate": 1.4884210526315788e-05, |
|
"loss": 1.2964, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.9971181556195965, |
|
"grad_norm": 2.729344129562378, |
|
"learning_rate": 1.4352631578947369e-05, |
|
"loss": 1.7346, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.011527377521614, |
|
"grad_norm": 0.9175971746444702, |
|
"learning_rate": 1.3821052631578949e-05, |
|
"loss": 0.8195, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.0230547550432276, |
|
"grad_norm": 0.883823037147522, |
|
"learning_rate": 1.3289473684210526e-05, |
|
"loss": 1.0166, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.0345821325648417, |
|
"grad_norm": 0.8910732865333557, |
|
"learning_rate": 1.2757894736842106e-05, |
|
"loss": 0.9309, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.0461095100864553, |
|
"grad_norm": 0.9672825932502747, |
|
"learning_rate": 1.2226315789473685e-05, |
|
"loss": 0.9933, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.0576368876080693, |
|
"grad_norm": 1.295758605003357, |
|
"learning_rate": 1.1694736842105264e-05, |
|
"loss": 1.1036, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.069164265129683, |
|
"grad_norm": 2.083310127258301, |
|
"learning_rate": 1.1163157894736842e-05, |
|
"loss": 1.1551, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.080691642651297, |
|
"grad_norm": 2.122234344482422, |
|
"learning_rate": 1.0631578947368421e-05, |
|
"loss": 1.114, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0922190201729105, |
|
"grad_norm": 1.4002490043640137, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 0.8692, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.1037463976945245, |
|
"grad_norm": 1.2972763776779175, |
|
"learning_rate": 9.56842105263158e-06, |
|
"loss": 0.8139, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.115273775216138, |
|
"grad_norm": 1.2130416631698608, |
|
"learning_rate": 9.036842105263159e-06, |
|
"loss": 0.9519, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.126801152737752, |
|
"grad_norm": 1.3538533449172974, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 1.1313, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.138328530259366, |
|
"grad_norm": 1.8970357179641724, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 1.1983, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.14985590778098, |
|
"grad_norm": 2.499178886413574, |
|
"learning_rate": 7.442105263157894e-06, |
|
"loss": 1.1611, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.161383285302594, |
|
"grad_norm": 0.9207624197006226, |
|
"learning_rate": 6.9105263157894745e-06, |
|
"loss": 0.7852, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.1729106628242074, |
|
"grad_norm": 0.9183100461959839, |
|
"learning_rate": 6.378947368421053e-06, |
|
"loss": 0.8561, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.1844380403458215, |
|
"grad_norm": 0.886722207069397, |
|
"learning_rate": 5.847368421052632e-06, |
|
"loss": 0.8833, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.195965417867435, |
|
"grad_norm": 1.110753059387207, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 1.1024, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.207492795389049, |
|
"grad_norm": 1.6218575239181519, |
|
"learning_rate": 4.78421052631579e-06, |
|
"loss": 1.0674, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.2190201729106627, |
|
"grad_norm": 2.869983673095703, |
|
"learning_rate": 4.252631578947369e-06, |
|
"loss": 1.2698, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.2305475504322767, |
|
"grad_norm": 0.937856912612915, |
|
"learning_rate": 3.721052631578947e-06, |
|
"loss": 0.916, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.2420749279538903, |
|
"grad_norm": 0.8958096504211426, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 0.9195, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.2536023054755043, |
|
"grad_norm": 0.9468475580215454, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 0.9558, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.2651296829971184, |
|
"grad_norm": 1.0763096809387207, |
|
"learning_rate": 2.1263157894736844e-06, |
|
"loss": 1.0005, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.276657060518732, |
|
"grad_norm": 1.6214865446090698, |
|
"learning_rate": 1.5947368421052633e-06, |
|
"loss": 1.2395, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.288184438040346, |
|
"grad_norm": 2.93674898147583, |
|
"learning_rate": 1.0631578947368422e-06, |
|
"loss": 1.3516, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.2997118155619596, |
|
"grad_norm": 0.9342450499534607, |
|
"learning_rate": 5.315789473684211e-07, |
|
"loss": 0.7604, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 2.3112391930835736, |
|
"grad_norm": 0.9478535652160645, |
|
"learning_rate": 0.0, |
|
"loss": 0.7978, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.3112391930835736, |
|
"eval_loss": 0.3262763023376465, |
|
"eval_runtime": 17.7432, |
|
"eval_samples_per_second": 65.715, |
|
"eval_steps_per_second": 2.085, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.5438705567323914e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|