|
{ |
|
"best_metric": 0.10675784200429916, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.5089058524173028, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002544529262086514, |
|
"grad_norm": 2.0976202487945557, |
|
"learning_rate": 1.0017e-05, |
|
"loss": 0.4494, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002544529262086514, |
|
"eval_loss": 0.6885223388671875, |
|
"eval_runtime": 10.1676, |
|
"eval_samples_per_second": 16.326, |
|
"eval_steps_per_second": 4.131, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005089058524173028, |
|
"grad_norm": 2.4291188716888428, |
|
"learning_rate": 2.0034e-05, |
|
"loss": 0.4714, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007633587786259542, |
|
"grad_norm": 2.6177380084991455, |
|
"learning_rate": 3.0050999999999997e-05, |
|
"loss": 0.4572, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010178117048346057, |
|
"grad_norm": 2.227078676223755, |
|
"learning_rate": 4.0068e-05, |
|
"loss": 0.3751, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01272264631043257, |
|
"grad_norm": 1.521238923072815, |
|
"learning_rate": 5.0085e-05, |
|
"loss": 0.3044, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015267175572519083, |
|
"grad_norm": 1.3237571716308594, |
|
"learning_rate": 6.0101999999999995e-05, |
|
"loss": 0.2047, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.017811704834605598, |
|
"grad_norm": 1.6091803312301636, |
|
"learning_rate": 7.0119e-05, |
|
"loss": 0.1256, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 1.4948714971542358, |
|
"learning_rate": 8.0136e-05, |
|
"loss": 0.1606, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.022900763358778626, |
|
"grad_norm": 1.0482193231582642, |
|
"learning_rate": 9.0153e-05, |
|
"loss": 0.1225, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02544529262086514, |
|
"grad_norm": 1.3892583847045898, |
|
"learning_rate": 0.00010017, |
|
"loss": 0.1892, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027989821882951654, |
|
"grad_norm": 1.202789306640625, |
|
"learning_rate": 9.964278947368421e-05, |
|
"loss": 0.1744, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.030534351145038167, |
|
"grad_norm": 1.1245602369308472, |
|
"learning_rate": 9.911557894736841e-05, |
|
"loss": 0.0761, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03307888040712468, |
|
"grad_norm": 1.1420965194702148, |
|
"learning_rate": 9.858836842105263e-05, |
|
"loss": 0.0954, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.035623409669211195, |
|
"grad_norm": 0.8061597943305969, |
|
"learning_rate": 9.806115789473684e-05, |
|
"loss": 0.1012, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03816793893129771, |
|
"grad_norm": 1.2784687280654907, |
|
"learning_rate": 9.753394736842106e-05, |
|
"loss": 0.1408, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 1.0279990434646606, |
|
"learning_rate": 9.700673684210526e-05, |
|
"loss": 0.1174, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.043256997455470736, |
|
"grad_norm": 1.0930790901184082, |
|
"learning_rate": 9.647952631578948e-05, |
|
"loss": 0.0984, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04580152671755725, |
|
"grad_norm": 0.4375361502170563, |
|
"learning_rate": 9.595231578947368e-05, |
|
"loss": 0.0476, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04834605597964377, |
|
"grad_norm": 0.5908359289169312, |
|
"learning_rate": 9.542510526315789e-05, |
|
"loss": 0.0612, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05089058524173028, |
|
"grad_norm": 0.4914228916168213, |
|
"learning_rate": 9.48978947368421e-05, |
|
"loss": 0.0473, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05343511450381679, |
|
"grad_norm": 1.2252209186553955, |
|
"learning_rate": 9.437068421052632e-05, |
|
"loss": 0.1821, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05597964376590331, |
|
"grad_norm": 0.7145554423332214, |
|
"learning_rate": 9.384347368421052e-05, |
|
"loss": 0.0709, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.058524173027989825, |
|
"grad_norm": 0.2932605445384979, |
|
"learning_rate": 9.331626315789474e-05, |
|
"loss": 0.021, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 0.628063440322876, |
|
"learning_rate": 9.278905263157894e-05, |
|
"loss": 0.0734, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06361323155216285, |
|
"grad_norm": 0.48500677943229675, |
|
"learning_rate": 9.226184210526316e-05, |
|
"loss": 0.0503, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06615776081424936, |
|
"grad_norm": 0.5622182488441467, |
|
"learning_rate": 9.173463157894736e-05, |
|
"loss": 0.0558, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06870229007633588, |
|
"grad_norm": 0.5120857357978821, |
|
"learning_rate": 9.120742105263159e-05, |
|
"loss": 0.045, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07124681933842239, |
|
"grad_norm": 0.12333207577466965, |
|
"learning_rate": 9.068021052631579e-05, |
|
"loss": 0.0056, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0737913486005089, |
|
"grad_norm": 0.8002417087554932, |
|
"learning_rate": 9.0153e-05, |
|
"loss": 0.1115, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07633587786259542, |
|
"grad_norm": 3.1628360748291016, |
|
"learning_rate": 8.96257894736842e-05, |
|
"loss": 0.661, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07888040712468193, |
|
"grad_norm": 1.9972189664840698, |
|
"learning_rate": 8.909857894736842e-05, |
|
"loss": 0.3344, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 1.6704767942428589, |
|
"learning_rate": 8.857136842105263e-05, |
|
"loss": 0.3315, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08396946564885496, |
|
"grad_norm": 1.8845316171646118, |
|
"learning_rate": 8.804415789473684e-05, |
|
"loss": 0.3487, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08651399491094147, |
|
"grad_norm": 2.4319205284118652, |
|
"learning_rate": 8.751694736842105e-05, |
|
"loss": 0.3297, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.089058524173028, |
|
"grad_norm": 2.7092981338500977, |
|
"learning_rate": 8.698973684210527e-05, |
|
"loss": 0.5175, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0916030534351145, |
|
"grad_norm": 1.65862238407135, |
|
"learning_rate": 8.646252631578948e-05, |
|
"loss": 0.3395, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09414758269720101, |
|
"grad_norm": 1.9453610181808472, |
|
"learning_rate": 8.593531578947368e-05, |
|
"loss": 0.3279, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09669211195928754, |
|
"grad_norm": 1.8823449611663818, |
|
"learning_rate": 8.54081052631579e-05, |
|
"loss": 0.3059, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09923664122137404, |
|
"grad_norm": 1.427621603012085, |
|
"learning_rate": 8.48808947368421e-05, |
|
"loss": 0.2756, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 1.9383624792099, |
|
"learning_rate": 8.435368421052631e-05, |
|
"loss": 0.3652, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10432569974554708, |
|
"grad_norm": 0.5723309516906738, |
|
"learning_rate": 8.382647368421053e-05, |
|
"loss": 0.0681, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10687022900763359, |
|
"grad_norm": 1.692962884902954, |
|
"learning_rate": 8.329926315789474e-05, |
|
"loss": 0.2737, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10941475826972011, |
|
"grad_norm": 5.488102436065674, |
|
"learning_rate": 8.277205263157894e-05, |
|
"loss": 0.3806, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11195928753180662, |
|
"grad_norm": 2.793001890182495, |
|
"learning_rate": 8.224484210526316e-05, |
|
"loss": 0.3236, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11450381679389313, |
|
"grad_norm": 0.8564298152923584, |
|
"learning_rate": 8.171763157894736e-05, |
|
"loss": 0.0697, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11704834605597965, |
|
"grad_norm": 1.465958595275879, |
|
"learning_rate": 8.119042105263158e-05, |
|
"loss": 0.1509, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11959287531806616, |
|
"grad_norm": 1.0620710849761963, |
|
"learning_rate": 8.066321052631578e-05, |
|
"loss": 0.0998, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 0.551638126373291, |
|
"learning_rate": 8.0136e-05, |
|
"loss": 0.0336, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12468193384223919, |
|
"grad_norm": 1.185890555381775, |
|
"learning_rate": 7.960878947368421e-05, |
|
"loss": 0.0431, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1272264631043257, |
|
"grad_norm": 0.4987131953239441, |
|
"learning_rate": 7.908157894736842e-05, |
|
"loss": 0.017, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1272264631043257, |
|
"eval_loss": 0.20289373397827148, |
|
"eval_runtime": 10.1481, |
|
"eval_samples_per_second": 16.358, |
|
"eval_steps_per_second": 4.139, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1297709923664122, |
|
"grad_norm": 1.5103840827941895, |
|
"learning_rate": 7.855436842105262e-05, |
|
"loss": 0.4364, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13231552162849872, |
|
"grad_norm": 1.3223518133163452, |
|
"learning_rate": 7.802715789473684e-05, |
|
"loss": 0.3782, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13486005089058525, |
|
"grad_norm": 1.2647677659988403, |
|
"learning_rate": 7.749994736842104e-05, |
|
"loss": 0.401, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13740458015267176, |
|
"grad_norm": 0.7807660102844238, |
|
"learning_rate": 7.697273684210526e-05, |
|
"loss": 0.1596, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13994910941475827, |
|
"grad_norm": 0.8354874849319458, |
|
"learning_rate": 7.644552631578947e-05, |
|
"loss": 0.165, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 0.6091985702514648, |
|
"learning_rate": 7.591831578947369e-05, |
|
"loss": 0.0962, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1450381679389313, |
|
"grad_norm": 0.7430208325386047, |
|
"learning_rate": 7.539110526315789e-05, |
|
"loss": 0.1676, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1475826972010178, |
|
"grad_norm": 0.6381292343139648, |
|
"learning_rate": 7.48638947368421e-05, |
|
"loss": 0.1042, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15012722646310434, |
|
"grad_norm": 0.8650558590888977, |
|
"learning_rate": 7.433668421052632e-05, |
|
"loss": 0.1513, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15267175572519084, |
|
"grad_norm": 0.7318075895309448, |
|
"learning_rate": 7.380947368421052e-05, |
|
"loss": 0.1286, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15521628498727735, |
|
"grad_norm": 0.5076261758804321, |
|
"learning_rate": 7.328226315789473e-05, |
|
"loss": 0.0677, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15776081424936386, |
|
"grad_norm": 0.5992767214775085, |
|
"learning_rate": 7.275505263157895e-05, |
|
"loss": 0.0658, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.16030534351145037, |
|
"grad_norm": 1.0940337181091309, |
|
"learning_rate": 7.222784210526316e-05, |
|
"loss": 0.1328, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 0.41800355911254883, |
|
"learning_rate": 7.170063157894737e-05, |
|
"loss": 0.0529, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16539440203562342, |
|
"grad_norm": 0.410457968711853, |
|
"learning_rate": 7.117342105263158e-05, |
|
"loss": 0.0427, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16793893129770993, |
|
"grad_norm": 0.928383469581604, |
|
"learning_rate": 7.064621052631578e-05, |
|
"loss": 0.0959, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17048346055979643, |
|
"grad_norm": 0.39881715178489685, |
|
"learning_rate": 7.0119e-05, |
|
"loss": 0.0456, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17302798982188294, |
|
"grad_norm": 0.6098618507385254, |
|
"learning_rate": 6.959178947368421e-05, |
|
"loss": 0.0535, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17557251908396945, |
|
"grad_norm": 0.6409094333648682, |
|
"learning_rate": 6.906457894736843e-05, |
|
"loss": 0.0848, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.178117048346056, |
|
"grad_norm": 0.3413279950618744, |
|
"learning_rate": 6.853736842105263e-05, |
|
"loss": 0.0257, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1806615776081425, |
|
"grad_norm": 0.41074299812316895, |
|
"learning_rate": 6.801015789473684e-05, |
|
"loss": 0.0237, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 0.6176720857620239, |
|
"learning_rate": 6.748294736842105e-05, |
|
"loss": 0.0628, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18575063613231552, |
|
"grad_norm": 0.24736110866069794, |
|
"learning_rate": 6.695573684210526e-05, |
|
"loss": 0.0158, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18829516539440203, |
|
"grad_norm": 0.8030320405960083, |
|
"learning_rate": 6.642852631578946e-05, |
|
"loss": 0.0547, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19083969465648856, |
|
"grad_norm": 0.46492913365364075, |
|
"learning_rate": 6.590131578947369e-05, |
|
"loss": 0.0397, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19338422391857507, |
|
"grad_norm": 0.570913553237915, |
|
"learning_rate": 6.537410526315789e-05, |
|
"loss": 0.0731, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19592875318066158, |
|
"grad_norm": 0.8975678086280823, |
|
"learning_rate": 6.484689473684211e-05, |
|
"loss": 0.1668, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1984732824427481, |
|
"grad_norm": 0.10023163259029388, |
|
"learning_rate": 6.431968421052631e-05, |
|
"loss": 0.0064, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2010178117048346, |
|
"grad_norm": 1.0112429857254028, |
|
"learning_rate": 6.379247368421052e-05, |
|
"loss": 0.1961, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 1.7674541473388672, |
|
"learning_rate": 6.326526315789474e-05, |
|
"loss": 0.4854, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20610687022900764, |
|
"grad_norm": 2.2243638038635254, |
|
"learning_rate": 6.273805263157894e-05, |
|
"loss": 0.515, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.20865139949109415, |
|
"grad_norm": 1.8203999996185303, |
|
"learning_rate": 6.221084210526315e-05, |
|
"loss": 0.4717, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.21119592875318066, |
|
"grad_norm": 1.2786808013916016, |
|
"learning_rate": 6.168363157894737e-05, |
|
"loss": 0.2411, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.21374045801526717, |
|
"grad_norm": 1.3873907327651978, |
|
"learning_rate": 6.115642105263159e-05, |
|
"loss": 0.2586, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21628498727735368, |
|
"grad_norm": 1.169811725616455, |
|
"learning_rate": 6.0629210526315787e-05, |
|
"loss": 0.2859, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21882951653944022, |
|
"grad_norm": 1.7699614763259888, |
|
"learning_rate": 6.0101999999999995e-05, |
|
"loss": 0.3636, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.22137404580152673, |
|
"grad_norm": 1.0457572937011719, |
|
"learning_rate": 5.95747894736842e-05, |
|
"loss": 0.2371, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 1.2649692296981812, |
|
"learning_rate": 5.904757894736841e-05, |
|
"loss": 0.2149, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.22646310432569974, |
|
"grad_norm": 0.8703845143318176, |
|
"learning_rate": 5.852036842105263e-05, |
|
"loss": 0.1582, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22900763358778625, |
|
"grad_norm": 1.6556470394134521, |
|
"learning_rate": 5.799315789473684e-05, |
|
"loss": 0.2751, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23155216284987276, |
|
"grad_norm": 1.0715538263320923, |
|
"learning_rate": 5.746594736842105e-05, |
|
"loss": 0.2189, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2340966921119593, |
|
"grad_norm": 1.5337820053100586, |
|
"learning_rate": 5.693873684210526e-05, |
|
"loss": 0.3324, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2366412213740458, |
|
"grad_norm": 0.6989188194274902, |
|
"learning_rate": 5.641152631578947e-05, |
|
"loss": 0.0784, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23918575063613232, |
|
"grad_norm": 1.108068585395813, |
|
"learning_rate": 5.588431578947368e-05, |
|
"loss": 0.1732, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24173027989821882, |
|
"grad_norm": 0.7206950783729553, |
|
"learning_rate": 5.5357105263157896e-05, |
|
"loss": 0.08, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 1.3309029340744019, |
|
"learning_rate": 5.482989473684211e-05, |
|
"loss": 0.1309, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.24681933842239187, |
|
"grad_norm": 1.4102177619934082, |
|
"learning_rate": 5.430268421052632e-05, |
|
"loss": 0.1174, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24936386768447838, |
|
"grad_norm": 0.6907632350921631, |
|
"learning_rate": 5.377547368421053e-05, |
|
"loss": 0.0753, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.25190839694656486, |
|
"grad_norm": 0.5914320945739746, |
|
"learning_rate": 5.3248263157894736e-05, |
|
"loss": 0.0487, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2544529262086514, |
|
"grad_norm": 0.410552054643631, |
|
"learning_rate": 5.2721052631578944e-05, |
|
"loss": 0.028, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2544529262086514, |
|
"eval_loss": 0.1450011283159256, |
|
"eval_runtime": 10.1692, |
|
"eval_samples_per_second": 16.324, |
|
"eval_steps_per_second": 4.13, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25699745547073793, |
|
"grad_norm": 0.9659016728401184, |
|
"learning_rate": 5.219384210526315e-05, |
|
"loss": 0.2955, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2595419847328244, |
|
"grad_norm": 0.5893524885177612, |
|
"learning_rate": 5.1666631578947374e-05, |
|
"loss": 0.158, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.26208651399491095, |
|
"grad_norm": 0.561215877532959, |
|
"learning_rate": 5.113942105263158e-05, |
|
"loss": 0.1543, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 0.5634675621986389, |
|
"learning_rate": 5.061221052631579e-05, |
|
"loss": 0.1578, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.26717557251908397, |
|
"grad_norm": 0.7371407747268677, |
|
"learning_rate": 5.0085e-05, |
|
"loss": 0.1396, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2697201017811705, |
|
"grad_norm": 0.62689608335495, |
|
"learning_rate": 4.955778947368421e-05, |
|
"loss": 0.1542, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.272264631043257, |
|
"grad_norm": 0.5553103685379028, |
|
"learning_rate": 4.903057894736842e-05, |
|
"loss": 0.0909, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2748091603053435, |
|
"grad_norm": 0.5198187828063965, |
|
"learning_rate": 4.850336842105263e-05, |
|
"loss": 0.0785, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.27735368956743, |
|
"grad_norm": 0.7179524898529053, |
|
"learning_rate": 4.797615789473684e-05, |
|
"loss": 0.1036, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.27989821882951654, |
|
"grad_norm": 0.44508594274520874, |
|
"learning_rate": 4.744894736842105e-05, |
|
"loss": 0.1104, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2824427480916031, |
|
"grad_norm": 0.7336511015892029, |
|
"learning_rate": 4.692173684210526e-05, |
|
"loss": 0.1067, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 0.9355735778808594, |
|
"learning_rate": 4.639452631578947e-05, |
|
"loss": 0.1675, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2875318066157761, |
|
"grad_norm": 0.46843382716178894, |
|
"learning_rate": 4.586731578947368e-05, |
|
"loss": 0.0723, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2900763358778626, |
|
"grad_norm": 0.5565648078918457, |
|
"learning_rate": 4.5340105263157894e-05, |
|
"loss": 0.0579, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2926208651399491, |
|
"grad_norm": 0.35323649644851685, |
|
"learning_rate": 4.48128947368421e-05, |
|
"loss": 0.0532, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2951653944020356, |
|
"grad_norm": 0.41509339213371277, |
|
"learning_rate": 4.428568421052632e-05, |
|
"loss": 0.0788, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.29770992366412213, |
|
"grad_norm": 0.4781738817691803, |
|
"learning_rate": 4.3758473684210525e-05, |
|
"loss": 0.0939, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.30025445292620867, |
|
"grad_norm": 0.5751485824584961, |
|
"learning_rate": 4.323126315789474e-05, |
|
"loss": 0.0883, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.30279898218829515, |
|
"grad_norm": 0.2596683204174042, |
|
"learning_rate": 4.270405263157895e-05, |
|
"loss": 0.0365, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 0.572528600692749, |
|
"learning_rate": 4.217684210526316e-05, |
|
"loss": 0.0871, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.30788804071246817, |
|
"grad_norm": 0.5119253396987915, |
|
"learning_rate": 4.164963157894737e-05, |
|
"loss": 0.0973, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3104325699745547, |
|
"grad_norm": 0.5054477453231812, |
|
"learning_rate": 4.112242105263158e-05, |
|
"loss": 0.0494, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.31297709923664124, |
|
"grad_norm": 0.3897090256214142, |
|
"learning_rate": 4.059521052631579e-05, |
|
"loss": 0.0333, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3155216284987277, |
|
"grad_norm": 0.2573760747909546, |
|
"learning_rate": 4.0068e-05, |
|
"loss": 0.0229, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.31806615776081426, |
|
"grad_norm": 0.28332197666168213, |
|
"learning_rate": 3.954078947368421e-05, |
|
"loss": 0.0257, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.32061068702290074, |
|
"grad_norm": 0.446418434381485, |
|
"learning_rate": 3.901357894736842e-05, |
|
"loss": 0.0825, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3231552162849873, |
|
"grad_norm": 0.29756420850753784, |
|
"learning_rate": 3.848636842105263e-05, |
|
"loss": 0.0242, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 0.5935866236686707, |
|
"learning_rate": 3.795915789473684e-05, |
|
"loss": 0.0925, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3282442748091603, |
|
"grad_norm": 0.2986157536506653, |
|
"learning_rate": 3.743194736842105e-05, |
|
"loss": 0.0154, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.33078880407124683, |
|
"grad_norm": 0.03564433753490448, |
|
"learning_rate": 3.690473684210526e-05, |
|
"loss": 0.0024, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.021472949534654617, |
|
"learning_rate": 3.6377526315789475e-05, |
|
"loss": 0.0011, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.33587786259541985, |
|
"grad_norm": 1.2343424558639526, |
|
"learning_rate": 3.585031578947368e-05, |
|
"loss": 0.3226, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3384223918575064, |
|
"grad_norm": 1.3323383331298828, |
|
"learning_rate": 3.532310526315789e-05, |
|
"loss": 0.2967, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.34096692111959287, |
|
"grad_norm": 1.58578360080719, |
|
"learning_rate": 3.4795894736842106e-05, |
|
"loss": 0.2687, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3435114503816794, |
|
"grad_norm": 1.3783105611801147, |
|
"learning_rate": 3.4268684210526314e-05, |
|
"loss": 0.2798, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 1.470922827720642, |
|
"learning_rate": 3.374147368421052e-05, |
|
"loss": 0.3177, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3486005089058524, |
|
"grad_norm": 1.449453592300415, |
|
"learning_rate": 3.321426315789473e-05, |
|
"loss": 0.1875, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3511450381679389, |
|
"grad_norm": 1.273271083831787, |
|
"learning_rate": 3.2687052631578946e-05, |
|
"loss": 0.2517, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.35368956743002544, |
|
"grad_norm": 1.2989132404327393, |
|
"learning_rate": 3.2159842105263154e-05, |
|
"loss": 0.1741, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.356234096692112, |
|
"grad_norm": 1.1349838972091675, |
|
"learning_rate": 3.163263157894737e-05, |
|
"loss": 0.2073, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.35877862595419846, |
|
"grad_norm": 1.2873899936676025, |
|
"learning_rate": 3.110542105263158e-05, |
|
"loss": 0.1692, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.361323155216285, |
|
"grad_norm": 1.297892689704895, |
|
"learning_rate": 3.057821052631579e-05, |
|
"loss": 0.1529, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3638676844783715, |
|
"grad_norm": 1.0262969732284546, |
|
"learning_rate": 3.0050999999999997e-05, |
|
"loss": 0.1269, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 1.489499807357788, |
|
"learning_rate": 2.9523789473684206e-05, |
|
"loss": 0.2182, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.36895674300254455, |
|
"grad_norm": 2.6656413078308105, |
|
"learning_rate": 2.899657894736842e-05, |
|
"loss": 0.1811, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.37150127226463103, |
|
"grad_norm": 1.5800155401229858, |
|
"learning_rate": 2.846936842105263e-05, |
|
"loss": 0.1364, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.37404580152671757, |
|
"grad_norm": 0.6563022136688232, |
|
"learning_rate": 2.794215789473684e-05, |
|
"loss": 0.0965, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.37659033078880405, |
|
"grad_norm": 1.1012194156646729, |
|
"learning_rate": 2.7414947368421056e-05, |
|
"loss": 0.097, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3791348600508906, |
|
"grad_norm": 1.3474540710449219, |
|
"learning_rate": 2.6887736842105264e-05, |
|
"loss": 0.1278, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3816793893129771, |
|
"grad_norm": 1.2162439823150635, |
|
"learning_rate": 2.6360526315789472e-05, |
|
"loss": 0.1464, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3816793893129771, |
|
"eval_loss": 0.1360974758863449, |
|
"eval_runtime": 10.1762, |
|
"eval_samples_per_second": 16.313, |
|
"eval_steps_per_second": 4.127, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3842239185750636, |
|
"grad_norm": 0.6443613767623901, |
|
"learning_rate": 2.5833315789473687e-05, |
|
"loss": 0.2032, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 0.7321302890777588, |
|
"learning_rate": 2.5306105263157895e-05, |
|
"loss": 0.1961, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3893129770992366, |
|
"grad_norm": 0.7189200520515442, |
|
"learning_rate": 2.4778894736842104e-05, |
|
"loss": 0.1966, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.39185750636132316, |
|
"grad_norm": 0.6960674524307251, |
|
"learning_rate": 2.4251684210526315e-05, |
|
"loss": 0.2118, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3944020356234097, |
|
"grad_norm": 0.7753060460090637, |
|
"learning_rate": 2.3724473684210524e-05, |
|
"loss": 0.2101, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3969465648854962, |
|
"grad_norm": 0.5562716126441956, |
|
"learning_rate": 2.3197263157894735e-05, |
|
"loss": 0.1283, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3994910941475827, |
|
"grad_norm": 0.37449532747268677, |
|
"learning_rate": 2.2670052631578947e-05, |
|
"loss": 0.1007, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4020356234096692, |
|
"grad_norm": 0.5224964022636414, |
|
"learning_rate": 2.214284210526316e-05, |
|
"loss": 0.1294, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.40458015267175573, |
|
"grad_norm": 0.3669807016849518, |
|
"learning_rate": 2.161563157894737e-05, |
|
"loss": 0.0846, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 0.5417796969413757, |
|
"learning_rate": 2.108842105263158e-05, |
|
"loss": 0.12, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.40966921119592875, |
|
"grad_norm": 0.506889820098877, |
|
"learning_rate": 2.056121052631579e-05, |
|
"loss": 0.0786, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4122137404580153, |
|
"grad_norm": 0.49759092926979065, |
|
"learning_rate": 2.0034e-05, |
|
"loss": 0.093, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.41475826972010177, |
|
"grad_norm": 0.29034364223480225, |
|
"learning_rate": 1.950678947368421e-05, |
|
"loss": 0.0639, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4173027989821883, |
|
"grad_norm": 0.6314502358436584, |
|
"learning_rate": 1.897957894736842e-05, |
|
"loss": 0.1211, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4198473282442748, |
|
"grad_norm": 0.23841609060764313, |
|
"learning_rate": 1.845236842105263e-05, |
|
"loss": 0.0461, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4223918575063613, |
|
"grad_norm": 0.35829946398735046, |
|
"learning_rate": 1.792515789473684e-05, |
|
"loss": 0.0773, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.42493638676844786, |
|
"grad_norm": 0.43481776118278503, |
|
"learning_rate": 1.7397947368421053e-05, |
|
"loss": 0.0921, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 0.35226166248321533, |
|
"learning_rate": 1.687073684210526e-05, |
|
"loss": 0.0594, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4300254452926209, |
|
"grad_norm": 0.5202860832214355, |
|
"learning_rate": 1.6343526315789473e-05, |
|
"loss": 0.0986, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.43256997455470736, |
|
"grad_norm": 0.23757660388946533, |
|
"learning_rate": 1.5816315789473685e-05, |
|
"loss": 0.032, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4351145038167939, |
|
"grad_norm": 0.27789339423179626, |
|
"learning_rate": 1.5289105263157896e-05, |
|
"loss": 0.0438, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.43765903307888043, |
|
"grad_norm": 0.41914746165275574, |
|
"learning_rate": 1.4761894736842103e-05, |
|
"loss": 0.0462, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4402035623409669, |
|
"grad_norm": 0.2738276720046997, |
|
"learning_rate": 1.4234684210526314e-05, |
|
"loss": 0.0293, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.44274809160305345, |
|
"grad_norm": 0.4610910713672638, |
|
"learning_rate": 1.3707473684210528e-05, |
|
"loss": 0.0522, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.44529262086513993, |
|
"grad_norm": 0.18284475803375244, |
|
"learning_rate": 1.3180263157894736e-05, |
|
"loss": 0.0267, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 0.2613477110862732, |
|
"learning_rate": 1.2653052631578948e-05, |
|
"loss": 0.0191, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.45038167938931295, |
|
"grad_norm": 0.21745839715003967, |
|
"learning_rate": 1.2125842105263158e-05, |
|
"loss": 0.0267, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4529262086513995, |
|
"grad_norm": 0.6353086829185486, |
|
"learning_rate": 1.1598631578947368e-05, |
|
"loss": 0.0828, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.455470737913486, |
|
"grad_norm": 1.0890721082687378, |
|
"learning_rate": 1.107142105263158e-05, |
|
"loss": 0.3399, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4580152671755725, |
|
"grad_norm": 0.9117040038108826, |
|
"learning_rate": 1.054421052631579e-05, |
|
"loss": 0.2736, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46055979643765904, |
|
"grad_norm": 1.2923952341079712, |
|
"learning_rate": 1.0017e-05, |
|
"loss": 0.267, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4631043256997455, |
|
"grad_norm": 0.9573558568954468, |
|
"learning_rate": 9.48978947368421e-06, |
|
"loss": 0.2016, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.46564885496183206, |
|
"grad_norm": 0.787228524684906, |
|
"learning_rate": 8.96257894736842e-06, |
|
"loss": 0.1423, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 1.1528656482696533, |
|
"learning_rate": 8.43536842105263e-06, |
|
"loss": 0.2141, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4707379134860051, |
|
"grad_norm": 2.192894220352173, |
|
"learning_rate": 7.908157894736842e-06, |
|
"loss": 0.3117, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4732824427480916, |
|
"grad_norm": 1.0140397548675537, |
|
"learning_rate": 7.380947368421051e-06, |
|
"loss": 0.1539, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4758269720101781, |
|
"grad_norm": 1.4267032146453857, |
|
"learning_rate": 6.853736842105264e-06, |
|
"loss": 0.2976, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.47837150127226463, |
|
"grad_norm": 1.1065343618392944, |
|
"learning_rate": 6.326526315789474e-06, |
|
"loss": 0.2073, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.48091603053435117, |
|
"grad_norm": 1.1713448762893677, |
|
"learning_rate": 5.799315789473684e-06, |
|
"loss": 0.1977, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.48346055979643765, |
|
"grad_norm": 0.6917346119880676, |
|
"learning_rate": 5.272105263157895e-06, |
|
"loss": 0.0885, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4860050890585242, |
|
"grad_norm": 1.0129892826080322, |
|
"learning_rate": 4.744894736842105e-06, |
|
"loss": 0.1472, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 1.470230221748352, |
|
"learning_rate": 4.217684210526315e-06, |
|
"loss": 0.22, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4910941475826972, |
|
"grad_norm": 0.406305730342865, |
|
"learning_rate": 3.6904736842105257e-06, |
|
"loss": 0.0418, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.49363867684478374, |
|
"grad_norm": 1.7621744871139526, |
|
"learning_rate": 3.163263157894737e-06, |
|
"loss": 0.2434, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4961832061068702, |
|
"grad_norm": 1.2661513090133667, |
|
"learning_rate": 2.6360526315789473e-06, |
|
"loss": 0.1978, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.49872773536895676, |
|
"grad_norm": 1.6431432962417603, |
|
"learning_rate": 2.1088421052631577e-06, |
|
"loss": 0.2335, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5012722646310432, |
|
"grad_norm": 1.3212987184524536, |
|
"learning_rate": 1.5816315789473685e-06, |
|
"loss": 0.0987, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5038167938931297, |
|
"grad_norm": 0.8376440405845642, |
|
"learning_rate": 1.0544210526315788e-06, |
|
"loss": 0.0621, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5063613231552163, |
|
"grad_norm": 0.5887414216995239, |
|
"learning_rate": 5.272105263157894e-07, |
|
"loss": 0.0602, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 0.6141554117202759, |
|
"learning_rate": 0.0, |
|
"loss": 0.0582, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"eval_loss": 0.10675784200429916, |
|
"eval_runtime": 10.1526, |
|
"eval_samples_per_second": 16.35, |
|
"eval_steps_per_second": 4.137, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3676760973312e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|