|
{ |
|
"best_metric": 0.770029604434967, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.12168410805548795, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006084205402774398, |
|
"grad_norm": 3.7732460498809814, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 3.2093, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006084205402774398, |
|
"eval_loss": 1.9844963550567627, |
|
"eval_runtime": 0.0679, |
|
"eval_samples_per_second": 736.566, |
|
"eval_steps_per_second": 44.194, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0012168410805548796, |
|
"grad_norm": 4.146840572357178, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 2.2685, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0018252616208323193, |
|
"grad_norm": 3.805527448654175, |
|
"learning_rate": 6.521739130434783e-06, |
|
"loss": 2.0894, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002433682161109759, |
|
"grad_norm": 3.5490667819976807, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 1.9026, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003042102701387199, |
|
"grad_norm": 3.1814687252044678, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"loss": 1.9351, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0036505232416646385, |
|
"grad_norm": 3.0306947231292725, |
|
"learning_rate": 1.3043478260869566e-05, |
|
"loss": 1.9283, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004258943781942078, |
|
"grad_norm": 3.2333295345306396, |
|
"learning_rate": 1.5217391304347828e-05, |
|
"loss": 1.9199, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004867364322219518, |
|
"grad_norm": 2.786118745803833, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 1.9464, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005475784862496958, |
|
"grad_norm": 3.1137781143188477, |
|
"learning_rate": 1.956521739130435e-05, |
|
"loss": 1.8952, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.006084205402774398, |
|
"grad_norm": 3.2737996578216553, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 1.8745, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006692625943051838, |
|
"grad_norm": 3.218707799911499, |
|
"learning_rate": 2.391304347826087e-05, |
|
"loss": 1.9421, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.007301046483329277, |
|
"grad_norm": 3.402679204940796, |
|
"learning_rate": 2.608695652173913e-05, |
|
"loss": 2.0349, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007909467023606717, |
|
"grad_norm": 2.8429970741271973, |
|
"learning_rate": 2.826086956521739e-05, |
|
"loss": 4.0381, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008517887563884156, |
|
"grad_norm": 4.92161750793457, |
|
"learning_rate": 3.0434782608695656e-05, |
|
"loss": 2.315, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.009126308104161596, |
|
"grad_norm": 4.043981075286865, |
|
"learning_rate": 3.260869565217392e-05, |
|
"loss": 2.028, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009734728644439037, |
|
"grad_norm": 3.5905041694641113, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 2.034, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.010343149184716476, |
|
"grad_norm": 3.121873378753662, |
|
"learning_rate": 3.695652173913043e-05, |
|
"loss": 1.9274, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010951569724993915, |
|
"grad_norm": 3.123685836791992, |
|
"learning_rate": 3.91304347826087e-05, |
|
"loss": 1.9217, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.011559990265271356, |
|
"grad_norm": 2.93074107170105, |
|
"learning_rate": 4.130434782608696e-05, |
|
"loss": 1.6898, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.012168410805548795, |
|
"grad_norm": 3.0180726051330566, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 1.6951, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012776831345826235, |
|
"grad_norm": 2.9282548427581787, |
|
"learning_rate": 4.565217391304348e-05, |
|
"loss": 1.7592, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.013385251886103676, |
|
"grad_norm": 2.5757176876068115, |
|
"learning_rate": 4.782608695652174e-05, |
|
"loss": 1.753, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.013993672426381115, |
|
"grad_norm": 2.7244386672973633, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7478, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.014602092966658554, |
|
"grad_norm": 2.9556708335876465, |
|
"learning_rate": 5.217391304347826e-05, |
|
"loss": 1.7354, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.015210513506935993, |
|
"grad_norm": 2.826549530029297, |
|
"learning_rate": 5.4347826086956524e-05, |
|
"loss": 1.8359, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.015210513506935993, |
|
"eval_loss": 1.7007235288619995, |
|
"eval_runtime": 0.0651, |
|
"eval_samples_per_second": 767.794, |
|
"eval_steps_per_second": 46.068, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.015818934047213434, |
|
"grad_norm": 4.59504508972168, |
|
"learning_rate": 5.652173913043478e-05, |
|
"loss": 2.9356, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.016427354587490874, |
|
"grad_norm": 4.431028842926025, |
|
"learning_rate": 5.869565217391305e-05, |
|
"loss": 1.7887, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.017035775127768313, |
|
"grad_norm": 3.4355099201202393, |
|
"learning_rate": 6.086956521739131e-05, |
|
"loss": 1.6548, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.017644195668045752, |
|
"grad_norm": 2.5302505493164062, |
|
"learning_rate": 6.304347826086957e-05, |
|
"loss": 1.6289, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01825261620832319, |
|
"grad_norm": 1.6945921182632446, |
|
"learning_rate": 6.521739130434783e-05, |
|
"loss": 1.4231, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.018861036748600634, |
|
"grad_norm": 1.6188069581985474, |
|
"learning_rate": 6.73913043478261e-05, |
|
"loss": 1.4109, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.019469457288878073, |
|
"grad_norm": 1.2194888591766357, |
|
"learning_rate": 6.956521739130436e-05, |
|
"loss": 1.4766, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.020077877829155513, |
|
"grad_norm": 1.0816359519958496, |
|
"learning_rate": 7.17391304347826e-05, |
|
"loss": 1.5166, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.020686298369432952, |
|
"grad_norm": 1.0028973817825317, |
|
"learning_rate": 7.391304347826086e-05, |
|
"loss": 1.5929, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02129471890971039, |
|
"grad_norm": 0.9615126848220825, |
|
"learning_rate": 7.608695652173914e-05, |
|
"loss": 1.3961, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02190313944998783, |
|
"grad_norm": 1.0823183059692383, |
|
"learning_rate": 7.82608695652174e-05, |
|
"loss": 1.4874, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.022511559990265273, |
|
"grad_norm": 1.1442559957504272, |
|
"learning_rate": 8.043478260869566e-05, |
|
"loss": 1.5171, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.023119980530542712, |
|
"grad_norm": 3.612239360809326, |
|
"learning_rate": 8.260869565217392e-05, |
|
"loss": 3.3537, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02372840107082015, |
|
"grad_norm": 3.0585246086120605, |
|
"learning_rate": 8.478260869565218e-05, |
|
"loss": 1.571, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02433682161109759, |
|
"grad_norm": 2.00331711769104, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 1.5134, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02494524215137503, |
|
"grad_norm": 1.4210355281829834, |
|
"learning_rate": 8.91304347826087e-05, |
|
"loss": 1.2967, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02555366269165247, |
|
"grad_norm": 1.2288633584976196, |
|
"learning_rate": 9.130434782608696e-05, |
|
"loss": 1.4348, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02616208323192991, |
|
"grad_norm": 1.157564640045166, |
|
"learning_rate": 9.347826086956522e-05, |
|
"loss": 1.3196, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02677050377220735, |
|
"grad_norm": 1.2825106382369995, |
|
"learning_rate": 9.565217391304348e-05, |
|
"loss": 1.349, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02737892431248479, |
|
"grad_norm": 1.2434223890304565, |
|
"learning_rate": 9.782608695652174e-05, |
|
"loss": 1.3314, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02798734485276223, |
|
"grad_norm": 1.429315447807312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3863, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02859576539303967, |
|
"grad_norm": 0.9383629560470581, |
|
"learning_rate": 9.999972027814122e-05, |
|
"loss": 1.3054, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.029204185933317108, |
|
"grad_norm": 0.7935044765472412, |
|
"learning_rate": 9.999888111604245e-05, |
|
"loss": 1.298, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.029812606473594547, |
|
"grad_norm": 0.859842836856842, |
|
"learning_rate": 9.999748252413618e-05, |
|
"loss": 1.3098, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.030421027013871987, |
|
"grad_norm": 1.2138994932174683, |
|
"learning_rate": 9.999552451980984e-05, |
|
"loss": 1.3987, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.030421027013871987, |
|
"eval_loss": 1.2663160562515259, |
|
"eval_runtime": 0.064, |
|
"eval_samples_per_second": 781.103, |
|
"eval_steps_per_second": 46.866, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03102944755414943, |
|
"grad_norm": 3.1896111965179443, |
|
"learning_rate": 9.999300712740551e-05, |
|
"loss": 2.1177, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03163786809442687, |
|
"grad_norm": 1.8031097650527954, |
|
"learning_rate": 9.998993037821958e-05, |
|
"loss": 1.281, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03224628863470431, |
|
"grad_norm": 1.3828022480010986, |
|
"learning_rate": 9.998629431050251e-05, |
|
"loss": 1.286, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03285470917498175, |
|
"grad_norm": 1.2959613800048828, |
|
"learning_rate": 9.998209896945815e-05, |
|
"loss": 1.3098, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.033463129715259186, |
|
"grad_norm": 0.9320961236953735, |
|
"learning_rate": 9.997734440724333e-05, |
|
"loss": 1.2237, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.034071550255536626, |
|
"grad_norm": 0.7311733961105347, |
|
"learning_rate": 9.997203068296719e-05, |
|
"loss": 1.1421, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.034679970795814065, |
|
"grad_norm": 0.7125067710876465, |
|
"learning_rate": 9.996615786269035e-05, |
|
"loss": 1.2081, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.035288391336091504, |
|
"grad_norm": 0.6830151677131653, |
|
"learning_rate": 9.995972601942424e-05, |
|
"loss": 1.2123, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03589681187636894, |
|
"grad_norm": 0.6575762629508972, |
|
"learning_rate": 9.995273523313003e-05, |
|
"loss": 1.1829, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03650523241664638, |
|
"grad_norm": 0.8578693866729736, |
|
"learning_rate": 9.994518559071775e-05, |
|
"loss": 1.1763, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03711365295692383, |
|
"grad_norm": 0.8111445307731628, |
|
"learning_rate": 9.993707718604522e-05, |
|
"loss": 1.1614, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03772207349720127, |
|
"grad_norm": 0.9371657967567444, |
|
"learning_rate": 9.992841011991676e-05, |
|
"loss": 1.2245, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03833049403747871, |
|
"grad_norm": 2.6930816173553467, |
|
"learning_rate": 9.991918450008206e-05, |
|
"loss": 2.285, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03893891457775615, |
|
"grad_norm": 1.746653437614441, |
|
"learning_rate": 9.99094004412348e-05, |
|
"loss": 1.301, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.039547335118033586, |
|
"grad_norm": 1.1019763946533203, |
|
"learning_rate": 9.98990580650112e-05, |
|
"loss": 1.1406, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.040155755658311025, |
|
"grad_norm": 1.0221314430236816, |
|
"learning_rate": 9.988815749998852e-05, |
|
"loss": 1.0963, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.040764176198588464, |
|
"grad_norm": 0.7163240313529968, |
|
"learning_rate": 9.987669888168351e-05, |
|
"loss": 1.0621, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.041372596738865904, |
|
"grad_norm": 1.0129799842834473, |
|
"learning_rate": 9.986468235255065e-05, |
|
"loss": 1.0655, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04198101727914334, |
|
"grad_norm": 1.0162873268127441, |
|
"learning_rate": 9.985210806198042e-05, |
|
"loss": 1.1478, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04258943781942078, |
|
"grad_norm": 0.6798834204673767, |
|
"learning_rate": 9.983897616629744e-05, |
|
"loss": 1.0584, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04319785835969822, |
|
"grad_norm": 0.82222980260849, |
|
"learning_rate": 9.982528682875851e-05, |
|
"loss": 1.0776, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04380627889997566, |
|
"grad_norm": 0.8663562536239624, |
|
"learning_rate": 9.981104021955063e-05, |
|
"loss": 1.073, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0444146994402531, |
|
"grad_norm": 1.122699499130249, |
|
"learning_rate": 9.979623651578881e-05, |
|
"loss": 1.0509, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.045023119980530546, |
|
"grad_norm": 1.1595953702926636, |
|
"learning_rate": 9.978087590151393e-05, |
|
"loss": 1.1094, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.045631540520807985, |
|
"grad_norm": 1.62815523147583, |
|
"learning_rate": 9.976495856769038e-05, |
|
"loss": 1.1211, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.045631540520807985, |
|
"eval_loss": 1.0243722200393677, |
|
"eval_runtime": 0.0649, |
|
"eval_samples_per_second": 770.624, |
|
"eval_steps_per_second": 46.237, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.046239961061085424, |
|
"grad_norm": 2.726969003677368, |
|
"learning_rate": 9.97484847122038e-05, |
|
"loss": 2.205, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.046848381601362864, |
|
"grad_norm": 2.2901804447174072, |
|
"learning_rate": 9.973145453985854e-05, |
|
"loss": 1.1102, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0474568021416403, |
|
"grad_norm": 2.2033584117889404, |
|
"learning_rate": 9.971386826237507e-05, |
|
"loss": 1.0353, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04806522268191774, |
|
"grad_norm": 2.2385048866271973, |
|
"learning_rate": 9.969572609838744e-05, |
|
"loss": 1.0281, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04867364322219518, |
|
"grad_norm": 1.700537919998169, |
|
"learning_rate": 9.967702827344057e-05, |
|
"loss": 1.0513, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04928206376247262, |
|
"grad_norm": 1.1010593175888062, |
|
"learning_rate": 9.965777501998734e-05, |
|
"loss": 1.0073, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04989048430275006, |
|
"grad_norm": 0.7716583609580994, |
|
"learning_rate": 9.963796657738579e-05, |
|
"loss": 1.0205, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0504989048430275, |
|
"grad_norm": 0.7150983810424805, |
|
"learning_rate": 9.961760319189612e-05, |
|
"loss": 0.924, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.05110732538330494, |
|
"grad_norm": 0.9970099925994873, |
|
"learning_rate": 9.959668511667762e-05, |
|
"loss": 1.0786, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05171574592358238, |
|
"grad_norm": 1.10440194606781, |
|
"learning_rate": 9.957521261178554e-05, |
|
"loss": 1.0916, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05232416646385982, |
|
"grad_norm": 1.2334967851638794, |
|
"learning_rate": 9.95531859441678e-05, |
|
"loss": 1.0281, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05293258700413726, |
|
"grad_norm": 1.5450046062469482, |
|
"learning_rate": 9.953060538766178e-05, |
|
"loss": 0.9459, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0535410075444147, |
|
"grad_norm": 2.800281524658203, |
|
"learning_rate": 9.950747122299084e-05, |
|
"loss": 2.3543, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.05414942808469214, |
|
"grad_norm": 1.7520790100097656, |
|
"learning_rate": 9.948378373776079e-05, |
|
"loss": 1.0488, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05475784862496958, |
|
"grad_norm": 1.148888111114502, |
|
"learning_rate": 9.945954322645642e-05, |
|
"loss": 0.8649, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05536626916524702, |
|
"grad_norm": 0.883175253868103, |
|
"learning_rate": 9.943474999043775e-05, |
|
"loss": 0.9496, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05597468970552446, |
|
"grad_norm": 1.041022539138794, |
|
"learning_rate": 9.940940433793637e-05, |
|
"loss": 0.9575, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0565831102458019, |
|
"grad_norm": 0.8950981497764587, |
|
"learning_rate": 9.938350658405152e-05, |
|
"loss": 0.8507, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05719153078607934, |
|
"grad_norm": 0.8415868282318115, |
|
"learning_rate": 9.935705705074617e-05, |
|
"loss": 0.9419, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05779995132635678, |
|
"grad_norm": 0.8708381652832031, |
|
"learning_rate": 9.933005606684317e-05, |
|
"loss": 1.0413, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.058408371866634216, |
|
"grad_norm": 1.0290625095367432, |
|
"learning_rate": 9.930250396802094e-05, |
|
"loss": 0.9954, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.059016792406911656, |
|
"grad_norm": 0.8220187425613403, |
|
"learning_rate": 9.927440109680946e-05, |
|
"loss": 0.987, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.059625212947189095, |
|
"grad_norm": 0.8632601499557495, |
|
"learning_rate": 9.924574780258596e-05, |
|
"loss": 0.9248, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.060233633487466534, |
|
"grad_norm": 1.202643632888794, |
|
"learning_rate": 9.921654444157054e-05, |
|
"loss": 0.9264, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06084205402774397, |
|
"grad_norm": 1.6064764261245728, |
|
"learning_rate": 9.91867913768218e-05, |
|
"loss": 1.032, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06084205402774397, |
|
"eval_loss": 0.901063084602356, |
|
"eval_runtime": 0.0644, |
|
"eval_samples_per_second": 776.142, |
|
"eval_steps_per_second": 46.569, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06145047456802142, |
|
"grad_norm": 2.1135504245758057, |
|
"learning_rate": 9.915648897823232e-05, |
|
"loss": 1.8061, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.06205889510829886, |
|
"grad_norm": 1.5115822553634644, |
|
"learning_rate": 9.912563762252399e-05, |
|
"loss": 0.9178, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.06266731564857629, |
|
"grad_norm": 1.3775817155838013, |
|
"learning_rate": 9.909423769324343e-05, |
|
"loss": 0.8464, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.06327573618885374, |
|
"grad_norm": 1.1930534839630127, |
|
"learning_rate": 9.906228958075717e-05, |
|
"loss": 0.9817, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.06388415672913117, |
|
"grad_norm": 1.158815622329712, |
|
"learning_rate": 9.902979368224675e-05, |
|
"loss": 0.9052, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06449257726940862, |
|
"grad_norm": 0.8110672831535339, |
|
"learning_rate": 9.899675040170388e-05, |
|
"loss": 0.8699, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.06510099780968605, |
|
"grad_norm": 0.6599923372268677, |
|
"learning_rate": 9.896316014992537e-05, |
|
"loss": 0.8528, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0657094183499635, |
|
"grad_norm": 0.7083361744880676, |
|
"learning_rate": 9.892902334450798e-05, |
|
"loss": 0.9154, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06631783889024094, |
|
"grad_norm": 1.0131536722183228, |
|
"learning_rate": 9.889434040984332e-05, |
|
"loss": 0.9124, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06692625943051837, |
|
"grad_norm": 1.1790049076080322, |
|
"learning_rate": 9.885911177711248e-05, |
|
"loss": 0.8958, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06753467997079582, |
|
"grad_norm": 1.2120789289474487, |
|
"learning_rate": 9.882333788428073e-05, |
|
"loss": 0.8547, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06814310051107325, |
|
"grad_norm": 1.194826364517212, |
|
"learning_rate": 9.878701917609207e-05, |
|
"loss": 0.9218, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0687515210513507, |
|
"grad_norm": 2.966965913772583, |
|
"learning_rate": 9.875015610406369e-05, |
|
"loss": 2.5956, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06935994159162813, |
|
"grad_norm": 1.3608638048171997, |
|
"learning_rate": 9.871274912648033e-05, |
|
"loss": 0.9144, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06996836213190558, |
|
"grad_norm": 1.4627468585968018, |
|
"learning_rate": 9.867479870838862e-05, |
|
"loss": 0.9815, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.07057678267218301, |
|
"grad_norm": 1.1806144714355469, |
|
"learning_rate": 9.863630532159132e-05, |
|
"loss": 0.7937, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.07118520321246045, |
|
"grad_norm": 1.0851095914840698, |
|
"learning_rate": 9.859726944464137e-05, |
|
"loss": 0.8003, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.07179362375273789, |
|
"grad_norm": 1.126287579536438, |
|
"learning_rate": 9.855769156283603e-05, |
|
"loss": 0.9241, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.07240204429301533, |
|
"grad_norm": 0.6791806817054749, |
|
"learning_rate": 9.851757216821079e-05, |
|
"loss": 0.9201, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.07301046483329277, |
|
"grad_norm": 0.7268375158309937, |
|
"learning_rate": 9.847691175953328e-05, |
|
"loss": 0.8126, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07361888537357021, |
|
"grad_norm": 0.8680149912834167, |
|
"learning_rate": 9.843571084229707e-05, |
|
"loss": 0.8912, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.07422730591384766, |
|
"grad_norm": 1.042919397354126, |
|
"learning_rate": 9.839396992871535e-05, |
|
"loss": 0.9064, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.07483572645412509, |
|
"grad_norm": 1.2081691026687622, |
|
"learning_rate": 9.835168953771461e-05, |
|
"loss": 0.9376, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07544414699440254, |
|
"grad_norm": 1.1437644958496094, |
|
"learning_rate": 9.830887019492818e-05, |
|
"loss": 0.9418, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07605256753467997, |
|
"grad_norm": 1.2367491722106934, |
|
"learning_rate": 9.826551243268966e-05, |
|
"loss": 1.015, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07605256753467997, |
|
"eval_loss": 0.8607701063156128, |
|
"eval_runtime": 0.0636, |
|
"eval_samples_per_second": 785.848, |
|
"eval_steps_per_second": 47.151, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07666098807495741, |
|
"grad_norm": 2.4859538078308105, |
|
"learning_rate": 9.82216167900263e-05, |
|
"loss": 1.8291, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07726940861523485, |
|
"grad_norm": 1.4214491844177246, |
|
"learning_rate": 9.81771838126524e-05, |
|
"loss": 1.0467, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0778778291555123, |
|
"grad_norm": 1.3560972213745117, |
|
"learning_rate": 9.813221405296237e-05, |
|
"loss": 1.0159, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.07848624969578973, |
|
"grad_norm": 1.501007080078125, |
|
"learning_rate": 9.8086708070024e-05, |
|
"loss": 0.8479, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07909467023606717, |
|
"grad_norm": 1.5460386276245117, |
|
"learning_rate": 9.804066642957143e-05, |
|
"loss": 0.9617, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0797030907763446, |
|
"grad_norm": 1.2963954210281372, |
|
"learning_rate": 9.799408970399813e-05, |
|
"loss": 0.8346, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.08031151131662205, |
|
"grad_norm": 0.8653491139411926, |
|
"learning_rate": 9.79469784723498e-05, |
|
"loss": 0.8009, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.08091993185689948, |
|
"grad_norm": 1.0787755250930786, |
|
"learning_rate": 9.789933332031717e-05, |
|
"loss": 0.9501, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.08152835239717693, |
|
"grad_norm": 1.0019638538360596, |
|
"learning_rate": 9.78511548402287e-05, |
|
"loss": 0.841, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.08213677293745437, |
|
"grad_norm": 1.1226329803466797, |
|
"learning_rate": 9.780244363104323e-05, |
|
"loss": 0.8492, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.08274519347773181, |
|
"grad_norm": 1.269909381866455, |
|
"learning_rate": 9.775320029834254e-05, |
|
"loss": 0.854, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.08335361401800925, |
|
"grad_norm": 1.76808762550354, |
|
"learning_rate": 9.770342545432383e-05, |
|
"loss": 0.8746, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.08396203455828669, |
|
"grad_norm": 3.7219080924987793, |
|
"learning_rate": 9.765311971779204e-05, |
|
"loss": 2.2368, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.08457045509856413, |
|
"grad_norm": 1.7717487812042236, |
|
"learning_rate": 9.760228371415227e-05, |
|
"loss": 1.0042, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.08517887563884156, |
|
"grad_norm": 1.4473094940185547, |
|
"learning_rate": 9.75509180754019e-05, |
|
"loss": 0.8678, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08578729617911901, |
|
"grad_norm": 1.3099236488342285, |
|
"learning_rate": 9.749902344012279e-05, |
|
"loss": 0.7265, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08639571671939644, |
|
"grad_norm": 1.1434625387191772, |
|
"learning_rate": 9.744660045347336e-05, |
|
"loss": 0.7999, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08700413725967389, |
|
"grad_norm": 1.1469634771347046, |
|
"learning_rate": 9.73936497671805e-05, |
|
"loss": 0.8769, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08761255779995132, |
|
"grad_norm": 1.1170365810394287, |
|
"learning_rate": 9.734017203953149e-05, |
|
"loss": 0.8353, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08822097834022877, |
|
"grad_norm": 0.7087867259979248, |
|
"learning_rate": 9.728616793536588e-05, |
|
"loss": 0.8558, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0888293988805062, |
|
"grad_norm": 0.7099660634994507, |
|
"learning_rate": 9.723163812606716e-05, |
|
"loss": 0.7829, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.08943781942078365, |
|
"grad_norm": 1.0997332334518433, |
|
"learning_rate": 9.71765832895544e-05, |
|
"loss": 0.8654, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.09004623996106109, |
|
"grad_norm": 1.0360599756240845, |
|
"learning_rate": 9.712100411027391e-05, |
|
"loss": 0.8817, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.09065466050133852, |
|
"grad_norm": 1.460658311843872, |
|
"learning_rate": 9.70649012791906e-05, |
|
"loss": 0.8495, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.09126308104161597, |
|
"grad_norm": 1.6194958686828613, |
|
"learning_rate": 9.700827549377955e-05, |
|
"loss": 0.929, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09126308104161597, |
|
"eval_loss": 0.8000746369361877, |
|
"eval_runtime": 0.0648, |
|
"eval_samples_per_second": 771.193, |
|
"eval_steps_per_second": 46.272, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0918715015818934, |
|
"grad_norm": 2.0881123542785645, |
|
"learning_rate": 9.695112745801716e-05, |
|
"loss": 1.903, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.09247992212217085, |
|
"grad_norm": 1.1570873260498047, |
|
"learning_rate": 9.689345788237256e-05, |
|
"loss": 0.9427, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.09308834266244828, |
|
"grad_norm": 0.8610666990280151, |
|
"learning_rate": 9.683526748379864e-05, |
|
"loss": 0.8573, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.09369676320272573, |
|
"grad_norm": 0.8190277218818665, |
|
"learning_rate": 9.677655698572326e-05, |
|
"loss": 0.6991, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.09430518374300316, |
|
"grad_norm": 0.8921626210212708, |
|
"learning_rate": 9.671732711804022e-05, |
|
"loss": 0.7823, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0949136042832806, |
|
"grad_norm": 0.6367238759994507, |
|
"learning_rate": 9.665757861710008e-05, |
|
"loss": 0.8343, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.09552202482355804, |
|
"grad_norm": 0.533492922782898, |
|
"learning_rate": 9.659731222570114e-05, |
|
"loss": 0.8161, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.09613044536383548, |
|
"grad_norm": 0.7728853821754456, |
|
"learning_rate": 9.653652869308019e-05, |
|
"loss": 0.925, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09673886590411292, |
|
"grad_norm": 0.7567634582519531, |
|
"learning_rate": 9.647522877490312e-05, |
|
"loss": 0.8247, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09734728644439036, |
|
"grad_norm": 0.6781755685806274, |
|
"learning_rate": 9.641341323325554e-05, |
|
"loss": 0.8119, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09795570698466781, |
|
"grad_norm": 0.9459850788116455, |
|
"learning_rate": 9.635108283663343e-05, |
|
"loss": 0.7861, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.09856412752494524, |
|
"grad_norm": 0.8214953541755676, |
|
"learning_rate": 9.628823835993338e-05, |
|
"loss": 0.8647, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09917254806522269, |
|
"grad_norm": 2.5658833980560303, |
|
"learning_rate": 9.622488058444314e-05, |
|
"loss": 2.3544, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09978096860550012, |
|
"grad_norm": 1.3964160680770874, |
|
"learning_rate": 9.61610102978318e-05, |
|
"loss": 0.8933, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.10038938914577757, |
|
"grad_norm": 1.2962576150894165, |
|
"learning_rate": 9.609662829414005e-05, |
|
"loss": 0.7469, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.100997809686055, |
|
"grad_norm": 1.0267356634140015, |
|
"learning_rate": 9.603173537377026e-05, |
|
"loss": 0.8354, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.10160623022633244, |
|
"grad_norm": 0.7708994150161743, |
|
"learning_rate": 9.59663323434766e-05, |
|
"loss": 0.9245, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.10221465076660988, |
|
"grad_norm": 0.5819109678268433, |
|
"learning_rate": 9.590042001635495e-05, |
|
"loss": 0.8, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.10282307130688732, |
|
"grad_norm": 0.5333255529403687, |
|
"learning_rate": 9.583399921183276e-05, |
|
"loss": 0.798, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.10343149184716476, |
|
"grad_norm": 0.9702023267745972, |
|
"learning_rate": 9.5767070755659e-05, |
|
"loss": 0.9434, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1040399123874422, |
|
"grad_norm": 0.9722750186920166, |
|
"learning_rate": 9.569963547989376e-05, |
|
"loss": 0.8818, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.10464833292771963, |
|
"grad_norm": 1.0027716159820557, |
|
"learning_rate": 9.563169422289797e-05, |
|
"loss": 0.7531, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.10525675346799708, |
|
"grad_norm": 0.7870794534683228, |
|
"learning_rate": 9.556324782932292e-05, |
|
"loss": 0.8253, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.10586517400827453, |
|
"grad_norm": 0.7732038497924805, |
|
"learning_rate": 9.549429715009983e-05, |
|
"loss": 0.8425, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.10647359454855196, |
|
"grad_norm": 1.3445314168930054, |
|
"learning_rate": 9.542484304242927e-05, |
|
"loss": 0.8793, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10647359454855196, |
|
"eval_loss": 0.7928541302680969, |
|
"eval_runtime": 0.0638, |
|
"eval_samples_per_second": 783.396, |
|
"eval_steps_per_second": 47.004, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1070820150888294, |
|
"grad_norm": 3.020885705947876, |
|
"learning_rate": 9.535488636977042e-05, |
|
"loss": 1.6845, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.10769043562910684, |
|
"grad_norm": 2.2240676879882812, |
|
"learning_rate": 9.528442800183043e-05, |
|
"loss": 0.8827, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.10829885616938428, |
|
"grad_norm": 1.7883273363113403, |
|
"learning_rate": 9.521346881455356e-05, |
|
"loss": 0.7705, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10890727670966172, |
|
"grad_norm": 1.2515283823013306, |
|
"learning_rate": 9.514200969011025e-05, |
|
"loss": 0.7441, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.10951569724993916, |
|
"grad_norm": 0.8652520179748535, |
|
"learning_rate": 9.507005151688627e-05, |
|
"loss": 0.725, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1101241177902166, |
|
"grad_norm": 0.6205310225486755, |
|
"learning_rate": 9.499759518947156e-05, |
|
"loss": 0.7823, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.11073253833049404, |
|
"grad_norm": 0.6425133943557739, |
|
"learning_rate": 9.492464160864917e-05, |
|
"loss": 0.7767, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.11134095887077147, |
|
"grad_norm": 1.1534785032272339, |
|
"learning_rate": 9.485119168138405e-05, |
|
"loss": 0.8115, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.11194937941104892, |
|
"grad_norm": 1.0528301000595093, |
|
"learning_rate": 9.477724632081176e-05, |
|
"loss": 0.7672, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.11255779995132635, |
|
"grad_norm": 1.1875839233398438, |
|
"learning_rate": 9.470280644622713e-05, |
|
"loss": 0.8329, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1131662204916038, |
|
"grad_norm": 1.0731632709503174, |
|
"learning_rate": 9.462787298307285e-05, |
|
"loss": 0.8891, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.11377464103188123, |
|
"grad_norm": 1.1598612070083618, |
|
"learning_rate": 9.455244686292789e-05, |
|
"loss": 0.7696, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.11438306157215868, |
|
"grad_norm": 3.4111616611480713, |
|
"learning_rate": 9.447652902349602e-05, |
|
"loss": 1.4939, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.11499148211243612, |
|
"grad_norm": 1.8949023485183716, |
|
"learning_rate": 9.44001204085941e-05, |
|
"loss": 0.8453, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.11559990265271355, |
|
"grad_norm": 1.656935214996338, |
|
"learning_rate": 9.432322196814033e-05, |
|
"loss": 0.7722, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.116208323192991, |
|
"grad_norm": 1.8966083526611328, |
|
"learning_rate": 9.424583465814249e-05, |
|
"loss": 0.7714, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.11681674373326843, |
|
"grad_norm": 1.4729326963424683, |
|
"learning_rate": 9.4167959440686e-05, |
|
"loss": 0.8103, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.11742516427354588, |
|
"grad_norm": 1.2855483293533325, |
|
"learning_rate": 9.408959728392199e-05, |
|
"loss": 0.751, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.11803358481382331, |
|
"grad_norm": 0.6022900938987732, |
|
"learning_rate": 9.401074916205528e-05, |
|
"loss": 0.7532, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11864200535410076, |
|
"grad_norm": 0.6037607789039612, |
|
"learning_rate": 9.393141605533224e-05, |
|
"loss": 0.683, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11925042589437819, |
|
"grad_norm": 1.0040525197982788, |
|
"learning_rate": 9.385159895002859e-05, |
|
"loss": 0.7185, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11985884643465564, |
|
"grad_norm": 1.1619277000427246, |
|
"learning_rate": 9.37712988384372e-05, |
|
"loss": 0.934, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.12046726697493307, |
|
"grad_norm": 1.2666449546813965, |
|
"learning_rate": 9.369051671885566e-05, |
|
"loss": 0.8437, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.12107568751521051, |
|
"grad_norm": 1.3021451234817505, |
|
"learning_rate": 9.360925359557397e-05, |
|
"loss": 0.7939, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.12168410805548795, |
|
"grad_norm": 1.3136470317840576, |
|
"learning_rate": 9.3527510478862e-05, |
|
"loss": 0.8785, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12168410805548795, |
|
"eval_loss": 0.770029604434967, |
|
"eval_runtime": 0.0645, |
|
"eval_samples_per_second": 775.029, |
|
"eval_steps_per_second": 46.502, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3040284166258688e+16, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|