|
{ |
|
"best_metric": 2.7491917610168457, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.06791171477079797, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00033955857385398983, |
|
"grad_norm": 21.100221633911133, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 5.7028, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00033955857385398983, |
|
"eval_loss": 3.8351783752441406, |
|
"eval_runtime": 134.0344, |
|
"eval_samples_per_second": 9.251, |
|
"eval_steps_per_second": 2.313, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006791171477079797, |
|
"grad_norm": 32.934295654296875, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 6.2743, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0010186757215619694, |
|
"grad_norm": 11.293846130371094, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 5.5001, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0013582342954159593, |
|
"grad_norm": 12.974593162536621, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 5.5615, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.001697792869269949, |
|
"grad_norm": 11.986504554748535, |
|
"learning_rate": 5.05e-05, |
|
"loss": 6.0771, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0020373514431239388, |
|
"grad_norm": 11.836446762084961, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 5.833, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0023769100169779285, |
|
"grad_norm": 12.108397483825684, |
|
"learning_rate": 7.07e-05, |
|
"loss": 6.3409, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0027164685908319186, |
|
"grad_norm": 13.400388717651367, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 6.262, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0030560271646859084, |
|
"grad_norm": 12.49435806274414, |
|
"learning_rate": 9.09e-05, |
|
"loss": 5.619, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003395585738539898, |
|
"grad_norm": 12.52596378326416, |
|
"learning_rate": 0.000101, |
|
"loss": 5.1372, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003735144312393888, |
|
"grad_norm": 12.6868314743042, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 5.2604, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0040747028862478775, |
|
"grad_norm": 14.099748611450195, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 5.58, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004414261460101867, |
|
"grad_norm": 12.148128509521484, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 5.5032, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004753820033955857, |
|
"grad_norm": 14.858428001403809, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 6.4015, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0050933786078098476, |
|
"grad_norm": 14.074804306030273, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 5.938, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005432937181663837, |
|
"grad_norm": 13.298508644104004, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 5.9138, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005772495755517827, |
|
"grad_norm": 13.888445854187012, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 5.5768, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.006112054329371817, |
|
"grad_norm": 12.229842185974121, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 5.4015, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0064516129032258064, |
|
"grad_norm": 17.1883487701416, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 6.27, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.006791171477079796, |
|
"grad_norm": 13.889158248901367, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 5.3761, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007130730050933786, |
|
"grad_norm": 18.03790283203125, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 5.8293, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.007470288624787776, |
|
"grad_norm": 15.712692260742188, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 6.1728, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.007809847198641765, |
|
"grad_norm": 15.400139808654785, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 6.7802, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.008149405772495755, |
|
"grad_norm": 14.763883590698242, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 5.7079, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.008488964346349746, |
|
"grad_norm": 12.111189842224121, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 5.5747, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008828522920203734, |
|
"grad_norm": 14.711851119995117, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 6.0876, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.009168081494057725, |
|
"grad_norm": 15.358607292175293, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 5.6319, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.009507640067911714, |
|
"grad_norm": 13.506231307983398, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 5.8136, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.009847198641765705, |
|
"grad_norm": 14.769192695617676, |
|
"learning_rate": 9.09e-05, |
|
"loss": 5.8328, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.010186757215619695, |
|
"grad_norm": 14.589241027832031, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 5.5436, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010526315789473684, |
|
"grad_norm": 16.822694778442383, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 6.7274, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.010865874363327675, |
|
"grad_norm": 17.69041633605957, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 6.3115, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.011205432937181663, |
|
"grad_norm": 16.131786346435547, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 5.8964, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.011544991511035654, |
|
"grad_norm": 17.694215774536133, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 6.6347, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.011884550084889643, |
|
"grad_norm": 16.214025497436523, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 5.3229, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.012224108658743633, |
|
"grad_norm": 17.1286678314209, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 6.4127, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.012563667232597622, |
|
"grad_norm": 17.29891014099121, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 5.6328, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.012903225806451613, |
|
"grad_norm": 22.239364624023438, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 6.787, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.013242784380305603, |
|
"grad_norm": 17.34881591796875, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 5.9971, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.013582342954159592, |
|
"grad_norm": 31.443096160888672, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 7.1717, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.013921901528013583, |
|
"grad_norm": 20.705217361450195, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 6.672, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.014261460101867572, |
|
"grad_norm": 22.882652282714844, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 6.7516, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.014601018675721562, |
|
"grad_norm": 25.97607421875, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 7.0149, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.014940577249575551, |
|
"grad_norm": 29.19485855102539, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 7.1149, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.015280135823429542, |
|
"grad_norm": 26.053762435913086, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 6.5975, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01561969439728353, |
|
"grad_norm": 28.610328674316406, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 6.2714, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01595925297113752, |
|
"grad_norm": 35.02290344238281, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 7.4225, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01629881154499151, |
|
"grad_norm": 51.69056701660156, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 6.8981, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.016638370118845502, |
|
"grad_norm": 50.08887481689453, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 7.0015, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01697792869269949, |
|
"grad_norm": 40.95683288574219, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 6.7858, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01697792869269949, |
|
"eval_loss": 3.2727584838867188, |
|
"eval_runtime": 136.3007, |
|
"eval_samples_per_second": 9.098, |
|
"eval_steps_per_second": 2.274, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01731748726655348, |
|
"grad_norm": 8.816231727600098, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 6.4331, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.01765704584040747, |
|
"grad_norm": 7.692835807800293, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 6.1655, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01799660441426146, |
|
"grad_norm": 6.719763278961182, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 5.9821, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.01833616298811545, |
|
"grad_norm": 6.741839408874512, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 5.5779, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01867572156196944, |
|
"grad_norm": 7.938393592834473, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 6.1207, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.019015280135823428, |
|
"grad_norm": 7.241247177124023, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 5.5408, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.01935483870967742, |
|
"grad_norm": 7.55157470703125, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 5.5802, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01969439728353141, |
|
"grad_norm": 7.256725311279297, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 5.7427, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.020033955857385398, |
|
"grad_norm": 8.38663101196289, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 5.8768, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02037351443123939, |
|
"grad_norm": 7.507662296295166, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 5.8298, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02071307300509338, |
|
"grad_norm": 8.123747825622559, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 5.6901, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.021052631578947368, |
|
"grad_norm": 7.707481384277344, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 5.4775, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.021392190152801357, |
|
"grad_norm": 8.684199333190918, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 4.7097, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.02173174872665535, |
|
"grad_norm": 9.479657173156738, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 6.1553, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.022071307300509338, |
|
"grad_norm": 7.97694206237793, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 5.1793, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.022410865874363327, |
|
"grad_norm": 8.905004501342773, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 5.1063, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.02275042444821732, |
|
"grad_norm": 9.290450096130371, |
|
"learning_rate": 7.07e-05, |
|
"loss": 5.6181, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.023089983022071308, |
|
"grad_norm": 9.563346862792969, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 5.3551, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.023429541595925297, |
|
"grad_norm": 9.26034927368164, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 5.5433, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.023769100169779286, |
|
"grad_norm": 9.813597679138184, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 5.3692, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.024108658743633278, |
|
"grad_norm": 9.946206092834473, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 5.4733, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.024448217317487267, |
|
"grad_norm": 11.333394050598145, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 6.3697, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.024787775891341256, |
|
"grad_norm": 10.807990074157715, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 5.7684, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.025127334465195245, |
|
"grad_norm": 10.34673023223877, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 5.5579, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.025466893039049237, |
|
"grad_norm": 10.379812240600586, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 6.0875, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.025806451612903226, |
|
"grad_norm": 10.365467071533203, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 5.4606, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.026146010186757215, |
|
"grad_norm": 12.18764877319336, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 5.7223, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.026485568760611207, |
|
"grad_norm": 10.989704132080078, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 5.3804, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.026825127334465196, |
|
"grad_norm": 11.694632530212402, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 5.822, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.027164685908319185, |
|
"grad_norm": 12.42897891998291, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 5.6253, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.027504244482173174, |
|
"grad_norm": 12.49673080444336, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 5.885, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.027843803056027166, |
|
"grad_norm": 12.554586410522461, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 5.4057, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.028183361629881155, |
|
"grad_norm": 12.602128028869629, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 5.8701, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.028522920203735144, |
|
"grad_norm": 14.500311851501465, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 5.7579, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.028862478777589132, |
|
"grad_norm": 12.415670394897461, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 5.589, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.029202037351443125, |
|
"grad_norm": 12.579917907714844, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 5.7132, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.029541595925297114, |
|
"grad_norm": 14.4943208694458, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 6.1524, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.029881154499151102, |
|
"grad_norm": 13.979001998901367, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 5.4524, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.030220713073005095, |
|
"grad_norm": 12.837852478027344, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 5.0063, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.030560271646859084, |
|
"grad_norm": 15.69062614440918, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 5.7173, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.030899830220713072, |
|
"grad_norm": 18.907155990600586, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 5.5478, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03123938879456706, |
|
"grad_norm": 13.907947540283203, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 5.1368, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.031578947368421054, |
|
"grad_norm": 21.56955337524414, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 5.4158, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03191850594227504, |
|
"grad_norm": 19.460166931152344, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 6.2592, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 22.000574111938477, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 5.9925, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03259762308998302, |
|
"grad_norm": 21.29176139831543, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 6.2012, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.03293718166383701, |
|
"grad_norm": 24.593799591064453, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 6.3382, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.033276740237691005, |
|
"grad_norm": 25.268535614013672, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 6.17, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.033616298811544994, |
|
"grad_norm": 37.28253173828125, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 7.309, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.03395585738539898, |
|
"grad_norm": 61.91118240356445, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 9.4064, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03395585738539898, |
|
"eval_loss": 3.108811855316162, |
|
"eval_runtime": 133.8842, |
|
"eval_samples_per_second": 9.262, |
|
"eval_steps_per_second": 2.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03429541595925297, |
|
"grad_norm": 8.512744903564453, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 6.0228, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.03463497453310696, |
|
"grad_norm": 7.949807643890381, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 5.8142, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.03497453310696095, |
|
"grad_norm": 6.752256870269775, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 5.552, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.03531409168081494, |
|
"grad_norm": 6.490177631378174, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 5.8519, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.035653650254668934, |
|
"grad_norm": 5.849376678466797, |
|
"learning_rate": 5.05e-05, |
|
"loss": 5.6065, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03599320882852292, |
|
"grad_norm": 6.028791427612305, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 5.6156, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.03633276740237691, |
|
"grad_norm": 5.619626522064209, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 5.2623, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0366723259762309, |
|
"grad_norm": 5.889388084411621, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 5.3881, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.03701188455008489, |
|
"grad_norm": 6.1949615478515625, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 5.1539, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.03735144312393888, |
|
"grad_norm": 6.2401442527771, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 5.1559, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03769100169779287, |
|
"grad_norm": 6.51352071762085, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 4.9546, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.038030560271646856, |
|
"grad_norm": 7.465339660644531, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 5.389, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.03837011884550085, |
|
"grad_norm": 8.441889762878418, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 5.6321, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.03870967741935484, |
|
"grad_norm": 8.055974006652832, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 5.5059, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.03904923599320883, |
|
"grad_norm": 7.581737041473389, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 5.1159, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03938879456706282, |
|
"grad_norm": 8.991089820861816, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 5.9822, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.03972835314091681, |
|
"grad_norm": 8.726984024047852, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 5.4402, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.040067911714770796, |
|
"grad_norm": 8.529667854309082, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 5.2337, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.040407470288624785, |
|
"grad_norm": 8.97454833984375, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 5.6533, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.04074702886247878, |
|
"grad_norm": 8.54892349243164, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 5.4748, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04108658743633277, |
|
"grad_norm": 8.859085083007812, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 5.733, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.04142614601018676, |
|
"grad_norm": 8.932308197021484, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 5.3355, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.04176570458404075, |
|
"grad_norm": 9.009238243103027, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 5.2937, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.042105263157894736, |
|
"grad_norm": 8.809886932373047, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 5.6176, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.042444821731748725, |
|
"grad_norm": 10.109439849853516, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 5.5681, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.042784380305602714, |
|
"grad_norm": 9.18508243560791, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 5.263, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.04312393887945671, |
|
"grad_norm": 10.614432334899902, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 5.6346, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0434634974533107, |
|
"grad_norm": 11.10940933227539, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 5.6721, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.04380305602716469, |
|
"grad_norm": 9.783493041992188, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 5.2759, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.044142614601018676, |
|
"grad_norm": 10.094010353088379, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 5.0748, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.044482173174872665, |
|
"grad_norm": 11.673230171203613, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 6.0693, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.044821731748726654, |
|
"grad_norm": 13.237796783447266, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 5.8695, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.04516129032258064, |
|
"grad_norm": 11.816963195800781, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 4.9874, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.04550084889643464, |
|
"grad_norm": 11.55286979675293, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 5.6631, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.04584040747028863, |
|
"grad_norm": 14.232548713684082, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 5.5924, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.046179966044142616, |
|
"grad_norm": 14.204998970031738, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 6.4456, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.046519524617996605, |
|
"grad_norm": 16.168073654174805, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 6.0166, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.046859083191850594, |
|
"grad_norm": 13.623854637145996, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 4.8727, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.04719864176570458, |
|
"grad_norm": 14.182967185974121, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 5.2488, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.04753820033955857, |
|
"grad_norm": 15.692301750183105, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 5.7354, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04787775891341256, |
|
"grad_norm": 13.835912704467773, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 4.7608, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.048217317487266556, |
|
"grad_norm": 23.79447364807129, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 5.9428, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.048556876061120545, |
|
"grad_norm": 20.368270874023438, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 5.8896, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.048896434634974534, |
|
"grad_norm": 26.974061965942383, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 6.8198, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.04923599320882852, |
|
"grad_norm": 21.44305419921875, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 6.8624, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04957555178268251, |
|
"grad_norm": 22.52785301208496, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 5.6798, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0499151103565365, |
|
"grad_norm": 27.52121353149414, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 6.4979, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05025466893039049, |
|
"grad_norm": 34.942691802978516, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 7.2382, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.050594227504244485, |
|
"grad_norm": 44.1684684753418, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 7.3026, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.050933786078098474, |
|
"grad_norm": 29.29156494140625, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 5.9047, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.050933786078098474, |
|
"eval_loss": 2.8410627841949463, |
|
"eval_runtime": 134.1501, |
|
"eval_samples_per_second": 9.243, |
|
"eval_steps_per_second": 2.311, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05127334465195246, |
|
"grad_norm": 4.4673285484313965, |
|
"learning_rate": 2.6047368421052634e-05, |
|
"loss": 5.1138, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.05161290322580645, |
|
"grad_norm": 5.355312347412109, |
|
"learning_rate": 2.5515789473684213e-05, |
|
"loss": 5.4617, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.05195246179966044, |
|
"grad_norm": 5.351836681365967, |
|
"learning_rate": 2.4984210526315788e-05, |
|
"loss": 5.3663, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.05229202037351443, |
|
"grad_norm": 6.066408634185791, |
|
"learning_rate": 2.445263157894737e-05, |
|
"loss": 5.3583, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 5.661759376525879, |
|
"learning_rate": 2.3921052631578946e-05, |
|
"loss": 5.3968, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.052971137521222414, |
|
"grad_norm": 5.76517391204834, |
|
"learning_rate": 2.3389473684210528e-05, |
|
"loss": 5.0868, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.0533106960950764, |
|
"grad_norm": 6.425754070281982, |
|
"learning_rate": 2.2857894736842106e-05, |
|
"loss": 5.7214, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.05365025466893039, |
|
"grad_norm": 6.3002753257751465, |
|
"learning_rate": 2.2326315789473685e-05, |
|
"loss": 4.8879, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.05398981324278438, |
|
"grad_norm": 5.956075191497803, |
|
"learning_rate": 2.1794736842105264e-05, |
|
"loss": 5.3174, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.05432937181663837, |
|
"grad_norm": 6.668689727783203, |
|
"learning_rate": 2.1263157894736842e-05, |
|
"loss": 5.1679, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05466893039049236, |
|
"grad_norm": 6.492646217346191, |
|
"learning_rate": 2.073157894736842e-05, |
|
"loss": 5.1203, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.05500848896434635, |
|
"grad_norm": 6.742474555969238, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 5.3391, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.05534804753820034, |
|
"grad_norm": 7.066228866577148, |
|
"learning_rate": 1.966842105263158e-05, |
|
"loss": 5.2756, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.05568760611205433, |
|
"grad_norm": 7.766740798950195, |
|
"learning_rate": 1.913684210526316e-05, |
|
"loss": 5.0874, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.05602716468590832, |
|
"grad_norm": 7.978595733642578, |
|
"learning_rate": 1.8605263157894736e-05, |
|
"loss": 5.2179, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.05636672325976231, |
|
"grad_norm": 7.731940269470215, |
|
"learning_rate": 1.8073684210526318e-05, |
|
"loss": 5.0202, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0567062818336163, |
|
"grad_norm": 7.676203727722168, |
|
"learning_rate": 1.7542105263157897e-05, |
|
"loss": 5.3613, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.05704584040747029, |
|
"grad_norm": 8.548066139221191, |
|
"learning_rate": 1.7010526315789475e-05, |
|
"loss": 5.5421, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.057385398981324276, |
|
"grad_norm": 7.955386638641357, |
|
"learning_rate": 1.6478947368421054e-05, |
|
"loss": 5.2485, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.057724957555178265, |
|
"grad_norm": 8.47363567352295, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 5.5402, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05806451612903226, |
|
"grad_norm": 8.166704177856445, |
|
"learning_rate": 1.541578947368421e-05, |
|
"loss": 4.6554, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.05840407470288625, |
|
"grad_norm": 9.387300491333008, |
|
"learning_rate": 1.4884210526315788e-05, |
|
"loss": 5.4705, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.05874363327674024, |
|
"grad_norm": 11.64377212524414, |
|
"learning_rate": 1.4352631578947369e-05, |
|
"loss": 5.0644, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.05908319185059423, |
|
"grad_norm": 9.732513427734375, |
|
"learning_rate": 1.3821052631578949e-05, |
|
"loss": 5.5656, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.059422750424448216, |
|
"grad_norm": 9.858539581298828, |
|
"learning_rate": 1.3289473684210526e-05, |
|
"loss": 5.3905, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.059762308998302205, |
|
"grad_norm": 11.403061866760254, |
|
"learning_rate": 1.2757894736842106e-05, |
|
"loss": 5.5977, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.060101867572156194, |
|
"grad_norm": 9.382144927978516, |
|
"learning_rate": 1.2226315789473685e-05, |
|
"loss": 5.2198, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.06044142614601019, |
|
"grad_norm": 12.708952903747559, |
|
"learning_rate": 1.1694736842105264e-05, |
|
"loss": 5.279, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.06078098471986418, |
|
"grad_norm": 11.602399826049805, |
|
"learning_rate": 1.1163157894736842e-05, |
|
"loss": 5.6348, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.06112054329371817, |
|
"grad_norm": 11.256779670715332, |
|
"learning_rate": 1.0631578947368421e-05, |
|
"loss": 5.3102, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.061460101867572156, |
|
"grad_norm": 13.216877937316895, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 5.7058, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.061799660441426145, |
|
"grad_norm": 11.540813446044922, |
|
"learning_rate": 9.56842105263158e-06, |
|
"loss": 5.7871, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.062139219015280134, |
|
"grad_norm": 11.003501892089844, |
|
"learning_rate": 9.036842105263159e-06, |
|
"loss": 4.9942, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.06247877758913412, |
|
"grad_norm": 12.439997673034668, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 5.6214, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.06281833616298811, |
|
"grad_norm": 13.413476943969727, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 5.2558, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.06315789473684211, |
|
"grad_norm": 12.21358585357666, |
|
"learning_rate": 7.442105263157894e-06, |
|
"loss": 5.3534, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.06349745331069609, |
|
"grad_norm": 17.38026237487793, |
|
"learning_rate": 6.9105263157894745e-06, |
|
"loss": 5.9743, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.06383701188455009, |
|
"grad_norm": 14.174423217773438, |
|
"learning_rate": 6.378947368421053e-06, |
|
"loss": 6.3298, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.06417657045840408, |
|
"grad_norm": 15.029065132141113, |
|
"learning_rate": 5.847368421052632e-06, |
|
"loss": 6.079, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 18.191574096679688, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 6.2973, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06485568760611206, |
|
"grad_norm": 20.552350997924805, |
|
"learning_rate": 4.78421052631579e-06, |
|
"loss": 5.2437, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.06519524617996604, |
|
"grad_norm": 15.131223678588867, |
|
"learning_rate": 4.252631578947369e-06, |
|
"loss": 4.9799, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.06553480475382004, |
|
"grad_norm": 19.161144256591797, |
|
"learning_rate": 3.721052631578947e-06, |
|
"loss": 5.4162, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.06587436332767402, |
|
"grad_norm": 17.99496841430664, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 6.2559, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.06621392190152801, |
|
"grad_norm": 21.827606201171875, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 6.9087, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.06655348047538201, |
|
"grad_norm": 22.512189865112305, |
|
"learning_rate": 2.1263157894736844e-06, |
|
"loss": 5.9861, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.06689303904923599, |
|
"grad_norm": 20.787433624267578, |
|
"learning_rate": 1.5947368421052633e-06, |
|
"loss": 6.6073, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.06723259762308999, |
|
"grad_norm": 34.3043098449707, |
|
"learning_rate": 1.0631578947368422e-06, |
|
"loss": 6.9233, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.06757215619694397, |
|
"grad_norm": 39.098968505859375, |
|
"learning_rate": 5.315789473684211e-07, |
|
"loss": 7.7742, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.06791171477079797, |
|
"grad_norm": 41.64736557006836, |
|
"learning_rate": 0.0, |
|
"loss": 7.8041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06791171477079797, |
|
"eval_loss": 2.7491917610168457, |
|
"eval_runtime": 134.4966, |
|
"eval_samples_per_second": 9.22, |
|
"eval_steps_per_second": 2.305, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.229663245605274e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|