|
{ |
|
"best_metric": 1.6180227994918823, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.6269592476489029, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006269592476489028, |
|
"grad_norm": 0.1956695169210434, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 1.4797, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006269592476489028, |
|
"eval_loss": 2.014873743057251, |
|
"eval_runtime": 21.7647, |
|
"eval_samples_per_second": 98.738, |
|
"eval_steps_per_second": 3.124, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012539184952978056, |
|
"grad_norm": 0.21951091289520264, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 1.8897, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.018808777429467086, |
|
"grad_norm": 0.2182653844356537, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 1.8755, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.025078369905956112, |
|
"grad_norm": 0.3972433805465698, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 2.0184, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03134796238244514, |
|
"grad_norm": 0.7481153607368469, |
|
"learning_rate": 5.05e-05, |
|
"loss": 2.1242, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03761755485893417, |
|
"grad_norm": 0.6163584589958191, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 2.254, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0438871473354232, |
|
"grad_norm": 0.17153511941432953, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.4166, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.050156739811912224, |
|
"grad_norm": 0.16584382951259613, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.7842, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05642633228840126, |
|
"grad_norm": 0.18644414842128754, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.8403, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06269592476489028, |
|
"grad_norm": 0.21256040036678314, |
|
"learning_rate": 0.000101, |
|
"loss": 1.9229, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"grad_norm": 0.452114462852478, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 1.9909, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07523510971786834, |
|
"grad_norm": 0.3007873296737671, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 1.995, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08150470219435736, |
|
"grad_norm": 0.22591519355773926, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 1.3466, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0877742946708464, |
|
"grad_norm": 0.22962772846221924, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 1.5942, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09404388714733543, |
|
"grad_norm": 0.20066531002521515, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 1.759, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10031347962382445, |
|
"grad_norm": 0.24357616901397705, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 1.7682, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10658307210031348, |
|
"grad_norm": 0.42487001419067383, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 1.8977, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11285266457680251, |
|
"grad_norm": 0.5519868731498718, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 1.8652, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11912225705329153, |
|
"grad_norm": 0.1837894469499588, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 1.2621, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 0.16041411459445953, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 1.4973, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13166144200626959, |
|
"grad_norm": 0.1466640830039978, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 1.6563, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 0.15411381423473358, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 1.8432, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14420062695924765, |
|
"grad_norm": 0.20786531269550323, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 1.866, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.15047021943573669, |
|
"grad_norm": 0.27923616766929626, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 1.8369, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15673981191222572, |
|
"grad_norm": 0.45824792981147766, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 1.9946, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16300940438871472, |
|
"grad_norm": 0.11721207201480865, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 1.3217, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.16927899686520376, |
|
"grad_norm": 0.15526717901229858, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 1.7139, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1755485893416928, |
|
"grad_norm": 0.15869063138961792, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 1.7312, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 0.1636732816696167, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.7714, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.18808777429467086, |
|
"grad_norm": 0.20718851685523987, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 1.828, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19435736677115986, |
|
"grad_norm": 0.30376043915748596, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 1.8697, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2006269592476489, |
|
"grad_norm": 0.11045503616333008, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 1.3708, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"grad_norm": 0.14193548262119293, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 1.6054, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.21316614420062696, |
|
"grad_norm": 0.15867547690868378, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 1.6872, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.219435736677116, |
|
"grad_norm": 0.16795365512371063, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 1.8028, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.22570532915360503, |
|
"grad_norm": 0.19065167009830475, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 1.8083, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23197492163009403, |
|
"grad_norm": 0.23978182673454285, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 1.8856, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.23824451410658307, |
|
"grad_norm": 0.13266095519065857, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 1.239, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2445141065830721, |
|
"grad_norm": 0.1396464705467224, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 1.528, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 0.15594753623008728, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 1.6795, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25705329153605017, |
|
"grad_norm": 0.16817249357700348, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 1.7164, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.26332288401253917, |
|
"grad_norm": 0.18988773226737976, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 1.7163, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.26959247648902823, |
|
"grad_norm": 0.20697522163391113, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 1.7748, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.14124108850955963, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 1.3559, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28213166144200624, |
|
"grad_norm": 0.10386940836906433, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 1.4066, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2884012539184953, |
|
"grad_norm": 0.15532410144805908, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 1.6759, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2946708463949843, |
|
"grad_norm": 0.16163372993469238, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 1.6765, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.30094043887147337, |
|
"grad_norm": 0.18687112629413605, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.7736, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3072100313479624, |
|
"grad_norm": 0.19891808927059174, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 1.7138, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"grad_norm": 0.546370267868042, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 1.8809, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"eval_loss": 1.6940747499465942, |
|
"eval_runtime": 22.5071, |
|
"eval_samples_per_second": 95.481, |
|
"eval_steps_per_second": 3.021, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31974921630094044, |
|
"grad_norm": 0.11734400689601898, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 1.4185, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.32601880877742945, |
|
"grad_norm": 0.24825617671012878, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 1.6477, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3322884012539185, |
|
"grad_norm": 0.2786071002483368, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 1.6508, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3385579937304075, |
|
"grad_norm": 0.22523944079875946, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 1.7142, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.20191609859466553, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 1.7316, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3510971786833856, |
|
"grad_norm": 0.2791290283203125, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 1.754, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3573667711598746, |
|
"grad_norm": 0.10366496443748474, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 1.2027, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 0.15324437618255615, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 1.6548, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.36990595611285265, |
|
"grad_norm": 0.1608223021030426, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 1.6525, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 0.20208290219306946, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 1.6865, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3824451410658307, |
|
"grad_norm": 0.23002368211746216, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 1.7319, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3887147335423197, |
|
"grad_norm": 0.25719794631004333, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 1.7345, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3949843260188088, |
|
"grad_norm": 0.11874490976333618, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 1.1791, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4012539184952978, |
|
"grad_norm": 0.13194750249385834, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 1.5006, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.40752351097178685, |
|
"grad_norm": 0.16368307173252106, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 1.653, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.18413026630878448, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 1.6872, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4200626959247649, |
|
"grad_norm": 0.23217302560806274, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.7738, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.4263322884012539, |
|
"grad_norm": 0.21129940450191498, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 1.6499, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.43260188087774293, |
|
"grad_norm": 0.14536724984645844, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 1.0403, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.438871473354232, |
|
"grad_norm": 0.12869617342948914, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 1.4696, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.445141065830721, |
|
"grad_norm": 0.1599850058555603, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 1.6434, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.45141065830721006, |
|
"grad_norm": 0.17733348906040192, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 1.6627, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.45768025078369906, |
|
"grad_norm": 0.1968277245759964, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 1.641, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.46394984326018807, |
|
"grad_norm": 0.2661347985267639, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 1.6584, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.4702194357366771, |
|
"grad_norm": 0.574530303478241, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 1.8574, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47648902821316613, |
|
"grad_norm": 0.11632688343524933, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 1.4023, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"grad_norm": 0.2113131582736969, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 1.6341, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4890282131661442, |
|
"grad_norm": 0.2565387189388275, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 1.6345, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4952978056426332, |
|
"grad_norm": 0.2454068958759308, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 1.6885, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 0.25979533791542053, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 1.7208, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5078369905956113, |
|
"grad_norm": 0.30401724576950073, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 1.7026, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5141065830721003, |
|
"grad_norm": 0.11303433030843735, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 1.1272, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5203761755485894, |
|
"grad_norm": 0.21448008716106415, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 1.6056, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5266457680250783, |
|
"grad_norm": 0.18290068209171295, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 1.6274, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5329153605015674, |
|
"grad_norm": 0.21106426417827606, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 1.6398, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5391849529780565, |
|
"grad_norm": 0.24094125628471375, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 1.7705, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 0.2784807085990906, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 1.6553, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.135740265250206, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 1.0888, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5579937304075235, |
|
"grad_norm": 0.13163161277770996, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 1.5075, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5642633228840125, |
|
"grad_norm": 0.1819342076778412, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 1.6406, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5705329153605015, |
|
"grad_norm": 0.20158928632736206, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 1.6991, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5768025078369906, |
|
"grad_norm": 0.218618705868721, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 1.6872, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5830721003134797, |
|
"grad_norm": 0.2431667596101761, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 1.6567, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5893416927899686, |
|
"grad_norm": 0.15539388358592987, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 1.1379, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5956112852664577, |
|
"grad_norm": 0.14918771386146545, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 1.4123, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6018808777429467, |
|
"grad_norm": 0.19607971608638763, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 1.6222, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6081504702194357, |
|
"grad_norm": 0.21519030630588531, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 1.6053, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6144200626959248, |
|
"grad_norm": 0.2148403525352478, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 1.7118, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6206896551724138, |
|
"grad_norm": 0.24617306888103485, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 1.6654, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 0.5618337392807007, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 1.7828, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"eval_loss": 1.6180227994918823, |
|
"eval_runtime": 21.2838, |
|
"eval_samples_per_second": 100.969, |
|
"eval_steps_per_second": 3.195, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0800404085719695e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|