|
{ |
|
"best_metric": 1.5667701959609985, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.9404388714733543, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006269592476489028, |
|
"grad_norm": 0.1956695169210434, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 1.4797, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006269592476489028, |
|
"eval_loss": 2.014873743057251, |
|
"eval_runtime": 21.7647, |
|
"eval_samples_per_second": 98.738, |
|
"eval_steps_per_second": 3.124, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012539184952978056, |
|
"grad_norm": 0.21951091289520264, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 1.8897, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.018808777429467086, |
|
"grad_norm": 0.2182653844356537, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 1.8755, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.025078369905956112, |
|
"grad_norm": 0.3972433805465698, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 2.0184, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03134796238244514, |
|
"grad_norm": 0.7481153607368469, |
|
"learning_rate": 5.05e-05, |
|
"loss": 2.1242, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03761755485893417, |
|
"grad_norm": 0.6163584589958191, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 2.254, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0438871473354232, |
|
"grad_norm": 0.17153511941432953, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.4166, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.050156739811912224, |
|
"grad_norm": 0.16584382951259613, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.7842, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05642633228840126, |
|
"grad_norm": 0.18644414842128754, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.8403, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06269592476489028, |
|
"grad_norm": 0.21256040036678314, |
|
"learning_rate": 0.000101, |
|
"loss": 1.9229, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"grad_norm": 0.452114462852478, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 1.9909, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07523510971786834, |
|
"grad_norm": 0.3007873296737671, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 1.995, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08150470219435736, |
|
"grad_norm": 0.22591519355773926, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 1.3466, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0877742946708464, |
|
"grad_norm": 0.22962772846221924, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 1.5942, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09404388714733543, |
|
"grad_norm": 0.20066531002521515, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 1.759, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10031347962382445, |
|
"grad_norm": 0.24357616901397705, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 1.7682, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10658307210031348, |
|
"grad_norm": 0.42487001419067383, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 1.8977, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11285266457680251, |
|
"grad_norm": 0.5519868731498718, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 1.8652, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11912225705329153, |
|
"grad_norm": 0.1837894469499588, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 1.2621, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 0.16041411459445953, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 1.4973, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13166144200626959, |
|
"grad_norm": 0.1466640830039978, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 1.6563, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 0.15411381423473358, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 1.8432, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14420062695924765, |
|
"grad_norm": 0.20786531269550323, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 1.866, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.15047021943573669, |
|
"grad_norm": 0.27923616766929626, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 1.8369, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15673981191222572, |
|
"grad_norm": 0.45824792981147766, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 1.9946, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16300940438871472, |
|
"grad_norm": 0.11721207201480865, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 1.3217, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.16927899686520376, |
|
"grad_norm": 0.15526717901229858, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 1.7139, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1755485893416928, |
|
"grad_norm": 0.15869063138961792, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 1.7312, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 0.1636732816696167, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.7714, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.18808777429467086, |
|
"grad_norm": 0.20718851685523987, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 1.828, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19435736677115986, |
|
"grad_norm": 0.30376043915748596, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 1.8697, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2006269592476489, |
|
"grad_norm": 0.11045503616333008, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 1.3708, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"grad_norm": 0.14193548262119293, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 1.6054, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.21316614420062696, |
|
"grad_norm": 0.15867547690868378, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 1.6872, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.219435736677116, |
|
"grad_norm": 0.16795365512371063, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 1.8028, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.22570532915360503, |
|
"grad_norm": 0.19065167009830475, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 1.8083, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23197492163009403, |
|
"grad_norm": 0.23978182673454285, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 1.8856, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.23824451410658307, |
|
"grad_norm": 0.13266095519065857, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 1.239, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2445141065830721, |
|
"grad_norm": 0.1396464705467224, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 1.528, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 0.15594753623008728, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 1.6795, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25705329153605017, |
|
"grad_norm": 0.16817249357700348, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 1.7164, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.26332288401253917, |
|
"grad_norm": 0.18988773226737976, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 1.7163, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.26959247648902823, |
|
"grad_norm": 0.20697522163391113, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 1.7748, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.14124108850955963, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 1.3559, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28213166144200624, |
|
"grad_norm": 0.10386940836906433, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 1.4066, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2884012539184953, |
|
"grad_norm": 0.15532410144805908, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 1.6759, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2946708463949843, |
|
"grad_norm": 0.16163372993469238, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 1.6765, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.30094043887147337, |
|
"grad_norm": 0.18687112629413605, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.7736, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3072100313479624, |
|
"grad_norm": 0.19891808927059174, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 1.7138, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"grad_norm": 0.546370267868042, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 1.8809, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"eval_loss": 1.6940747499465942, |
|
"eval_runtime": 22.5071, |
|
"eval_samples_per_second": 95.481, |
|
"eval_steps_per_second": 3.021, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31974921630094044, |
|
"grad_norm": 0.11734400689601898, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 1.4185, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.32601880877742945, |
|
"grad_norm": 0.24825617671012878, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 1.6477, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3322884012539185, |
|
"grad_norm": 0.2786071002483368, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 1.6508, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3385579937304075, |
|
"grad_norm": 0.22523944079875946, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 1.7142, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.20191609859466553, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 1.7316, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3510971786833856, |
|
"grad_norm": 0.2791290283203125, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 1.754, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3573667711598746, |
|
"grad_norm": 0.10366496443748474, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 1.2027, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 0.15324437618255615, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 1.6548, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.36990595611285265, |
|
"grad_norm": 0.1608223021030426, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 1.6525, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 0.20208290219306946, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 1.6865, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3824451410658307, |
|
"grad_norm": 0.23002368211746216, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 1.7319, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3887147335423197, |
|
"grad_norm": 0.25719794631004333, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 1.7345, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3949843260188088, |
|
"grad_norm": 0.11874490976333618, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 1.1791, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4012539184952978, |
|
"grad_norm": 0.13194750249385834, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 1.5006, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.40752351097178685, |
|
"grad_norm": 0.16368307173252106, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 1.653, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.18413026630878448, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 1.6872, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4200626959247649, |
|
"grad_norm": 0.23217302560806274, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.7738, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.4263322884012539, |
|
"grad_norm": 0.21129940450191498, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 1.6499, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.43260188087774293, |
|
"grad_norm": 0.14536724984645844, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 1.0403, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.438871473354232, |
|
"grad_norm": 0.12869617342948914, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 1.4696, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.445141065830721, |
|
"grad_norm": 0.1599850058555603, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 1.6434, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.45141065830721006, |
|
"grad_norm": 0.17733348906040192, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 1.6627, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.45768025078369906, |
|
"grad_norm": 0.1968277245759964, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 1.641, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.46394984326018807, |
|
"grad_norm": 0.2661347985267639, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 1.6584, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.4702194357366771, |
|
"grad_norm": 0.574530303478241, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 1.8574, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47648902821316613, |
|
"grad_norm": 0.11632688343524933, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 1.4023, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"grad_norm": 0.2113131582736969, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 1.6341, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4890282131661442, |
|
"grad_norm": 0.2565387189388275, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 1.6345, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4952978056426332, |
|
"grad_norm": 0.2454068958759308, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 1.6885, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 0.25979533791542053, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 1.7208, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5078369905956113, |
|
"grad_norm": 0.30401724576950073, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 1.7026, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5141065830721003, |
|
"grad_norm": 0.11303433030843735, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 1.1272, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5203761755485894, |
|
"grad_norm": 0.21448008716106415, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 1.6056, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5266457680250783, |
|
"grad_norm": 0.18290068209171295, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 1.6274, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5329153605015674, |
|
"grad_norm": 0.21106426417827606, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 1.6398, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5391849529780565, |
|
"grad_norm": 0.24094125628471375, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 1.7705, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 0.2784807085990906, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 1.6553, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.135740265250206, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 1.0888, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5579937304075235, |
|
"grad_norm": 0.13163161277770996, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 1.5075, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5642633228840125, |
|
"grad_norm": 0.1819342076778412, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 1.6406, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5705329153605015, |
|
"grad_norm": 0.20158928632736206, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 1.6991, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5768025078369906, |
|
"grad_norm": 0.218618705868721, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 1.6872, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5830721003134797, |
|
"grad_norm": 0.2431667596101761, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 1.6567, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5893416927899686, |
|
"grad_norm": 0.15539388358592987, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 1.1379, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5956112852664577, |
|
"grad_norm": 0.14918771386146545, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 1.4123, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6018808777429467, |
|
"grad_norm": 0.19607971608638763, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 1.6222, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6081504702194357, |
|
"grad_norm": 0.21519030630588531, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 1.6053, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6144200626959248, |
|
"grad_norm": 0.2148403525352478, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 1.7118, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6206896551724138, |
|
"grad_norm": 0.24617306888103485, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 1.6654, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 0.5618337392807007, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 1.7828, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"eval_loss": 1.6180227994918823, |
|
"eval_runtime": 21.2838, |
|
"eval_samples_per_second": 100.969, |
|
"eval_steps_per_second": 3.195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6332288401253918, |
|
"grad_norm": 0.12550345063209534, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 1.1544, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6394984326018809, |
|
"grad_norm": 0.20995737612247467, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 1.5736, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.64576802507837, |
|
"grad_norm": 0.25661516189575195, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 1.6052, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6520376175548589, |
|
"grad_norm": 0.2354477196931839, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 1.6452, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.658307210031348, |
|
"grad_norm": 0.2458198219537735, |
|
"learning_rate": 5.05e-05, |
|
"loss": 1.7015, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.664576802507837, |
|
"grad_norm": 0.33329248428344727, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 1.5996, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.670846394984326, |
|
"grad_norm": 0.1337110549211502, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 1.3261, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.677115987460815, |
|
"grad_norm": 0.17378275096416473, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 1.5064, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6833855799373041, |
|
"grad_norm": 0.20934435725212097, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 1.6143, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.24600297212600708, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 1.6473, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6959247648902821, |
|
"grad_norm": 0.2352122813463211, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 1.59, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7021943573667712, |
|
"grad_norm": 0.3226903975009918, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 1.5846, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7084639498432602, |
|
"grad_norm": 0.14687702059745789, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 1.1129, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7147335423197492, |
|
"grad_norm": 0.15256668627262115, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 1.4691, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7210031347962382, |
|
"grad_norm": 0.20408159494400024, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 1.6611, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.22884103655815125, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 1.6256, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7335423197492164, |
|
"grad_norm": 0.23959754407405853, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 1.6332, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7398119122257053, |
|
"grad_norm": 0.2993139624595642, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 1.6246, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7460815047021944, |
|
"grad_norm": 0.19114616513252258, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 1.0553, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 0.1459835320711136, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 1.432, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7586206896551724, |
|
"grad_norm": 0.19810743629932404, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 1.5597, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7648902821316614, |
|
"grad_norm": 0.22155524790287018, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 1.5973, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7711598746081505, |
|
"grad_norm": 0.252210795879364, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 1.7093, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7774294670846394, |
|
"grad_norm": 0.2699624300003052, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 1.6218, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7836990595611285, |
|
"grad_norm": 0.5296167731285095, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 1.6447, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7899686520376176, |
|
"grad_norm": 0.1305689811706543, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 1.2415, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7962382445141066, |
|
"grad_norm": 0.19951651990413666, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 1.5846, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.8025078369905956, |
|
"grad_norm": 0.24844390153884888, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 1.5013, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8087774294670846, |
|
"grad_norm": 0.26770254969596863, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 1.6404, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8150470219435737, |
|
"grad_norm": 0.268388956785202, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 1.6831, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8213166144200627, |
|
"grad_norm": 0.3877102732658386, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 1.5713, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 0.14182406663894653, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 1.1147, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8338557993730408, |
|
"grad_norm": 0.1862500160932541, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 1.4998, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8401253918495298, |
|
"grad_norm": 0.22701646387577057, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 1.5137, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8463949843260188, |
|
"grad_norm": 0.2560668885707855, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 1.6129, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8526645768025078, |
|
"grad_norm": 0.2551365792751312, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 1.6454, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8589341692789969, |
|
"grad_norm": 0.331463485956192, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 1.5863, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8652037617554859, |
|
"grad_norm": 0.17028988897800446, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 1.1377, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8714733542319749, |
|
"grad_norm": 0.1787910908460617, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 1.4605, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 0.21535782516002655, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 1.5651, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8840125391849529, |
|
"grad_norm": 0.2516005337238312, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 1.6457, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.890282131661442, |
|
"grad_norm": 0.26242345571517944, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 1.6603, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.896551724137931, |
|
"grad_norm": 0.31754815578460693, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 1.6406, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.9028213166144201, |
|
"grad_norm": 0.19499145448207855, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 1.0787, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.16461507976055145, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 1.3539, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.9153605015673981, |
|
"grad_norm": 0.231450155377388, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 1.6046, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9216300940438872, |
|
"grad_norm": 0.25709661841392517, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 1.5514, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.9278996865203761, |
|
"grad_norm": 0.26337385177612305, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 1.6737, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9341692789968652, |
|
"grad_norm": 0.29333168268203735, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 1.5403, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.9404388714733543, |
|
"grad_norm": 0.6478084325790405, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 1.7032, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9404388714733543, |
|
"eval_loss": 1.5667701959609985, |
|
"eval_runtime": 22.4342, |
|
"eval_samples_per_second": 95.791, |
|
"eval_steps_per_second": 3.031, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6208921818772275e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|