|
{ |
|
"best_metric": 10.709113121032715, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.055699962866691426, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003713330857779428, |
|
"grad_norm": 0.4906452000141144, |
|
"learning_rate": 1.007e-05, |
|
"loss": 10.835, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003713330857779428, |
|
"eval_loss": 10.832501411437988, |
|
"eval_runtime": 8.608, |
|
"eval_samples_per_second": 131.738, |
|
"eval_steps_per_second": 32.993, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007426661715558856, |
|
"grad_norm": 0.4328306317329407, |
|
"learning_rate": 2.014e-05, |
|
"loss": 10.8341, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0011139992573338284, |
|
"grad_norm": 0.37087035179138184, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 10.8293, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0014853323431117712, |
|
"grad_norm": 0.39447692036628723, |
|
"learning_rate": 4.028e-05, |
|
"loss": 10.8318, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.001856665428889714, |
|
"grad_norm": 0.4673115611076355, |
|
"learning_rate": 5.035e-05, |
|
"loss": 10.8282, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0022279985146676567, |
|
"grad_norm": 0.4667337238788605, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 10.8325, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0025993316004456, |
|
"grad_norm": 0.37647679448127747, |
|
"learning_rate": 7.049e-05, |
|
"loss": 10.8326, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0029706646862235424, |
|
"grad_norm": 0.4516909420490265, |
|
"learning_rate": 8.056e-05, |
|
"loss": 10.8271, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0033419977720014855, |
|
"grad_norm": 0.5389593839645386, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 10.8291, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003713330857779428, |
|
"grad_norm": 0.40566104650497437, |
|
"learning_rate": 0.0001007, |
|
"loss": 10.8375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004084663943557371, |
|
"grad_norm": 0.48166805505752563, |
|
"learning_rate": 0.00010017, |
|
"loss": 10.8247, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004455997029335313, |
|
"grad_norm": 0.4881480038166046, |
|
"learning_rate": 9.963999999999999e-05, |
|
"loss": 10.8119, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004827330115113257, |
|
"grad_norm": 0.4888553321361542, |
|
"learning_rate": 9.910999999999999e-05, |
|
"loss": 10.8045, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0051986632008912, |
|
"grad_norm": 0.5155038237571716, |
|
"learning_rate": 9.858e-05, |
|
"loss": 10.8187, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.005569996286669142, |
|
"grad_norm": 0.5028515458106995, |
|
"learning_rate": 9.805e-05, |
|
"loss": 10.8058, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005941329372447085, |
|
"grad_norm": 0.5659723877906799, |
|
"learning_rate": 9.752e-05, |
|
"loss": 10.8044, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0063126624582250275, |
|
"grad_norm": 0.552946150302887, |
|
"learning_rate": 9.698999999999999e-05, |
|
"loss": 10.8163, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.006683995544002971, |
|
"grad_norm": 0.575535774230957, |
|
"learning_rate": 9.646e-05, |
|
"loss": 10.7946, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007055328629780914, |
|
"grad_norm": 0.5085407495498657, |
|
"learning_rate": 9.593e-05, |
|
"loss": 10.7978, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.007426661715558856, |
|
"grad_norm": 0.48418793082237244, |
|
"learning_rate": 9.539999999999999e-05, |
|
"loss": 10.8028, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007797994801336799, |
|
"grad_norm": 0.5124921202659607, |
|
"learning_rate": 9.487e-05, |
|
"loss": 10.8111, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008169327887114742, |
|
"grad_norm": 0.629709780216217, |
|
"learning_rate": 9.434e-05, |
|
"loss": 10.7865, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.008540660972892685, |
|
"grad_norm": 0.5611943006515503, |
|
"learning_rate": 9.381e-05, |
|
"loss": 10.7937, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.008911994058670627, |
|
"grad_norm": 0.5063443183898926, |
|
"learning_rate": 9.327999999999999e-05, |
|
"loss": 10.7651, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00928332714444857, |
|
"grad_norm": 0.6020315885543823, |
|
"learning_rate": 9.274999999999999e-05, |
|
"loss": 10.7925, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009654660230226514, |
|
"grad_norm": 0.5812113285064697, |
|
"learning_rate": 9.222e-05, |
|
"loss": 10.7925, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010025993316004456, |
|
"grad_norm": 0.561542272567749, |
|
"learning_rate": 9.169e-05, |
|
"loss": 10.761, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0103973264017824, |
|
"grad_norm": 0.5838537812232971, |
|
"learning_rate": 9.116e-05, |
|
"loss": 10.753, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.010768659487560341, |
|
"grad_norm": 0.5075663328170776, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 10.777, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.011139992573338284, |
|
"grad_norm": 0.5487096309661865, |
|
"learning_rate": 9.01e-05, |
|
"loss": 10.7593, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011511325659116228, |
|
"grad_norm": 0.4488137662410736, |
|
"learning_rate": 8.957e-05, |
|
"loss": 10.7719, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01188265874489417, |
|
"grad_norm": 0.5524377226829529, |
|
"learning_rate": 8.903999999999999e-05, |
|
"loss": 10.7879, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.012253991830672113, |
|
"grad_norm": 0.5395157933235168, |
|
"learning_rate": 8.850999999999999e-05, |
|
"loss": 10.775, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.012625324916450055, |
|
"grad_norm": 0.5032263994216919, |
|
"learning_rate": 8.798e-05, |
|
"loss": 10.7386, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.012996658002227999, |
|
"grad_norm": 0.5099305510520935, |
|
"learning_rate": 8.745e-05, |
|
"loss": 10.7548, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.013367991088005942, |
|
"grad_norm": 0.5281259417533875, |
|
"learning_rate": 8.692e-05, |
|
"loss": 10.7409, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.013739324173783884, |
|
"grad_norm": 0.5443010330200195, |
|
"learning_rate": 8.638999999999999e-05, |
|
"loss": 10.7454, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.014110657259561827, |
|
"grad_norm": 0.5503028631210327, |
|
"learning_rate": 8.586e-05, |
|
"loss": 10.7472, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.014481990345339769, |
|
"grad_norm": 0.5560601949691772, |
|
"learning_rate": 8.533e-05, |
|
"loss": 10.7289, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.014853323431117713, |
|
"grad_norm": 0.5327764749526978, |
|
"learning_rate": 8.479999999999999e-05, |
|
"loss": 10.7278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015224656516895656, |
|
"grad_norm": 0.49340856075286865, |
|
"learning_rate": 8.427e-05, |
|
"loss": 10.7534, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.015595989602673598, |
|
"grad_norm": 0.5683452486991882, |
|
"learning_rate": 8.374e-05, |
|
"loss": 10.7333, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.01596732268845154, |
|
"grad_norm": 0.5234541893005371, |
|
"learning_rate": 8.321e-05, |
|
"loss": 10.7106, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.016338655774229483, |
|
"grad_norm": 0.5029370188713074, |
|
"learning_rate": 8.268e-05, |
|
"loss": 10.7453, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.01670998886000743, |
|
"grad_norm": 0.5557736158370972, |
|
"learning_rate": 8.214999999999999e-05, |
|
"loss": 10.7304, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01708132194578537, |
|
"grad_norm": 0.5473191142082214, |
|
"learning_rate": 8.162e-05, |
|
"loss": 10.7416, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.017452655031563312, |
|
"grad_norm": 0.6747854948043823, |
|
"learning_rate": 8.108999999999998e-05, |
|
"loss": 10.7589, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.017823988117341254, |
|
"grad_norm": 0.5004291534423828, |
|
"learning_rate": 8.056e-05, |
|
"loss": 10.7202, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0181953212031192, |
|
"grad_norm": 0.524412214756012, |
|
"learning_rate": 8.003e-05, |
|
"loss": 10.7175, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01856665428889714, |
|
"grad_norm": 0.5781667828559875, |
|
"learning_rate": 7.95e-05, |
|
"loss": 10.7257, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01856665428889714, |
|
"eval_loss": 10.735980033874512, |
|
"eval_runtime": 8.5155, |
|
"eval_samples_per_second": 133.168, |
|
"eval_steps_per_second": 33.351, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.018937987374675083, |
|
"grad_norm": 0.4127328395843506, |
|
"learning_rate": 7.897e-05, |
|
"loss": 10.7444, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.019309320460453028, |
|
"grad_norm": 0.3050692081451416, |
|
"learning_rate": 7.843999999999999e-05, |
|
"loss": 10.7566, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01968065354623097, |
|
"grad_norm": 0.3271315395832062, |
|
"learning_rate": 7.790999999999999e-05, |
|
"loss": 10.7442, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02005198663200891, |
|
"grad_norm": 0.32859012484550476, |
|
"learning_rate": 7.738e-05, |
|
"loss": 10.7451, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.020423319717786857, |
|
"grad_norm": 0.3305034637451172, |
|
"learning_rate": 7.685e-05, |
|
"loss": 10.7568, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0207946528035648, |
|
"grad_norm": 0.3300938308238983, |
|
"learning_rate": 7.632e-05, |
|
"loss": 10.7359, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02116598588934274, |
|
"grad_norm": 0.3706151247024536, |
|
"learning_rate": 7.578999999999999e-05, |
|
"loss": 10.7507, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.021537318975120682, |
|
"grad_norm": 0.3721340000629425, |
|
"learning_rate": 7.526e-05, |
|
"loss": 10.7443, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.021908652060898627, |
|
"grad_norm": 0.30731654167175293, |
|
"learning_rate": 7.473e-05, |
|
"loss": 10.7477, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02227998514667657, |
|
"grad_norm": 0.3816874623298645, |
|
"learning_rate": 7.419999999999999e-05, |
|
"loss": 10.7548, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02265131823245451, |
|
"grad_norm": 0.3089187443256378, |
|
"learning_rate": 7.367e-05, |
|
"loss": 10.7363, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.023022651318232456, |
|
"grad_norm": 0.3170040547847748, |
|
"learning_rate": 7.314e-05, |
|
"loss": 10.7385, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.023393984404010398, |
|
"grad_norm": 0.3177466094493866, |
|
"learning_rate": 7.261e-05, |
|
"loss": 10.7296, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.02376531748978834, |
|
"grad_norm": 0.32649269700050354, |
|
"learning_rate": 7.208e-05, |
|
"loss": 10.7279, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.024136650575566285, |
|
"grad_norm": 0.31671473383903503, |
|
"learning_rate": 7.154999999999999e-05, |
|
"loss": 10.7275, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.024507983661344226, |
|
"grad_norm": 0.3684324622154236, |
|
"learning_rate": 7.102e-05, |
|
"loss": 10.7562, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.024879316747122168, |
|
"grad_norm": 0.35255691409111023, |
|
"learning_rate": 7.049e-05, |
|
"loss": 10.7501, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02525064983290011, |
|
"grad_norm": 0.3559291362762451, |
|
"learning_rate": 6.996e-05, |
|
"loss": 10.7445, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.025621982918678055, |
|
"grad_norm": 0.29838132858276367, |
|
"learning_rate": 6.943e-05, |
|
"loss": 10.7373, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.025993316004455997, |
|
"grad_norm": 0.32272014021873474, |
|
"learning_rate": 6.89e-05, |
|
"loss": 10.7402, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02636464909023394, |
|
"grad_norm": 0.29716819524765015, |
|
"learning_rate": 6.837e-05, |
|
"loss": 10.7499, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.026735982176011884, |
|
"grad_norm": 0.2992607057094574, |
|
"learning_rate": 6.784e-05, |
|
"loss": 10.7316, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.027107315261789826, |
|
"grad_norm": 0.3100895285606384, |
|
"learning_rate": 6.730999999999999e-05, |
|
"loss": 10.7266, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.027478648347567768, |
|
"grad_norm": 0.3643105924129486, |
|
"learning_rate": 6.678e-05, |
|
"loss": 10.7325, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.027849981433345713, |
|
"grad_norm": 0.3808096945285797, |
|
"learning_rate": 6.625e-05, |
|
"loss": 10.715, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.028221314519123655, |
|
"grad_norm": 0.3222029209136963, |
|
"learning_rate": 6.572e-05, |
|
"loss": 10.7365, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.028592647604901596, |
|
"grad_norm": 0.32192742824554443, |
|
"learning_rate": 6.519e-05, |
|
"loss": 10.7343, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.028963980690679538, |
|
"grad_norm": 0.40252402424812317, |
|
"learning_rate": 6.466e-05, |
|
"loss": 10.7201, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.029335313776457483, |
|
"grad_norm": 0.3803711533546448, |
|
"learning_rate": 6.413e-05, |
|
"loss": 10.7316, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.029706646862235425, |
|
"grad_norm": 0.3504716455936432, |
|
"learning_rate": 6.359999999999999e-05, |
|
"loss": 10.7356, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.030077979948013367, |
|
"grad_norm": 0.35962575674057007, |
|
"learning_rate": 6.306999999999999e-05, |
|
"loss": 10.7201, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.030449313033791312, |
|
"grad_norm": 0.36944904923439026, |
|
"learning_rate": 6.254000000000001e-05, |
|
"loss": 10.7067, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.030820646119569254, |
|
"grad_norm": 0.38394877314567566, |
|
"learning_rate": 6.201e-05, |
|
"loss": 10.7409, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.031191979205347196, |
|
"grad_norm": 0.41426077485084534, |
|
"learning_rate": 6.148e-05, |
|
"loss": 10.748, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03156331229112514, |
|
"grad_norm": 0.3880663812160492, |
|
"learning_rate": 6.095e-05, |
|
"loss": 10.7121, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03193464537690308, |
|
"grad_norm": 0.4019233286380768, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 10.7155, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.032305978462681025, |
|
"grad_norm": 0.4152543544769287, |
|
"learning_rate": 5.988999999999999e-05, |
|
"loss": 10.7029, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.032677311548458966, |
|
"grad_norm": 0.47186049818992615, |
|
"learning_rate": 5.9359999999999994e-05, |
|
"loss": 10.7208, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.03304864463423691, |
|
"grad_norm": 0.4307488203048706, |
|
"learning_rate": 5.8830000000000004e-05, |
|
"loss": 10.701, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03341997772001486, |
|
"grad_norm": 0.47585317492485046, |
|
"learning_rate": 5.83e-05, |
|
"loss": 10.7097, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0337913108057928, |
|
"grad_norm": 0.4662454426288605, |
|
"learning_rate": 5.777e-05, |
|
"loss": 10.6999, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03416264389157074, |
|
"grad_norm": 0.45331087708473206, |
|
"learning_rate": 5.7239999999999994e-05, |
|
"loss": 10.6841, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.03453397697734868, |
|
"grad_norm": 0.5249789357185364, |
|
"learning_rate": 5.671e-05, |
|
"loss": 10.7172, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.034905310063126624, |
|
"grad_norm": 0.5332587361335754, |
|
"learning_rate": 5.6179999999999994e-05, |
|
"loss": 10.7276, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.035276643148904566, |
|
"grad_norm": 0.4843442440032959, |
|
"learning_rate": 5.5650000000000004e-05, |
|
"loss": 10.7224, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03564797623468251, |
|
"grad_norm": 0.5148628950119019, |
|
"learning_rate": 5.512e-05, |
|
"loss": 10.6775, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.036019309320460456, |
|
"grad_norm": 0.5223995447158813, |
|
"learning_rate": 5.459e-05, |
|
"loss": 10.7094, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0363906424062384, |
|
"grad_norm": 0.5281713604927063, |
|
"learning_rate": 5.406e-05, |
|
"loss": 10.6981, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.03676197549201634, |
|
"grad_norm": 0.6166062951087952, |
|
"learning_rate": 5.353e-05, |
|
"loss": 10.7082, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.03713330857779428, |
|
"grad_norm": 0.7465858459472656, |
|
"learning_rate": 5.2999999999999994e-05, |
|
"loss": 10.6922, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03713330857779428, |
|
"eval_loss": 10.71716022491455, |
|
"eval_runtime": 8.5368, |
|
"eval_samples_per_second": 132.837, |
|
"eval_steps_per_second": 33.268, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03750464166357222, |
|
"grad_norm": 0.30814334750175476, |
|
"learning_rate": 5.246999999999999e-05, |
|
"loss": 10.7375, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.037875974749350165, |
|
"grad_norm": 0.34333688020706177, |
|
"learning_rate": 5.194e-05, |
|
"loss": 10.7405, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.03824730783512811, |
|
"grad_norm": 0.396445631980896, |
|
"learning_rate": 5.141e-05, |
|
"loss": 10.7225, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.038618640920906055, |
|
"grad_norm": 0.3750787079334259, |
|
"learning_rate": 5.088e-05, |
|
"loss": 10.7264, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.038989974006684, |
|
"grad_norm": 0.30554020404815674, |
|
"learning_rate": 5.035e-05, |
|
"loss": 10.7261, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03936130709246194, |
|
"grad_norm": 0.35283979773521423, |
|
"learning_rate": 4.9819999999999994e-05, |
|
"loss": 10.7309, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.03973264017823988, |
|
"grad_norm": 0.39950183033943176, |
|
"learning_rate": 4.929e-05, |
|
"loss": 10.7223, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04010397326401782, |
|
"grad_norm": 0.38336852192878723, |
|
"learning_rate": 4.876e-05, |
|
"loss": 10.7209, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.040475306349795764, |
|
"grad_norm": 0.362358421087265, |
|
"learning_rate": 4.823e-05, |
|
"loss": 10.735, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04084663943557371, |
|
"grad_norm": 0.397513747215271, |
|
"learning_rate": 4.7699999999999994e-05, |
|
"loss": 10.7304, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.041217972521351655, |
|
"grad_norm": 0.4068267047405243, |
|
"learning_rate": 4.717e-05, |
|
"loss": 10.7297, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0415893056071296, |
|
"grad_norm": 0.38471511006355286, |
|
"learning_rate": 4.6639999999999994e-05, |
|
"loss": 10.754, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.04196063869290754, |
|
"grad_norm": 0.3469901978969574, |
|
"learning_rate": 4.611e-05, |
|
"loss": 10.7255, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.04233197177868548, |
|
"grad_norm": 0.3917114734649658, |
|
"learning_rate": 4.558e-05, |
|
"loss": 10.72, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.04270330486446342, |
|
"grad_norm": 0.3135025203227997, |
|
"learning_rate": 4.505e-05, |
|
"loss": 10.7204, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.043074637950241364, |
|
"grad_norm": 0.32310202717781067, |
|
"learning_rate": 4.4519999999999994e-05, |
|
"loss": 10.7334, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04344597103601931, |
|
"grad_norm": 0.316651314496994, |
|
"learning_rate": 4.399e-05, |
|
"loss": 10.7357, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.043817304121797254, |
|
"grad_norm": 0.3423415422439575, |
|
"learning_rate": 4.346e-05, |
|
"loss": 10.7195, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.044188637207575196, |
|
"grad_norm": 0.3235597610473633, |
|
"learning_rate": 4.293e-05, |
|
"loss": 10.7321, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.04455997029335314, |
|
"grad_norm": 0.33004501461982727, |
|
"learning_rate": 4.2399999999999994e-05, |
|
"loss": 10.7148, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04493130337913108, |
|
"grad_norm": 0.3902532756328583, |
|
"learning_rate": 4.187e-05, |
|
"loss": 10.7344, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.04530263646490902, |
|
"grad_norm": 0.30236899852752686, |
|
"learning_rate": 4.134e-05, |
|
"loss": 10.7194, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.04567396955068696, |
|
"grad_norm": 0.38607534766197205, |
|
"learning_rate": 4.081e-05, |
|
"loss": 10.7046, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.04604530263646491, |
|
"grad_norm": 0.3628384470939636, |
|
"learning_rate": 4.028e-05, |
|
"loss": 10.7198, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.046416635722242854, |
|
"grad_norm": 0.36593976616859436, |
|
"learning_rate": 3.975e-05, |
|
"loss": 10.7281, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.046787968808020795, |
|
"grad_norm": 0.3433733880519867, |
|
"learning_rate": 3.9219999999999994e-05, |
|
"loss": 10.7148, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.04715930189379874, |
|
"grad_norm": 0.37441202998161316, |
|
"learning_rate": 3.869e-05, |
|
"loss": 10.7132, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.04753063497957668, |
|
"grad_norm": 0.3574247658252716, |
|
"learning_rate": 3.816e-05, |
|
"loss": 10.7207, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.04790196806535462, |
|
"grad_norm": 0.3976617157459259, |
|
"learning_rate": 3.763e-05, |
|
"loss": 10.7027, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.04827330115113257, |
|
"grad_norm": 0.4094492495059967, |
|
"learning_rate": 3.7099999999999994e-05, |
|
"loss": 10.7168, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04864463423691051, |
|
"grad_norm": 0.3910907506942749, |
|
"learning_rate": 3.657e-05, |
|
"loss": 10.6894, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.04901596732268845, |
|
"grad_norm": 0.347520112991333, |
|
"learning_rate": 3.604e-05, |
|
"loss": 10.7075, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.049387300408466395, |
|
"grad_norm": 0.33360084891319275, |
|
"learning_rate": 3.551e-05, |
|
"loss": 10.7175, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.049758633494244336, |
|
"grad_norm": 0.3098675608634949, |
|
"learning_rate": 3.498e-05, |
|
"loss": 10.6943, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05012996658002228, |
|
"grad_norm": 0.417607843875885, |
|
"learning_rate": 3.445e-05, |
|
"loss": 10.696, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05050129966580022, |
|
"grad_norm": 0.37847384810447693, |
|
"learning_rate": 3.392e-05, |
|
"loss": 10.7031, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05087263275157817, |
|
"grad_norm": 0.34361228346824646, |
|
"learning_rate": 3.339e-05, |
|
"loss": 10.714, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.05124396583735611, |
|
"grad_norm": 0.4933507740497589, |
|
"learning_rate": 3.286e-05, |
|
"loss": 10.6758, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.05161529892313405, |
|
"grad_norm": 0.412986695766449, |
|
"learning_rate": 3.233e-05, |
|
"loss": 10.6963, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.051986632008911994, |
|
"grad_norm": 0.43772485852241516, |
|
"learning_rate": 3.1799999999999994e-05, |
|
"loss": 10.7149, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.052357965094689936, |
|
"grad_norm": 0.37518948316574097, |
|
"learning_rate": 3.1270000000000004e-05, |
|
"loss": 10.6951, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.05272929818046788, |
|
"grad_norm": 0.36650022864341736, |
|
"learning_rate": 3.074e-05, |
|
"loss": 10.6925, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.05310063126624582, |
|
"grad_norm": 0.4543534219264984, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 10.683, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.05347196435202377, |
|
"grad_norm": 0.47826236486434937, |
|
"learning_rate": 2.9679999999999997e-05, |
|
"loss": 10.7077, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.05384329743780171, |
|
"grad_norm": 0.3722932040691376, |
|
"learning_rate": 2.915e-05, |
|
"loss": 10.6916, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05421463052357965, |
|
"grad_norm": 0.5447331666946411, |
|
"learning_rate": 2.8619999999999997e-05, |
|
"loss": 10.7065, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.05458596360935759, |
|
"grad_norm": 0.42991727590560913, |
|
"learning_rate": 2.8089999999999997e-05, |
|
"loss": 10.6734, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.054957296695135535, |
|
"grad_norm": 0.5424541234970093, |
|
"learning_rate": 2.756e-05, |
|
"loss": 10.6609, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.05532862978091348, |
|
"grad_norm": 0.6233406066894531, |
|
"learning_rate": 2.703e-05, |
|
"loss": 10.6865, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.055699962866691426, |
|
"grad_norm": 0.635688066482544, |
|
"learning_rate": 2.6499999999999997e-05, |
|
"loss": 10.6352, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.055699962866691426, |
|
"eval_loss": 10.709113121032715, |
|
"eval_runtime": 8.5126, |
|
"eval_samples_per_second": 133.215, |
|
"eval_steps_per_second": 33.362, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 25616292249600.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|