|
{ |
|
"best_metric": 0.21266202628612518, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.16359918200409, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008179959100204499, |
|
"grad_norm": 0.44888365268707275, |
|
"learning_rate": 1.012e-05, |
|
"loss": 0.8848, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008179959100204499, |
|
"eval_loss": 0.5526403784751892, |
|
"eval_runtime": 164.606, |
|
"eval_samples_per_second": 3.129, |
|
"eval_steps_per_second": 0.784, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016359918200408998, |
|
"grad_norm": 0.3820202052593231, |
|
"learning_rate": 2.024e-05, |
|
"loss": 0.6338, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00245398773006135, |
|
"grad_norm": 1.0369555950164795, |
|
"learning_rate": 3.0359999999999997e-05, |
|
"loss": 1.3722, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0032719836400817996, |
|
"grad_norm": 0.5409644842147827, |
|
"learning_rate": 4.048e-05, |
|
"loss": 1.027, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00408997955010225, |
|
"grad_norm": 0.5290982127189636, |
|
"learning_rate": 5.06e-05, |
|
"loss": 0.7956, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0049079754601227, |
|
"grad_norm": 0.4899097979068756, |
|
"learning_rate": 6.0719999999999995e-05, |
|
"loss": 0.8705, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0057259713701431495, |
|
"grad_norm": 0.7995485663414001, |
|
"learning_rate": 7.083999999999999e-05, |
|
"loss": 1.2093, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006543967280163599, |
|
"grad_norm": 0.5504206418991089, |
|
"learning_rate": 8.096e-05, |
|
"loss": 0.9514, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007361963190184049, |
|
"grad_norm": 0.5944662690162659, |
|
"learning_rate": 9.108e-05, |
|
"loss": 0.955, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0081799591002045, |
|
"grad_norm": 0.6484851837158203, |
|
"learning_rate": 0.0001012, |
|
"loss": 0.9926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00899795501022495, |
|
"grad_norm": 0.6986644864082336, |
|
"learning_rate": 0.00010066736842105262, |
|
"loss": 0.8318, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0098159509202454, |
|
"grad_norm": 0.6467517018318176, |
|
"learning_rate": 0.00010013473684210525, |
|
"loss": 0.7294, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01063394683026585, |
|
"grad_norm": 0.7561603784561157, |
|
"learning_rate": 9.960210526315788e-05, |
|
"loss": 0.8644, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011451942740286299, |
|
"grad_norm": 0.7822588086128235, |
|
"learning_rate": 9.906947368421052e-05, |
|
"loss": 0.9813, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012269938650306749, |
|
"grad_norm": 0.6852607727050781, |
|
"learning_rate": 9.853684210526316e-05, |
|
"loss": 0.9142, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013087934560327199, |
|
"grad_norm": 0.6556634902954102, |
|
"learning_rate": 9.800421052631579e-05, |
|
"loss": 0.5068, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013905930470347648, |
|
"grad_norm": 0.954055905342102, |
|
"learning_rate": 9.747157894736841e-05, |
|
"loss": 1.126, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014723926380368098, |
|
"grad_norm": 0.8079794645309448, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 0.963, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015541922290388548, |
|
"grad_norm": 0.8518968820571899, |
|
"learning_rate": 9.640631578947367e-05, |
|
"loss": 0.9903, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016359918200409, |
|
"grad_norm": 0.5757415890693665, |
|
"learning_rate": 9.58736842105263e-05, |
|
"loss": 0.4356, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01717791411042945, |
|
"grad_norm": 1.0395272970199585, |
|
"learning_rate": 9.534105263157894e-05, |
|
"loss": 1.2191, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0179959100204499, |
|
"grad_norm": 1.0517971515655518, |
|
"learning_rate": 9.480842105263158e-05, |
|
"loss": 1.1783, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01881390593047035, |
|
"grad_norm": 0.9631999135017395, |
|
"learning_rate": 9.427578947368421e-05, |
|
"loss": 0.9933, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0196319018404908, |
|
"grad_norm": 1.0935978889465332, |
|
"learning_rate": 9.374315789473684e-05, |
|
"loss": 0.8218, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02044989775051125, |
|
"grad_norm": 0.8494012951850891, |
|
"learning_rate": 9.321052631578946e-05, |
|
"loss": 0.953, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0212678936605317, |
|
"grad_norm": 0.6680853962898254, |
|
"learning_rate": 9.267789473684209e-05, |
|
"loss": 0.4734, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.022085889570552148, |
|
"grad_norm": 0.7398406267166138, |
|
"learning_rate": 9.214526315789473e-05, |
|
"loss": 0.6016, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022903885480572598, |
|
"grad_norm": 0.9350616335868835, |
|
"learning_rate": 9.161263157894736e-05, |
|
"loss": 0.7371, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.023721881390593048, |
|
"grad_norm": 0.886838436126709, |
|
"learning_rate": 9.108e-05, |
|
"loss": 0.8018, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.024539877300613498, |
|
"grad_norm": 0.6444780826568604, |
|
"learning_rate": 9.054736842105263e-05, |
|
"loss": 0.551, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025357873210633947, |
|
"grad_norm": 0.8645835518836975, |
|
"learning_rate": 9.001473684210526e-05, |
|
"loss": 0.8206, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.026175869120654397, |
|
"grad_norm": 1.1317236423492432, |
|
"learning_rate": 8.948210526315789e-05, |
|
"loss": 0.4924, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026993865030674847, |
|
"grad_norm": 0.7311250567436218, |
|
"learning_rate": 8.894947368421051e-05, |
|
"loss": 0.4553, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.027811860940695297, |
|
"grad_norm": 0.5183742046356201, |
|
"learning_rate": 8.841684210526315e-05, |
|
"loss": 0.3488, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.028629856850715747, |
|
"grad_norm": 0.7674701809883118, |
|
"learning_rate": 8.788421052631578e-05, |
|
"loss": 0.617, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.029447852760736196, |
|
"grad_norm": 0.36555930972099304, |
|
"learning_rate": 8.735157894736842e-05, |
|
"loss": 0.1531, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.030265848670756646, |
|
"grad_norm": 0.23393642902374268, |
|
"learning_rate": 8.681894736842105e-05, |
|
"loss": 0.0187, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.031083844580777096, |
|
"grad_norm": 0.09792309999465942, |
|
"learning_rate": 8.628631578947368e-05, |
|
"loss": 0.0083, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03190184049079755, |
|
"grad_norm": 0.07559319585561752, |
|
"learning_rate": 8.575368421052631e-05, |
|
"loss": 0.0057, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.032719836400818, |
|
"grad_norm": 0.05586693063378334, |
|
"learning_rate": 8.522105263157893e-05, |
|
"loss": 0.0034, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03353783231083845, |
|
"grad_norm": 0.08591938763856888, |
|
"learning_rate": 8.468842105263158e-05, |
|
"loss": 0.0034, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0343558282208589, |
|
"grad_norm": 0.06896223872900009, |
|
"learning_rate": 8.41557894736842e-05, |
|
"loss": 0.0034, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03517382413087935, |
|
"grad_norm": 0.11430584639310837, |
|
"learning_rate": 8.362315789473683e-05, |
|
"loss": 0.0046, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0359918200408998, |
|
"grad_norm": 0.1037866622209549, |
|
"learning_rate": 8.309052631578947e-05, |
|
"loss": 0.0026, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03680981595092025, |
|
"grad_norm": 0.29823267459869385, |
|
"learning_rate": 8.25578947368421e-05, |
|
"loss": 0.0045, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0376278118609407, |
|
"grad_norm": 0.017684394493699074, |
|
"learning_rate": 8.202526315789473e-05, |
|
"loss": 0.0005, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03844580777096115, |
|
"grad_norm": 0.021542565897107124, |
|
"learning_rate": 8.149263157894736e-05, |
|
"loss": 0.0007, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0392638036809816, |
|
"grad_norm": 0.01819503679871559, |
|
"learning_rate": 8.096e-05, |
|
"loss": 0.0003, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04008179959100205, |
|
"grad_norm": 0.31022578477859497, |
|
"learning_rate": 8.042736842105263e-05, |
|
"loss": 0.0054, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0408997955010225, |
|
"grad_norm": 0.00996240321546793, |
|
"learning_rate": 7.989473684210525e-05, |
|
"loss": 0.0003, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0408997955010225, |
|
"eval_loss": 0.3140620291233063, |
|
"eval_runtime": 164.9522, |
|
"eval_samples_per_second": 3.122, |
|
"eval_steps_per_second": 0.782, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04171779141104295, |
|
"grad_norm": 0.9700417518615723, |
|
"learning_rate": 7.93621052631579e-05, |
|
"loss": 0.975, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0425357873210634, |
|
"grad_norm": 0.7017274498939514, |
|
"learning_rate": 7.882947368421052e-05, |
|
"loss": 0.801, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.043353783231083846, |
|
"grad_norm": 0.5045299530029297, |
|
"learning_rate": 7.829684210526315e-05, |
|
"loss": 0.7498, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.044171779141104296, |
|
"grad_norm": 0.5897574424743652, |
|
"learning_rate": 7.776421052631578e-05, |
|
"loss": 1.052, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.044989775051124746, |
|
"grad_norm": 0.6167125105857849, |
|
"learning_rate": 7.723157894736842e-05, |
|
"loss": 1.0903, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.045807770961145196, |
|
"grad_norm": 0.488067090511322, |
|
"learning_rate": 7.669894736842105e-05, |
|
"loss": 0.9046, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.046625766871165646, |
|
"grad_norm": 0.4243190884590149, |
|
"learning_rate": 7.616631578947367e-05, |
|
"loss": 0.6169, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.047443762781186095, |
|
"grad_norm": 0.4573240876197815, |
|
"learning_rate": 7.563368421052632e-05, |
|
"loss": 0.5286, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.048261758691206545, |
|
"grad_norm": 0.35533568263053894, |
|
"learning_rate": 7.510105263157894e-05, |
|
"loss": 0.3841, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.049079754601226995, |
|
"grad_norm": 3.222236394882202, |
|
"learning_rate": 7.456842105263157e-05, |
|
"loss": 0.4826, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.049897750511247445, |
|
"grad_norm": 0.6317524909973145, |
|
"learning_rate": 7.403578947368421e-05, |
|
"loss": 0.6264, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.050715746421267895, |
|
"grad_norm": 0.9670488238334656, |
|
"learning_rate": 7.350315789473684e-05, |
|
"loss": 0.3956, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.051533742331288344, |
|
"grad_norm": 0.6406692266464233, |
|
"learning_rate": 7.297052631578947e-05, |
|
"loss": 0.471, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.052351738241308794, |
|
"grad_norm": 0.5081653594970703, |
|
"learning_rate": 7.24378947368421e-05, |
|
"loss": 0.6125, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.053169734151329244, |
|
"grad_norm": 0.5987796783447266, |
|
"learning_rate": 7.190526315789474e-05, |
|
"loss": 0.6309, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.053987730061349694, |
|
"grad_norm": 0.6003880500793457, |
|
"learning_rate": 7.137263157894736e-05, |
|
"loss": 0.7208, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.054805725971370144, |
|
"grad_norm": 0.8808057904243469, |
|
"learning_rate": 7.083999999999999e-05, |
|
"loss": 0.9311, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05562372188139059, |
|
"grad_norm": 0.8602137565612793, |
|
"learning_rate": 7.030736842105263e-05, |
|
"loss": 0.9426, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05644171779141104, |
|
"grad_norm": 0.5142946243286133, |
|
"learning_rate": 6.977473684210526e-05, |
|
"loss": 0.5403, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05725971370143149, |
|
"grad_norm": 0.5149444937705994, |
|
"learning_rate": 6.924210526315789e-05, |
|
"loss": 0.597, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05807770961145194, |
|
"grad_norm": 0.5740354061126709, |
|
"learning_rate": 6.870947368421052e-05, |
|
"loss": 0.5234, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05889570552147239, |
|
"grad_norm": 3.2240636348724365, |
|
"learning_rate": 6.817684210526316e-05, |
|
"loss": 0.9182, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05971370143149284, |
|
"grad_norm": 4.175367832183838, |
|
"learning_rate": 6.764421052631579e-05, |
|
"loss": 0.8576, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06053169734151329, |
|
"grad_norm": 0.5963009595870972, |
|
"learning_rate": 6.711157894736841e-05, |
|
"loss": 0.6773, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06134969325153374, |
|
"grad_norm": 0.9091707468032837, |
|
"learning_rate": 6.657894736842106e-05, |
|
"loss": 0.793, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06216768916155419, |
|
"grad_norm": 0.706394374370575, |
|
"learning_rate": 6.604631578947368e-05, |
|
"loss": 0.6347, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06298568507157465, |
|
"grad_norm": 0.711338222026825, |
|
"learning_rate": 6.551368421052631e-05, |
|
"loss": 0.7409, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0638036809815951, |
|
"grad_norm": 0.7073589563369751, |
|
"learning_rate": 6.498105263157894e-05, |
|
"loss": 0.5461, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06462167689161555, |
|
"grad_norm": 0.6385952234268188, |
|
"learning_rate": 6.444842105263157e-05, |
|
"loss": 0.5371, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.065439672801636, |
|
"grad_norm": 0.671297013759613, |
|
"learning_rate": 6.391578947368421e-05, |
|
"loss": 0.4195, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06625766871165645, |
|
"grad_norm": 0.7944509387016296, |
|
"learning_rate": 6.338315789473684e-05, |
|
"loss": 0.2961, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0670756646216769, |
|
"grad_norm": 0.4690554440021515, |
|
"learning_rate": 6.285052631578948e-05, |
|
"loss": 0.3195, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06789366053169735, |
|
"grad_norm": 0.40818992257118225, |
|
"learning_rate": 6.23178947368421e-05, |
|
"loss": 0.324, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0687116564417178, |
|
"grad_norm": 0.19155316054821014, |
|
"learning_rate": 6.178526315789473e-05, |
|
"loss": 0.076, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06952965235173825, |
|
"grad_norm": 0.26963499188423157, |
|
"learning_rate": 6.125263157894736e-05, |
|
"loss": 0.1005, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0703476482617587, |
|
"grad_norm": 0.46540749073028564, |
|
"learning_rate": 6.0719999999999995e-05, |
|
"loss": 0.1703, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07116564417177915, |
|
"grad_norm": 0.026622101664543152, |
|
"learning_rate": 6.018736842105262e-05, |
|
"loss": 0.001, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0719836400817996, |
|
"grad_norm": 0.2342638075351715, |
|
"learning_rate": 5.965473684210526e-05, |
|
"loss": 0.0101, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07280163599182005, |
|
"grad_norm": 0.37686291337013245, |
|
"learning_rate": 5.912210526315789e-05, |
|
"loss": 0.0078, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0736196319018405, |
|
"grad_norm": 0.10179778933525085, |
|
"learning_rate": 5.8589473684210526e-05, |
|
"loss": 0.0014, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07443762781186095, |
|
"grad_norm": 0.1271056979894638, |
|
"learning_rate": 5.8056842105263154e-05, |
|
"loss": 0.0045, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0752556237218814, |
|
"grad_norm": 0.03991863876581192, |
|
"learning_rate": 5.752421052631578e-05, |
|
"loss": 0.002, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07607361963190185, |
|
"grad_norm": 0.3088296055793762, |
|
"learning_rate": 5.6991578947368416e-05, |
|
"loss": 0.0097, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0768916155419223, |
|
"grad_norm": 0.01231884490698576, |
|
"learning_rate": 5.6458947368421044e-05, |
|
"loss": 0.0005, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07770961145194274, |
|
"grad_norm": 0.0380236841738224, |
|
"learning_rate": 5.5926315789473685e-05, |
|
"loss": 0.0017, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0785276073619632, |
|
"grad_norm": 0.018580930307507515, |
|
"learning_rate": 5.539368421052631e-05, |
|
"loss": 0.0007, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07934560327198364, |
|
"grad_norm": 0.3009152114391327, |
|
"learning_rate": 5.486105263157895e-05, |
|
"loss": 0.0008, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0801635991820041, |
|
"grad_norm": 0.14345374703407288, |
|
"learning_rate": 5.4328421052631575e-05, |
|
"loss": 0.0096, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08098159509202454, |
|
"grad_norm": 0.0597989596426487, |
|
"learning_rate": 5.37957894736842e-05, |
|
"loss": 0.0013, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.081799591002045, |
|
"grad_norm": 0.025775019079446793, |
|
"learning_rate": 5.326315789473684e-05, |
|
"loss": 0.0011, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.081799591002045, |
|
"eval_loss": 0.2703871726989746, |
|
"eval_runtime": 165.4305, |
|
"eval_samples_per_second": 3.113, |
|
"eval_steps_per_second": 0.78, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08261758691206544, |
|
"grad_norm": 3.3351776599884033, |
|
"learning_rate": 5.2730526315789465e-05, |
|
"loss": 0.8922, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0834355828220859, |
|
"grad_norm": 0.621583878993988, |
|
"learning_rate": 5.2197894736842107e-05, |
|
"loss": 0.8219, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08425357873210634, |
|
"grad_norm": 0.4286845624446869, |
|
"learning_rate": 5.1665263157894734e-05, |
|
"loss": 0.6638, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0850715746421268, |
|
"grad_norm": 0.5275766253471375, |
|
"learning_rate": 5.113263157894737e-05, |
|
"loss": 0.9051, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08588957055214724, |
|
"grad_norm": 0.5137267112731934, |
|
"learning_rate": 5.06e-05, |
|
"loss": 0.7615, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08670756646216769, |
|
"grad_norm": 0.4253179430961609, |
|
"learning_rate": 5.0067368421052624e-05, |
|
"loss": 0.6455, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08752556237218814, |
|
"grad_norm": 0.4956965148448944, |
|
"learning_rate": 4.953473684210526e-05, |
|
"loss": 0.8425, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08834355828220859, |
|
"grad_norm": 0.4571160674095154, |
|
"learning_rate": 4.9002105263157893e-05, |
|
"loss": 0.6951, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08916155419222904, |
|
"grad_norm": 0.48802193999290466, |
|
"learning_rate": 4.846947368421052e-05, |
|
"loss": 0.7291, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08997955010224949, |
|
"grad_norm": 0.5465656518936157, |
|
"learning_rate": 4.793684210526315e-05, |
|
"loss": 0.8595, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09079754601226994, |
|
"grad_norm": 0.4221843183040619, |
|
"learning_rate": 4.740421052631579e-05, |
|
"loss": 0.5227, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09161554192229039, |
|
"grad_norm": 0.40702882409095764, |
|
"learning_rate": 4.687157894736842e-05, |
|
"loss": 0.4758, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09243353783231084, |
|
"grad_norm": 0.4591318964958191, |
|
"learning_rate": 4.6338947368421046e-05, |
|
"loss": 0.5884, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09325153374233129, |
|
"grad_norm": 0.3259945809841156, |
|
"learning_rate": 4.580631578947368e-05, |
|
"loss": 0.3064, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09406952965235174, |
|
"grad_norm": 0.41009268164634705, |
|
"learning_rate": 4.5273684210526315e-05, |
|
"loss": 0.4763, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09488752556237219, |
|
"grad_norm": 0.49340561032295227, |
|
"learning_rate": 4.474105263157894e-05, |
|
"loss": 0.5706, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09570552147239264, |
|
"grad_norm": 0.41743770241737366, |
|
"learning_rate": 4.420842105263158e-05, |
|
"loss": 0.3968, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09652351738241309, |
|
"grad_norm": 0.5831127166748047, |
|
"learning_rate": 4.367578947368421e-05, |
|
"loss": 0.789, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09734151329243354, |
|
"grad_norm": 0.540946900844574, |
|
"learning_rate": 4.314315789473684e-05, |
|
"loss": 0.5618, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09815950920245399, |
|
"grad_norm": 0.5608387589454651, |
|
"learning_rate": 4.261052631578947e-05, |
|
"loss": 0.7326, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09897750511247444, |
|
"grad_norm": 0.5865150690078735, |
|
"learning_rate": 4.20778947368421e-05, |
|
"loss": 0.6434, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09979550102249489, |
|
"grad_norm": 0.4052663743495941, |
|
"learning_rate": 4.1545263157894736e-05, |
|
"loss": 0.4434, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10061349693251534, |
|
"grad_norm": 0.5830983519554138, |
|
"learning_rate": 4.1012631578947364e-05, |
|
"loss": 0.533, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10143149284253579, |
|
"grad_norm": 0.5231256484985352, |
|
"learning_rate": 4.048e-05, |
|
"loss": 0.54, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10224948875255624, |
|
"grad_norm": 0.655725359916687, |
|
"learning_rate": 3.9947368421052626e-05, |
|
"loss": 0.7891, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10306748466257669, |
|
"grad_norm": 0.6883142590522766, |
|
"learning_rate": 3.941473684210526e-05, |
|
"loss": 0.848, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10388548057259714, |
|
"grad_norm": 0.5699670314788818, |
|
"learning_rate": 3.888210526315789e-05, |
|
"loss": 0.5417, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10470347648261759, |
|
"grad_norm": 0.6029432415962219, |
|
"learning_rate": 3.834947368421052e-05, |
|
"loss": 0.5477, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.10552147239263804, |
|
"grad_norm": 0.5479352474212646, |
|
"learning_rate": 3.781684210526316e-05, |
|
"loss": 0.5703, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10633946830265849, |
|
"grad_norm": 0.6330269575119019, |
|
"learning_rate": 3.7284210526315786e-05, |
|
"loss": 0.7119, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10715746421267894, |
|
"grad_norm": 0.3221192955970764, |
|
"learning_rate": 3.675157894736842e-05, |
|
"loss": 0.2668, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10797546012269939, |
|
"grad_norm": 0.4486640989780426, |
|
"learning_rate": 3.621894736842105e-05, |
|
"loss": 0.2938, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10879345603271984, |
|
"grad_norm": 0.6219035983085632, |
|
"learning_rate": 3.568631578947368e-05, |
|
"loss": 0.7169, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10961145194274029, |
|
"grad_norm": 0.5056197047233582, |
|
"learning_rate": 3.515368421052632e-05, |
|
"loss": 0.5306, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11042944785276074, |
|
"grad_norm": 0.3415873646736145, |
|
"learning_rate": 3.4621052631578945e-05, |
|
"loss": 0.2147, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11124744376278119, |
|
"grad_norm": 0.7372704744338989, |
|
"learning_rate": 3.408842105263158e-05, |
|
"loss": 0.5961, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.11206543967280164, |
|
"grad_norm": 0.356452614068985, |
|
"learning_rate": 3.355578947368421e-05, |
|
"loss": 0.2583, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11288343558282209, |
|
"grad_norm": 0.3617746829986572, |
|
"learning_rate": 3.302315789473684e-05, |
|
"loss": 0.1477, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11370143149284254, |
|
"grad_norm": 0.18670551478862762, |
|
"learning_rate": 3.249052631578947e-05, |
|
"loss": 0.0075, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11451942740286299, |
|
"grad_norm": 0.05176525190472603, |
|
"learning_rate": 3.1957894736842104e-05, |
|
"loss": 0.0016, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11533742331288344, |
|
"grad_norm": 0.04952479153871536, |
|
"learning_rate": 3.142526315789474e-05, |
|
"loss": 0.0018, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11615541922290389, |
|
"grad_norm": 0.07239986956119537, |
|
"learning_rate": 3.0892631578947366e-05, |
|
"loss": 0.001, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11697341513292434, |
|
"grad_norm": 0.021206321194767952, |
|
"learning_rate": 3.0359999999999997e-05, |
|
"loss": 0.0005, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11779141104294479, |
|
"grad_norm": 0.00947723537683487, |
|
"learning_rate": 2.982736842105263e-05, |
|
"loss": 0.0005, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11860940695296524, |
|
"grad_norm": 0.17608602344989777, |
|
"learning_rate": 2.9294736842105263e-05, |
|
"loss": 0.0033, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11942740286298568, |
|
"grad_norm": 0.02094121463596821, |
|
"learning_rate": 2.876210526315789e-05, |
|
"loss": 0.0006, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.12024539877300613, |
|
"grad_norm": 0.07817188650369644, |
|
"learning_rate": 2.8229473684210522e-05, |
|
"loss": 0.0015, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12106339468302658, |
|
"grad_norm": 0.42592841386795044, |
|
"learning_rate": 2.7696842105263156e-05, |
|
"loss": 0.0054, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12188139059304703, |
|
"grad_norm": 0.007265524938702583, |
|
"learning_rate": 2.7164210526315788e-05, |
|
"loss": 0.0002, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 0.008094916120171547, |
|
"learning_rate": 2.663157894736842e-05, |
|
"loss": 0.0004, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"eval_loss": 0.2351786196231842, |
|
"eval_runtime": 165.3057, |
|
"eval_samples_per_second": 3.115, |
|
"eval_steps_per_second": 0.78, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12351738241308793, |
|
"grad_norm": 0.430698424577713, |
|
"learning_rate": 2.6098947368421053e-05, |
|
"loss": 0.838, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.12433537832310838, |
|
"grad_norm": 0.4114360213279724, |
|
"learning_rate": 2.5566315789473684e-05, |
|
"loss": 0.6347, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.12515337423312883, |
|
"grad_norm": 5.368963241577148, |
|
"learning_rate": 2.5033684210526312e-05, |
|
"loss": 1.7455, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1259713701431493, |
|
"grad_norm": 0.36379197239875793, |
|
"learning_rate": 2.4501052631578947e-05, |
|
"loss": 0.5644, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.12678936605316973, |
|
"grad_norm": 0.3544858694076538, |
|
"learning_rate": 2.3968421052631575e-05, |
|
"loss": 0.5105, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1276073619631902, |
|
"grad_norm": 0.3365378975868225, |
|
"learning_rate": 2.343578947368421e-05, |
|
"loss": 0.4261, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12842535787321063, |
|
"grad_norm": 0.4293052852153778, |
|
"learning_rate": 2.290315789473684e-05, |
|
"loss": 0.695, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1292433537832311, |
|
"grad_norm": 0.5024716854095459, |
|
"learning_rate": 2.237052631578947e-05, |
|
"loss": 0.7874, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13006134969325153, |
|
"grad_norm": 0.4503779113292694, |
|
"learning_rate": 2.1837894736842106e-05, |
|
"loss": 0.6787, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.130879345603272, |
|
"grad_norm": 0.5354055166244507, |
|
"learning_rate": 2.1305263157894734e-05, |
|
"loss": 0.8901, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13169734151329243, |
|
"grad_norm": 0.6013686656951904, |
|
"learning_rate": 2.0772631578947368e-05, |
|
"loss": 0.6101, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1325153374233129, |
|
"grad_norm": 0.5253039002418518, |
|
"learning_rate": 2.024e-05, |
|
"loss": 0.6207, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.5484157800674438, |
|
"learning_rate": 1.970736842105263e-05, |
|
"loss": 0.63, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1341513292433538, |
|
"grad_norm": 0.376302570104599, |
|
"learning_rate": 1.917473684210526e-05, |
|
"loss": 0.3893, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.13496932515337423, |
|
"grad_norm": 0.41201335191726685, |
|
"learning_rate": 1.8642105263157893e-05, |
|
"loss": 0.4192, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1357873210633947, |
|
"grad_norm": 0.69189453125, |
|
"learning_rate": 1.8109473684210524e-05, |
|
"loss": 0.8196, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.13660531697341513, |
|
"grad_norm": 0.3967001140117645, |
|
"learning_rate": 1.757684210526316e-05, |
|
"loss": 0.4478, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1374233128834356, |
|
"grad_norm": 0.40037596225738525, |
|
"learning_rate": 1.704421052631579e-05, |
|
"loss": 0.437, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13824130879345603, |
|
"grad_norm": 0.4589173197746277, |
|
"learning_rate": 1.651157894736842e-05, |
|
"loss": 0.5041, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1390593047034765, |
|
"grad_norm": 0.5317126512527466, |
|
"learning_rate": 1.5978947368421052e-05, |
|
"loss": 0.5844, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13987730061349693, |
|
"grad_norm": 0.6097099184989929, |
|
"learning_rate": 1.5446315789473683e-05, |
|
"loss": 0.8981, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1406952965235174, |
|
"grad_norm": 0.5526396632194519, |
|
"learning_rate": 1.4913684210526314e-05, |
|
"loss": 0.5214, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.14151329243353783, |
|
"grad_norm": 0.49050372838974, |
|
"learning_rate": 1.4381052631578945e-05, |
|
"loss": 0.3992, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1423312883435583, |
|
"grad_norm": 0.6631816029548645, |
|
"learning_rate": 1.3848421052631578e-05, |
|
"loss": 0.7089, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.14314928425357873, |
|
"grad_norm": 0.6019887328147888, |
|
"learning_rate": 1.331578947368421e-05, |
|
"loss": 0.6868, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1439672801635992, |
|
"grad_norm": 0.46158432960510254, |
|
"learning_rate": 1.2783157894736842e-05, |
|
"loss": 0.4053, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14478527607361963, |
|
"grad_norm": 3.7597968578338623, |
|
"learning_rate": 1.2250526315789473e-05, |
|
"loss": 0.6116, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1456032719836401, |
|
"grad_norm": 0.3607610762119293, |
|
"learning_rate": 1.1717894736842105e-05, |
|
"loss": 0.2041, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14642126789366053, |
|
"grad_norm": 0.6213468313217163, |
|
"learning_rate": 1.1185263157894736e-05, |
|
"loss": 0.6546, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.147239263803681, |
|
"grad_norm": 0.4181584119796753, |
|
"learning_rate": 1.0652631578947367e-05, |
|
"loss": 0.3367, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14805725971370143, |
|
"grad_norm": 0.7935755848884583, |
|
"learning_rate": 1.012e-05, |
|
"loss": 0.7382, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1488752556237219, |
|
"grad_norm": 0.7149289846420288, |
|
"learning_rate": 9.58736842105263e-06, |
|
"loss": 0.6995, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.14969325153374233, |
|
"grad_norm": 0.6175360083580017, |
|
"learning_rate": 9.054736842105262e-06, |
|
"loss": 0.515, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1505112474437628, |
|
"grad_norm": 0.432099848985672, |
|
"learning_rate": 8.522105263157895e-06, |
|
"loss": 0.222, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.15132924335378323, |
|
"grad_norm": 0.14052408933639526, |
|
"learning_rate": 7.989473684210526e-06, |
|
"loss": 0.0522, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1521472392638037, |
|
"grad_norm": 0.06722941249608994, |
|
"learning_rate": 7.456842105263157e-06, |
|
"loss": 0.0043, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.15296523517382413, |
|
"grad_norm": 0.015035667456686497, |
|
"learning_rate": 6.924210526315789e-06, |
|
"loss": 0.0008, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1537832310838446, |
|
"grad_norm": 0.15017950534820557, |
|
"learning_rate": 6.391578947368421e-06, |
|
"loss": 0.003, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.15460122699386503, |
|
"grad_norm": 0.018131496384739876, |
|
"learning_rate": 5.858947368421052e-06, |
|
"loss": 0.001, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1554192229038855, |
|
"grad_norm": 0.015157670713961124, |
|
"learning_rate": 5.326315789473683e-06, |
|
"loss": 0.0008, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15623721881390593, |
|
"grad_norm": 0.11725586652755737, |
|
"learning_rate": 4.793684210526315e-06, |
|
"loss": 0.0067, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1570552147239264, |
|
"grad_norm": 0.043377045542001724, |
|
"learning_rate": 4.261052631578947e-06, |
|
"loss": 0.0024, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15787321063394683, |
|
"grad_norm": 0.10283850133419037, |
|
"learning_rate": 3.7284210526315786e-06, |
|
"loss": 0.004, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1586912065439673, |
|
"grad_norm": 0.1786062866449356, |
|
"learning_rate": 3.1957894736842106e-06, |
|
"loss": 0.0071, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.15950920245398773, |
|
"grad_norm": 0.13341274857521057, |
|
"learning_rate": 2.6631578947368417e-06, |
|
"loss": 0.0007, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1603271983640082, |
|
"grad_norm": 0.1136331856250763, |
|
"learning_rate": 2.1305263157894737e-06, |
|
"loss": 0.0033, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.16114519427402862, |
|
"grad_norm": 0.052339375019073486, |
|
"learning_rate": 1.5978947368421053e-06, |
|
"loss": 0.002, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1619631901840491, |
|
"grad_norm": 0.01893027499318123, |
|
"learning_rate": 1.0652631578947369e-06, |
|
"loss": 0.0008, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.16278118609406952, |
|
"grad_norm": 0.01076345145702362, |
|
"learning_rate": 5.326315789473684e-07, |
|
"loss": 0.0006, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.16359918200409, |
|
"grad_norm": 0.029393598437309265, |
|
"learning_rate": 0.0, |
|
"loss": 0.0015, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16359918200409, |
|
"eval_loss": 0.21266202628612518, |
|
"eval_runtime": 165.9021, |
|
"eval_samples_per_second": 3.104, |
|
"eval_steps_per_second": 0.778, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.484049111154688e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|