|
{ |
|
"best_metric": 0.00024003432190511376, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 1.7278617710583153, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008639308855291577, |
|
"grad_norm": 0.7260720729827881, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0407, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008639308855291577, |
|
"eval_loss": 0.15066038072109222, |
|
"eval_runtime": 20.6837, |
|
"eval_samples_per_second": 9.428, |
|
"eval_steps_per_second": 2.369, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017278617710583154, |
|
"grad_norm": 0.557333767414093, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.034, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02591792656587473, |
|
"grad_norm": 0.3476514518260956, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.0328, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03455723542116631, |
|
"grad_norm": 0.18027208745479584, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.0306, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04319654427645788, |
|
"grad_norm": 0.15658758580684662, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0283, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05183585313174946, |
|
"grad_norm": 0.2384180724620819, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0351, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06047516198704104, |
|
"grad_norm": 0.31615033745765686, |
|
"learning_rate": 4.899999999999999e-05, |
|
"loss": 0.0305, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06911447084233262, |
|
"grad_norm": 0.1833355873823166, |
|
"learning_rate": 5.6e-05, |
|
"loss": 0.03, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07775377969762419, |
|
"grad_norm": 0.24100624024868011, |
|
"learning_rate": 6.3e-05, |
|
"loss": 0.0226, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08639308855291576, |
|
"grad_norm": 0.23082295060157776, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0112, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09503239740820735, |
|
"grad_norm": 0.09087604284286499, |
|
"learning_rate": 6.999521567473641e-05, |
|
"loss": 0.0045, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10367170626349892, |
|
"grad_norm": 0.5570639371871948, |
|
"learning_rate": 6.998086400693241e-05, |
|
"loss": 0.0162, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11231101511879049, |
|
"grad_norm": 0.08612176030874252, |
|
"learning_rate": 6.995694892019065e-05, |
|
"loss": 0.0043, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12095032397408208, |
|
"grad_norm": 0.062429703772068024, |
|
"learning_rate": 6.99234769526571e-05, |
|
"loss": 0.0019, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12958963282937366, |
|
"grad_norm": 0.3207145035266876, |
|
"learning_rate": 6.988045725523343e-05, |
|
"loss": 0.0064, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13822894168466524, |
|
"grad_norm": 0.11560860276222229, |
|
"learning_rate": 6.982790158907539e-05, |
|
"loss": 0.003, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1468682505399568, |
|
"grad_norm": 0.04708254709839821, |
|
"learning_rate": 6.976582432237733e-05, |
|
"loss": 0.0016, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.15550755939524838, |
|
"grad_norm": 0.07981427013874054, |
|
"learning_rate": 6.969424242644413e-05, |
|
"loss": 0.0017, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.16414686825053995, |
|
"grad_norm": 0.09298226237297058, |
|
"learning_rate": 6.961317547105138e-05, |
|
"loss": 0.0006, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17278617710583152, |
|
"grad_norm": 0.02390686422586441, |
|
"learning_rate": 6.952264561909527e-05, |
|
"loss": 0.0008, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18142548596112312, |
|
"grad_norm": 0.06466929614543915, |
|
"learning_rate": 6.942267762053337e-05, |
|
"loss": 0.0014, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1900647948164147, |
|
"grad_norm": 0.05321163311600685, |
|
"learning_rate": 6.931329880561832e-05, |
|
"loss": 0.001, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.19870410367170627, |
|
"grad_norm": 0.02070157416164875, |
|
"learning_rate": 6.919453907742597e-05, |
|
"loss": 0.0006, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.20734341252699784, |
|
"grad_norm": 0.16782431304454803, |
|
"learning_rate": 6.90664309036802e-05, |
|
"loss": 0.0049, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2159827213822894, |
|
"grad_norm": 0.14060857892036438, |
|
"learning_rate": 6.892900930787656e-05, |
|
"loss": 0.0012, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22462203023758098, |
|
"grad_norm": 0.43972817063331604, |
|
"learning_rate": 6.87823118597072e-05, |
|
"loss": 0.0071, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.23326133909287258, |
|
"grad_norm": 0.05086011067032814, |
|
"learning_rate": 6.862637866478969e-05, |
|
"loss": 0.0013, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.24190064794816415, |
|
"grad_norm": 0.06401721388101578, |
|
"learning_rate": 6.846125235370252e-05, |
|
"loss": 0.0013, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2505399568034557, |
|
"grad_norm": 0.09170061349868774, |
|
"learning_rate": 6.828697807033038e-05, |
|
"loss": 0.0183, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2591792656587473, |
|
"grad_norm": 0.06554131209850311, |
|
"learning_rate": 6.81036034595222e-05, |
|
"loss": 0.018, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2678185745140389, |
|
"grad_norm": 0.08401723951101303, |
|
"learning_rate": 6.791117865406564e-05, |
|
"loss": 0.0175, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.27645788336933047, |
|
"grad_norm": 0.06230723112821579, |
|
"learning_rate": 6.770975626098112e-05, |
|
"loss": 0.0122, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.28509719222462204, |
|
"grad_norm": 0.066391222178936, |
|
"learning_rate": 6.749939134713974e-05, |
|
"loss": 0.0137, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2937365010799136, |
|
"grad_norm": 0.01942325197160244, |
|
"learning_rate": 6.728014142420846e-05, |
|
"loss": 0.0023, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3023758099352052, |
|
"grad_norm": 0.05515532195568085, |
|
"learning_rate": 6.7052066432927e-05, |
|
"loss": 0.0023, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.31101511879049676, |
|
"grad_norm": 0.038139645010232925, |
|
"learning_rate": 6.681522872672069e-05, |
|
"loss": 0.0015, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.31965442764578833, |
|
"grad_norm": 0.03340466320514679, |
|
"learning_rate": 6.656969305465356e-05, |
|
"loss": 0.0016, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3282937365010799, |
|
"grad_norm": 0.04159877821803093, |
|
"learning_rate": 6.631552654372672e-05, |
|
"loss": 0.0015, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3369330453563715, |
|
"grad_norm": 0.03693181276321411, |
|
"learning_rate": 6.60527986805264e-05, |
|
"loss": 0.0017, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.34557235421166305, |
|
"grad_norm": 0.034342411905527115, |
|
"learning_rate": 6.578158129222711e-05, |
|
"loss": 0.0013, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3542116630669546, |
|
"grad_norm": 0.022351600229740143, |
|
"learning_rate": 6.550194852695469e-05, |
|
"loss": 0.0013, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.36285097192224625, |
|
"grad_norm": 0.07802402973175049, |
|
"learning_rate": 6.521397683351509e-05, |
|
"loss": 0.0012, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3714902807775378, |
|
"grad_norm": 0.011767297983169556, |
|
"learning_rate": 6.491774494049386e-05, |
|
"loss": 0.0004, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3801295896328294, |
|
"grad_norm": 0.0234123133122921, |
|
"learning_rate": 6.461333383473272e-05, |
|
"loss": 0.0013, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.38876889848812096, |
|
"grad_norm": 0.007028356194496155, |
|
"learning_rate": 6.430082673918849e-05, |
|
"loss": 0.0004, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.39740820734341253, |
|
"grad_norm": 0.011285451240837574, |
|
"learning_rate": 6.398030909018069e-05, |
|
"loss": 0.0003, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4060475161987041, |
|
"grad_norm": 0.07014564424753189, |
|
"learning_rate": 6.365186851403423e-05, |
|
"loss": 0.001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4146868250539957, |
|
"grad_norm": 0.023154348134994507, |
|
"learning_rate": 6.331559480312315e-05, |
|
"loss": 0.0003, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.42332613390928725, |
|
"grad_norm": 0.08951613306999207, |
|
"learning_rate": 6.297157989132236e-05, |
|
"loss": 0.0011, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"grad_norm": 0.03926246613264084, |
|
"learning_rate": 6.261991782887377e-05, |
|
"loss": 0.0006, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"eval_loss": 0.0019684885628521442, |
|
"eval_runtime": 20.5581, |
|
"eval_samples_per_second": 9.485, |
|
"eval_steps_per_second": 2.383, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4406047516198704, |
|
"grad_norm": 0.17613102495670319, |
|
"learning_rate": 6.226070475667393e-05, |
|
"loss": 0.0021, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.44924406047516197, |
|
"grad_norm": 0.05827736854553223, |
|
"learning_rate": 6.189403887999006e-05, |
|
"loss": 0.001, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.45788336933045354, |
|
"grad_norm": 0.12556667625904083, |
|
"learning_rate": 6.152002044161171e-05, |
|
"loss": 0.0015, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.46652267818574517, |
|
"grad_norm": 0.07447590678930283, |
|
"learning_rate": 6.113875169444539e-05, |
|
"loss": 0.0009, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.47516198704103674, |
|
"grad_norm": 0.27384987473487854, |
|
"learning_rate": 6.0750336873559605e-05, |
|
"loss": 0.01, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4838012958963283, |
|
"grad_norm": 0.005780680105090141, |
|
"learning_rate": 6.035488216768811e-05, |
|
"loss": 0.0002, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4924406047516199, |
|
"grad_norm": 0.04053672403097153, |
|
"learning_rate": 5.9952495690198894e-05, |
|
"loss": 0.0051, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5010799136069114, |
|
"grad_norm": 0.04079966992139816, |
|
"learning_rate": 5.954328744953709e-05, |
|
"loss": 0.0057, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.509719222462203, |
|
"grad_norm": 0.03938170522451401, |
|
"learning_rate": 5.91273693191498e-05, |
|
"loss": 0.0049, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5183585313174947, |
|
"grad_norm": 0.06116793677210808, |
|
"learning_rate": 5.870485500690094e-05, |
|
"loss": 0.0053, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5269978401727862, |
|
"grad_norm": 0.06775252521038055, |
|
"learning_rate": 5.827586002398468e-05, |
|
"loss": 0.0034, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5356371490280778, |
|
"grad_norm": 0.04742324352264404, |
|
"learning_rate": 5.784050165334589e-05, |
|
"loss": 0.0009, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5442764578833693, |
|
"grad_norm": 0.0582570843398571, |
|
"learning_rate": 5.739889891761608e-05, |
|
"loss": 0.0021, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5529157667386609, |
|
"grad_norm": 0.024544579908251762, |
|
"learning_rate": 5.6951172546573794e-05, |
|
"loss": 0.0006, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5615550755939525, |
|
"grad_norm": 0.07139912247657776, |
|
"learning_rate": 5.6497444944138376e-05, |
|
"loss": 0.0017, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5701943844492441, |
|
"grad_norm": 0.02395671233534813, |
|
"learning_rate": 5.603784015490587e-05, |
|
"loss": 0.0005, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5788336933045356, |
|
"grad_norm": 0.004293499980121851, |
|
"learning_rate": 5.557248383023655e-05, |
|
"loss": 0.0003, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5874730021598272, |
|
"grad_norm": 0.029220029711723328, |
|
"learning_rate": 5.510150319390302e-05, |
|
"loss": 0.0005, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5961123110151187, |
|
"grad_norm": 0.037274319678545, |
|
"learning_rate": 5.4625027007308546e-05, |
|
"loss": 0.0015, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6047516198704104, |
|
"grad_norm": 0.00902900006622076, |
|
"learning_rate": 5.414318553428494e-05, |
|
"loss": 0.0003, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6133909287257019, |
|
"grad_norm": 0.01143543142825365, |
|
"learning_rate": 5.3656110505479776e-05, |
|
"loss": 0.0003, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6220302375809935, |
|
"grad_norm": 0.005858670454472303, |
|
"learning_rate": 5.316393508234253e-05, |
|
"loss": 0.0004, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6306695464362851, |
|
"grad_norm": 0.006607948802411556, |
|
"learning_rate": 5.266679382071953e-05, |
|
"loss": 0.0004, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6393088552915767, |
|
"grad_norm": 0.05994042009115219, |
|
"learning_rate": 5.216482263406778e-05, |
|
"loss": 0.0006, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6479481641468683, |
|
"grad_norm": 0.003944529686123133, |
|
"learning_rate": 5.1658158756297576e-05, |
|
"loss": 0.0002, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6565874730021598, |
|
"grad_norm": 0.005714634899049997, |
|
"learning_rate": 5.114694070425407e-05, |
|
"loss": 0.0002, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6652267818574514, |
|
"grad_norm": 0.24551953375339508, |
|
"learning_rate": 5.063130823984823e-05, |
|
"loss": 0.0005, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.673866090712743, |
|
"grad_norm": 0.10831040889024734, |
|
"learning_rate": 5.011140233184724e-05, |
|
"loss": 0.0027, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6825053995680346, |
|
"grad_norm": 0.0029632514342665672, |
|
"learning_rate": 4.958736511733516e-05, |
|
"loss": 0.0002, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6911447084233261, |
|
"grad_norm": 0.007232643198221922, |
|
"learning_rate": 4.905933986285393e-05, |
|
"loss": 0.0001, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6997840172786177, |
|
"grad_norm": 0.010217340663075447, |
|
"learning_rate": 4.8527470925235824e-05, |
|
"loss": 0.0002, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7084233261339092, |
|
"grad_norm": 0.2681877613067627, |
|
"learning_rate": 4.799190371213772e-05, |
|
"loss": 0.0037, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7170626349892009, |
|
"grad_norm": 0.006039237137883902, |
|
"learning_rate": 4.745278464228808e-05, |
|
"loss": 0.0002, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7257019438444925, |
|
"grad_norm": 0.005269297398626804, |
|
"learning_rate": 4.69102611054575e-05, |
|
"loss": 0.0003, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.734341252699784, |
|
"grad_norm": 0.06765911728143692, |
|
"learning_rate": 4.6364481422163926e-05, |
|
"loss": 0.0019, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7429805615550756, |
|
"grad_norm": 0.05636543780565262, |
|
"learning_rate": 4.581559480312316e-05, |
|
"loss": 0.0023, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7516198704103672, |
|
"grad_norm": 0.026066439226269722, |
|
"learning_rate": 4.526375130845627e-05, |
|
"loss": 0.0015, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7602591792656588, |
|
"grad_norm": 0.018351661041378975, |
|
"learning_rate": 4.4709101806664554e-05, |
|
"loss": 0.0014, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7688984881209503, |
|
"grad_norm": 0.1412251740694046, |
|
"learning_rate": 4.4151797933383685e-05, |
|
"loss": 0.0096, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7775377969762419, |
|
"grad_norm": 0.21160076558589935, |
|
"learning_rate": 4.359199204992797e-05, |
|
"loss": 0.0059, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7861771058315334, |
|
"grad_norm": 0.059807900339365005, |
|
"learning_rate": 4.30298372016363e-05, |
|
"loss": 0.0005, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7948164146868251, |
|
"grad_norm": 0.010206430219113827, |
|
"learning_rate": 4.246548707603114e-05, |
|
"loss": 0.0003, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8034557235421166, |
|
"grad_norm": 0.021596243605017662, |
|
"learning_rate": 4.1899095960801805e-05, |
|
"loss": 0.0004, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8120950323974082, |
|
"grad_norm": 0.0017379262717440724, |
|
"learning_rate": 4.133081870162385e-05, |
|
"loss": 0.0002, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8207343412526998, |
|
"grad_norm": 0.01433930266648531, |
|
"learning_rate": 4.076081065982569e-05, |
|
"loss": 0.0003, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8293736501079914, |
|
"grad_norm": 0.03355858847498894, |
|
"learning_rate": 4.018922766991447e-05, |
|
"loss": 0.0006, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.838012958963283, |
|
"grad_norm": 0.1033296138048172, |
|
"learning_rate": 3.961622599697241e-05, |
|
"loss": 0.0013, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8466522678185745, |
|
"grad_norm": 0.10396935045719147, |
|
"learning_rate": 3.9041962293935516e-05, |
|
"loss": 0.0035, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8552915766738661, |
|
"grad_norm": 0.007392291445285082, |
|
"learning_rate": 3.84665935587662e-05, |
|
"loss": 0.0003, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"grad_norm": 0.06569644808769226, |
|
"learning_rate": 3.7890277091531636e-05, |
|
"loss": 0.0013, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"eval_loss": 0.0009457149426452816, |
|
"eval_runtime": 20.7602, |
|
"eval_samples_per_second": 9.393, |
|
"eval_steps_per_second": 2.36, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8725701943844493, |
|
"grad_norm": 0.06337860226631165, |
|
"learning_rate": 3.7313170451399475e-05, |
|
"loss": 0.0019, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8812095032397408, |
|
"grad_norm": 0.07296153157949448, |
|
"learning_rate": 3.673543141356278e-05, |
|
"loss": 0.0033, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8898488120950324, |
|
"grad_norm": 0.09170746803283691, |
|
"learning_rate": 3.6157217926105783e-05, |
|
"loss": 0.0004, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.8984881209503239, |
|
"grad_norm": 0.0043894099071621895, |
|
"learning_rate": 3.557868806682255e-05, |
|
"loss": 0.0002, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9071274298056156, |
|
"grad_norm": 0.004214062821120024, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0003, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9157667386609071, |
|
"grad_norm": 0.004896323662251234, |
|
"learning_rate": 3.442131193317745e-05, |
|
"loss": 0.0002, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9244060475161987, |
|
"grad_norm": 0.04607084020972252, |
|
"learning_rate": 3.384278207389421e-05, |
|
"loss": 0.0005, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9330453563714903, |
|
"grad_norm": 0.024103185161948204, |
|
"learning_rate": 3.3264568586437216e-05, |
|
"loss": 0.0004, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9416846652267818, |
|
"grad_norm": 0.19529423117637634, |
|
"learning_rate": 3.268682954860052e-05, |
|
"loss": 0.0016, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9503239740820735, |
|
"grad_norm": 0.013852166011929512, |
|
"learning_rate": 3.210972290846837e-05, |
|
"loss": 0.0003, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.958963282937365, |
|
"grad_norm": 0.005281697493046522, |
|
"learning_rate": 3.15334064412338e-05, |
|
"loss": 0.0003, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9676025917926566, |
|
"grad_norm": 0.04835696145892143, |
|
"learning_rate": 3.0958037706064485e-05, |
|
"loss": 0.0007, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9762419006479481, |
|
"grad_norm": 0.007758499588817358, |
|
"learning_rate": 3.038377400302758e-05, |
|
"loss": 0.0003, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.9848812095032398, |
|
"grad_norm": 0.006247072480618954, |
|
"learning_rate": 2.9810772330085524e-05, |
|
"loss": 0.0006, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9935205183585313, |
|
"grad_norm": 0.06823667138814926, |
|
"learning_rate": 2.9239189340174306e-05, |
|
"loss": 0.0006, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0021598272138228, |
|
"grad_norm": 0.13855108618736267, |
|
"learning_rate": 2.8669181298376163e-05, |
|
"loss": 0.0025, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0107991360691144, |
|
"grad_norm": 0.013171014375984669, |
|
"learning_rate": 2.8100904039198193e-05, |
|
"loss": 0.0007, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.019438444924406, |
|
"grad_norm": 0.012365025468170643, |
|
"learning_rate": 2.7534512923968863e-05, |
|
"loss": 0.0006, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0280777537796977, |
|
"grad_norm": 0.009904728271067142, |
|
"learning_rate": 2.6970162798363695e-05, |
|
"loss": 0.0008, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.0367170626349893, |
|
"grad_norm": 0.006425977218896151, |
|
"learning_rate": 2.640800795007203e-05, |
|
"loss": 0.0004, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0453563714902807, |
|
"grad_norm": 0.01372888870537281, |
|
"learning_rate": 2.5848202066616305e-05, |
|
"loss": 0.0002, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.0539956803455723, |
|
"grad_norm": 0.0021302136592566967, |
|
"learning_rate": 2.5290898193335446e-05, |
|
"loss": 0.0003, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.062634989200864, |
|
"grad_norm": 0.0052225952968001366, |
|
"learning_rate": 2.4736248691543736e-05, |
|
"loss": 0.0002, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.0712742980561556, |
|
"grad_norm": 0.0029196590185165405, |
|
"learning_rate": 2.4184405196876842e-05, |
|
"loss": 0.0001, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.079913606911447, |
|
"grad_norm": 0.09910155832767487, |
|
"learning_rate": 2.363551857783608e-05, |
|
"loss": 0.0003, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0885529157667386, |
|
"grad_norm": 0.0008615689584985375, |
|
"learning_rate": 2.308973889454249e-05, |
|
"loss": 0.0002, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.0971922246220303, |
|
"grad_norm": 0.0013229359174147248, |
|
"learning_rate": 2.2547215357711918e-05, |
|
"loss": 0.0001, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.1058315334773219, |
|
"grad_norm": 0.000881396175827831, |
|
"learning_rate": 2.2008096287862266e-05, |
|
"loss": 0.0001, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.1144708423326133, |
|
"grad_norm": 0.0023514782078564167, |
|
"learning_rate": 2.1472529074764177e-05, |
|
"loss": 0.0002, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.123110151187905, |
|
"grad_norm": 0.013889284804463387, |
|
"learning_rate": 2.0940660137146074e-05, |
|
"loss": 0.0003, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1317494600431965, |
|
"grad_norm": 0.0022602914832532406, |
|
"learning_rate": 2.041263488266484e-05, |
|
"loss": 0.0001, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.1403887688984882, |
|
"grad_norm": 0.0012188655091449618, |
|
"learning_rate": 1.988859766815275e-05, |
|
"loss": 0.0001, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.1490280777537798, |
|
"grad_norm": 0.0018668539123609662, |
|
"learning_rate": 1.9368691760151773e-05, |
|
"loss": 0.0001, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.1576673866090712, |
|
"grad_norm": 0.0017291579861193895, |
|
"learning_rate": 1.885305929574593e-05, |
|
"loss": 0.0001, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.1663066954643628, |
|
"grad_norm": 0.0010335007682442665, |
|
"learning_rate": 1.8341841243702424e-05, |
|
"loss": 0.0001, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.1749460043196545, |
|
"grad_norm": 0.18275120854377747, |
|
"learning_rate": 1.7835177365932225e-05, |
|
"loss": 0.0022, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.183585313174946, |
|
"grad_norm": 0.0107800904661417, |
|
"learning_rate": 1.7333206179280478e-05, |
|
"loss": 0.0002, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.1922246220302375, |
|
"grad_norm": 0.0653991624712944, |
|
"learning_rate": 1.6836064917657478e-05, |
|
"loss": 0.0004, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.2008639308855291, |
|
"grad_norm": 0.005070838611572981, |
|
"learning_rate": 1.6343889494520224e-05, |
|
"loss": 0.0002, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.2095032397408207, |
|
"grad_norm": 0.004730647429823875, |
|
"learning_rate": 1.5856814465715064e-05, |
|
"loss": 0.0001, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2181425485961124, |
|
"grad_norm": 0.0011722528142854571, |
|
"learning_rate": 1.5374972992691458e-05, |
|
"loss": 0.0001, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.226781857451404, |
|
"grad_norm": 0.001693835249170661, |
|
"learning_rate": 1.4898496806096974e-05, |
|
"loss": 0.0001, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.2354211663066954, |
|
"grad_norm": 0.003972134552896023, |
|
"learning_rate": 1.4427516169763444e-05, |
|
"loss": 0.0001, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.244060475161987, |
|
"grad_norm": 0.01975773461163044, |
|
"learning_rate": 1.396215984509412e-05, |
|
"loss": 0.0005, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.2526997840172787, |
|
"grad_norm": 0.007931800559163094, |
|
"learning_rate": 1.3502555055861625e-05, |
|
"loss": 0.0004, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.26133909287257, |
|
"grad_norm": 0.022132746875286102, |
|
"learning_rate": 1.3048827453426203e-05, |
|
"loss": 0.0005, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.2699784017278617, |
|
"grad_norm": 0.010564382188022137, |
|
"learning_rate": 1.2601101082383917e-05, |
|
"loss": 0.0004, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.2786177105831533, |
|
"grad_norm": 0.009835362434387207, |
|
"learning_rate": 1.2159498346654094e-05, |
|
"loss": 0.0005, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.287257019438445, |
|
"grad_norm": 0.006598853040486574, |
|
"learning_rate": 1.1724139976015306e-05, |
|
"loss": 0.0003, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.2958963282937366, |
|
"grad_norm": 0.00920469593256712, |
|
"learning_rate": 1.1295144993099068e-05, |
|
"loss": 0.0005, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2958963282937366, |
|
"eval_loss": 0.00024003432190511376, |
|
"eval_runtime": 20.7717, |
|
"eval_samples_per_second": 9.388, |
|
"eval_steps_per_second": 2.359, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3045356371490282, |
|
"grad_norm": 0.0014002382522448897, |
|
"learning_rate": 1.0872630680850196e-05, |
|
"loss": 0.0001, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.3131749460043196, |
|
"grad_norm": 0.0010238329414278269, |
|
"learning_rate": 1.0456712550462898e-05, |
|
"loss": 0.0002, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.3218142548596112, |
|
"grad_norm": 0.0012431687209755182, |
|
"learning_rate": 1.0047504309801104e-05, |
|
"loss": 0.0001, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.3304535637149029, |
|
"grad_norm": 0.0026770096737891436, |
|
"learning_rate": 9.645117832311886e-06, |
|
"loss": 0.0001, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.3390928725701943, |
|
"grad_norm": 0.01820327155292034, |
|
"learning_rate": 9.249663126440394e-06, |
|
"loss": 0.0002, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.347732181425486, |
|
"grad_norm": 0.01555480808019638, |
|
"learning_rate": 8.861248305554624e-06, |
|
"loss": 0.0002, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.3563714902807775, |
|
"grad_norm": 0.0015801583649590611, |
|
"learning_rate": 8.47997955838829e-06, |
|
"loss": 0.0002, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.3650107991360692, |
|
"grad_norm": 0.10153518617153168, |
|
"learning_rate": 8.10596112000994e-06, |
|
"loss": 0.0004, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.3736501079913608, |
|
"grad_norm": 0.0009876766707748175, |
|
"learning_rate": 7.739295243326067e-06, |
|
"loss": 0.0001, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.3822894168466522, |
|
"grad_norm": 0.0026555354706943035, |
|
"learning_rate": 7.380082171126228e-06, |
|
"loss": 0.0002, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.3909287257019438, |
|
"grad_norm": 0.0006382952560670674, |
|
"learning_rate": 7.028420108677635e-06, |
|
"loss": 0.0001, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.3995680345572354, |
|
"grad_norm": 0.0014154494274407625, |
|
"learning_rate": 6.684405196876842e-06, |
|
"loss": 0.0001, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.408207343412527, |
|
"grad_norm": 0.0011231210082769394, |
|
"learning_rate": 6.3481314859657675e-06, |
|
"loss": 0.0001, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.4168466522678185, |
|
"grad_norm": 0.0014514840440824628, |
|
"learning_rate": 6.019690909819298e-06, |
|
"loss": 0.0001, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.42548596112311, |
|
"grad_norm": 0.0013826994691044092, |
|
"learning_rate": 5.6991732608115e-06, |
|
"loss": 0.0001, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.4341252699784017, |
|
"grad_norm": 0.0013565586414188147, |
|
"learning_rate": 5.386666165267256e-06, |
|
"loss": 0.0001, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.4427645788336934, |
|
"grad_norm": 0.007900253869593143, |
|
"learning_rate": 5.08225505950613e-06, |
|
"loss": 0.0001, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.451403887688985, |
|
"grad_norm": 0.0012855289969593287, |
|
"learning_rate": 4.786023166484913e-06, |
|
"loss": 0.0001, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.4600431965442764, |
|
"grad_norm": 0.01582699827849865, |
|
"learning_rate": 4.498051473045291e-06, |
|
"loss": 0.0002, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.468682505399568, |
|
"grad_norm": 0.0007394661079160869, |
|
"learning_rate": 4.218418707772886e-06, |
|
"loss": 0.0001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.4773218142548596, |
|
"grad_norm": 0.0008164668688550591, |
|
"learning_rate": 3.947201319473587e-06, |
|
"loss": 0.0001, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.485961123110151, |
|
"grad_norm": 0.002124256454408169, |
|
"learning_rate": 3.684473456273278e-06, |
|
"loss": 0.0002, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.4946004319654427, |
|
"grad_norm": 0.00903933122754097, |
|
"learning_rate": 3.4303069453464383e-06, |
|
"loss": 0.0003, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.5032397408207343, |
|
"grad_norm": 0.017047259956598282, |
|
"learning_rate": 3.184771273279312e-06, |
|
"loss": 0.0006, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.511879049676026, |
|
"grad_norm": 0.01836921088397503, |
|
"learning_rate": 2.947933567072987e-06, |
|
"loss": 0.0006, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.5205183585313176, |
|
"grad_norm": 0.0054769194684922695, |
|
"learning_rate": 2.719858575791534e-06, |
|
"loss": 0.0003, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.5291576673866092, |
|
"grad_norm": 0.05894150957465172, |
|
"learning_rate": 2.500608652860256e-06, |
|
"loss": 0.0009, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.5377969762419006, |
|
"grad_norm": 0.0017646638443693519, |
|
"learning_rate": 2.2902437390188737e-06, |
|
"loss": 0.0002, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.5464362850971922, |
|
"grad_norm": 0.0016004899516701698, |
|
"learning_rate": 2.0888213459343587e-06, |
|
"loss": 0.0002, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.5550755939524838, |
|
"grad_norm": 0.0009360113763250411, |
|
"learning_rate": 1.8963965404777875e-06, |
|
"loss": 0.0001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.5637149028077753, |
|
"grad_norm": 0.0009685845579952002, |
|
"learning_rate": 1.7130219296696263e-06, |
|
"loss": 0.0001, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.5723542116630669, |
|
"grad_norm": 0.0022252460476011038, |
|
"learning_rate": 1.5387476462974824e-06, |
|
"loss": 0.0003, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.5809935205183585, |
|
"grad_norm": 0.0034834735561162233, |
|
"learning_rate": 1.3736213352103147e-06, |
|
"loss": 0.0001, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.5896328293736501, |
|
"grad_norm": 0.0007790013332851231, |
|
"learning_rate": 1.2176881402928002e-06, |
|
"loss": 0.0001, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.5982721382289418, |
|
"grad_norm": 0.0013246826129034162, |
|
"learning_rate": 1.0709906921234367e-06, |
|
"loss": 0.0001, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.6069114470842334, |
|
"grad_norm": 0.0007768021896481514, |
|
"learning_rate": 9.33569096319799e-07, |
|
"loss": 0.0001, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.6155507559395248, |
|
"grad_norm": 0.0009022291051223874, |
|
"learning_rate": 8.054609225740255e-07, |
|
"loss": 0.0001, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.6241900647948164, |
|
"grad_norm": 0.0007956126355566084, |
|
"learning_rate": 6.867011943816724e-07, |
|
"loss": 0.0001, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.6328293736501078, |
|
"grad_norm": 0.0012011040234938264, |
|
"learning_rate": 5.77322379466617e-07, |
|
"loss": 0.0001, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.6414686825053995, |
|
"grad_norm": 0.0011474161874502897, |
|
"learning_rate": 4.773543809047186e-07, |
|
"loss": 0.0001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.650107991360691, |
|
"grad_norm": 0.0006911220261827111, |
|
"learning_rate": 3.868245289486027e-07, |
|
"loss": 0.0001, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.6587473002159827, |
|
"grad_norm": 0.0015587140806019306, |
|
"learning_rate": 3.0575757355586817e-07, |
|
"loss": 0.0001, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.6673866090712743, |
|
"grad_norm": 0.0011123515432700515, |
|
"learning_rate": 2.3417567762266497e-07, |
|
"loss": 0.0001, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.676025917926566, |
|
"grad_norm": 0.002342136111110449, |
|
"learning_rate": 1.7209841092460043e-07, |
|
"loss": 0.0001, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.6846652267818576, |
|
"grad_norm": 0.0016477032331749797, |
|
"learning_rate": 1.1954274476655534e-07, |
|
"loss": 0.0001, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.693304535637149, |
|
"grad_norm": 0.0014787918189540505, |
|
"learning_rate": 7.652304734289127e-08, |
|
"loss": 0.0001, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.7019438444924406, |
|
"grad_norm": 0.00296723167411983, |
|
"learning_rate": 4.30510798093342e-08, |
|
"loss": 0.0001, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.710583153347732, |
|
"grad_norm": 0.0015325212152674794, |
|
"learning_rate": 1.9135993067588284e-08, |
|
"loss": 0.0001, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.7192224622030237, |
|
"grad_norm": 0.00978434830904007, |
|
"learning_rate": 4.784325263584854e-09, |
|
"loss": 0.0002, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.7278617710583153, |
|
"grad_norm": 0.0022683811839669943, |
|
"learning_rate": 0.0, |
|
"loss": 0.0002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7278617710583153, |
|
"eval_loss": 0.00024209167168010026, |
|
"eval_runtime": 20.7713, |
|
"eval_samples_per_second": 9.388, |
|
"eval_steps_per_second": 2.359, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.970690585555108e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|