{ "best_metric": 0.00024003432190511376, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 1.7278617710583153, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008639308855291577, "grad_norm": 0.7260720729827881, "learning_rate": 7e-06, "loss": 0.0407, "step": 1 }, { "epoch": 0.008639308855291577, "eval_loss": 0.15066038072109222, "eval_runtime": 20.6837, "eval_samples_per_second": 9.428, "eval_steps_per_second": 2.369, "step": 1 }, { "epoch": 0.017278617710583154, "grad_norm": 0.557333767414093, "learning_rate": 1.4e-05, "loss": 0.034, "step": 2 }, { "epoch": 0.02591792656587473, "grad_norm": 0.3476514518260956, "learning_rate": 2.1e-05, "loss": 0.0328, "step": 3 }, { "epoch": 0.03455723542116631, "grad_norm": 0.18027208745479584, "learning_rate": 2.8e-05, "loss": 0.0306, "step": 4 }, { "epoch": 0.04319654427645788, "grad_norm": 0.15658758580684662, "learning_rate": 3.5e-05, "loss": 0.0283, "step": 5 }, { "epoch": 0.05183585313174946, "grad_norm": 0.2384180724620819, "learning_rate": 4.2e-05, "loss": 0.0351, "step": 6 }, { "epoch": 0.06047516198704104, "grad_norm": 0.31615033745765686, "learning_rate": 4.899999999999999e-05, "loss": 0.0305, "step": 7 }, { "epoch": 0.06911447084233262, "grad_norm": 0.1833355873823166, "learning_rate": 5.6e-05, "loss": 0.03, "step": 8 }, { "epoch": 0.07775377969762419, "grad_norm": 0.24100624024868011, "learning_rate": 6.3e-05, "loss": 0.0226, "step": 9 }, { "epoch": 0.08639308855291576, "grad_norm": 0.23082295060157776, "learning_rate": 7e-05, "loss": 0.0112, "step": 10 }, { "epoch": 0.09503239740820735, "grad_norm": 0.09087604284286499, "learning_rate": 6.999521567473641e-05, "loss": 0.0045, "step": 11 }, { "epoch": 0.10367170626349892, "grad_norm": 0.5570639371871948, "learning_rate": 6.998086400693241e-05, "loss": 0.0162, "step": 12 }, { "epoch": 0.11231101511879049, "grad_norm": 0.08612176030874252, "learning_rate": 6.995694892019065e-05, "loss": 0.0043, "step": 13 }, { "epoch": 0.12095032397408208, "grad_norm": 0.062429703772068024, "learning_rate": 6.99234769526571e-05, "loss": 0.0019, "step": 14 }, { "epoch": 0.12958963282937366, "grad_norm": 0.3207145035266876, "learning_rate": 6.988045725523343e-05, "loss": 0.0064, "step": 15 }, { "epoch": 0.13822894168466524, "grad_norm": 0.11560860276222229, "learning_rate": 6.982790158907539e-05, "loss": 0.003, "step": 16 }, { "epoch": 0.1468682505399568, "grad_norm": 0.04708254709839821, "learning_rate": 6.976582432237733e-05, "loss": 0.0016, "step": 17 }, { "epoch": 0.15550755939524838, "grad_norm": 0.07981427013874054, "learning_rate": 6.969424242644413e-05, "loss": 0.0017, "step": 18 }, { "epoch": 0.16414686825053995, "grad_norm": 0.09298226237297058, "learning_rate": 6.961317547105138e-05, "loss": 0.0006, "step": 19 }, { "epoch": 0.17278617710583152, "grad_norm": 0.02390686422586441, "learning_rate": 6.952264561909527e-05, "loss": 0.0008, "step": 20 }, { "epoch": 0.18142548596112312, "grad_norm": 0.06466929614543915, "learning_rate": 6.942267762053337e-05, "loss": 0.0014, "step": 21 }, { "epoch": 0.1900647948164147, "grad_norm": 0.05321163311600685, "learning_rate": 6.931329880561832e-05, "loss": 0.001, "step": 22 }, { "epoch": 0.19870410367170627, "grad_norm": 0.02070157416164875, "learning_rate": 6.919453907742597e-05, "loss": 0.0006, "step": 23 }, { "epoch": 0.20734341252699784, "grad_norm": 0.16782431304454803, "learning_rate": 6.90664309036802e-05, "loss": 0.0049, "step": 24 }, { "epoch": 0.2159827213822894, "grad_norm": 0.14060857892036438, "learning_rate": 6.892900930787656e-05, "loss": 0.0012, "step": 25 }, { "epoch": 0.22462203023758098, "grad_norm": 0.43972817063331604, "learning_rate": 6.87823118597072e-05, "loss": 0.0071, "step": 26 }, { "epoch": 0.23326133909287258, "grad_norm": 0.05086011067032814, "learning_rate": 6.862637866478969e-05, "loss": 0.0013, "step": 27 }, { "epoch": 0.24190064794816415, "grad_norm": 0.06401721388101578, "learning_rate": 6.846125235370252e-05, "loss": 0.0013, "step": 28 }, { "epoch": 0.2505399568034557, "grad_norm": 0.09170061349868774, "learning_rate": 6.828697807033038e-05, "loss": 0.0183, "step": 29 }, { "epoch": 0.2591792656587473, "grad_norm": 0.06554131209850311, "learning_rate": 6.81036034595222e-05, "loss": 0.018, "step": 30 }, { "epoch": 0.2678185745140389, "grad_norm": 0.08401723951101303, "learning_rate": 6.791117865406564e-05, "loss": 0.0175, "step": 31 }, { "epoch": 0.27645788336933047, "grad_norm": 0.06230723112821579, "learning_rate": 6.770975626098112e-05, "loss": 0.0122, "step": 32 }, { "epoch": 0.28509719222462204, "grad_norm": 0.066391222178936, "learning_rate": 6.749939134713974e-05, "loss": 0.0137, "step": 33 }, { "epoch": 0.2937365010799136, "grad_norm": 0.01942325197160244, "learning_rate": 6.728014142420846e-05, "loss": 0.0023, "step": 34 }, { "epoch": 0.3023758099352052, "grad_norm": 0.05515532195568085, "learning_rate": 6.7052066432927e-05, "loss": 0.0023, "step": 35 }, { "epoch": 0.31101511879049676, "grad_norm": 0.038139645010232925, "learning_rate": 6.681522872672069e-05, "loss": 0.0015, "step": 36 }, { "epoch": 0.31965442764578833, "grad_norm": 0.03340466320514679, "learning_rate": 6.656969305465356e-05, "loss": 0.0016, "step": 37 }, { "epoch": 0.3282937365010799, "grad_norm": 0.04159877821803093, "learning_rate": 6.631552654372672e-05, "loss": 0.0015, "step": 38 }, { "epoch": 0.3369330453563715, "grad_norm": 0.03693181276321411, "learning_rate": 6.60527986805264e-05, "loss": 0.0017, "step": 39 }, { "epoch": 0.34557235421166305, "grad_norm": 0.034342411905527115, "learning_rate": 6.578158129222711e-05, "loss": 0.0013, "step": 40 }, { "epoch": 0.3542116630669546, "grad_norm": 0.022351600229740143, "learning_rate": 6.550194852695469e-05, "loss": 0.0013, "step": 41 }, { "epoch": 0.36285097192224625, "grad_norm": 0.07802402973175049, "learning_rate": 6.521397683351509e-05, "loss": 0.0012, "step": 42 }, { "epoch": 0.3714902807775378, "grad_norm": 0.011767297983169556, "learning_rate": 6.491774494049386e-05, "loss": 0.0004, "step": 43 }, { "epoch": 0.3801295896328294, "grad_norm": 0.0234123133122921, "learning_rate": 6.461333383473272e-05, "loss": 0.0013, "step": 44 }, { "epoch": 0.38876889848812096, "grad_norm": 0.007028356194496155, "learning_rate": 6.430082673918849e-05, "loss": 0.0004, "step": 45 }, { "epoch": 0.39740820734341253, "grad_norm": 0.011285451240837574, "learning_rate": 6.398030909018069e-05, "loss": 0.0003, "step": 46 }, { "epoch": 0.4060475161987041, "grad_norm": 0.07014564424753189, "learning_rate": 6.365186851403423e-05, "loss": 0.001, "step": 47 }, { "epoch": 0.4146868250539957, "grad_norm": 0.023154348134994507, "learning_rate": 6.331559480312315e-05, "loss": 0.0003, "step": 48 }, { "epoch": 0.42332613390928725, "grad_norm": 0.08951613306999207, "learning_rate": 6.297157989132236e-05, "loss": 0.0011, "step": 49 }, { "epoch": 0.4319654427645788, "grad_norm": 0.03926246613264084, "learning_rate": 6.261991782887377e-05, "loss": 0.0006, "step": 50 }, { "epoch": 0.4319654427645788, "eval_loss": 0.0019684885628521442, "eval_runtime": 20.5581, "eval_samples_per_second": 9.485, "eval_steps_per_second": 2.383, "step": 50 }, { "epoch": 0.4406047516198704, "grad_norm": 0.17613102495670319, "learning_rate": 6.226070475667393e-05, "loss": 0.0021, "step": 51 }, { "epoch": 0.44924406047516197, "grad_norm": 0.05827736854553223, "learning_rate": 6.189403887999006e-05, "loss": 0.001, "step": 52 }, { "epoch": 0.45788336933045354, "grad_norm": 0.12556667625904083, "learning_rate": 6.152002044161171e-05, "loss": 0.0015, "step": 53 }, { "epoch": 0.46652267818574517, "grad_norm": 0.07447590678930283, "learning_rate": 6.113875169444539e-05, "loss": 0.0009, "step": 54 }, { "epoch": 0.47516198704103674, "grad_norm": 0.27384987473487854, "learning_rate": 6.0750336873559605e-05, "loss": 0.01, "step": 55 }, { "epoch": 0.4838012958963283, "grad_norm": 0.005780680105090141, "learning_rate": 6.035488216768811e-05, "loss": 0.0002, "step": 56 }, { "epoch": 0.4924406047516199, "grad_norm": 0.04053672403097153, "learning_rate": 5.9952495690198894e-05, "loss": 0.0051, "step": 57 }, { "epoch": 0.5010799136069114, "grad_norm": 0.04079966992139816, "learning_rate": 5.954328744953709e-05, "loss": 0.0057, "step": 58 }, { "epoch": 0.509719222462203, "grad_norm": 0.03938170522451401, "learning_rate": 5.91273693191498e-05, "loss": 0.0049, "step": 59 }, { "epoch": 0.5183585313174947, "grad_norm": 0.06116793677210808, "learning_rate": 5.870485500690094e-05, "loss": 0.0053, "step": 60 }, { "epoch": 0.5269978401727862, "grad_norm": 0.06775252521038055, "learning_rate": 5.827586002398468e-05, "loss": 0.0034, "step": 61 }, { "epoch": 0.5356371490280778, "grad_norm": 0.04742324352264404, "learning_rate": 5.784050165334589e-05, "loss": 0.0009, "step": 62 }, { "epoch": 0.5442764578833693, "grad_norm": 0.0582570843398571, "learning_rate": 5.739889891761608e-05, "loss": 0.0021, "step": 63 }, { "epoch": 0.5529157667386609, "grad_norm": 0.024544579908251762, "learning_rate": 5.6951172546573794e-05, "loss": 0.0006, "step": 64 }, { "epoch": 0.5615550755939525, "grad_norm": 0.07139912247657776, "learning_rate": 5.6497444944138376e-05, "loss": 0.0017, "step": 65 }, { "epoch": 0.5701943844492441, "grad_norm": 0.02395671233534813, "learning_rate": 5.603784015490587e-05, "loss": 0.0005, "step": 66 }, { "epoch": 0.5788336933045356, "grad_norm": 0.004293499980121851, "learning_rate": 5.557248383023655e-05, "loss": 0.0003, "step": 67 }, { "epoch": 0.5874730021598272, "grad_norm": 0.029220029711723328, "learning_rate": 5.510150319390302e-05, "loss": 0.0005, "step": 68 }, { "epoch": 0.5961123110151187, "grad_norm": 0.037274319678545, "learning_rate": 5.4625027007308546e-05, "loss": 0.0015, "step": 69 }, { "epoch": 0.6047516198704104, "grad_norm": 0.00902900006622076, "learning_rate": 5.414318553428494e-05, "loss": 0.0003, "step": 70 }, { "epoch": 0.6133909287257019, "grad_norm": 0.01143543142825365, "learning_rate": 5.3656110505479776e-05, "loss": 0.0003, "step": 71 }, { "epoch": 0.6220302375809935, "grad_norm": 0.005858670454472303, "learning_rate": 5.316393508234253e-05, "loss": 0.0004, "step": 72 }, { "epoch": 0.6306695464362851, "grad_norm": 0.006607948802411556, "learning_rate": 5.266679382071953e-05, "loss": 0.0004, "step": 73 }, { "epoch": 0.6393088552915767, "grad_norm": 0.05994042009115219, "learning_rate": 5.216482263406778e-05, "loss": 0.0006, "step": 74 }, { "epoch": 0.6479481641468683, "grad_norm": 0.003944529686123133, "learning_rate": 5.1658158756297576e-05, "loss": 0.0002, "step": 75 }, { "epoch": 0.6565874730021598, "grad_norm": 0.005714634899049997, "learning_rate": 5.114694070425407e-05, "loss": 0.0002, "step": 76 }, { "epoch": 0.6652267818574514, "grad_norm": 0.24551953375339508, "learning_rate": 5.063130823984823e-05, "loss": 0.0005, "step": 77 }, { "epoch": 0.673866090712743, "grad_norm": 0.10831040889024734, "learning_rate": 5.011140233184724e-05, "loss": 0.0027, "step": 78 }, { "epoch": 0.6825053995680346, "grad_norm": 0.0029632514342665672, "learning_rate": 4.958736511733516e-05, "loss": 0.0002, "step": 79 }, { "epoch": 0.6911447084233261, "grad_norm": 0.007232643198221922, "learning_rate": 4.905933986285393e-05, "loss": 0.0001, "step": 80 }, { "epoch": 0.6997840172786177, "grad_norm": 0.010217340663075447, "learning_rate": 4.8527470925235824e-05, "loss": 0.0002, "step": 81 }, { "epoch": 0.7084233261339092, "grad_norm": 0.2681877613067627, "learning_rate": 4.799190371213772e-05, "loss": 0.0037, "step": 82 }, { "epoch": 0.7170626349892009, "grad_norm": 0.006039237137883902, "learning_rate": 4.745278464228808e-05, "loss": 0.0002, "step": 83 }, { "epoch": 0.7257019438444925, "grad_norm": 0.005269297398626804, "learning_rate": 4.69102611054575e-05, "loss": 0.0003, "step": 84 }, { "epoch": 0.734341252699784, "grad_norm": 0.06765911728143692, "learning_rate": 4.6364481422163926e-05, "loss": 0.0019, "step": 85 }, { "epoch": 0.7429805615550756, "grad_norm": 0.05636543780565262, "learning_rate": 4.581559480312316e-05, "loss": 0.0023, "step": 86 }, { "epoch": 0.7516198704103672, "grad_norm": 0.026066439226269722, "learning_rate": 4.526375130845627e-05, "loss": 0.0015, "step": 87 }, { "epoch": 0.7602591792656588, "grad_norm": 0.018351661041378975, "learning_rate": 4.4709101806664554e-05, "loss": 0.0014, "step": 88 }, { "epoch": 0.7688984881209503, "grad_norm": 0.1412251740694046, "learning_rate": 4.4151797933383685e-05, "loss": 0.0096, "step": 89 }, { "epoch": 0.7775377969762419, "grad_norm": 0.21160076558589935, "learning_rate": 4.359199204992797e-05, "loss": 0.0059, "step": 90 }, { "epoch": 0.7861771058315334, "grad_norm": 0.059807900339365005, "learning_rate": 4.30298372016363e-05, "loss": 0.0005, "step": 91 }, { "epoch": 0.7948164146868251, "grad_norm": 0.010206430219113827, "learning_rate": 4.246548707603114e-05, "loss": 0.0003, "step": 92 }, { "epoch": 0.8034557235421166, "grad_norm": 0.021596243605017662, "learning_rate": 4.1899095960801805e-05, "loss": 0.0004, "step": 93 }, { "epoch": 0.8120950323974082, "grad_norm": 0.0017379262717440724, "learning_rate": 4.133081870162385e-05, "loss": 0.0002, "step": 94 }, { "epoch": 0.8207343412526998, "grad_norm": 0.01433930266648531, "learning_rate": 4.076081065982569e-05, "loss": 0.0003, "step": 95 }, { "epoch": 0.8293736501079914, "grad_norm": 0.03355858847498894, "learning_rate": 4.018922766991447e-05, "loss": 0.0006, "step": 96 }, { "epoch": 0.838012958963283, "grad_norm": 0.1033296138048172, "learning_rate": 3.961622599697241e-05, "loss": 0.0013, "step": 97 }, { "epoch": 0.8466522678185745, "grad_norm": 0.10396935045719147, "learning_rate": 3.9041962293935516e-05, "loss": 0.0035, "step": 98 }, { "epoch": 0.8552915766738661, "grad_norm": 0.007392291445285082, "learning_rate": 3.84665935587662e-05, "loss": 0.0003, "step": 99 }, { "epoch": 0.8639308855291576, "grad_norm": 0.06569644808769226, "learning_rate": 3.7890277091531636e-05, "loss": 0.0013, "step": 100 }, { "epoch": 0.8639308855291576, "eval_loss": 0.0009457149426452816, "eval_runtime": 20.7602, "eval_samples_per_second": 9.393, "eval_steps_per_second": 2.36, "step": 100 }, { "epoch": 0.8725701943844493, "grad_norm": 0.06337860226631165, "learning_rate": 3.7313170451399475e-05, "loss": 0.0019, "step": 101 }, { "epoch": 0.8812095032397408, "grad_norm": 0.07296153157949448, "learning_rate": 3.673543141356278e-05, "loss": 0.0033, "step": 102 }, { "epoch": 0.8898488120950324, "grad_norm": 0.09170746803283691, "learning_rate": 3.6157217926105783e-05, "loss": 0.0004, "step": 103 }, { "epoch": 0.8984881209503239, "grad_norm": 0.0043894099071621895, "learning_rate": 3.557868806682255e-05, "loss": 0.0002, "step": 104 }, { "epoch": 0.9071274298056156, "grad_norm": 0.004214062821120024, "learning_rate": 3.5e-05, "loss": 0.0003, "step": 105 }, { "epoch": 0.9157667386609071, "grad_norm": 0.004896323662251234, "learning_rate": 3.442131193317745e-05, "loss": 0.0002, "step": 106 }, { "epoch": 0.9244060475161987, "grad_norm": 0.04607084020972252, "learning_rate": 3.384278207389421e-05, "loss": 0.0005, "step": 107 }, { "epoch": 0.9330453563714903, "grad_norm": 0.024103185161948204, "learning_rate": 3.3264568586437216e-05, "loss": 0.0004, "step": 108 }, { "epoch": 0.9416846652267818, "grad_norm": 0.19529423117637634, "learning_rate": 3.268682954860052e-05, "loss": 0.0016, "step": 109 }, { "epoch": 0.9503239740820735, "grad_norm": 0.013852166011929512, "learning_rate": 3.210972290846837e-05, "loss": 0.0003, "step": 110 }, { "epoch": 0.958963282937365, "grad_norm": 0.005281697493046522, "learning_rate": 3.15334064412338e-05, "loss": 0.0003, "step": 111 }, { "epoch": 0.9676025917926566, "grad_norm": 0.04835696145892143, "learning_rate": 3.0958037706064485e-05, "loss": 0.0007, "step": 112 }, { "epoch": 0.9762419006479481, "grad_norm": 0.007758499588817358, "learning_rate": 3.038377400302758e-05, "loss": 0.0003, "step": 113 }, { "epoch": 0.9848812095032398, "grad_norm": 0.006247072480618954, "learning_rate": 2.9810772330085524e-05, "loss": 0.0006, "step": 114 }, { "epoch": 0.9935205183585313, "grad_norm": 0.06823667138814926, "learning_rate": 2.9239189340174306e-05, "loss": 0.0006, "step": 115 }, { "epoch": 1.0021598272138228, "grad_norm": 0.13855108618736267, "learning_rate": 2.8669181298376163e-05, "loss": 0.0025, "step": 116 }, { "epoch": 1.0107991360691144, "grad_norm": 0.013171014375984669, "learning_rate": 2.8100904039198193e-05, "loss": 0.0007, "step": 117 }, { "epoch": 1.019438444924406, "grad_norm": 0.012365025468170643, "learning_rate": 2.7534512923968863e-05, "loss": 0.0006, "step": 118 }, { "epoch": 1.0280777537796977, "grad_norm": 0.009904728271067142, "learning_rate": 2.6970162798363695e-05, "loss": 0.0008, "step": 119 }, { "epoch": 1.0367170626349893, "grad_norm": 0.006425977218896151, "learning_rate": 2.640800795007203e-05, "loss": 0.0004, "step": 120 }, { "epoch": 1.0453563714902807, "grad_norm": 0.01372888870537281, "learning_rate": 2.5848202066616305e-05, "loss": 0.0002, "step": 121 }, { "epoch": 1.0539956803455723, "grad_norm": 0.0021302136592566967, "learning_rate": 2.5290898193335446e-05, "loss": 0.0003, "step": 122 }, { "epoch": 1.062634989200864, "grad_norm": 0.0052225952968001366, "learning_rate": 2.4736248691543736e-05, "loss": 0.0002, "step": 123 }, { "epoch": 1.0712742980561556, "grad_norm": 0.0029196590185165405, "learning_rate": 2.4184405196876842e-05, "loss": 0.0001, "step": 124 }, { "epoch": 1.079913606911447, "grad_norm": 0.09910155832767487, "learning_rate": 2.363551857783608e-05, "loss": 0.0003, "step": 125 }, { "epoch": 1.0885529157667386, "grad_norm": 0.0008615689584985375, "learning_rate": 2.308973889454249e-05, "loss": 0.0002, "step": 126 }, { "epoch": 1.0971922246220303, "grad_norm": 0.0013229359174147248, "learning_rate": 2.2547215357711918e-05, "loss": 0.0001, "step": 127 }, { "epoch": 1.1058315334773219, "grad_norm": 0.000881396175827831, "learning_rate": 2.2008096287862266e-05, "loss": 0.0001, "step": 128 }, { "epoch": 1.1144708423326133, "grad_norm": 0.0023514782078564167, "learning_rate": 2.1472529074764177e-05, "loss": 0.0002, "step": 129 }, { "epoch": 1.123110151187905, "grad_norm": 0.013889284804463387, "learning_rate": 2.0940660137146074e-05, "loss": 0.0003, "step": 130 }, { "epoch": 1.1317494600431965, "grad_norm": 0.0022602914832532406, "learning_rate": 2.041263488266484e-05, "loss": 0.0001, "step": 131 }, { "epoch": 1.1403887688984882, "grad_norm": 0.0012188655091449618, "learning_rate": 1.988859766815275e-05, "loss": 0.0001, "step": 132 }, { "epoch": 1.1490280777537798, "grad_norm": 0.0018668539123609662, "learning_rate": 1.9368691760151773e-05, "loss": 0.0001, "step": 133 }, { "epoch": 1.1576673866090712, "grad_norm": 0.0017291579861193895, "learning_rate": 1.885305929574593e-05, "loss": 0.0001, "step": 134 }, { "epoch": 1.1663066954643628, "grad_norm": 0.0010335007682442665, "learning_rate": 1.8341841243702424e-05, "loss": 0.0001, "step": 135 }, { "epoch": 1.1749460043196545, "grad_norm": 0.18275120854377747, "learning_rate": 1.7835177365932225e-05, "loss": 0.0022, "step": 136 }, { "epoch": 1.183585313174946, "grad_norm": 0.0107800904661417, "learning_rate": 1.7333206179280478e-05, "loss": 0.0002, "step": 137 }, { "epoch": 1.1922246220302375, "grad_norm": 0.0653991624712944, "learning_rate": 1.6836064917657478e-05, "loss": 0.0004, "step": 138 }, { "epoch": 1.2008639308855291, "grad_norm": 0.005070838611572981, "learning_rate": 1.6343889494520224e-05, "loss": 0.0002, "step": 139 }, { "epoch": 1.2095032397408207, "grad_norm": 0.004730647429823875, "learning_rate": 1.5856814465715064e-05, "loss": 0.0001, "step": 140 }, { "epoch": 1.2181425485961124, "grad_norm": 0.0011722528142854571, "learning_rate": 1.5374972992691458e-05, "loss": 0.0001, "step": 141 }, { "epoch": 1.226781857451404, "grad_norm": 0.001693835249170661, "learning_rate": 1.4898496806096974e-05, "loss": 0.0001, "step": 142 }, { "epoch": 1.2354211663066954, "grad_norm": 0.003972134552896023, "learning_rate": 1.4427516169763444e-05, "loss": 0.0001, "step": 143 }, { "epoch": 1.244060475161987, "grad_norm": 0.01975773461163044, "learning_rate": 1.396215984509412e-05, "loss": 0.0005, "step": 144 }, { "epoch": 1.2526997840172787, "grad_norm": 0.007931800559163094, "learning_rate": 1.3502555055861625e-05, "loss": 0.0004, "step": 145 }, { "epoch": 1.26133909287257, "grad_norm": 0.022132746875286102, "learning_rate": 1.3048827453426203e-05, "loss": 0.0005, "step": 146 }, { "epoch": 1.2699784017278617, "grad_norm": 0.010564382188022137, "learning_rate": 1.2601101082383917e-05, "loss": 0.0004, "step": 147 }, { "epoch": 1.2786177105831533, "grad_norm": 0.009835362434387207, "learning_rate": 1.2159498346654094e-05, "loss": 0.0005, "step": 148 }, { "epoch": 1.287257019438445, "grad_norm": 0.006598853040486574, "learning_rate": 1.1724139976015306e-05, "loss": 0.0003, "step": 149 }, { "epoch": 1.2958963282937366, "grad_norm": 0.00920469593256712, "learning_rate": 1.1295144993099068e-05, "loss": 0.0005, "step": 150 }, { "epoch": 1.2958963282937366, "eval_loss": 0.00024003432190511376, "eval_runtime": 20.7717, "eval_samples_per_second": 9.388, "eval_steps_per_second": 2.359, "step": 150 }, { "epoch": 1.3045356371490282, "grad_norm": 0.0014002382522448897, "learning_rate": 1.0872630680850196e-05, "loss": 0.0001, "step": 151 }, { "epoch": 1.3131749460043196, "grad_norm": 0.0010238329414278269, "learning_rate": 1.0456712550462898e-05, "loss": 0.0002, "step": 152 }, { "epoch": 1.3218142548596112, "grad_norm": 0.0012431687209755182, "learning_rate": 1.0047504309801104e-05, "loss": 0.0001, "step": 153 }, { "epoch": 1.3304535637149029, "grad_norm": 0.0026770096737891436, "learning_rate": 9.645117832311886e-06, "loss": 0.0001, "step": 154 }, { "epoch": 1.3390928725701943, "grad_norm": 0.01820327155292034, "learning_rate": 9.249663126440394e-06, "loss": 0.0002, "step": 155 }, { "epoch": 1.347732181425486, "grad_norm": 0.01555480808019638, "learning_rate": 8.861248305554624e-06, "loss": 0.0002, "step": 156 }, { "epoch": 1.3563714902807775, "grad_norm": 0.0015801583649590611, "learning_rate": 8.47997955838829e-06, "loss": 0.0002, "step": 157 }, { "epoch": 1.3650107991360692, "grad_norm": 0.10153518617153168, "learning_rate": 8.10596112000994e-06, "loss": 0.0004, "step": 158 }, { "epoch": 1.3736501079913608, "grad_norm": 0.0009876766707748175, "learning_rate": 7.739295243326067e-06, "loss": 0.0001, "step": 159 }, { "epoch": 1.3822894168466522, "grad_norm": 0.0026555354706943035, "learning_rate": 7.380082171126228e-06, "loss": 0.0002, "step": 160 }, { "epoch": 1.3909287257019438, "grad_norm": 0.0006382952560670674, "learning_rate": 7.028420108677635e-06, "loss": 0.0001, "step": 161 }, { "epoch": 1.3995680345572354, "grad_norm": 0.0014154494274407625, "learning_rate": 6.684405196876842e-06, "loss": 0.0001, "step": 162 }, { "epoch": 1.408207343412527, "grad_norm": 0.0011231210082769394, "learning_rate": 6.3481314859657675e-06, "loss": 0.0001, "step": 163 }, { "epoch": 1.4168466522678185, "grad_norm": 0.0014514840440824628, "learning_rate": 6.019690909819298e-06, "loss": 0.0001, "step": 164 }, { "epoch": 1.42548596112311, "grad_norm": 0.0013826994691044092, "learning_rate": 5.6991732608115e-06, "loss": 0.0001, "step": 165 }, { "epoch": 1.4341252699784017, "grad_norm": 0.0013565586414188147, "learning_rate": 5.386666165267256e-06, "loss": 0.0001, "step": 166 }, { "epoch": 1.4427645788336934, "grad_norm": 0.007900253869593143, "learning_rate": 5.08225505950613e-06, "loss": 0.0001, "step": 167 }, { "epoch": 1.451403887688985, "grad_norm": 0.0012855289969593287, "learning_rate": 4.786023166484913e-06, "loss": 0.0001, "step": 168 }, { "epoch": 1.4600431965442764, "grad_norm": 0.01582699827849865, "learning_rate": 4.498051473045291e-06, "loss": 0.0002, "step": 169 }, { "epoch": 1.468682505399568, "grad_norm": 0.0007394661079160869, "learning_rate": 4.218418707772886e-06, "loss": 0.0001, "step": 170 }, { "epoch": 1.4773218142548596, "grad_norm": 0.0008164668688550591, "learning_rate": 3.947201319473587e-06, "loss": 0.0001, "step": 171 }, { "epoch": 1.485961123110151, "grad_norm": 0.002124256454408169, "learning_rate": 3.684473456273278e-06, "loss": 0.0002, "step": 172 }, { "epoch": 1.4946004319654427, "grad_norm": 0.00903933122754097, "learning_rate": 3.4303069453464383e-06, "loss": 0.0003, "step": 173 }, { "epoch": 1.5032397408207343, "grad_norm": 0.017047259956598282, "learning_rate": 3.184771273279312e-06, "loss": 0.0006, "step": 174 }, { "epoch": 1.511879049676026, "grad_norm": 0.01836921088397503, "learning_rate": 2.947933567072987e-06, "loss": 0.0006, "step": 175 }, { "epoch": 1.5205183585313176, "grad_norm": 0.0054769194684922695, "learning_rate": 2.719858575791534e-06, "loss": 0.0003, "step": 176 }, { "epoch": 1.5291576673866092, "grad_norm": 0.05894150957465172, "learning_rate": 2.500608652860256e-06, "loss": 0.0009, "step": 177 }, { "epoch": 1.5377969762419006, "grad_norm": 0.0017646638443693519, "learning_rate": 2.2902437390188737e-06, "loss": 0.0002, "step": 178 }, { "epoch": 1.5464362850971922, "grad_norm": 0.0016004899516701698, "learning_rate": 2.0888213459343587e-06, "loss": 0.0002, "step": 179 }, { "epoch": 1.5550755939524838, "grad_norm": 0.0009360113763250411, "learning_rate": 1.8963965404777875e-06, "loss": 0.0001, "step": 180 }, { "epoch": 1.5637149028077753, "grad_norm": 0.0009685845579952002, "learning_rate": 1.7130219296696263e-06, "loss": 0.0001, "step": 181 }, { "epoch": 1.5723542116630669, "grad_norm": 0.0022252460476011038, "learning_rate": 1.5387476462974824e-06, "loss": 0.0003, "step": 182 }, { "epoch": 1.5809935205183585, "grad_norm": 0.0034834735561162233, "learning_rate": 1.3736213352103147e-06, "loss": 0.0001, "step": 183 }, { "epoch": 1.5896328293736501, "grad_norm": 0.0007790013332851231, "learning_rate": 1.2176881402928002e-06, "loss": 0.0001, "step": 184 }, { "epoch": 1.5982721382289418, "grad_norm": 0.0013246826129034162, "learning_rate": 1.0709906921234367e-06, "loss": 0.0001, "step": 185 }, { "epoch": 1.6069114470842334, "grad_norm": 0.0007768021896481514, "learning_rate": 9.33569096319799e-07, "loss": 0.0001, "step": 186 }, { "epoch": 1.6155507559395248, "grad_norm": 0.0009022291051223874, "learning_rate": 8.054609225740255e-07, "loss": 0.0001, "step": 187 }, { "epoch": 1.6241900647948164, "grad_norm": 0.0007956126355566084, "learning_rate": 6.867011943816724e-07, "loss": 0.0001, "step": 188 }, { "epoch": 1.6328293736501078, "grad_norm": 0.0012011040234938264, "learning_rate": 5.77322379466617e-07, "loss": 0.0001, "step": 189 }, { "epoch": 1.6414686825053995, "grad_norm": 0.0011474161874502897, "learning_rate": 4.773543809047186e-07, "loss": 0.0001, "step": 190 }, { "epoch": 1.650107991360691, "grad_norm": 0.0006911220261827111, "learning_rate": 3.868245289486027e-07, "loss": 0.0001, "step": 191 }, { "epoch": 1.6587473002159827, "grad_norm": 0.0015587140806019306, "learning_rate": 3.0575757355586817e-07, "loss": 0.0001, "step": 192 }, { "epoch": 1.6673866090712743, "grad_norm": 0.0011123515432700515, "learning_rate": 2.3417567762266497e-07, "loss": 0.0001, "step": 193 }, { "epoch": 1.676025917926566, "grad_norm": 0.002342136111110449, "learning_rate": 1.7209841092460043e-07, "loss": 0.0001, "step": 194 }, { "epoch": 1.6846652267818576, "grad_norm": 0.0016477032331749797, "learning_rate": 1.1954274476655534e-07, "loss": 0.0001, "step": 195 }, { "epoch": 1.693304535637149, "grad_norm": 0.0014787918189540505, "learning_rate": 7.652304734289127e-08, "loss": 0.0001, "step": 196 }, { "epoch": 1.7019438444924406, "grad_norm": 0.00296723167411983, "learning_rate": 4.30510798093342e-08, "loss": 0.0001, "step": 197 }, { "epoch": 1.710583153347732, "grad_norm": 0.0015325212152674794, "learning_rate": 1.9135993067588284e-08, "loss": 0.0001, "step": 198 }, { "epoch": 1.7192224622030237, "grad_norm": 0.00978434830904007, "learning_rate": 4.784325263584854e-09, "loss": 0.0002, "step": 199 }, { "epoch": 1.7278617710583153, "grad_norm": 0.0022683811839669943, "learning_rate": 0.0, "loss": 0.0002, "step": 200 }, { "epoch": 1.7278617710583153, "eval_loss": 0.00024209167168010026, "eval_runtime": 20.7713, "eval_samples_per_second": 9.388, "eval_steps_per_second": 2.359, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.970690585555108e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }