{ "best_metric": 1.1125794649124146, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.13065490772497143, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006532745386248571, "grad_norm": 1.8516772985458374, "learning_rate": 7e-06, "loss": 1.0302, "step": 1 }, { "epoch": 0.0006532745386248571, "eval_loss": 1.9036271572113037, "eval_runtime": 361.5362, "eval_samples_per_second": 7.131, "eval_steps_per_second": 1.784, "step": 1 }, { "epoch": 0.0013065490772497142, "grad_norm": 1.7827223539352417, "learning_rate": 1.4e-05, "loss": 1.0844, "step": 2 }, { "epoch": 0.0019598236158745713, "grad_norm": 1.8511314392089844, "learning_rate": 2.1e-05, "loss": 1.0716, "step": 3 }, { "epoch": 0.0026130981544994283, "grad_norm": 1.8042787313461304, "learning_rate": 2.8e-05, "loss": 1.1179, "step": 4 }, { "epoch": 0.0032663726931242854, "grad_norm": 1.379598617553711, "learning_rate": 3.5e-05, "loss": 1.146, "step": 5 }, { "epoch": 0.0039196472317491425, "grad_norm": 0.7377445101737976, "learning_rate": 4.2e-05, "loss": 0.9583, "step": 6 }, { "epoch": 0.004572921770374, "grad_norm": 0.816500186920166, "learning_rate": 4.899999999999999e-05, "loss": 1.0773, "step": 7 }, { "epoch": 0.005226196308998857, "grad_norm": 0.7636451721191406, "learning_rate": 5.6e-05, "loss": 1.067, "step": 8 }, { "epoch": 0.005879470847623714, "grad_norm": 0.7429799437522888, "learning_rate": 6.3e-05, "loss": 1.0436, "step": 9 }, { "epoch": 0.006532745386248571, "grad_norm": 0.6683851480484009, "learning_rate": 7e-05, "loss": 1.0759, "step": 10 }, { "epoch": 0.007186019924873428, "grad_norm": 0.7098096609115601, "learning_rate": 6.999521567473641e-05, "loss": 1.053, "step": 11 }, { "epoch": 0.007839294463498285, "grad_norm": 0.667305588722229, "learning_rate": 6.998086400693241e-05, "loss": 1.0688, "step": 12 }, { "epoch": 0.008492569002123142, "grad_norm": 0.529070258140564, "learning_rate": 6.995694892019065e-05, "loss": 1.0495, "step": 13 }, { "epoch": 0.009145843540748, "grad_norm": 0.48540985584259033, "learning_rate": 6.99234769526571e-05, "loss": 0.9939, "step": 14 }, { "epoch": 0.009799118079372856, "grad_norm": 0.4808014929294586, "learning_rate": 6.988045725523343e-05, "loss": 1.0301, "step": 15 }, { "epoch": 0.010452392617997713, "grad_norm": 0.5085527896881104, "learning_rate": 6.982790158907539e-05, "loss": 1.0252, "step": 16 }, { "epoch": 0.01110566715662257, "grad_norm": 0.5372496843338013, "learning_rate": 6.976582432237733e-05, "loss": 1.0514, "step": 17 }, { "epoch": 0.011758941695247428, "grad_norm": 0.5005790591239929, "learning_rate": 6.969424242644413e-05, "loss": 0.9802, "step": 18 }, { "epoch": 0.012412216233872285, "grad_norm": 0.5220534801483154, "learning_rate": 6.961317547105138e-05, "loss": 0.994, "step": 19 }, { "epoch": 0.013065490772497142, "grad_norm": 0.5052276253700256, "learning_rate": 6.952264561909527e-05, "loss": 0.983, "step": 20 }, { "epoch": 0.013718765311121999, "grad_norm": 0.5826500058174133, "learning_rate": 6.942267762053337e-05, "loss": 0.9843, "step": 21 }, { "epoch": 0.014372039849746856, "grad_norm": 0.5644847750663757, "learning_rate": 6.931329880561832e-05, "loss": 0.987, "step": 22 }, { "epoch": 0.015025314388371713, "grad_norm": 0.5492434501647949, "learning_rate": 6.919453907742597e-05, "loss": 1.0731, "step": 23 }, { "epoch": 0.01567858892699657, "grad_norm": 0.5545996427536011, "learning_rate": 6.90664309036802e-05, "loss": 1.0625, "step": 24 }, { "epoch": 0.01633186346562143, "grad_norm": 0.6682809591293335, "learning_rate": 6.892900930787656e-05, "loss": 1.2169, "step": 25 }, { "epoch": 0.016985138004246284, "grad_norm": 0.6673710942268372, "learning_rate": 6.87823118597072e-05, "loss": 1.1868, "step": 26 }, { "epoch": 0.017638412542871143, "grad_norm": 0.6434420347213745, "learning_rate": 6.862637866478969e-05, "loss": 1.1481, "step": 27 }, { "epoch": 0.018291687081496, "grad_norm": 0.632610023021698, "learning_rate": 6.846125235370252e-05, "loss": 0.9949, "step": 28 }, { "epoch": 0.018944961620120857, "grad_norm": 0.7760617733001709, "learning_rate": 6.828697807033038e-05, "loss": 1.0732, "step": 29 }, { "epoch": 0.019598236158745713, "grad_norm": 0.8459382653236389, "learning_rate": 6.81036034595222e-05, "loss": 1.0873, "step": 30 }, { "epoch": 0.02025151069737057, "grad_norm": 0.8089104294776917, "learning_rate": 6.791117865406564e-05, "loss": 1.0646, "step": 31 }, { "epoch": 0.020904785235995427, "grad_norm": 0.9536648392677307, "learning_rate": 6.770975626098112e-05, "loss": 1.0986, "step": 32 }, { "epoch": 0.021558059774620286, "grad_norm": 1.1872056722640991, "learning_rate": 6.749939134713974e-05, "loss": 1.0859, "step": 33 }, { "epoch": 0.02221133431324514, "grad_norm": 1.1158490180969238, "learning_rate": 6.728014142420846e-05, "loss": 1.1656, "step": 34 }, { "epoch": 0.02286460885187, "grad_norm": 1.1185222864151, "learning_rate": 6.7052066432927e-05, "loss": 1.3207, "step": 35 }, { "epoch": 0.023517883390494855, "grad_norm": 1.4036815166473389, "learning_rate": 6.681522872672069e-05, "loss": 1.3948, "step": 36 }, { "epoch": 0.024171157929119714, "grad_norm": 1.1611055135726929, "learning_rate": 6.656969305465356e-05, "loss": 1.2411, "step": 37 }, { "epoch": 0.02482443246774457, "grad_norm": 1.248063325881958, "learning_rate": 6.631552654372672e-05, "loss": 1.2205, "step": 38 }, { "epoch": 0.025477707006369428, "grad_norm": 1.3313448429107666, "learning_rate": 6.60527986805264e-05, "loss": 1.2172, "step": 39 }, { "epoch": 0.026130981544994283, "grad_norm": 1.3370778560638428, "learning_rate": 6.578158129222711e-05, "loss": 1.1935, "step": 40 }, { "epoch": 0.026784256083619142, "grad_norm": 1.5255675315856934, "learning_rate": 6.550194852695469e-05, "loss": 1.2351, "step": 41 }, { "epoch": 0.027437530622243998, "grad_norm": 1.7755012512207031, "learning_rate": 6.521397683351509e-05, "loss": 1.227, "step": 42 }, { "epoch": 0.028090805160868856, "grad_norm": 1.9421392679214478, "learning_rate": 6.491774494049386e-05, "loss": 1.3949, "step": 43 }, { "epoch": 0.02874407969949371, "grad_norm": 1.7597455978393555, "learning_rate": 6.461333383473272e-05, "loss": 1.4456, "step": 44 }, { "epoch": 0.02939735423811857, "grad_norm": 2.684230089187622, "learning_rate": 6.430082673918849e-05, "loss": 1.326, "step": 45 }, { "epoch": 0.030050628776743426, "grad_norm": 2.200204849243164, "learning_rate": 6.398030909018069e-05, "loss": 1.4689, "step": 46 }, { "epoch": 0.030703903315368285, "grad_norm": 2.003971815109253, "learning_rate": 6.365186851403423e-05, "loss": 1.4395, "step": 47 }, { "epoch": 0.03135717785399314, "grad_norm": 2.6442818641662598, "learning_rate": 6.331559480312315e-05, "loss": 1.972, "step": 48 }, { "epoch": 0.032010452392617995, "grad_norm": 3.0363879203796387, "learning_rate": 6.297157989132236e-05, "loss": 1.7881, "step": 49 }, { "epoch": 0.03266372693124286, "grad_norm": 5.096224308013916, "learning_rate": 6.261991782887377e-05, "loss": 2.3222, "step": 50 }, { "epoch": 0.03266372693124286, "eval_loss": 1.4000219106674194, "eval_runtime": 364.054, "eval_samples_per_second": 7.081, "eval_steps_per_second": 1.772, "step": 50 }, { "epoch": 0.03331700146986771, "grad_norm": 2.7139570713043213, "learning_rate": 6.226070475667393e-05, "loss": 1.1416, "step": 51 }, { "epoch": 0.03397027600849257, "grad_norm": 2.7785696983337402, "learning_rate": 6.189403887999006e-05, "loss": 1.2402, "step": 52 }, { "epoch": 0.034623550547117424, "grad_norm": 1.7651437520980835, "learning_rate": 6.152002044161171e-05, "loss": 1.1096, "step": 53 }, { "epoch": 0.035276825085742286, "grad_norm": 0.9755071401596069, "learning_rate": 6.113875169444539e-05, "loss": 1.0095, "step": 54 }, { "epoch": 0.03593009962436714, "grad_norm": 0.7062902450561523, "learning_rate": 6.0750336873559605e-05, "loss": 0.9351, "step": 55 }, { "epoch": 0.036583374162992, "grad_norm": 0.6608876585960388, "learning_rate": 6.035488216768811e-05, "loss": 0.9881, "step": 56 }, { "epoch": 0.03723664870161685, "grad_norm": 0.5747250914573669, "learning_rate": 5.9952495690198894e-05, "loss": 0.9328, "step": 57 }, { "epoch": 0.037889923240241714, "grad_norm": 0.45825669169425964, "learning_rate": 5.954328744953709e-05, "loss": 0.9784, "step": 58 }, { "epoch": 0.03854319777886657, "grad_norm": 0.471233993768692, "learning_rate": 5.91273693191498e-05, "loss": 0.9657, "step": 59 }, { "epoch": 0.039196472317491425, "grad_norm": 0.48014357686042786, "learning_rate": 5.870485500690094e-05, "loss": 0.9516, "step": 60 }, { "epoch": 0.03984974685611628, "grad_norm": 0.543270468711853, "learning_rate": 5.827586002398468e-05, "loss": 0.9755, "step": 61 }, { "epoch": 0.04050302139474114, "grad_norm": 0.47643303871154785, "learning_rate": 5.784050165334589e-05, "loss": 0.9195, "step": 62 }, { "epoch": 0.041156295933366, "grad_norm": 0.4851834177970886, "learning_rate": 5.739889891761608e-05, "loss": 0.8976, "step": 63 }, { "epoch": 0.04180957047199085, "grad_norm": 0.44153380393981934, "learning_rate": 5.6951172546573794e-05, "loss": 0.9761, "step": 64 }, { "epoch": 0.04246284501061571, "grad_norm": 0.4306458830833435, "learning_rate": 5.6497444944138376e-05, "loss": 0.8513, "step": 65 }, { "epoch": 0.04311611954924057, "grad_norm": 0.45442384481430054, "learning_rate": 5.603784015490587e-05, "loss": 0.9658, "step": 66 }, { "epoch": 0.043769394087865426, "grad_norm": 0.4480721950531006, "learning_rate": 5.557248383023655e-05, "loss": 0.9132, "step": 67 }, { "epoch": 0.04442266862649028, "grad_norm": 0.47314023971557617, "learning_rate": 5.510150319390302e-05, "loss": 0.9718, "step": 68 }, { "epoch": 0.04507594316511514, "grad_norm": 0.45936113595962524, "learning_rate": 5.4625027007308546e-05, "loss": 0.9377, "step": 69 }, { "epoch": 0.04572921770374, "grad_norm": 0.4822114109992981, "learning_rate": 5.414318553428494e-05, "loss": 1.0319, "step": 70 }, { "epoch": 0.046382492242364855, "grad_norm": 0.5402209758758545, "learning_rate": 5.3656110505479776e-05, "loss": 1.053, "step": 71 }, { "epoch": 0.04703576678098971, "grad_norm": 0.5175288915634155, "learning_rate": 5.316393508234253e-05, "loss": 1.0582, "step": 72 }, { "epoch": 0.047689041319614565, "grad_norm": 0.599414050579071, "learning_rate": 5.266679382071953e-05, "loss": 1.1909, "step": 73 }, { "epoch": 0.04834231585823943, "grad_norm": 0.5590885877609253, "learning_rate": 5.216482263406778e-05, "loss": 1.0945, "step": 74 }, { "epoch": 0.04899559039686428, "grad_norm": 0.570393443107605, "learning_rate": 5.1658158756297576e-05, "loss": 1.0452, "step": 75 }, { "epoch": 0.04964886493548914, "grad_norm": 0.6049505472183228, "learning_rate": 5.114694070425407e-05, "loss": 1.0949, "step": 76 }, { "epoch": 0.050302139474113994, "grad_norm": 0.6091915965080261, "learning_rate": 5.063130823984823e-05, "loss": 1.0116, "step": 77 }, { "epoch": 0.050955414012738856, "grad_norm": 0.6413677334785461, "learning_rate": 5.011140233184724e-05, "loss": 1.0694, "step": 78 }, { "epoch": 0.05160868855136371, "grad_norm": 0.6690137386322021, "learning_rate": 4.958736511733516e-05, "loss": 1.0193, "step": 79 }, { "epoch": 0.05226196308998857, "grad_norm": 0.7686389088630676, "learning_rate": 4.905933986285393e-05, "loss": 1.1433, "step": 80 }, { "epoch": 0.05291523762861342, "grad_norm": 0.8502298593521118, "learning_rate": 4.8527470925235824e-05, "loss": 1.0882, "step": 81 }, { "epoch": 0.053568512167238284, "grad_norm": 0.7803910970687866, "learning_rate": 4.799190371213772e-05, "loss": 1.065, "step": 82 }, { "epoch": 0.05422178670586314, "grad_norm": 0.9237357378005981, "learning_rate": 4.745278464228808e-05, "loss": 1.1578, "step": 83 }, { "epoch": 0.054875061244487995, "grad_norm": 0.9249285459518433, "learning_rate": 4.69102611054575e-05, "loss": 1.1609, "step": 84 }, { "epoch": 0.05552833578311285, "grad_norm": 0.9805899858474731, "learning_rate": 4.6364481422163926e-05, "loss": 1.1561, "step": 85 }, { "epoch": 0.05618161032173771, "grad_norm": 0.9704437851905823, "learning_rate": 4.581559480312316e-05, "loss": 0.9433, "step": 86 }, { "epoch": 0.05683488486036257, "grad_norm": 1.1474000215530396, "learning_rate": 4.526375130845627e-05, "loss": 1.208, "step": 87 }, { "epoch": 0.05748815939898742, "grad_norm": 1.274498701095581, "learning_rate": 4.4709101806664554e-05, "loss": 1.232, "step": 88 }, { "epoch": 0.05814143393761228, "grad_norm": 1.8896253108978271, "learning_rate": 4.4151797933383685e-05, "loss": 1.1602, "step": 89 }, { "epoch": 0.05879470847623714, "grad_norm": 1.4872145652770996, "learning_rate": 4.359199204992797e-05, "loss": 1.3486, "step": 90 }, { "epoch": 0.059447983014861996, "grad_norm": 1.4222277402877808, "learning_rate": 4.30298372016363e-05, "loss": 1.4133, "step": 91 }, { "epoch": 0.06010125755348685, "grad_norm": 1.8111426830291748, "learning_rate": 4.246548707603114e-05, "loss": 1.5896, "step": 92 }, { "epoch": 0.06075453209211171, "grad_norm": 1.4717673063278198, "learning_rate": 4.1899095960801805e-05, "loss": 1.3622, "step": 93 }, { "epoch": 0.06140780663073657, "grad_norm": 1.6247068643569946, "learning_rate": 4.133081870162385e-05, "loss": 1.3382, "step": 94 }, { "epoch": 0.062061081169361425, "grad_norm": 1.5674505233764648, "learning_rate": 4.076081065982569e-05, "loss": 1.4386, "step": 95 }, { "epoch": 0.06271435570798628, "grad_norm": 1.7394001483917236, "learning_rate": 4.018922766991447e-05, "loss": 1.575, "step": 96 }, { "epoch": 0.06336763024661114, "grad_norm": 2.449448585510254, "learning_rate": 3.961622599697241e-05, "loss": 1.8127, "step": 97 }, { "epoch": 0.06402090478523599, "grad_norm": 2.9365131855010986, "learning_rate": 3.9041962293935516e-05, "loss": 2.335, "step": 98 }, { "epoch": 0.06467417932386085, "grad_norm": 2.3340282440185547, "learning_rate": 3.84665935587662e-05, "loss": 1.8717, "step": 99 }, { "epoch": 0.06532745386248572, "grad_norm": 3.9780924320220947, "learning_rate": 3.7890277091531636e-05, "loss": 2.2112, "step": 100 }, { "epoch": 0.06532745386248572, "eval_loss": 1.2186014652252197, "eval_runtime": 364.2382, "eval_samples_per_second": 7.078, "eval_steps_per_second": 1.771, "step": 100 }, { "epoch": 0.06598072840111056, "grad_norm": 0.880546510219574, "learning_rate": 3.7313170451399475e-05, "loss": 0.831, "step": 101 }, { "epoch": 0.06663400293973543, "grad_norm": 0.8998001217842102, "learning_rate": 3.673543141356278e-05, "loss": 0.9768, "step": 102 }, { "epoch": 0.06728727747836027, "grad_norm": 0.9604887366294861, "learning_rate": 3.6157217926105783e-05, "loss": 0.9947, "step": 103 }, { "epoch": 0.06794055201698514, "grad_norm": 0.690131425857544, "learning_rate": 3.557868806682255e-05, "loss": 0.9427, "step": 104 }, { "epoch": 0.06859382655561, "grad_norm": 0.6709505319595337, "learning_rate": 3.5e-05, "loss": 0.9457, "step": 105 }, { "epoch": 0.06924710109423485, "grad_norm": 0.49770456552505493, "learning_rate": 3.442131193317745e-05, "loss": 0.9816, "step": 106 }, { "epoch": 0.06990037563285971, "grad_norm": 0.44334131479263306, "learning_rate": 3.384278207389421e-05, "loss": 0.943, "step": 107 }, { "epoch": 0.07055365017148457, "grad_norm": 0.35040923953056335, "learning_rate": 3.3264568586437216e-05, "loss": 1.0121, "step": 108 }, { "epoch": 0.07120692471010942, "grad_norm": 0.35298407077789307, "learning_rate": 3.268682954860052e-05, "loss": 0.9131, "step": 109 }, { "epoch": 0.07186019924873428, "grad_norm": 0.32489556074142456, "learning_rate": 3.210972290846837e-05, "loss": 0.9123, "step": 110 }, { "epoch": 0.07251347378735913, "grad_norm": 0.36420565843582153, "learning_rate": 3.15334064412338e-05, "loss": 0.8552, "step": 111 }, { "epoch": 0.073166748325984, "grad_norm": 0.37501949071884155, "learning_rate": 3.0958037706064485e-05, "loss": 0.9162, "step": 112 }, { "epoch": 0.07382002286460886, "grad_norm": 0.3867112398147583, "learning_rate": 3.038377400302758e-05, "loss": 0.8919, "step": 113 }, { "epoch": 0.0744732974032337, "grad_norm": 0.4006747603416443, "learning_rate": 2.9810772330085524e-05, "loss": 0.8862, "step": 114 }, { "epoch": 0.07512657194185857, "grad_norm": 0.45074766874313354, "learning_rate": 2.9239189340174306e-05, "loss": 0.9592, "step": 115 }, { "epoch": 0.07577984648048343, "grad_norm": 0.4480585753917694, "learning_rate": 2.8669181298376163e-05, "loss": 1.0181, "step": 116 }, { "epoch": 0.07643312101910828, "grad_norm": 0.48356470465660095, "learning_rate": 2.8100904039198193e-05, "loss": 1.0788, "step": 117 }, { "epoch": 0.07708639555773314, "grad_norm": 0.46879181265830994, "learning_rate": 2.7534512923968863e-05, "loss": 1.0395, "step": 118 }, { "epoch": 0.07773967009635799, "grad_norm": 0.5236515998840332, "learning_rate": 2.6970162798363695e-05, "loss": 1.1158, "step": 119 }, { "epoch": 0.07839294463498285, "grad_norm": 0.5310538411140442, "learning_rate": 2.640800795007203e-05, "loss": 0.9777, "step": 120 }, { "epoch": 0.07904621917360771, "grad_norm": 0.4761475622653961, "learning_rate": 2.5848202066616305e-05, "loss": 0.9904, "step": 121 }, { "epoch": 0.07969949371223256, "grad_norm": 0.5263852477073669, "learning_rate": 2.5290898193335446e-05, "loss": 1.02, "step": 122 }, { "epoch": 0.08035276825085742, "grad_norm": 0.5361368060112, "learning_rate": 2.4736248691543736e-05, "loss": 1.0196, "step": 123 }, { "epoch": 0.08100604278948229, "grad_norm": 0.5235584378242493, "learning_rate": 2.4184405196876842e-05, "loss": 1.0654, "step": 124 }, { "epoch": 0.08165931732810713, "grad_norm": 0.588202178478241, "learning_rate": 2.363551857783608e-05, "loss": 1.0997, "step": 125 }, { "epoch": 0.082312591866732, "grad_norm": 0.5942503809928894, "learning_rate": 2.308973889454249e-05, "loss": 1.0127, "step": 126 }, { "epoch": 0.08296586640535684, "grad_norm": 0.579181969165802, "learning_rate": 2.2547215357711918e-05, "loss": 0.9425, "step": 127 }, { "epoch": 0.0836191409439817, "grad_norm": 0.6416296362876892, "learning_rate": 2.2008096287862266e-05, "loss": 1.0125, "step": 128 }, { "epoch": 0.08427241548260657, "grad_norm": 0.6700341105461121, "learning_rate": 2.1472529074764177e-05, "loss": 1.0656, "step": 129 }, { "epoch": 0.08492569002123142, "grad_norm": 0.8234823942184448, "learning_rate": 2.0940660137146074e-05, "loss": 1.1097, "step": 130 }, { "epoch": 0.08557896455985628, "grad_norm": 0.756790280342102, "learning_rate": 2.041263488266484e-05, "loss": 1.1005, "step": 131 }, { "epoch": 0.08623223909848114, "grad_norm": 0.9773803353309631, "learning_rate": 1.988859766815275e-05, "loss": 1.0462, "step": 132 }, { "epoch": 0.08688551363710599, "grad_norm": 0.9532663226127625, "learning_rate": 1.9368691760151773e-05, "loss": 1.1081, "step": 133 }, { "epoch": 0.08753878817573085, "grad_norm": 1.0115333795547485, "learning_rate": 1.885305929574593e-05, "loss": 1.1444, "step": 134 }, { "epoch": 0.0881920627143557, "grad_norm": 1.1368460655212402, "learning_rate": 1.8341841243702424e-05, "loss": 1.1458, "step": 135 }, { "epoch": 0.08884533725298056, "grad_norm": 1.1344200372695923, "learning_rate": 1.7835177365932225e-05, "loss": 1.1039, "step": 136 }, { "epoch": 0.08949861179160543, "grad_norm": 1.142113447189331, "learning_rate": 1.7333206179280478e-05, "loss": 0.9681, "step": 137 }, { "epoch": 0.09015188633023027, "grad_norm": 1.1623753309249878, "learning_rate": 1.6836064917657478e-05, "loss": 1.348, "step": 138 }, { "epoch": 0.09080516086885514, "grad_norm": 1.138903260231018, "learning_rate": 1.6343889494520224e-05, "loss": 1.1893, "step": 139 }, { "epoch": 0.09145843540748, "grad_norm": 1.4059568643569946, "learning_rate": 1.5856814465715064e-05, "loss": 1.1307, "step": 140 }, { "epoch": 0.09211170994610485, "grad_norm": 1.434770941734314, "learning_rate": 1.5374972992691458e-05, "loss": 1.2641, "step": 141 }, { "epoch": 0.09276498448472971, "grad_norm": 1.6972754001617432, "learning_rate": 1.4898496806096974e-05, "loss": 0.9313, "step": 142 }, { "epoch": 0.09341825902335456, "grad_norm": 1.7282185554504395, "learning_rate": 1.4427516169763444e-05, "loss": 1.2348, "step": 143 }, { "epoch": 0.09407153356197942, "grad_norm": 1.6720625162124634, "learning_rate": 1.396215984509412e-05, "loss": 1.4805, "step": 144 }, { "epoch": 0.09472480810060428, "grad_norm": 1.8798261880874634, "learning_rate": 1.3502555055861625e-05, "loss": 1.8364, "step": 145 }, { "epoch": 0.09537808263922913, "grad_norm": 1.8419171571731567, "learning_rate": 1.3048827453426203e-05, "loss": 1.4047, "step": 146 }, { "epoch": 0.096031357177854, "grad_norm": 1.905526876449585, "learning_rate": 1.2601101082383917e-05, "loss": 1.5292, "step": 147 }, { "epoch": 0.09668463171647886, "grad_norm": 1.815724492073059, "learning_rate": 1.2159498346654094e-05, "loss": 1.511, "step": 148 }, { "epoch": 0.0973379062551037, "grad_norm": 2.371288299560547, "learning_rate": 1.1724139976015306e-05, "loss": 2.0157, "step": 149 }, { "epoch": 0.09799118079372857, "grad_norm": 3.6831870079040527, "learning_rate": 1.1295144993099068e-05, "loss": 1.9797, "step": 150 }, { "epoch": 0.09799118079372857, "eval_loss": 1.1195086240768433, "eval_runtime": 364.2777, "eval_samples_per_second": 7.077, "eval_steps_per_second": 1.771, "step": 150 }, { "epoch": 0.09864445533235343, "grad_norm": 0.29028820991516113, "learning_rate": 1.0872630680850196e-05, "loss": 0.7943, "step": 151 }, { "epoch": 0.09929772987097828, "grad_norm": 0.3445136845111847, "learning_rate": 1.0456712550462898e-05, "loss": 0.9206, "step": 152 }, { "epoch": 0.09995100440960314, "grad_norm": 0.3625146746635437, "learning_rate": 1.0047504309801104e-05, "loss": 0.9081, "step": 153 }, { "epoch": 0.10060427894822799, "grad_norm": 0.3956086337566376, "learning_rate": 9.645117832311886e-06, "loss": 0.8597, "step": 154 }, { "epoch": 0.10125755348685285, "grad_norm": 0.32289180159568787, "learning_rate": 9.249663126440394e-06, "loss": 0.8973, "step": 155 }, { "epoch": 0.10191082802547771, "grad_norm": 0.3416202664375305, "learning_rate": 8.861248305554624e-06, "loss": 0.8879, "step": 156 }, { "epoch": 0.10256410256410256, "grad_norm": 0.3258891701698303, "learning_rate": 8.47997955838829e-06, "loss": 0.842, "step": 157 }, { "epoch": 0.10321737710272742, "grad_norm": 0.3462526798248291, "learning_rate": 8.10596112000994e-06, "loss": 0.8148, "step": 158 }, { "epoch": 0.10387065164135229, "grad_norm": 0.3501952290534973, "learning_rate": 7.739295243326067e-06, "loss": 0.8469, "step": 159 }, { "epoch": 0.10452392617997713, "grad_norm": 0.3751770853996277, "learning_rate": 7.380082171126228e-06, "loss": 0.9513, "step": 160 }, { "epoch": 0.105177200718602, "grad_norm": 0.35325130820274353, "learning_rate": 7.028420108677635e-06, "loss": 0.8966, "step": 161 }, { "epoch": 0.10583047525722684, "grad_norm": 0.3733804225921631, "learning_rate": 6.684405196876842e-06, "loss": 0.9438, "step": 162 }, { "epoch": 0.1064837497958517, "grad_norm": 0.3731028437614441, "learning_rate": 6.3481314859657675e-06, "loss": 0.9872, "step": 163 }, { "epoch": 0.10713702433447657, "grad_norm": 0.37592631578445435, "learning_rate": 6.019690909819298e-06, "loss": 0.9131, "step": 164 }, { "epoch": 0.10779029887310142, "grad_norm": 0.3725065290927887, "learning_rate": 5.6991732608115e-06, "loss": 0.9644, "step": 165 }, { "epoch": 0.10844357341172628, "grad_norm": 0.41977131366729736, "learning_rate": 5.386666165267256e-06, "loss": 0.9352, "step": 166 }, { "epoch": 0.10909684795035114, "grad_norm": 0.42900514602661133, "learning_rate": 5.08225505950613e-06, "loss": 1.0494, "step": 167 }, { "epoch": 0.10975012248897599, "grad_norm": 0.45085152983665466, "learning_rate": 4.786023166484913e-06, "loss": 1.0391, "step": 168 }, { "epoch": 0.11040339702760085, "grad_norm": 0.48264625668525696, "learning_rate": 4.498051473045291e-06, "loss": 0.8983, "step": 169 }, { "epoch": 0.1110566715662257, "grad_norm": 0.4835262596607208, "learning_rate": 4.218418707772886e-06, "loss": 1.0586, "step": 170 }, { "epoch": 0.11170994610485056, "grad_norm": 0.48784199357032776, "learning_rate": 3.947201319473587e-06, "loss": 0.8897, "step": 171 }, { "epoch": 0.11236322064347543, "grad_norm": 0.5302643179893494, "learning_rate": 3.684473456273278e-06, "loss": 0.9997, "step": 172 }, { "epoch": 0.11301649518210027, "grad_norm": 0.5575405359268188, "learning_rate": 3.4303069453464383e-06, "loss": 0.8987, "step": 173 }, { "epoch": 0.11366976972072514, "grad_norm": 0.5708325505256653, "learning_rate": 3.184771273279312e-06, "loss": 1.1024, "step": 174 }, { "epoch": 0.11432304425935, "grad_norm": 0.6621981263160706, "learning_rate": 2.947933567072987e-06, "loss": 1.0761, "step": 175 }, { "epoch": 0.11497631879797485, "grad_norm": 0.7082878351211548, "learning_rate": 2.719858575791534e-06, "loss": 1.0525, "step": 176 }, { "epoch": 0.11562959333659971, "grad_norm": 0.6509903073310852, "learning_rate": 2.500608652860256e-06, "loss": 0.9682, "step": 177 }, { "epoch": 0.11628286787522456, "grad_norm": 0.705852210521698, "learning_rate": 2.2902437390188737e-06, "loss": 0.9737, "step": 178 }, { "epoch": 0.11693614241384942, "grad_norm": 0.7518905997276306, "learning_rate": 2.0888213459343587e-06, "loss": 1.0402, "step": 179 }, { "epoch": 0.11758941695247428, "grad_norm": 0.7310596704483032, "learning_rate": 1.8963965404777875e-06, "loss": 0.9668, "step": 180 }, { "epoch": 0.11824269149109913, "grad_norm": 0.6952759027481079, "learning_rate": 1.7130219296696263e-06, "loss": 1.0489, "step": 181 }, { "epoch": 0.11889596602972399, "grad_norm": 0.7776533961296082, "learning_rate": 1.5387476462974824e-06, "loss": 1.215, "step": 182 }, { "epoch": 0.11954924056834886, "grad_norm": 0.8356504440307617, "learning_rate": 1.3736213352103147e-06, "loss": 0.9922, "step": 183 }, { "epoch": 0.1202025151069737, "grad_norm": 0.9396088123321533, "learning_rate": 1.2176881402928002e-06, "loss": 1.1072, "step": 184 }, { "epoch": 0.12085578964559857, "grad_norm": 1.1654841899871826, "learning_rate": 1.0709906921234367e-06, "loss": 1.2322, "step": 185 }, { "epoch": 0.12150906418422341, "grad_norm": 1.0885088443756104, "learning_rate": 9.33569096319799e-07, "loss": 1.121, "step": 186 }, { "epoch": 0.12216233872284828, "grad_norm": 1.069108247756958, "learning_rate": 8.054609225740255e-07, "loss": 1.1185, "step": 187 }, { "epoch": 0.12281561326147314, "grad_norm": 1.1217502355575562, "learning_rate": 6.867011943816724e-07, "loss": 1.305, "step": 188 }, { "epoch": 0.12346888780009799, "grad_norm": 1.2212369441986084, "learning_rate": 5.77322379466617e-07, "loss": 1.0351, "step": 189 }, { "epoch": 0.12412216233872285, "grad_norm": 1.4058793783187866, "learning_rate": 4.773543809047186e-07, "loss": 1.3516, "step": 190 }, { "epoch": 0.12477543687734771, "grad_norm": 1.4786542654037476, "learning_rate": 3.868245289486027e-07, "loss": 1.5069, "step": 191 }, { "epoch": 0.12542871141597256, "grad_norm": 1.6085865497589111, "learning_rate": 3.0575757355586817e-07, "loss": 1.2685, "step": 192 }, { "epoch": 0.1260819859545974, "grad_norm": 1.7137620449066162, "learning_rate": 2.3417567762266497e-07, "loss": 1.5837, "step": 193 }, { "epoch": 0.12673526049322228, "grad_norm": 1.631637692451477, "learning_rate": 1.7209841092460043e-07, "loss": 1.3906, "step": 194 }, { "epoch": 0.12738853503184713, "grad_norm": 1.6230666637420654, "learning_rate": 1.1954274476655534e-07, "loss": 1.3065, "step": 195 }, { "epoch": 0.12804180957047198, "grad_norm": 3.3031904697418213, "learning_rate": 7.652304734289127e-08, "loss": 1.6598, "step": 196 }, { "epoch": 0.12869508410909686, "grad_norm": 3.922074556350708, "learning_rate": 4.30510798093342e-08, "loss": 1.4547, "step": 197 }, { "epoch": 0.1293483586477217, "grad_norm": 2.128213882446289, "learning_rate": 1.9135993067588284e-08, "loss": 1.9439, "step": 198 }, { "epoch": 0.13000163318634655, "grad_norm": 3.5827889442443848, "learning_rate": 4.784325263584854e-09, "loss": 2.1012, "step": 199 }, { "epoch": 0.13065490772497143, "grad_norm": 6.226241111755371, "learning_rate": 0.0, "loss": 2.4371, "step": 200 }, { "epoch": 0.13065490772497143, "eval_loss": 1.1125794649124146, "eval_runtime": 363.9439, "eval_samples_per_second": 7.084, "eval_steps_per_second": 1.772, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.458761861544018e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }