{ "best_metric": 2.8477718830108643, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.11417697431018078, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007611798287345385, "grad_norm": 62.14710235595703, "learning_rate": 7e-06, "loss": 11.9374, "step": 1 }, { "epoch": 0.0007611798287345385, "eval_loss": 3.837273597717285, "eval_runtime": 157.4082, "eval_samples_per_second": 14.059, "eval_steps_per_second": 3.52, "step": 1 }, { "epoch": 0.001522359657469077, "grad_norm": 32.307308197021484, "learning_rate": 1.4e-05, "loss": 12.6976, "step": 2 }, { "epoch": 0.002283539486203616, "grad_norm": 26.411426544189453, "learning_rate": 2.1e-05, "loss": 13.2102, "step": 3 }, { "epoch": 0.003044719314938154, "grad_norm": 26.69647789001465, "learning_rate": 2.8e-05, "loss": 12.9265, "step": 4 }, { "epoch": 0.003805899143672693, "grad_norm": 21.253070831298828, "learning_rate": 3.5e-05, "loss": 12.6845, "step": 5 }, { "epoch": 0.004567078972407232, "grad_norm": 18.088212966918945, "learning_rate": 4.2e-05, "loss": 12.0568, "step": 6 }, { "epoch": 0.00532825880114177, "grad_norm": 17.1400089263916, "learning_rate": 4.899999999999999e-05, "loss": 11.9566, "step": 7 }, { "epoch": 0.006089438629876308, "grad_norm": 17.086406707763672, "learning_rate": 5.6e-05, "loss": 11.8525, "step": 8 }, { "epoch": 0.0068506184586108465, "grad_norm": 16.366493225097656, "learning_rate": 6.3e-05, "loss": 11.2577, "step": 9 }, { "epoch": 0.007611798287345386, "grad_norm": 14.400430679321289, "learning_rate": 7e-05, "loss": 11.5039, "step": 10 }, { "epoch": 0.008372978116079923, "grad_norm": 14.400092124938965, "learning_rate": 6.999521567473641e-05, "loss": 10.997, "step": 11 }, { "epoch": 0.009134157944814463, "grad_norm": 17.782190322875977, "learning_rate": 6.998086400693241e-05, "loss": 11.153, "step": 12 }, { "epoch": 0.009895337773549001, "grad_norm": 15.323582649230957, "learning_rate": 6.995694892019065e-05, "loss": 11.3713, "step": 13 }, { "epoch": 0.01065651760228354, "grad_norm": 18.582822799682617, "learning_rate": 6.99234769526571e-05, "loss": 11.6407, "step": 14 }, { "epoch": 0.011417697431018078, "grad_norm": 18.919496536254883, "learning_rate": 6.988045725523343e-05, "loss": 11.4483, "step": 15 }, { "epoch": 0.012178877259752616, "grad_norm": 15.953317642211914, "learning_rate": 6.982790158907539e-05, "loss": 11.4306, "step": 16 }, { "epoch": 0.012940057088487155, "grad_norm": 34.4675178527832, "learning_rate": 6.976582432237733e-05, "loss": 11.3819, "step": 17 }, { "epoch": 0.013701236917221693, "grad_norm": 16.293533325195312, "learning_rate": 6.969424242644413e-05, "loss": 11.3279, "step": 18 }, { "epoch": 0.014462416745956231, "grad_norm": 14.284806251525879, "learning_rate": 6.961317547105138e-05, "loss": 10.9124, "step": 19 }, { "epoch": 0.015223596574690771, "grad_norm": 16.047115325927734, "learning_rate": 6.952264561909527e-05, "loss": 11.1305, "step": 20 }, { "epoch": 0.01598477640342531, "grad_norm": 16.61277961730957, "learning_rate": 6.942267762053337e-05, "loss": 11.2785, "step": 21 }, { "epoch": 0.016745956232159846, "grad_norm": 16.367738723754883, "learning_rate": 6.931329880561832e-05, "loss": 11.1815, "step": 22 }, { "epoch": 0.017507136060894386, "grad_norm": 16.200246810913086, "learning_rate": 6.919453907742597e-05, "loss": 11.2894, "step": 23 }, { "epoch": 0.018268315889628926, "grad_norm": 15.94240665435791, "learning_rate": 6.90664309036802e-05, "loss": 10.8494, "step": 24 }, { "epoch": 0.019029495718363463, "grad_norm": 15.30321216583252, "learning_rate": 6.892900930787656e-05, "loss": 10.9201, "step": 25 }, { "epoch": 0.019790675547098003, "grad_norm": 18.01148223876953, "learning_rate": 6.87823118597072e-05, "loss": 11.3518, "step": 26 }, { "epoch": 0.02055185537583254, "grad_norm": 16.691192626953125, "learning_rate": 6.862637866478969e-05, "loss": 10.8088, "step": 27 }, { "epoch": 0.02131303520456708, "grad_norm": 16.822267532348633, "learning_rate": 6.846125235370252e-05, "loss": 11.7738, "step": 28 }, { "epoch": 0.022074215033301616, "grad_norm": 17.422344207763672, "learning_rate": 6.828697807033038e-05, "loss": 11.9039, "step": 29 }, { "epoch": 0.022835394862036156, "grad_norm": 15.227424621582031, "learning_rate": 6.81036034595222e-05, "loss": 10.5233, "step": 30 }, { "epoch": 0.023596574690770696, "grad_norm": 16.58725929260254, "learning_rate": 6.791117865406564e-05, "loss": 10.2275, "step": 31 }, { "epoch": 0.024357754519505233, "grad_norm": 15.96583080291748, "learning_rate": 6.770975626098112e-05, "loss": 10.0431, "step": 32 }, { "epoch": 0.025118934348239773, "grad_norm": 16.036426544189453, "learning_rate": 6.749939134713974e-05, "loss": 9.8158, "step": 33 }, { "epoch": 0.02588011417697431, "grad_norm": 15.925104141235352, "learning_rate": 6.728014142420846e-05, "loss": 9.9834, "step": 34 }, { "epoch": 0.02664129400570885, "grad_norm": 17.323747634887695, "learning_rate": 6.7052066432927e-05, "loss": 10.1752, "step": 35 }, { "epoch": 0.027402473834443386, "grad_norm": 17.286712646484375, "learning_rate": 6.681522872672069e-05, "loss": 10.1717, "step": 36 }, { "epoch": 0.028163653663177926, "grad_norm": 21.032140731811523, "learning_rate": 6.656969305465356e-05, "loss": 9.4869, "step": 37 }, { "epoch": 0.028924833491912463, "grad_norm": 18.7946834564209, "learning_rate": 6.631552654372672e-05, "loss": 10.5547, "step": 38 }, { "epoch": 0.029686013320647003, "grad_norm": 19.529447555541992, "learning_rate": 6.60527986805264e-05, "loss": 10.4017, "step": 39 }, { "epoch": 0.030447193149381543, "grad_norm": 18.256792068481445, "learning_rate": 6.578158129222711e-05, "loss": 9.9321, "step": 40 }, { "epoch": 0.03120837297811608, "grad_norm": 19.373693466186523, "learning_rate": 6.550194852695469e-05, "loss": 10.2322, "step": 41 }, { "epoch": 0.03196955280685062, "grad_norm": 19.30985450744629, "learning_rate": 6.521397683351509e-05, "loss": 10.274, "step": 42 }, { "epoch": 0.03273073263558516, "grad_norm": 19.986684799194336, "learning_rate": 6.491774494049386e-05, "loss": 9.9905, "step": 43 }, { "epoch": 0.03349191246431969, "grad_norm": 20.237152099609375, "learning_rate": 6.461333383473272e-05, "loss": 10.3248, "step": 44 }, { "epoch": 0.03425309229305423, "grad_norm": 16.88189125061035, "learning_rate": 6.430082673918849e-05, "loss": 9.3653, "step": 45 }, { "epoch": 0.03501427212178877, "grad_norm": 25.35614776611328, "learning_rate": 6.398030909018069e-05, "loss": 10.3682, "step": 46 }, { "epoch": 0.03577545195052331, "grad_norm": 22.58070945739746, "learning_rate": 6.365186851403423e-05, "loss": 10.2078, "step": 47 }, { "epoch": 0.03653663177925785, "grad_norm": 18.730236053466797, "learning_rate": 6.331559480312315e-05, "loss": 9.5991, "step": 48 }, { "epoch": 0.037297811607992386, "grad_norm": 24.788707733154297, "learning_rate": 6.297157989132236e-05, "loss": 9.0081, "step": 49 }, { "epoch": 0.038058991436726926, "grad_norm": 37.831748962402344, "learning_rate": 6.261991782887377e-05, "loss": 8.2863, "step": 50 }, { "epoch": 0.038058991436726926, "eval_loss": 3.2690162658691406, "eval_runtime": 159.7447, "eval_samples_per_second": 13.853, "eval_steps_per_second": 3.468, "step": 50 }, { "epoch": 0.038820171265461466, "grad_norm": 40.48371887207031, "learning_rate": 6.226070475667393e-05, "loss": 12.8986, "step": 51 }, { "epoch": 0.039581351094196006, "grad_norm": 28.599153518676758, "learning_rate": 6.189403887999006e-05, "loss": 12.1586, "step": 52 }, { "epoch": 0.04034253092293054, "grad_norm": 16.86196517944336, "learning_rate": 6.152002044161171e-05, "loss": 11.3232, "step": 53 }, { "epoch": 0.04110371075166508, "grad_norm": 13.314998626708984, "learning_rate": 6.113875169444539e-05, "loss": 11.3059, "step": 54 }, { "epoch": 0.04186489058039962, "grad_norm": 12.314648628234863, "learning_rate": 6.0750336873559605e-05, "loss": 11.3874, "step": 55 }, { "epoch": 0.04262607040913416, "grad_norm": 16.70978355407715, "learning_rate": 6.035488216768811e-05, "loss": 11.2571, "step": 56 }, { "epoch": 0.0433872502378687, "grad_norm": 12.304032325744629, "learning_rate": 5.9952495690198894e-05, "loss": 10.8975, "step": 57 }, { "epoch": 0.04414843006660323, "grad_norm": 12.1074800491333, "learning_rate": 5.954328744953709e-05, "loss": 10.5291, "step": 58 }, { "epoch": 0.04490960989533777, "grad_norm": 11.482349395751953, "learning_rate": 5.91273693191498e-05, "loss": 10.8876, "step": 59 }, { "epoch": 0.04567078972407231, "grad_norm": 11.817330360412598, "learning_rate": 5.870485500690094e-05, "loss": 10.9615, "step": 60 }, { "epoch": 0.04643196955280685, "grad_norm": 11.868997573852539, "learning_rate": 5.827586002398468e-05, "loss": 10.8039, "step": 61 }, { "epoch": 0.04719314938154139, "grad_norm": 11.931234359741211, "learning_rate": 5.784050165334589e-05, "loss": 10.3456, "step": 62 }, { "epoch": 0.047954329210275926, "grad_norm": 12.019116401672363, "learning_rate": 5.739889891761608e-05, "loss": 10.3588, "step": 63 }, { "epoch": 0.048715509039010466, "grad_norm": 12.904118537902832, "learning_rate": 5.6951172546573794e-05, "loss": 10.4684, "step": 64 }, { "epoch": 0.049476688867745006, "grad_norm": 11.970799446105957, "learning_rate": 5.6497444944138376e-05, "loss": 10.8854, "step": 65 }, { "epoch": 0.050237868696479546, "grad_norm": 12.086244583129883, "learning_rate": 5.603784015490587e-05, "loss": 10.4556, "step": 66 }, { "epoch": 0.05099904852521408, "grad_norm": 12.689579010009766, "learning_rate": 5.557248383023655e-05, "loss": 11.5082, "step": 67 }, { "epoch": 0.05176022835394862, "grad_norm": 12.876420974731445, "learning_rate": 5.510150319390302e-05, "loss": 10.8254, "step": 68 }, { "epoch": 0.05252140818268316, "grad_norm": 12.876321792602539, "learning_rate": 5.4625027007308546e-05, "loss": 10.6497, "step": 69 }, { "epoch": 0.0532825880114177, "grad_norm": 12.555021286010742, "learning_rate": 5.414318553428494e-05, "loss": 10.7097, "step": 70 }, { "epoch": 0.05404376784015224, "grad_norm": 14.044441223144531, "learning_rate": 5.3656110505479776e-05, "loss": 11.4296, "step": 71 }, { "epoch": 0.05480494766888677, "grad_norm": 12.55296802520752, "learning_rate": 5.316393508234253e-05, "loss": 10.8332, "step": 72 }, { "epoch": 0.05556612749762131, "grad_norm": 16.553260803222656, "learning_rate": 5.266679382071953e-05, "loss": 10.2235, "step": 73 }, { "epoch": 0.05632730732635585, "grad_norm": 12.925923347473145, "learning_rate": 5.216482263406778e-05, "loss": 11.0243, "step": 74 }, { "epoch": 0.05708848715509039, "grad_norm": 13.123712539672852, "learning_rate": 5.1658158756297576e-05, "loss": 10.5786, "step": 75 }, { "epoch": 0.057849666983824925, "grad_norm": 15.51366901397705, "learning_rate": 5.114694070425407e-05, "loss": 10.7227, "step": 76 }, { "epoch": 0.058610846812559465, "grad_norm": 13.080121994018555, "learning_rate": 5.063130823984823e-05, "loss": 10.228, "step": 77 }, { "epoch": 0.059372026641294005, "grad_norm": 13.711491584777832, "learning_rate": 5.011140233184724e-05, "loss": 10.5938, "step": 78 }, { "epoch": 0.060133206470028545, "grad_norm": 15.03261947631836, "learning_rate": 4.958736511733516e-05, "loss": 10.9826, "step": 79 }, { "epoch": 0.060894386298763085, "grad_norm": 12.706965446472168, "learning_rate": 4.905933986285393e-05, "loss": 9.9897, "step": 80 }, { "epoch": 0.06165556612749762, "grad_norm": 16.334020614624023, "learning_rate": 4.8527470925235824e-05, "loss": 11.1604, "step": 81 }, { "epoch": 0.06241674595623216, "grad_norm": 15.609319686889648, "learning_rate": 4.799190371213772e-05, "loss": 11.1633, "step": 82 }, { "epoch": 0.0631779257849667, "grad_norm": 14.876952171325684, "learning_rate": 4.745278464228808e-05, "loss": 10.7648, "step": 83 }, { "epoch": 0.06393910561370124, "grad_norm": 15.908063888549805, "learning_rate": 4.69102611054575e-05, "loss": 10.5009, "step": 84 }, { "epoch": 0.06470028544243578, "grad_norm": 15.217329025268555, "learning_rate": 4.6364481422163926e-05, "loss": 10.9934, "step": 85 }, { "epoch": 0.06546146527117032, "grad_norm": 15.298270225524902, "learning_rate": 4.581559480312316e-05, "loss": 10.359, "step": 86 }, { "epoch": 0.06622264509990486, "grad_norm": 14.394475936889648, "learning_rate": 4.526375130845627e-05, "loss": 10.089, "step": 87 }, { "epoch": 0.06698382492863938, "grad_norm": 15.546106338500977, "learning_rate": 4.4709101806664554e-05, "loss": 10.7069, "step": 88 }, { "epoch": 0.06774500475737393, "grad_norm": 16.768985748291016, "learning_rate": 4.4151797933383685e-05, "loss": 10.2604, "step": 89 }, { "epoch": 0.06850618458610847, "grad_norm": 15.508210182189941, "learning_rate": 4.359199204992797e-05, "loss": 10.2447, "step": 90 }, { "epoch": 0.069267364414843, "grad_norm": 15.253734588623047, "learning_rate": 4.30298372016363e-05, "loss": 9.6666, "step": 91 }, { "epoch": 0.07002854424357755, "grad_norm": 17.911222457885742, "learning_rate": 4.246548707603114e-05, "loss": 10.0422, "step": 92 }, { "epoch": 0.07078972407231209, "grad_norm": 17.074037551879883, "learning_rate": 4.1899095960801805e-05, "loss": 9.4583, "step": 93 }, { "epoch": 0.07155090390104663, "grad_norm": 16.951196670532227, "learning_rate": 4.133081870162385e-05, "loss": 9.2461, "step": 94 }, { "epoch": 0.07231208372978117, "grad_norm": 16.778108596801758, "learning_rate": 4.076081065982569e-05, "loss": 8.8192, "step": 95 }, { "epoch": 0.0730732635585157, "grad_norm": 19.881837844848633, "learning_rate": 4.018922766991447e-05, "loss": 8.6392, "step": 96 }, { "epoch": 0.07383444338725023, "grad_norm": 24.723371505737305, "learning_rate": 3.961622599697241e-05, "loss": 9.6325, "step": 97 }, { "epoch": 0.07459562321598477, "grad_norm": 21.536834716796875, "learning_rate": 3.9041962293935516e-05, "loss": 8.0487, "step": 98 }, { "epoch": 0.07535680304471931, "grad_norm": 21.819488525390625, "learning_rate": 3.84665935587662e-05, "loss": 8.5147, "step": 99 }, { "epoch": 0.07611798287345385, "grad_norm": 28.30029296875, "learning_rate": 3.7890277091531636e-05, "loss": 5.6556, "step": 100 }, { "epoch": 0.07611798287345385, "eval_loss": 3.2521181106567383, "eval_runtime": 159.753, "eval_samples_per_second": 13.853, "eval_steps_per_second": 3.468, "step": 100 }, { "epoch": 0.07687916270218839, "grad_norm": 40.049400329589844, "learning_rate": 3.7313170451399475e-05, "loss": 11.8576, "step": 101 }, { "epoch": 0.07764034253092293, "grad_norm": 33.80797576904297, "learning_rate": 3.673543141356278e-05, "loss": 12.1796, "step": 102 }, { "epoch": 0.07840152235965747, "grad_norm": 45.65495300292969, "learning_rate": 3.6157217926105783e-05, "loss": 11.9832, "step": 103 }, { "epoch": 0.07916270218839201, "grad_norm": 35.831092834472656, "learning_rate": 3.557868806682255e-05, "loss": 12.129, "step": 104 }, { "epoch": 0.07992388201712655, "grad_norm": 19.742515563964844, "learning_rate": 3.5e-05, "loss": 11.3112, "step": 105 }, { "epoch": 0.08068506184586108, "grad_norm": 12.872544288635254, "learning_rate": 3.442131193317745e-05, "loss": 10.8966, "step": 106 }, { "epoch": 0.08144624167459562, "grad_norm": 16.163219451904297, "learning_rate": 3.384278207389421e-05, "loss": 10.8695, "step": 107 }, { "epoch": 0.08220742150333016, "grad_norm": 15.760530471801758, "learning_rate": 3.3264568586437216e-05, "loss": 11.1322, "step": 108 }, { "epoch": 0.0829686013320647, "grad_norm": 12.737210273742676, "learning_rate": 3.268682954860052e-05, "loss": 11.1206, "step": 109 }, { "epoch": 0.08372978116079924, "grad_norm": 12.96513843536377, "learning_rate": 3.210972290846837e-05, "loss": 10.6524, "step": 110 }, { "epoch": 0.08449096098953378, "grad_norm": 15.09691333770752, "learning_rate": 3.15334064412338e-05, "loss": 10.8931, "step": 111 }, { "epoch": 0.08525214081826832, "grad_norm": 12.536844253540039, "learning_rate": 3.0958037706064485e-05, "loss": 10.828, "step": 112 }, { "epoch": 0.08601332064700286, "grad_norm": 12.665741920471191, "learning_rate": 3.038377400302758e-05, "loss": 11.0088, "step": 113 }, { "epoch": 0.0867745004757374, "grad_norm": 11.625788688659668, "learning_rate": 2.9810772330085524e-05, "loss": 10.7514, "step": 114 }, { "epoch": 0.08753568030447194, "grad_norm": 13.06114673614502, "learning_rate": 2.9239189340174306e-05, "loss": 10.7538, "step": 115 }, { "epoch": 0.08829686013320646, "grad_norm": 13.02463150024414, "learning_rate": 2.8669181298376163e-05, "loss": 10.5935, "step": 116 }, { "epoch": 0.089058039961941, "grad_norm": 11.050668716430664, "learning_rate": 2.8100904039198193e-05, "loss": 10.4296, "step": 117 }, { "epoch": 0.08981921979067554, "grad_norm": 11.642556190490723, "learning_rate": 2.7534512923968863e-05, "loss": 10.5926, "step": 118 }, { "epoch": 0.09058039961941008, "grad_norm": 12.23384952545166, "learning_rate": 2.6970162798363695e-05, "loss": 10.8684, "step": 119 }, { "epoch": 0.09134157944814462, "grad_norm": 12.222782135009766, "learning_rate": 2.640800795007203e-05, "loss": 11.1264, "step": 120 }, { "epoch": 0.09210275927687916, "grad_norm": 11.30429458618164, "learning_rate": 2.5848202066616305e-05, "loss": 10.1724, "step": 121 }, { "epoch": 0.0928639391056137, "grad_norm": 14.014923095703125, "learning_rate": 2.5290898193335446e-05, "loss": 11.1533, "step": 122 }, { "epoch": 0.09362511893434824, "grad_norm": 14.214231491088867, "learning_rate": 2.4736248691543736e-05, "loss": 10.6027, "step": 123 }, { "epoch": 0.09438629876308278, "grad_norm": 13.18128776550293, "learning_rate": 2.4184405196876842e-05, "loss": 10.7151, "step": 124 }, { "epoch": 0.09514747859181731, "grad_norm": 12.126294136047363, "learning_rate": 2.363551857783608e-05, "loss": 10.0795, "step": 125 }, { "epoch": 0.09590865842055185, "grad_norm": 12.301307678222656, "learning_rate": 2.308973889454249e-05, "loss": 10.0897, "step": 126 }, { "epoch": 0.09666983824928639, "grad_norm": 12.24648666381836, "learning_rate": 2.2547215357711918e-05, "loss": 10.0434, "step": 127 }, { "epoch": 0.09743101807802093, "grad_norm": 13.534128189086914, "learning_rate": 2.2008096287862266e-05, "loss": 10.5216, "step": 128 }, { "epoch": 0.09819219790675547, "grad_norm": 13.032715797424316, "learning_rate": 2.1472529074764177e-05, "loss": 10.751, "step": 129 }, { "epoch": 0.09895337773549001, "grad_norm": 13.36697006225586, "learning_rate": 2.0940660137146074e-05, "loss": 10.4866, "step": 130 }, { "epoch": 0.09971455756422455, "grad_norm": 14.086920738220215, "learning_rate": 2.041263488266484e-05, "loss": 10.4804, "step": 131 }, { "epoch": 0.10047573739295909, "grad_norm": 13.180987358093262, "learning_rate": 1.988859766815275e-05, "loss": 9.7492, "step": 132 }, { "epoch": 0.10123691722169363, "grad_norm": 14.282618522644043, "learning_rate": 1.9368691760151773e-05, "loss": 11.3742, "step": 133 }, { "epoch": 0.10199809705042816, "grad_norm": 14.527554512023926, "learning_rate": 1.885305929574593e-05, "loss": 10.8801, "step": 134 }, { "epoch": 0.1027592768791627, "grad_norm": 12.420266151428223, "learning_rate": 1.8341841243702424e-05, "loss": 9.6681, "step": 135 }, { "epoch": 0.10352045670789724, "grad_norm": 15.356304168701172, "learning_rate": 1.7835177365932225e-05, "loss": 10.354, "step": 136 }, { "epoch": 0.10428163653663178, "grad_norm": 13.979833602905273, "learning_rate": 1.7333206179280478e-05, "loss": 10.2454, "step": 137 }, { "epoch": 0.10504281636536632, "grad_norm": 13.443976402282715, "learning_rate": 1.6836064917657478e-05, "loss": 9.7525, "step": 138 }, { "epoch": 0.10580399619410086, "grad_norm": 13.744975090026855, "learning_rate": 1.6343889494520224e-05, "loss": 9.7207, "step": 139 }, { "epoch": 0.1065651760228354, "grad_norm": 15.326041221618652, "learning_rate": 1.5856814465715064e-05, "loss": 9.8803, "step": 140 }, { "epoch": 0.10732635585156994, "grad_norm": 14.626502990722656, "learning_rate": 1.5374972992691458e-05, "loss": 9.7187, "step": 141 }, { "epoch": 0.10808753568030448, "grad_norm": 15.12524700164795, "learning_rate": 1.4898496806096974e-05, "loss": 9.5225, "step": 142 }, { "epoch": 0.108848715509039, "grad_norm": 15.535211563110352, "learning_rate": 1.4427516169763444e-05, "loss": 9.0, "step": 143 }, { "epoch": 0.10960989533777354, "grad_norm": 16.60105323791504, "learning_rate": 1.396215984509412e-05, "loss": 9.1394, "step": 144 }, { "epoch": 0.11037107516650808, "grad_norm": 14.278752326965332, "learning_rate": 1.3502555055861625e-05, "loss": 8.6505, "step": 145 }, { "epoch": 0.11113225499524262, "grad_norm": 17.42091941833496, "learning_rate": 1.3048827453426203e-05, "loss": 8.7583, "step": 146 }, { "epoch": 0.11189343482397716, "grad_norm": 18.650836944580078, "learning_rate": 1.2601101082383917e-05, "loss": 9.2693, "step": 147 }, { "epoch": 0.1126546146527117, "grad_norm": 19.8348331451416, "learning_rate": 1.2159498346654094e-05, "loss": 8.5282, "step": 148 }, { "epoch": 0.11341579448144624, "grad_norm": 22.303525924682617, "learning_rate": 1.1724139976015306e-05, "loss": 8.885, "step": 149 }, { "epoch": 0.11417697431018078, "grad_norm": 27.490812301635742, "learning_rate": 1.1295144993099068e-05, "loss": 8.4182, "step": 150 }, { "epoch": 0.11417697431018078, "eval_loss": 2.8477718830108643, "eval_runtime": 159.7361, "eval_samples_per_second": 13.854, "eval_steps_per_second": 3.468, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.1510037676281037e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }