{ "best_metric": 0.6657014489173889, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 3.0047846889952154, "eval_steps": 50, "global_step": 157, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019138755980861243, "grad_norm": 110.49056243896484, "learning_rate": 1.04e-05, "loss": 7.9662, "step": 1 }, { "epoch": 0.019138755980861243, "eval_loss": 3.50419020652771, "eval_runtime": 6.0611, "eval_samples_per_second": 116.15, "eval_steps_per_second": 3.63, "step": 1 }, { "epoch": 0.03827751196172249, "grad_norm": 167.85272216796875, "learning_rate": 2.08e-05, "loss": 10.7207, "step": 2 }, { "epoch": 0.05741626794258373, "grad_norm": 132.39137268066406, "learning_rate": 3.12e-05, "loss": 9.804, "step": 3 }, { "epoch": 0.07655502392344497, "grad_norm": 54.92786407470703, "learning_rate": 4.16e-05, "loss": 8.6958, "step": 4 }, { "epoch": 0.09569377990430622, "grad_norm": 73.43879699707031, "learning_rate": 5.2e-05, "loss": 9.0657, "step": 5 }, { "epoch": 0.11483253588516747, "grad_norm": 110.9149398803711, "learning_rate": 6.24e-05, "loss": 7.6876, "step": 6 }, { "epoch": 0.1339712918660287, "grad_norm": 20.857194900512695, "learning_rate": 7.28e-05, "loss": 4.8196, "step": 7 }, { "epoch": 0.15311004784688995, "grad_norm": 13.20616340637207, "learning_rate": 8.32e-05, "loss": 4.1929, "step": 8 }, { "epoch": 0.1722488038277512, "grad_norm": 16.594614028930664, "learning_rate": 9.36e-05, "loss": 4.156, "step": 9 }, { "epoch": 0.19138755980861244, "grad_norm": 15.613114356994629, "learning_rate": 0.000104, "loss": 3.779, "step": 10 }, { "epoch": 0.21052631578947367, "grad_norm": 15.915609359741211, "learning_rate": 0.000103988125320576, "loss": 3.99, "step": 11 }, { "epoch": 0.22966507177033493, "grad_norm": 18.94052505493164, "learning_rate": 0.0001039525067056891, "loss": 4.4262, "step": 12 }, { "epoch": 0.24880382775119617, "grad_norm": 19.97093391418457, "learning_rate": 0.00010389316042301748, "loss": 5.0256, "step": 13 }, { "epoch": 0.2679425837320574, "grad_norm": 22.92121696472168, "learning_rate": 0.00010381011357710274, "loss": 5.0772, "step": 14 }, { "epoch": 0.28708133971291866, "grad_norm": 13.574360847473145, "learning_rate": 0.00010370340409697079, "loss": 3.6064, "step": 15 }, { "epoch": 0.3062200956937799, "grad_norm": 57.79045486450195, "learning_rate": 0.00010357308071880879, "loss": 4.5663, "step": 16 }, { "epoch": 0.3253588516746411, "grad_norm": 21.614091873168945, "learning_rate": 0.00010341920296370667, "loss": 4.6561, "step": 17 }, { "epoch": 0.3444976076555024, "grad_norm": 25.760377883911133, "learning_rate": 0.0001032418411104726, "loss": 5.1199, "step": 18 }, { "epoch": 0.36363636363636365, "grad_norm": 19.697742462158203, "learning_rate": 0.00010304107616353539, "loss": 5.0533, "step": 19 }, { "epoch": 0.3827751196172249, "grad_norm": 7.635573387145996, "learning_rate": 0.00010281699981594828, "loss": 3.4386, "step": 20 }, { "epoch": 0.4019138755980861, "grad_norm": 10.854726791381836, "learning_rate": 0.00010256971440751105, "loss": 3.3948, "step": 21 }, { "epoch": 0.42105263157894735, "grad_norm": 9.287283897399902, "learning_rate": 0.00010229933287802952, "loss": 3.3784, "step": 22 }, { "epoch": 0.44019138755980863, "grad_norm": 9.869610786437988, "learning_rate": 0.00010200597871573389, "loss": 3.358, "step": 23 }, { "epoch": 0.45933014354066987, "grad_norm": 14.878267288208008, "learning_rate": 0.00010168978590087931, "loss": 3.7016, "step": 24 }, { "epoch": 0.4784688995215311, "grad_norm": 17.4876651763916, "learning_rate": 0.00010135089884455476, "loss": 5.1854, "step": 25 }, { "epoch": 0.49760765550239233, "grad_norm": 13.26728630065918, "learning_rate": 0.00010098947232272773, "loss": 3.3226, "step": 26 }, { "epoch": 0.5167464114832536, "grad_norm": 8.66011905670166, "learning_rate": 0.0001006056714055552, "loss": 3.2001, "step": 27 }, { "epoch": 0.5358851674641149, "grad_norm": 7.708569526672363, "learning_rate": 0.00010019967138199313, "loss": 3.1519, "step": 28 }, { "epoch": 0.5550239234449761, "grad_norm": 8.281988143920898, "learning_rate": 9.977165767973862e-05, "loss": 3.2212, "step": 29 }, { "epoch": 0.5741626794258373, "grad_norm": 12.27998161315918, "learning_rate": 9.932182578054185e-05, "loss": 3.7744, "step": 30 }, { "epoch": 0.5933014354066986, "grad_norm": 15.959146499633789, "learning_rate": 9.885038113092579e-05, "loss": 4.9725, "step": 31 }, { "epoch": 0.6124401913875598, "grad_norm": 8.665043830871582, "learning_rate": 9.835753904835505e-05, "loss": 3.7758, "step": 32 }, { "epoch": 0.631578947368421, "grad_norm": 6.562826633453369, "learning_rate": 9.784352462289645e-05, "loss": 2.755, "step": 33 }, { "epoch": 0.6507177033492823, "grad_norm": 6.567618370056152, "learning_rate": 9.730857261441625e-05, "loss": 3.1445, "step": 34 }, { "epoch": 0.6698564593301436, "grad_norm": 6.928153038024902, "learning_rate": 9.675292734536085e-05, "loss": 2.9476, "step": 35 }, { "epoch": 0.6889952153110048, "grad_norm": 8.07654857635498, "learning_rate": 9.61768425891704e-05, "loss": 3.2723, "step": 36 }, { "epoch": 0.7081339712918661, "grad_norm": 12.363129615783691, "learning_rate": 9.55805814543757e-05, "loss": 4.231, "step": 37 }, { "epoch": 0.7272727272727273, "grad_norm": 9.718721389770508, "learning_rate": 9.496441626443173e-05, "loss": 4.0451, "step": 38 }, { "epoch": 0.7464114832535885, "grad_norm": 7.053831100463867, "learning_rate": 9.43286284333424e-05, "loss": 2.936, "step": 39 }, { "epoch": 0.7655502392344498, "grad_norm": 7.714199542999268, "learning_rate": 9.367350833713373e-05, "loss": 2.589, "step": 40 }, { "epoch": 0.784688995215311, "grad_norm": 7.965484619140625, "learning_rate": 9.299935518123368e-05, "loss": 3.1633, "step": 41 }, { "epoch": 0.8038277511961722, "grad_norm": 8.742942810058594, "learning_rate": 9.230647686381949e-05, "loss": 3.0296, "step": 42 }, { "epoch": 0.8229665071770335, "grad_norm": 11.011368751525879, "learning_rate": 9.159518983519499e-05, "loss": 3.8187, "step": 43 }, { "epoch": 0.8421052631578947, "grad_norm": 11.43237018585205, "learning_rate": 9.086581895326183e-05, "loss": 4.2593, "step": 44 }, { "epoch": 0.861244019138756, "grad_norm": 5.459627151489258, "learning_rate": 9.011869733515096e-05, "loss": 2.5255, "step": 45 }, { "epoch": 0.8803827751196173, "grad_norm": 10.84112548828125, "learning_rate": 8.935416620508183e-05, "loss": 2.4228, "step": 46 }, { "epoch": 0.8995215311004785, "grad_norm": 6.377012729644775, "learning_rate": 8.8572574738519e-05, "loss": 2.748, "step": 47 }, { "epoch": 0.9186602870813397, "grad_norm": 7.533714294433594, "learning_rate": 8.777427990269736e-05, "loss": 2.9774, "step": 48 }, { "epoch": 0.937799043062201, "grad_norm": 10.710916519165039, "learning_rate": 8.695964629358846e-05, "loss": 3.6151, "step": 49 }, { "epoch": 0.9569377990430622, "grad_norm": 12.071314811706543, "learning_rate": 8.612904596938294e-05, "loss": 4.5794, "step": 50 }, { "epoch": 0.9569377990430622, "eval_loss": 0.7570522427558899, "eval_runtime": 6.351, "eval_samples_per_second": 110.849, "eval_steps_per_second": 3.464, "step": 50 }, { "epoch": 0.9760765550239234, "grad_norm": 5.633584499359131, "learning_rate": 8.528285828056462e-05, "loss": 2.3701, "step": 51 }, { "epoch": 0.9952153110047847, "grad_norm": 7.215916156768799, "learning_rate": 8.442146969665414e-05, "loss": 3.0782, "step": 52 }, { "epoch": 1.014354066985646, "grad_norm": 5.383790493011475, "learning_rate": 8.354527362970128e-05, "loss": 2.6831, "step": 53 }, { "epoch": 1.0334928229665072, "grad_norm": 3.5272979736328125, "learning_rate": 8.26546702546063e-05, "loss": 1.6875, "step": 54 }, { "epoch": 1.0526315789473684, "grad_norm": 4.227339744567871, "learning_rate": 8.175006632635281e-05, "loss": 1.7057, "step": 55 }, { "epoch": 1.0717703349282297, "grad_norm": 5.322114944458008, "learning_rate": 8.083187499423519e-05, "loss": 1.7467, "step": 56 }, { "epoch": 1.0909090909090908, "grad_norm": 6.478522777557373, "learning_rate": 7.990051561316567e-05, "loss": 2.0903, "step": 57 }, { "epoch": 1.1100478468899522, "grad_norm": 10.717913627624512, "learning_rate": 7.895641355214731e-05, "loss": 2.7574, "step": 58 }, { "epoch": 1.1291866028708135, "grad_norm": 9.820627212524414, "learning_rate": 7.8e-05, "loss": 2.8287, "step": 59 }, { "epoch": 1.1483253588516746, "grad_norm": 6.401808738708496, "learning_rate": 7.703171176842872e-05, "loss": 1.692, "step": 60 }, { "epoch": 1.167464114832536, "grad_norm": 5.5038371086120605, "learning_rate": 7.605199109252345e-05, "loss": 1.6249, "step": 61 }, { "epoch": 1.186602870813397, "grad_norm": 6.921739101409912, "learning_rate": 7.50612854287823e-05, "loss": 1.8119, "step": 62 }, { "epoch": 1.2057416267942584, "grad_norm": 5.326653480529785, "learning_rate": 7.406004725074992e-05, "loss": 1.7043, "step": 63 }, { "epoch": 1.2248803827751196, "grad_norm": 7.478108882904053, "learning_rate": 7.304873384236448e-05, "loss": 2.0332, "step": 64 }, { "epoch": 1.244019138755981, "grad_norm": 8.804676055908203, "learning_rate": 7.202780708910751e-05, "loss": 2.2396, "step": 65 }, { "epoch": 1.263157894736842, "grad_norm": 3.768653392791748, "learning_rate": 7.099773326705254e-05, "loss": 1.7259, "step": 66 }, { "epoch": 1.2822966507177034, "grad_norm": 4.256533622741699, "learning_rate": 6.995898282990799e-05, "loss": 1.7037, "step": 67 }, { "epoch": 1.3014354066985647, "grad_norm": 5.0931596755981445, "learning_rate": 6.891203019415246e-05, "loss": 1.6244, "step": 68 }, { "epoch": 1.3205741626794258, "grad_norm": 5.528133869171143, "learning_rate": 6.785735352235982e-05, "loss": 1.9309, "step": 69 }, { "epoch": 1.339712918660287, "grad_norm": 7.430482864379883, "learning_rate": 6.679543450481368e-05, "loss": 1.8413, "step": 70 }, { "epoch": 1.3588516746411483, "grad_norm": 10.80150318145752, "learning_rate": 6.572675813951051e-05, "loss": 2.761, "step": 71 }, { "epoch": 1.3779904306220097, "grad_norm": 4.056700706481934, "learning_rate": 6.465181251065205e-05, "loss": 1.525, "step": 72 }, { "epoch": 1.3971291866028708, "grad_norm": 4.227428913116455, "learning_rate": 6.357108856572834e-05, "loss": 1.4852, "step": 73 }, { "epoch": 1.4162679425837321, "grad_norm": 4.832366943359375, "learning_rate": 6.248507989129293e-05, "loss": 1.4578, "step": 74 }, { "epoch": 1.4354066985645932, "grad_norm": 5.4759039878845215, "learning_rate": 6.139428248753274e-05, "loss": 1.6232, "step": 75 }, { "epoch": 1.4545454545454546, "grad_norm": 6.998035907745361, "learning_rate": 6.0299194541735714e-05, "loss": 1.8466, "step": 76 }, { "epoch": 1.4736842105263157, "grad_norm": 8.820535659790039, "learning_rate": 5.9200316200759446e-05, "loss": 2.6151, "step": 77 }, { "epoch": 1.492822966507177, "grad_norm": 5.1287617683410645, "learning_rate": 5.80981493426049e-05, "loss": 1.9835, "step": 78 }, { "epoch": 1.5119617224880382, "grad_norm": 4.314144611358643, "learning_rate": 5.699319734719945e-05, "loss": 1.4555, "step": 79 }, { "epoch": 1.5311004784688995, "grad_norm": 5.008012294769287, "learning_rate": 5.5885964866494066e-05, "loss": 1.4348, "step": 80 }, { "epoch": 1.5502392344497609, "grad_norm": 5.556500434875488, "learning_rate": 5.4776957593979344e-05, "loss": 1.6667, "step": 81 }, { "epoch": 1.569377990430622, "grad_norm": 6.523804664611816, "learning_rate": 5.366668203372608e-05, "loss": 1.6892, "step": 82 }, { "epoch": 1.588516746411483, "grad_norm": 8.506819725036621, "learning_rate": 5.255564526905541e-05, "loss": 2.4417, "step": 83 }, { "epoch": 1.6076555023923444, "grad_norm": 7.113678932189941, "learning_rate": 5.1444354730944594e-05, "loss": 2.1147, "step": 84 }, { "epoch": 1.6267942583732058, "grad_norm": 3.6180498600006104, "learning_rate": 5.0333317966273924e-05, "loss": 1.4088, "step": 85 }, { "epoch": 1.6459330143540671, "grad_norm": 4.513885498046875, "learning_rate": 4.922304240602065e-05, "loss": 1.5087, "step": 86 }, { "epoch": 1.6650717703349283, "grad_norm": 5.088216304779053, "learning_rate": 4.8114035133505934e-05, "loss": 1.6036, "step": 87 }, { "epoch": 1.6842105263157894, "grad_norm": 5.699124813079834, "learning_rate": 4.7006802652800545e-05, "loss": 1.7486, "step": 88 }, { "epoch": 1.7033492822966507, "grad_norm": 8.198583602905273, "learning_rate": 4.590185065739511e-05, "loss": 2.3003, "step": 89 }, { "epoch": 1.722488038277512, "grad_norm": 7.6968302726745605, "learning_rate": 4.479968379924054e-05, "loss": 2.3539, "step": 90 }, { "epoch": 1.7416267942583732, "grad_norm": 3.240316390991211, "learning_rate": 4.3700805458264286e-05, "loss": 1.2323, "step": 91 }, { "epoch": 1.7607655502392343, "grad_norm": 4.187992572784424, "learning_rate": 4.260571751246726e-05, "loss": 1.2988, "step": 92 }, { "epoch": 1.7799043062200957, "grad_norm": 5.019561290740967, "learning_rate": 4.151492010870708e-05, "loss": 1.5268, "step": 93 }, { "epoch": 1.799043062200957, "grad_norm": 5.367182731628418, "learning_rate": 4.042891143427165e-05, "loss": 1.6519, "step": 94 }, { "epoch": 1.8181818181818183, "grad_norm": 7.2573957443237305, "learning_rate": 3.934818748934795e-05, "loss": 2.0782, "step": 95 }, { "epoch": 1.8373205741626795, "grad_norm": 9.014602661132812, "learning_rate": 3.827324186048949e-05, "loss": 2.9166, "step": 96 }, { "epoch": 1.8564593301435406, "grad_norm": 3.1976170539855957, "learning_rate": 3.7204565495186314e-05, "loss": 1.4677, "step": 97 }, { "epoch": 1.875598086124402, "grad_norm": 4.183046817779541, "learning_rate": 3.6142646477640186e-05, "loss": 1.3428, "step": 98 }, { "epoch": 1.8947368421052633, "grad_norm": 5.039797782897949, "learning_rate": 3.508796980584756e-05, "loss": 1.5467, "step": 99 }, { "epoch": 1.9138755980861244, "grad_norm": 5.642466068267822, "learning_rate": 3.404101717009201e-05, "loss": 1.6141, "step": 100 }, { "epoch": 1.9138755980861244, "eval_loss": 0.6657014489173889, "eval_runtime": 5.8474, "eval_samples_per_second": 120.394, "eval_steps_per_second": 3.762, "step": 100 }, { "epoch": 1.9330143540669855, "grad_norm": 6.372761249542236, "learning_rate": 3.3002266732947456e-05, "loss": 1.8084, "step": 101 }, { "epoch": 1.9521531100478469, "grad_norm": 8.915667533874512, "learning_rate": 3.197219291089247e-05, "loss": 2.5761, "step": 102 }, { "epoch": 1.9712918660287082, "grad_norm": 5.144289016723633, "learning_rate": 3.095126615763552e-05, "loss": 1.6011, "step": 103 }, { "epoch": 1.9904306220095693, "grad_norm": 5.723019599914551, "learning_rate": 2.9939952749250044e-05, "loss": 1.6758, "step": 104 }, { "epoch": 2.0095693779904304, "grad_norm": 5.689238548278809, "learning_rate": 2.8938714571217698e-05, "loss": 1.7935, "step": 105 }, { "epoch": 2.028708133971292, "grad_norm": 2.925431251525879, "learning_rate": 2.7948008907476577e-05, "loss": 0.8744, "step": 106 }, { "epoch": 2.047846889952153, "grad_norm": 3.4465460777282715, "learning_rate": 2.6968288231571308e-05, "loss": 0.8857, "step": 107 }, { "epoch": 2.0669856459330145, "grad_norm": 3.8623275756835938, "learning_rate": 2.600000000000001e-05, "loss": 0.8412, "step": 108 }, { "epoch": 2.0861244019138754, "grad_norm": 4.409061908721924, "learning_rate": 2.5043586447852696e-05, "loss": 0.9833, "step": 109 }, { "epoch": 2.1052631578947367, "grad_norm": 6.342567443847656, "learning_rate": 2.4099484386834334e-05, "loss": 1.2921, "step": 110 }, { "epoch": 2.124401913875598, "grad_norm": 6.614299297332764, "learning_rate": 2.316812500576481e-05, "loss": 1.3417, "step": 111 }, { "epoch": 2.1435406698564594, "grad_norm": 3.4653584957122803, "learning_rate": 2.224993367364719e-05, "loss": 0.8101, "step": 112 }, { "epoch": 2.1626794258373208, "grad_norm": 3.501373767852783, "learning_rate": 2.1345329745393693e-05, "loss": 0.6439, "step": 113 }, { "epoch": 2.1818181818181817, "grad_norm": 5.044320583343506, "learning_rate": 2.045472637029872e-05, "loss": 0.9163, "step": 114 }, { "epoch": 2.200956937799043, "grad_norm": 5.2914018630981445, "learning_rate": 1.9578530303345857e-05, "loss": 0.859, "step": 115 }, { "epoch": 2.2200956937799043, "grad_norm": 6.567808151245117, "learning_rate": 1.8717141719435375e-05, "loss": 0.9853, "step": 116 }, { "epoch": 2.2392344497607657, "grad_norm": 9.96963882446289, "learning_rate": 1.7870954030617056e-05, "loss": 1.4191, "step": 117 }, { "epoch": 2.258373205741627, "grad_norm": 3.649057149887085, "learning_rate": 1.704035370641152e-05, "loss": 1.0996, "step": 118 }, { "epoch": 2.277511961722488, "grad_norm": 4.023001670837402, "learning_rate": 1.6225720097302633e-05, "loss": 0.7567, "step": 119 }, { "epoch": 2.2966507177033493, "grad_norm": 4.509950637817383, "learning_rate": 1.5427425261480974e-05, "loss": 0.7814, "step": 120 }, { "epoch": 2.3157894736842106, "grad_norm": 4.699758529663086, "learning_rate": 1.464583379491817e-05, "loss": 0.7338, "step": 121 }, { "epoch": 2.334928229665072, "grad_norm": 5.8576340675354, "learning_rate": 1.3881302664849041e-05, "loss": 0.9532, "step": 122 }, { "epoch": 2.354066985645933, "grad_norm": 8.96512508392334, "learning_rate": 1.3134181046738168e-05, "loss": 1.2896, "step": 123 }, { "epoch": 2.373205741626794, "grad_norm": 4.900047302246094, "learning_rate": 1.240481016480501e-05, "loss": 0.9493, "step": 124 }, { "epoch": 2.3923444976076556, "grad_norm": 3.7494747638702393, "learning_rate": 1.1693523136180511e-05, "loss": 0.7405, "step": 125 }, { "epoch": 2.411483253588517, "grad_norm": 4.123234272003174, "learning_rate": 1.1000644818766316e-05, "loss": 0.7796, "step": 126 }, { "epoch": 2.430622009569378, "grad_norm": 4.546311855316162, "learning_rate": 1.032649166286626e-05, "loss": 0.7723, "step": 127 }, { "epoch": 2.449760765550239, "grad_norm": 4.600128650665283, "learning_rate": 9.671371566657587e-06, "loss": 0.83, "step": 128 }, { "epoch": 2.4688995215311005, "grad_norm": 7.553520202636719, "learning_rate": 9.035583735568276e-06, "loss": 1.2396, "step": 129 }, { "epoch": 2.488038277511962, "grad_norm": 5.458954334259033, "learning_rate": 8.419418545624288e-06, "loss": 1.1108, "step": 130 }, { "epoch": 2.507177033492823, "grad_norm": 3.184964179992676, "learning_rate": 7.823157410829596e-06, "loss": 0.7275, "step": 131 }, { "epoch": 2.526315789473684, "grad_norm": 3.7187752723693848, "learning_rate": 7.247072654639154e-06, "loss": 0.6998, "step": 132 }, { "epoch": 2.5454545454545454, "grad_norm": 4.585022449493408, "learning_rate": 6.69142738558375e-06, "loss": 0.7814, "step": 133 }, { "epoch": 2.5645933014354068, "grad_norm": 4.955410957336426, "learning_rate": 6.1564753771035305e-06, "loss": 0.84, "step": 134 }, { "epoch": 2.583732057416268, "grad_norm": 8.629329681396484, "learning_rate": 5.642460951644947e-06, "loss": 1.1407, "step": 135 }, { "epoch": 2.6028708133971294, "grad_norm": 8.311567306518555, "learning_rate": 5.14961886907421e-06, "loss": 1.2298, "step": 136 }, { "epoch": 2.6220095693779903, "grad_norm": 3.3478479385375977, "learning_rate": 4.678174219458142e-06, "loss": 0.7663, "step": 137 }, { "epoch": 2.6411483253588517, "grad_norm": 3.7112197875976562, "learning_rate": 4.228342320261366e-06, "loss": 0.5858, "step": 138 }, { "epoch": 2.660287081339713, "grad_norm": 6.350260257720947, "learning_rate": 3.8003286180068746e-06, "loss": 0.753, "step": 139 }, { "epoch": 2.679425837320574, "grad_norm": 4.954045295715332, "learning_rate": 3.3943285944447863e-06, "loss": 0.7933, "step": 140 }, { "epoch": 2.6985645933014353, "grad_norm": 6.56000280380249, "learning_rate": 3.0105276772722636e-06, "loss": 0.9958, "step": 141 }, { "epoch": 2.7177033492822966, "grad_norm": 8.548569679260254, "learning_rate": 2.649101155445231e-06, "loss": 1.315, "step": 142 }, { "epoch": 2.736842105263158, "grad_norm": 2.8328959941864014, "learning_rate": 2.310214099120679e-06, "loss": 0.864, "step": 143 }, { "epoch": 2.7559808612440193, "grad_norm": 3.963765859603882, "learning_rate": 1.9940212842661166e-06, "loss": 0.7689, "step": 144 }, { "epoch": 2.77511961722488, "grad_norm": 4.467096328735352, "learning_rate": 1.7006671219704684e-06, "loss": 0.7818, "step": 145 }, { "epoch": 2.7942583732057416, "grad_norm": 5.01872444152832, "learning_rate": 1.4302855924889446e-06, "loss": 0.7031, "step": 146 }, { "epoch": 2.813397129186603, "grad_norm": 6.861513137817383, "learning_rate": 1.1830001840517118e-06, "loss": 0.9363, "step": 147 }, { "epoch": 2.8325358851674642, "grad_norm": 8.302877426147461, "learning_rate": 9.58923836464597e-07, "loss": 1.2896, "step": 148 }, { "epoch": 2.8516746411483256, "grad_norm": 4.442171096801758, "learning_rate": 7.581588895273881e-07, "loss": 1.0246, "step": 149 }, { "epoch": 2.8708133971291865, "grad_norm": 3.28583025932312, "learning_rate": 5.807970362933168e-07, "loss": 0.6213, "step": 150 }, { "epoch": 2.8708133971291865, "eval_loss": 0.700709879398346, "eval_runtime": 5.7803, "eval_samples_per_second": 121.793, "eval_steps_per_second": 3.806, "step": 150 }, { "epoch": 2.889952153110048, "grad_norm": 4.2848358154296875, "learning_rate": 4.2691928119120525e-07, "loss": 0.8242, "step": 151 }, { "epoch": 2.909090909090909, "grad_norm": 4.562102317810059, "learning_rate": 2.9659590302921624e-07, "loss": 0.8105, "step": 152 }, { "epoch": 2.92822966507177, "grad_norm": 5.600813865661621, "learning_rate": 1.8988642289723988e-07, "loss": 0.8056, "step": 153 }, { "epoch": 2.9473684210526314, "grad_norm": 7.6773858070373535, "learning_rate": 1.0683957698251055e-07, "loss": 1.2432, "step": 154 }, { "epoch": 2.9665071770334928, "grad_norm": 5.841709613800049, "learning_rate": 4.749329431088922e-08, "loss": 1.0688, "step": 155 }, { "epoch": 2.985645933014354, "grad_norm": 4.864152431488037, "learning_rate": 1.1874679423989675e-08, "loss": 0.8732, "step": 156 }, { "epoch": 3.0047846889952154, "grad_norm": 6.73012638092041, "learning_rate": 0.0, "loss": 1.3435, "step": 157 } ], "logging_steps": 1, "max_steps": 157, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8020472473706824e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }