{ "best_metric": 1.700640082359314, "best_model_checkpoint": "miner_id_24/checkpoint-300", "epoch": 0.11954274898513187, "eval_steps": 100, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00039847582995043956, "grad_norm": 0.8249958157539368, "learning_rate": 2e-05, "loss": 3.7705, "step": 1 }, { "epoch": 0.00039847582995043956, "eval_loss": 3.994760274887085, "eval_runtime": 599.7212, "eval_samples_per_second": 14.097, "eval_steps_per_second": 3.525, "step": 1 }, { "epoch": 0.0007969516599008791, "grad_norm": 0.9214300513267517, "learning_rate": 4e-05, "loss": 3.9395, "step": 2 }, { "epoch": 0.0011954274898513188, "grad_norm": 0.7693865299224854, "learning_rate": 6e-05, "loss": 3.7306, "step": 3 }, { "epoch": 0.0015939033198017582, "grad_norm": 0.8011671304702759, "learning_rate": 8e-05, "loss": 3.3512, "step": 4 }, { "epoch": 0.001992379149752198, "grad_norm": 0.8984593749046326, "learning_rate": 0.0001, "loss": 3.6375, "step": 5 }, { "epoch": 0.0023908549797026376, "grad_norm": 1.0371809005737305, "learning_rate": 0.00012, "loss": 3.8592, "step": 6 }, { "epoch": 0.002789330809653077, "grad_norm": 1.1792114973068237, "learning_rate": 0.00014, "loss": 3.8321, "step": 7 }, { "epoch": 0.0031878066396035165, "grad_norm": 1.3046059608459473, "learning_rate": 0.00016, "loss": 3.3336, "step": 8 }, { "epoch": 0.003586282469553956, "grad_norm": 1.4238650798797607, "learning_rate": 0.00018, "loss": 2.9386, "step": 9 }, { "epoch": 0.003984758299504396, "grad_norm": 1.1028273105621338, "learning_rate": 0.0002, "loss": 2.7003, "step": 10 }, { "epoch": 0.0043832341294548355, "grad_norm": 1.1997171640396118, "learning_rate": 0.00019999741592564903, "loss": 2.4654, "step": 11 }, { "epoch": 0.004781709959405275, "grad_norm": 1.5336440801620483, "learning_rate": 0.00019998966383614488, "loss": 2.4423, "step": 12 }, { "epoch": 0.005180185789355715, "grad_norm": 1.8468828201293945, "learning_rate": 0.00019997674413212708, "loss": 2.4003, "step": 13 }, { "epoch": 0.005578661619306154, "grad_norm": 1.7681652307510376, "learning_rate": 0.00019995865748130516, "loss": 2.3207, "step": 14 }, { "epoch": 0.005977137449256593, "grad_norm": 1.0762815475463867, "learning_rate": 0.0001999354048184241, "loss": 2.2285, "step": 15 }, { "epoch": 0.006375613279207033, "grad_norm": 0.8622324466705322, "learning_rate": 0.00019990698734521613, "loss": 1.9898, "step": 16 }, { "epoch": 0.006774089109157473, "grad_norm": 0.9870002865791321, "learning_rate": 0.0001998734065303385, "loss": 1.923, "step": 17 }, { "epoch": 0.007172564939107912, "grad_norm": 1.0574088096618652, "learning_rate": 0.00019983466410929764, "loss": 2.08, "step": 18 }, { "epoch": 0.007571040769058352, "grad_norm": 0.8685367107391357, "learning_rate": 0.0001997907620843595, "loss": 1.9477, "step": 19 }, { "epoch": 0.007969516599008792, "grad_norm": 0.7225976586341858, "learning_rate": 0.00019974170272444604, "loss": 1.9725, "step": 20 }, { "epoch": 0.00836799242895923, "grad_norm": 0.7834375500679016, "learning_rate": 0.00019968748856501788, "loss": 1.9877, "step": 21 }, { "epoch": 0.008766468258909671, "grad_norm": 0.7260671257972717, "learning_rate": 0.00019962812240794343, "loss": 1.9946, "step": 22 }, { "epoch": 0.00916494408886011, "grad_norm": 0.8711982369422913, "learning_rate": 0.000199563607321354, "loss": 1.8275, "step": 23 }, { "epoch": 0.00956341991881055, "grad_norm": 0.5631409287452698, "learning_rate": 0.0001994939466394851, "loss": 1.7041, "step": 24 }, { "epoch": 0.009961895748760989, "grad_norm": 0.5470976829528809, "learning_rate": 0.00019941914396250446, "loss": 1.7129, "step": 25 }, { "epoch": 0.01036037157871143, "grad_norm": 0.5513713359832764, "learning_rate": 0.00019933920315632557, "loss": 1.8914, "step": 26 }, { "epoch": 0.010758847408661868, "grad_norm": 0.5356898903846741, "learning_rate": 0.00019925412835240826, "loss": 1.9072, "step": 27 }, { "epoch": 0.011157323238612307, "grad_norm": 0.5035513043403625, "learning_rate": 0.0001991639239475448, "loss": 1.9054, "step": 28 }, { "epoch": 0.011555799068562748, "grad_norm": 0.5602848529815674, "learning_rate": 0.00019906859460363307, "loss": 1.8175, "step": 29 }, { "epoch": 0.011954274898513187, "grad_norm": 0.5118929743766785, "learning_rate": 0.00019896814524743528, "loss": 1.7964, "step": 30 }, { "epoch": 0.012352750728463627, "grad_norm": 0.45721983909606934, "learning_rate": 0.0001988625810703235, "loss": 1.8377, "step": 31 }, { "epoch": 0.012751226558414066, "grad_norm": 0.44319745898246765, "learning_rate": 0.0001987519075280114, "loss": 1.8383, "step": 32 }, { "epoch": 0.013149702388364506, "grad_norm": 0.49430009722709656, "learning_rate": 0.00019863613034027224, "loss": 1.7603, "step": 33 }, { "epoch": 0.013548178218314945, "grad_norm": 0.4535483419895172, "learning_rate": 0.00019851525549064323, "loss": 1.6921, "step": 34 }, { "epoch": 0.013946654048265384, "grad_norm": 0.48142871260643005, "learning_rate": 0.00019838928922611632, "loss": 1.8217, "step": 35 }, { "epoch": 0.014345129878215825, "grad_norm": 0.6872768402099609, "learning_rate": 0.00019825823805681543, "loss": 1.8212, "step": 36 }, { "epoch": 0.014743605708166263, "grad_norm": 0.4731607437133789, "learning_rate": 0.0001981221087556598, "loss": 1.6508, "step": 37 }, { "epoch": 0.015142081538116704, "grad_norm": 0.42979615926742554, "learning_rate": 0.00019798090835801418, "loss": 1.4883, "step": 38 }, { "epoch": 0.015540557368067143, "grad_norm": 0.5217059254646301, "learning_rate": 0.00019783464416132506, "loss": 1.6702, "step": 39 }, { "epoch": 0.015939033198017583, "grad_norm": 0.5023307204246521, "learning_rate": 0.00019768332372474366, "loss": 1.7604, "step": 40 }, { "epoch": 0.016337509027968024, "grad_norm": 0.5037193894386292, "learning_rate": 0.00019752695486873517, "loss": 1.7254, "step": 41 }, { "epoch": 0.01673598485791846, "grad_norm": 0.4970403015613556, "learning_rate": 0.00019736554567467452, "loss": 1.726, "step": 42 }, { "epoch": 0.0171344606878689, "grad_norm": 0.4856554865837097, "learning_rate": 0.00019719910448442893, "loss": 1.5681, "step": 43 }, { "epoch": 0.017532936517819342, "grad_norm": 0.4940697252750397, "learning_rate": 0.00019702763989992662, "loss": 1.58, "step": 44 }, { "epoch": 0.01793141234776978, "grad_norm": 0.46332672238349915, "learning_rate": 0.00019685116078271223, "loss": 1.4937, "step": 45 }, { "epoch": 0.01832988817772022, "grad_norm": 0.4996075928211212, "learning_rate": 0.00019666967625348906, "loss": 1.8633, "step": 46 }, { "epoch": 0.01872836400767066, "grad_norm": 0.4581417143344879, "learning_rate": 0.00019648319569164736, "loss": 1.6708, "step": 47 }, { "epoch": 0.0191268398376211, "grad_norm": 0.49805447459220886, "learning_rate": 0.00019629172873477995, "loss": 1.6956, "step": 48 }, { "epoch": 0.019525315667571538, "grad_norm": 0.5399609208106995, "learning_rate": 0.0001960952852781838, "loss": 1.8536, "step": 49 }, { "epoch": 0.019923791497521978, "grad_norm": 0.5043647289276123, "learning_rate": 0.0001958938754743489, "loss": 1.9073, "step": 50 }, { "epoch": 0.02032226732747242, "grad_norm": 0.5041136741638184, "learning_rate": 0.0001956875097324334, "loss": 1.8891, "step": 51 }, { "epoch": 0.02072074315742286, "grad_norm": 0.48425522446632385, "learning_rate": 0.00019547619871772574, "loss": 1.7636, "step": 52 }, { "epoch": 0.021119218987373296, "grad_norm": 0.501112163066864, "learning_rate": 0.00019525995335109334, "loss": 1.6945, "step": 53 }, { "epoch": 0.021517694817323737, "grad_norm": 0.4796481132507324, "learning_rate": 0.0001950387848084183, "loss": 1.7757, "step": 54 }, { "epoch": 0.021916170647274177, "grad_norm": 0.5773142576217651, "learning_rate": 0.00019481270452001987, "loss": 1.7415, "step": 55 }, { "epoch": 0.022314646477224614, "grad_norm": 0.4900719225406647, "learning_rate": 0.00019458172417006347, "loss": 1.6834, "step": 56 }, { "epoch": 0.022713122307175055, "grad_norm": 0.4695626497268677, "learning_rate": 0.00019434585569595708, "loss": 1.7537, "step": 57 }, { "epoch": 0.023111598137125496, "grad_norm": 0.49179011583328247, "learning_rate": 0.00019410511128773418, "loss": 1.685, "step": 58 }, { "epoch": 0.023510073967075936, "grad_norm": 0.5011521577835083, "learning_rate": 0.0001938595033874238, "loss": 1.6088, "step": 59 }, { "epoch": 0.023908549797026373, "grad_norm": 0.47553250193595886, "learning_rate": 0.0001936090446884074, "loss": 1.6502, "step": 60 }, { "epoch": 0.024307025626976814, "grad_norm": 0.4945161044597626, "learning_rate": 0.00019335374813476302, "loss": 1.7799, "step": 61 }, { "epoch": 0.024705501456927254, "grad_norm": 0.5294601917266846, "learning_rate": 0.00019309362692059617, "loss": 1.6993, "step": 62 }, { "epoch": 0.02510397728687769, "grad_norm": 0.5426062941551208, "learning_rate": 0.00019282869448935798, "loss": 1.915, "step": 63 }, { "epoch": 0.025502453116828132, "grad_norm": 0.49963027238845825, "learning_rate": 0.00019255896453315052, "loss": 1.7709, "step": 64 }, { "epoch": 0.025900928946778572, "grad_norm": 0.5086182355880737, "learning_rate": 0.000192284450992019, "loss": 1.6436, "step": 65 }, { "epoch": 0.026299404776729013, "grad_norm": 0.4630321264266968, "learning_rate": 0.0001920051680532314, "loss": 1.6038, "step": 66 }, { "epoch": 0.02669788060667945, "grad_norm": 0.4538319408893585, "learning_rate": 0.00019172113015054532, "loss": 1.5576, "step": 67 }, { "epoch": 0.02709635643662989, "grad_norm": 0.511547863483429, "learning_rate": 0.0001914323519634619, "loss": 1.6114, "step": 68 }, { "epoch": 0.02749483226658033, "grad_norm": 0.5467154383659363, "learning_rate": 0.00019113884841646736, "loss": 1.8275, "step": 69 }, { "epoch": 0.027893308096530768, "grad_norm": 0.44893792271614075, "learning_rate": 0.00019084063467826137, "loss": 1.6006, "step": 70 }, { "epoch": 0.02829178392648121, "grad_norm": 0.4380510747432709, "learning_rate": 0.00019053772616097337, "loss": 1.5283, "step": 71 }, { "epoch": 0.02869025975643165, "grad_norm": 0.4855054020881653, "learning_rate": 0.000190230138519366, "loss": 1.949, "step": 72 }, { "epoch": 0.02908873558638209, "grad_norm": 0.509174108505249, "learning_rate": 0.000189917887650026, "loss": 1.9279, "step": 73 }, { "epoch": 0.029487211416332527, "grad_norm": 0.4690839946269989, "learning_rate": 0.00018960098969054255, "loss": 1.8505, "step": 74 }, { "epoch": 0.029885687246282967, "grad_norm": 0.5497538447380066, "learning_rate": 0.00018927946101867347, "loss": 1.9063, "step": 75 }, { "epoch": 0.030284163076233408, "grad_norm": 0.5328514575958252, "learning_rate": 0.0001889533182514986, "loss": 1.8781, "step": 76 }, { "epoch": 0.03068263890618385, "grad_norm": 0.42769238352775574, "learning_rate": 0.0001886225782445612, "loss": 1.5811, "step": 77 }, { "epoch": 0.031081114736134285, "grad_norm": 0.6662131547927856, "learning_rate": 0.00018828725809099655, "loss": 1.8193, "step": 78 }, { "epoch": 0.03147959056608472, "grad_norm": 0.4854792058467865, "learning_rate": 0.0001879473751206489, "loss": 1.6416, "step": 79 }, { "epoch": 0.031878066396035167, "grad_norm": 0.5450350046157837, "learning_rate": 0.00018760294689917553, "loss": 1.9132, "step": 80 }, { "epoch": 0.032276542225985604, "grad_norm": 0.5068654417991638, "learning_rate": 0.00018725399122713912, "loss": 1.7539, "step": 81 }, { "epoch": 0.03267501805593605, "grad_norm": 0.500575065612793, "learning_rate": 0.00018690052613908772, "loss": 1.7219, "step": 82 }, { "epoch": 0.033073493885886485, "grad_norm": 0.481229692697525, "learning_rate": 0.0001865425699026226, "loss": 1.6467, "step": 83 }, { "epoch": 0.03347196971583692, "grad_norm": 0.5118649005889893, "learning_rate": 0.00018618014101745442, "loss": 1.8061, "step": 84 }, { "epoch": 0.033870445545787366, "grad_norm": 0.5075271725654602, "learning_rate": 0.0001858132582144469, "loss": 1.7179, "step": 85 }, { "epoch": 0.0342689213757378, "grad_norm": 0.455424427986145, "learning_rate": 0.00018544194045464886, "loss": 1.5479, "step": 86 }, { "epoch": 0.03466739720568824, "grad_norm": 0.5024828910827637, "learning_rate": 0.00018506620692831428, "loss": 1.726, "step": 87 }, { "epoch": 0.035065873035638684, "grad_norm": 0.5479703545570374, "learning_rate": 0.0001846860770539105, "loss": 1.7982, "step": 88 }, { "epoch": 0.03546434886558912, "grad_norm": 0.4849150478839874, "learning_rate": 0.00018430157047711474, "loss": 1.6496, "step": 89 }, { "epoch": 0.03586282469553956, "grad_norm": 0.5472909212112427, "learning_rate": 0.00018391270706979862, "loss": 1.8333, "step": 90 }, { "epoch": 0.03626130052549, "grad_norm": 0.4992446005344391, "learning_rate": 0.00018351950692900126, "loss": 1.761, "step": 91 }, { "epoch": 0.03665977635544044, "grad_norm": 0.4649079144001007, "learning_rate": 0.00018312199037589068, "loss": 1.5256, "step": 92 }, { "epoch": 0.03705825218539088, "grad_norm": 0.5369842648506165, "learning_rate": 0.00018272017795471345, "loss": 1.78, "step": 93 }, { "epoch": 0.03745672801534132, "grad_norm": 0.4625012278556824, "learning_rate": 0.000182314090431733, "loss": 1.5817, "step": 94 }, { "epoch": 0.03785520384529176, "grad_norm": 0.5325396060943604, "learning_rate": 0.00018190374879415632, "loss": 1.698, "step": 95 }, { "epoch": 0.0382536796752422, "grad_norm": 0.4603791832923889, "learning_rate": 0.00018148917424904953, "loss": 1.5092, "step": 96 }, { "epoch": 0.03865215550519264, "grad_norm": 0.4525664746761322, "learning_rate": 0.0001810703882222415, "loss": 1.4083, "step": 97 }, { "epoch": 0.039050631335143075, "grad_norm": 0.5617400407791138, "learning_rate": 0.00018064741235721687, "loss": 1.6626, "step": 98 }, { "epoch": 0.03944910716509352, "grad_norm": 0.5286575555801392, "learning_rate": 0.00018022026851399737, "loss": 1.7368, "step": 99 }, { "epoch": 0.039847582995043956, "grad_norm": 0.5342997908592224, "learning_rate": 0.0001797889787680119, "loss": 1.8665, "step": 100 }, { "epoch": 0.039847582995043956, "eval_loss": 1.7500040531158447, "eval_runtime": 602.8533, "eval_samples_per_second": 14.023, "eval_steps_per_second": 3.507, "step": 100 }, { "epoch": 0.04024605882499439, "grad_norm": 0.4774135649204254, "learning_rate": 0.00017935356540895597, "loss": 1.6952, "step": 101 }, { "epoch": 0.04064453465494484, "grad_norm": 0.6040295362472534, "learning_rate": 0.00017891405093963938, "loss": 1.8068, "step": 102 }, { "epoch": 0.041043010484895275, "grad_norm": 0.5168588757514954, "learning_rate": 0.00017847045807482345, "loss": 1.7252, "step": 103 }, { "epoch": 0.04144148631484572, "grad_norm": 0.4739170968532562, "learning_rate": 0.00017802280974004716, "loss": 1.7146, "step": 104 }, { "epoch": 0.041839962144796156, "grad_norm": 0.5354509353637695, "learning_rate": 0.000177571129070442, "loss": 1.868, "step": 105 }, { "epoch": 0.04223843797474659, "grad_norm": 0.46404990553855896, "learning_rate": 0.00017711543940953668, "loss": 1.698, "step": 106 }, { "epoch": 0.04263691380469704, "grad_norm": 0.45692166686058044, "learning_rate": 0.00017665576430805053, "loss": 1.649, "step": 107 }, { "epoch": 0.043035389634647474, "grad_norm": 0.4770927131175995, "learning_rate": 0.0001761921275226763, "loss": 1.8552, "step": 108 }, { "epoch": 0.04343386546459791, "grad_norm": 0.4677036702632904, "learning_rate": 0.00017572455301485249, "loss": 1.6415, "step": 109 }, { "epoch": 0.043832341294548355, "grad_norm": 0.49166563153266907, "learning_rate": 0.00017525306494952498, "loss": 1.5864, "step": 110 }, { "epoch": 0.04423081712449879, "grad_norm": 0.551313579082489, "learning_rate": 0.0001747776876938981, "loss": 1.8246, "step": 111 }, { "epoch": 0.04462929295444923, "grad_norm": 0.5011689066886902, "learning_rate": 0.00017429844581617532, "loss": 1.6335, "step": 112 }, { "epoch": 0.04502776878439967, "grad_norm": 0.510704755783081, "learning_rate": 0.00017381536408428948, "loss": 1.7084, "step": 113 }, { "epoch": 0.04542624461435011, "grad_norm": 0.4545946419239044, "learning_rate": 0.00017332846746462288, "loss": 1.5209, "step": 114 }, { "epoch": 0.04582472044430055, "grad_norm": 0.4840458631515503, "learning_rate": 0.0001728377811207168, "loss": 1.7235, "step": 115 }, { "epoch": 0.04622319627425099, "grad_norm": 0.4970642626285553, "learning_rate": 0.00017234333041197126, "loss": 1.6567, "step": 116 }, { "epoch": 0.04662167210420143, "grad_norm": 0.5095712542533875, "learning_rate": 0.00017184514089233405, "loss": 1.7205, "step": 117 }, { "epoch": 0.04702014793415187, "grad_norm": 0.5349841713905334, "learning_rate": 0.00017134323830898037, "loss": 1.7285, "step": 118 }, { "epoch": 0.04741862376410231, "grad_norm": 0.5475637316703796, "learning_rate": 0.00017083764860098205, "loss": 1.6601, "step": 119 }, { "epoch": 0.047817099594052746, "grad_norm": 0.5360651016235352, "learning_rate": 0.0001703283978979671, "loss": 1.6062, "step": 120 }, { "epoch": 0.04821557542400319, "grad_norm": 0.49149248003959656, "learning_rate": 0.00016981551251876904, "loss": 1.7332, "step": 121 }, { "epoch": 0.04861405125395363, "grad_norm": 0.46091562509536743, "learning_rate": 0.00016929901897006698, "loss": 1.5933, "step": 122 }, { "epoch": 0.049012527083904064, "grad_norm": 0.5389519333839417, "learning_rate": 0.0001687789439450156, "loss": 1.9277, "step": 123 }, { "epoch": 0.04941100291385451, "grad_norm": 0.4960940182209015, "learning_rate": 0.00016825531432186543, "loss": 1.7398, "step": 124 }, { "epoch": 0.049809478743804945, "grad_norm": 0.5458736419677734, "learning_rate": 0.00016772815716257412, "loss": 1.7824, "step": 125 }, { "epoch": 0.05020795457375538, "grad_norm": 0.5217113494873047, "learning_rate": 0.00016719749971140754, "loss": 1.8169, "step": 126 }, { "epoch": 0.05060643040370583, "grad_norm": 0.49912309646606445, "learning_rate": 0.0001666633693935319, "loss": 1.7099, "step": 127 }, { "epoch": 0.051004906233656264, "grad_norm": 0.519611120223999, "learning_rate": 0.00016612579381359622, "loss": 1.5635, "step": 128 }, { "epoch": 0.05140338206360671, "grad_norm": 0.5147966146469116, "learning_rate": 0.00016558480075430594, "loss": 1.6643, "step": 129 }, { "epoch": 0.051801857893557145, "grad_norm": 0.539840817451477, "learning_rate": 0.00016504041817498678, "loss": 1.796, "step": 130 }, { "epoch": 0.05220033372350758, "grad_norm": 0.5166546106338501, "learning_rate": 0.00016449267421013994, "loss": 1.5948, "step": 131 }, { "epoch": 0.052598809553458026, "grad_norm": 0.5418827533721924, "learning_rate": 0.00016394159716798807, "loss": 1.8935, "step": 132 }, { "epoch": 0.05299728538340846, "grad_norm": 0.5098159313201904, "learning_rate": 0.00016338721552901212, "loss": 1.6439, "step": 133 }, { "epoch": 0.0533957612133589, "grad_norm": 0.5153954029083252, "learning_rate": 0.0001628295579444796, "loss": 1.7484, "step": 134 }, { "epoch": 0.053794237043309344, "grad_norm": 0.5499337315559387, "learning_rate": 0.0001622686532349637, "loss": 1.665, "step": 135 }, { "epoch": 0.05419271287325978, "grad_norm": 0.5195444226264954, "learning_rate": 0.00016170453038885394, "loss": 1.6813, "step": 136 }, { "epoch": 0.05459118870321022, "grad_norm": 0.5251967310905457, "learning_rate": 0.0001611372185608578, "loss": 1.7325, "step": 137 }, { "epoch": 0.05498966453316066, "grad_norm": 0.5916984677314758, "learning_rate": 0.0001605667470704942, "loss": 1.8083, "step": 138 }, { "epoch": 0.0553881403631111, "grad_norm": 0.5287485718727112, "learning_rate": 0.0001599931454005781, "loss": 1.8145, "step": 139 }, { "epoch": 0.055786616193061536, "grad_norm": 0.5018830299377441, "learning_rate": 0.00015941644319569665, "loss": 1.6038, "step": 140 }, { "epoch": 0.05618509202301198, "grad_norm": 0.4870772063732147, "learning_rate": 0.00015883667026067745, "loss": 1.6, "step": 141 }, { "epoch": 0.05658356785296242, "grad_norm": 0.4964452087879181, "learning_rate": 0.00015825385655904788, "loss": 1.7953, "step": 142 }, { "epoch": 0.05698204368291286, "grad_norm": 0.4588499367237091, "learning_rate": 0.00015766803221148673, "loss": 1.5708, "step": 143 }, { "epoch": 0.0573805195128633, "grad_norm": 0.521144449710846, "learning_rate": 0.00015707922749426737, "loss": 1.7794, "step": 144 }, { "epoch": 0.057778995342813735, "grad_norm": 0.5591409206390381, "learning_rate": 0.00015648747283769317, "loss": 1.9377, "step": 145 }, { "epoch": 0.05817747117276418, "grad_norm": 0.43348705768585205, "learning_rate": 0.00015589279882452476, "loss": 1.396, "step": 146 }, { "epoch": 0.058575947002714616, "grad_norm": 0.46277427673339844, "learning_rate": 0.0001552952361883994, "loss": 1.6397, "step": 147 }, { "epoch": 0.058974422832665054, "grad_norm": 0.49379056692123413, "learning_rate": 0.00015469481581224272, "loss": 1.7281, "step": 148 }, { "epoch": 0.0593728986626155, "grad_norm": 0.49816733598709106, "learning_rate": 0.00015409156872667258, "loss": 1.6383, "step": 149 }, { "epoch": 0.059771374492565935, "grad_norm": 0.44194599986076355, "learning_rate": 0.0001534855261083954, "loss": 1.552, "step": 150 }, { "epoch": 0.06016985032251637, "grad_norm": 0.4125038683414459, "learning_rate": 0.00015287671927859494, "loss": 1.4642, "step": 151 }, { "epoch": 0.060568326152466816, "grad_norm": 0.4800686836242676, "learning_rate": 0.00015226517970131343, "loss": 1.5824, "step": 152 }, { "epoch": 0.06096680198241725, "grad_norm": 0.5546180605888367, "learning_rate": 0.00015165093898182562, "loss": 1.8809, "step": 153 }, { "epoch": 0.0613652778123677, "grad_norm": 0.45037057995796204, "learning_rate": 0.00015103402886500525, "loss": 1.4952, "step": 154 }, { "epoch": 0.061763753642318134, "grad_norm": 0.531578004360199, "learning_rate": 0.00015041448123368455, "loss": 1.7191, "step": 155 }, { "epoch": 0.06216222947226857, "grad_norm": 0.48476216197013855, "learning_rate": 0.00014979232810700637, "loss": 1.7732, "step": 156 }, { "epoch": 0.06256070530221901, "grad_norm": 0.48990944027900696, "learning_rate": 0.0001491676016387694, "loss": 1.6629, "step": 157 }, { "epoch": 0.06295918113216945, "grad_norm": 0.5288809537887573, "learning_rate": 0.00014854033411576659, "loss": 1.6989, "step": 158 }, { "epoch": 0.0633576569621199, "grad_norm": 0.520767331123352, "learning_rate": 0.00014791055795611624, "loss": 1.6098, "step": 159 }, { "epoch": 0.06375613279207033, "grad_norm": 0.49702882766723633, "learning_rate": 0.00014727830570758678, "loss": 1.5923, "step": 160 }, { "epoch": 0.06415460862202077, "grad_norm": 0.555203378200531, "learning_rate": 0.0001466436100459146, "loss": 1.8835, "step": 161 }, { "epoch": 0.06455308445197121, "grad_norm": 0.49902498722076416, "learning_rate": 0.00014600650377311522, "loss": 1.6217, "step": 162 }, { "epoch": 0.06495156028192164, "grad_norm": 0.4641701877117157, "learning_rate": 0.0001453670198157883, "loss": 1.6337, "step": 163 }, { "epoch": 0.0653500361118721, "grad_norm": 0.5301873087882996, "learning_rate": 0.00014472519122341566, "loss": 1.7073, "step": 164 }, { "epoch": 0.06574851194182253, "grad_norm": 0.5231285095214844, "learning_rate": 0.00014408105116665336, "loss": 1.8093, "step": 165 }, { "epoch": 0.06614698777177297, "grad_norm": 0.45359745621681213, "learning_rate": 0.00014343463293561734, "loss": 1.6956, "step": 166 }, { "epoch": 0.0665454636017234, "grad_norm": 0.44360700249671936, "learning_rate": 0.00014278596993816308, "loss": 1.6548, "step": 167 }, { "epoch": 0.06694393943167384, "grad_norm": 0.4991564452648163, "learning_rate": 0.00014213509569815884, "loss": 1.7701, "step": 168 }, { "epoch": 0.06734241526162428, "grad_norm": 0.4743082821369171, "learning_rate": 0.00014148204385375321, "loss": 1.7401, "step": 169 }, { "epoch": 0.06774089109157473, "grad_norm": 0.5044605731964111, "learning_rate": 0.0001408268481556366, "loss": 1.6009, "step": 170 }, { "epoch": 0.06813936692152517, "grad_norm": 0.49447423219680786, "learning_rate": 0.00014016954246529696, "loss": 1.6208, "step": 171 }, { "epoch": 0.0685378427514756, "grad_norm": 0.46173223853111267, "learning_rate": 0.0001395101607532698, "loss": 1.6046, "step": 172 }, { "epoch": 0.06893631858142604, "grad_norm": 0.5149829983711243, "learning_rate": 0.00013884873709738257, "loss": 1.6744, "step": 173 }, { "epoch": 0.06933479441137648, "grad_norm": 0.4951649010181427, "learning_rate": 0.00013818530568099327, "loss": 1.7894, "step": 174 }, { "epoch": 0.06973327024132693, "grad_norm": 0.4963831603527069, "learning_rate": 0.00013751990079122412, "loss": 1.7119, "step": 175 }, { "epoch": 0.07013174607127737, "grad_norm": 0.48155760765075684, "learning_rate": 0.00013685255681718922, "loss": 1.6867, "step": 176 }, { "epoch": 0.0705302219012278, "grad_norm": 0.4654506742954254, "learning_rate": 0.0001361833082482175, "loss": 1.535, "step": 177 }, { "epoch": 0.07092869773117824, "grad_norm": 0.4506324827671051, "learning_rate": 0.0001355121896720703, "loss": 1.5751, "step": 178 }, { "epoch": 0.07132717356112868, "grad_norm": 0.5030463933944702, "learning_rate": 0.00013483923577315348, "loss": 1.7494, "step": 179 }, { "epoch": 0.07172564939107912, "grad_norm": 0.4498404264450073, "learning_rate": 0.00013416448133072526, "loss": 1.5368, "step": 180 }, { "epoch": 0.07212412522102957, "grad_norm": 0.5255778431892395, "learning_rate": 0.00013348796121709862, "loss": 1.6465, "step": 181 }, { "epoch": 0.07252260105098, "grad_norm": 0.566211998462677, "learning_rate": 0.00013280971039583906, "loss": 1.6151, "step": 182 }, { "epoch": 0.07292107688093044, "grad_norm": 0.5025095343589783, "learning_rate": 0.0001321297639199575, "loss": 1.6099, "step": 183 }, { "epoch": 0.07331955271088088, "grad_norm": 0.5001662969589233, "learning_rate": 0.000131448156930099, "loss": 1.5435, "step": 184 }, { "epoch": 0.07371802854083132, "grad_norm": 0.5104225277900696, "learning_rate": 0.0001307649246527263, "loss": 1.7083, "step": 185 }, { "epoch": 0.07411650437078177, "grad_norm": 0.4568442404270172, "learning_rate": 0.0001300801023982995, "loss": 1.555, "step": 186 }, { "epoch": 0.0745149802007322, "grad_norm": 0.5155336856842041, "learning_rate": 0.00012939372555945112, "loss": 1.6897, "step": 187 }, { "epoch": 0.07491345603068264, "grad_norm": 0.5319402813911438, "learning_rate": 0.0001287058296091567, "loss": 1.7909, "step": 188 }, { "epoch": 0.07531193186063308, "grad_norm": 0.5743489861488342, "learning_rate": 0.00012801645009890195, "loss": 1.9705, "step": 189 }, { "epoch": 0.07571040769058351, "grad_norm": 0.4790689945220947, "learning_rate": 0.0001273256226568451, "loss": 1.4495, "step": 190 }, { "epoch": 0.07610888352053395, "grad_norm": 0.47618716955184937, "learning_rate": 0.00012663338298597563, "loss": 1.7269, "step": 191 }, { "epoch": 0.0765073593504844, "grad_norm": 0.429544597864151, "learning_rate": 0.00012593976686226904, "loss": 1.488, "step": 192 }, { "epoch": 0.07690583518043484, "grad_norm": 0.5185227394104004, "learning_rate": 0.0001252448101328381, "loss": 1.6479, "step": 193 }, { "epoch": 0.07730431101038528, "grad_norm": 0.44675058126449585, "learning_rate": 0.00012454854871407994, "loss": 1.5254, "step": 194 }, { "epoch": 0.07770278684033571, "grad_norm": 0.5390235185623169, "learning_rate": 0.00012385101858982005, "loss": 1.4862, "step": 195 }, { "epoch": 0.07810126267028615, "grad_norm": 0.4766441583633423, "learning_rate": 0.00012315225580945252, "loss": 1.6441, "step": 196 }, { "epoch": 0.0784997385002366, "grad_norm": 0.46869808435440063, "learning_rate": 0.0001224522964860769, "loss": 1.7038, "step": 197 }, { "epoch": 0.07889821433018704, "grad_norm": 0.42946845293045044, "learning_rate": 0.00012175117679463187, "loss": 1.5641, "step": 198 }, { "epoch": 0.07929669016013748, "grad_norm": 0.4762909412384033, "learning_rate": 0.00012104893297002567, "loss": 1.6807, "step": 199 }, { "epoch": 0.07969516599008791, "grad_norm": 0.4462391138076782, "learning_rate": 0.0001203456013052634, "loss": 1.5805, "step": 200 }, { "epoch": 0.07969516599008791, "eval_loss": 1.7198740243911743, "eval_runtime": 603.1346, "eval_samples_per_second": 14.017, "eval_steps_per_second": 3.505, "step": 200 }, { "epoch": 0.08009364182003835, "grad_norm": 0.5120438933372498, "learning_rate": 0.00011964121814957137, "loss": 1.7626, "step": 201 }, { "epoch": 0.08049211764998879, "grad_norm": 0.5081213712692261, "learning_rate": 0.00011893581990651848, "loss": 1.7664, "step": 202 }, { "epoch": 0.08089059347993924, "grad_norm": 0.45303136110305786, "learning_rate": 0.00011822944303213486, "loss": 1.4845, "step": 203 }, { "epoch": 0.08128906930988967, "grad_norm": 0.4521328806877136, "learning_rate": 0.00011752212403302784, "loss": 1.4534, "step": 204 }, { "epoch": 0.08168754513984011, "grad_norm": 0.4870966970920563, "learning_rate": 0.00011681389946449504, "loss": 1.568, "step": 205 }, { "epoch": 0.08208602096979055, "grad_norm": 0.5694287419319153, "learning_rate": 0.00011610480592863531, "loss": 1.9597, "step": 206 }, { "epoch": 0.08248449679974099, "grad_norm": 0.5484179258346558, "learning_rate": 0.00011539488007245702, "loss": 1.8557, "step": 207 }, { "epoch": 0.08288297262969144, "grad_norm": 0.44166100025177, "learning_rate": 0.00011468415858598411, "loss": 1.5371, "step": 208 }, { "epoch": 0.08328144845964187, "grad_norm": 0.5365661978721619, "learning_rate": 0.00011397267820035986, "loss": 1.7778, "step": 209 }, { "epoch": 0.08367992428959231, "grad_norm": 0.45911017060279846, "learning_rate": 0.00011326047568594851, "loss": 1.5729, "step": 210 }, { "epoch": 0.08407840011954275, "grad_norm": 0.5417599678039551, "learning_rate": 0.00011254758785043515, "loss": 1.8296, "step": 211 }, { "epoch": 0.08447687594949319, "grad_norm": 0.48594942688941956, "learning_rate": 0.0001118340515369232, "loss": 1.7837, "step": 212 }, { "epoch": 0.08487535177944362, "grad_norm": 0.4888298511505127, "learning_rate": 0.00011111990362203033, "loss": 1.6575, "step": 213 }, { "epoch": 0.08527382760939407, "grad_norm": 0.5313907265663147, "learning_rate": 0.00011040518101398276, "loss": 1.7803, "step": 214 }, { "epoch": 0.08567230343934451, "grad_norm": 0.5065906643867493, "learning_rate": 0.00010968992065070769, "loss": 1.6539, "step": 215 }, { "epoch": 0.08607077926929495, "grad_norm": 0.46294957399368286, "learning_rate": 0.00010897415949792427, "loss": 1.6412, "step": 216 }, { "epoch": 0.08646925509924538, "grad_norm": 0.5068647861480713, "learning_rate": 0.00010825793454723325, "loss": 1.7173, "step": 217 }, { "epoch": 0.08686773092919582, "grad_norm": 0.45219966769218445, "learning_rate": 0.0001075412828142051, "loss": 1.4531, "step": 218 }, { "epoch": 0.08726620675914626, "grad_norm": 0.48035022616386414, "learning_rate": 0.0001068242413364671, "loss": 1.6187, "step": 219 }, { "epoch": 0.08766468258909671, "grad_norm": 0.5463985204696655, "learning_rate": 0.00010610684717178905, "loss": 1.749, "step": 220 }, { "epoch": 0.08806315841904715, "grad_norm": 0.4818764626979828, "learning_rate": 0.00010538913739616816, "loss": 1.4508, "step": 221 }, { "epoch": 0.08846163424899758, "grad_norm": 0.5018213987350464, "learning_rate": 0.00010467114910191289, "loss": 1.6853, "step": 222 }, { "epoch": 0.08886011007894802, "grad_norm": 0.5122075080871582, "learning_rate": 0.00010395291939572593, "loss": 1.6991, "step": 223 }, { "epoch": 0.08925858590889846, "grad_norm": 0.48191148042678833, "learning_rate": 0.00010323448539678653, "loss": 1.6428, "step": 224 }, { "epoch": 0.08965706173884891, "grad_norm": 0.4748276472091675, "learning_rate": 0.00010251588423483205, "loss": 1.7059, "step": 225 }, { "epoch": 0.09005553756879935, "grad_norm": 0.5150067806243896, "learning_rate": 0.0001017971530482392, "loss": 1.7409, "step": 226 }, { "epoch": 0.09045401339874978, "grad_norm": 0.5893855094909668, "learning_rate": 0.00010107832898210439, "loss": 1.7183, "step": 227 }, { "epoch": 0.09085248922870022, "grad_norm": 0.5195055603981018, "learning_rate": 0.00010035944918632429, "loss": 1.8396, "step": 228 }, { "epoch": 0.09125096505865066, "grad_norm": 0.5996953845024109, "learning_rate": 9.96405508136757e-05, "loss": 1.9944, "step": 229 }, { "epoch": 0.0916494408886011, "grad_norm": 0.5057780146598816, "learning_rate": 9.892167101789564e-05, "loss": 1.6473, "step": 230 }, { "epoch": 0.09204791671855155, "grad_norm": 0.46774283051490784, "learning_rate": 9.820284695176082e-05, "loss": 1.5973, "step": 231 }, { "epoch": 0.09244639254850198, "grad_norm": 0.46982142329216003, "learning_rate": 9.748411576516794e-05, "loss": 1.6464, "step": 232 }, { "epoch": 0.09284486837845242, "grad_norm": 0.4873621165752411, "learning_rate": 9.676551460321349e-05, "loss": 1.6629, "step": 233 }, { "epoch": 0.09324334420840286, "grad_norm": 0.4866909682750702, "learning_rate": 9.60470806042741e-05, "loss": 1.6262, "step": 234 }, { "epoch": 0.0936418200383533, "grad_norm": 0.5320809483528137, "learning_rate": 9.532885089808713e-05, "loss": 1.7158, "step": 235 }, { "epoch": 0.09404029586830374, "grad_norm": 0.47346270084381104, "learning_rate": 9.461086260383187e-05, "loss": 1.6044, "step": 236 }, { "epoch": 0.09443877169825418, "grad_norm": 0.5696609616279602, "learning_rate": 9.389315282821097e-05, "loss": 1.7883, "step": 237 }, { "epoch": 0.09483724752820462, "grad_norm": 0.4926949441432953, "learning_rate": 9.317575866353292e-05, "loss": 1.7306, "step": 238 }, { "epoch": 0.09523572335815506, "grad_norm": 0.5241943001747131, "learning_rate": 9.245871718579491e-05, "loss": 1.732, "step": 239 }, { "epoch": 0.09563419918810549, "grad_norm": 0.5425236225128174, "learning_rate": 9.174206545276677e-05, "loss": 1.6209, "step": 240 }, { "epoch": 0.09603267501805593, "grad_norm": 0.5216458439826965, "learning_rate": 9.102584050207578e-05, "loss": 1.74, "step": 241 }, { "epoch": 0.09643115084800638, "grad_norm": 0.5082316994667053, "learning_rate": 9.031007934929236e-05, "loss": 1.6836, "step": 242 }, { "epoch": 0.09682962667795682, "grad_norm": 0.48965132236480713, "learning_rate": 8.959481898601728e-05, "loss": 1.7055, "step": 243 }, { "epoch": 0.09722810250790725, "grad_norm": 0.514946699142456, "learning_rate": 8.888009637796968e-05, "loss": 1.684, "step": 244 }, { "epoch": 0.09762657833785769, "grad_norm": 0.551802396774292, "learning_rate": 8.81659484630768e-05, "loss": 1.8566, "step": 245 }, { "epoch": 0.09802505416780813, "grad_norm": 0.4790934920310974, "learning_rate": 8.745241214956483e-05, "loss": 1.6461, "step": 246 }, { "epoch": 0.09842352999775858, "grad_norm": 0.5450412631034851, "learning_rate": 8.673952431405148e-05, "loss": 1.7215, "step": 247 }, { "epoch": 0.09882200582770902, "grad_norm": 0.5299497842788696, "learning_rate": 8.602732179964017e-05, "loss": 1.7454, "step": 248 }, { "epoch": 0.09922048165765945, "grad_norm": 0.5010784268379211, "learning_rate": 8.531584141401591e-05, "loss": 1.6028, "step": 249 }, { "epoch": 0.09961895748760989, "grad_norm": 0.4926188886165619, "learning_rate": 8.4605119927543e-05, "loss": 1.6837, "step": 250 }, { "epoch": 0.10001743331756033, "grad_norm": 0.5703017115592957, "learning_rate": 8.38951940713647e-05, "loss": 1.8639, "step": 251 }, { "epoch": 0.10041590914751077, "grad_norm": 0.5429261326789856, "learning_rate": 8.318610053550497e-05, "loss": 1.7258, "step": 252 }, { "epoch": 0.10081438497746122, "grad_norm": 0.48338782787323, "learning_rate": 8.247787596697218e-05, "loss": 1.5873, "step": 253 }, { "epoch": 0.10121286080741165, "grad_norm": 0.506877601146698, "learning_rate": 8.177055696786516e-05, "loss": 1.6736, "step": 254 }, { "epoch": 0.10161133663736209, "grad_norm": 0.537820041179657, "learning_rate": 8.106418009348157e-05, "loss": 1.9075, "step": 255 }, { "epoch": 0.10200981246731253, "grad_norm": 0.4729152023792267, "learning_rate": 8.035878185042868e-05, "loss": 1.5359, "step": 256 }, { "epoch": 0.10240828829726296, "grad_norm": 0.4413747191429138, "learning_rate": 7.965439869473664e-05, "loss": 1.6245, "step": 257 }, { "epoch": 0.10280676412721342, "grad_norm": 0.5398510694503784, "learning_rate": 7.895106702997437e-05, "loss": 1.6318, "step": 258 }, { "epoch": 0.10320523995716385, "grad_norm": 0.5172785520553589, "learning_rate": 7.824882320536814e-05, "loss": 1.6601, "step": 259 }, { "epoch": 0.10360371578711429, "grad_norm": 0.4824993908405304, "learning_rate": 7.754770351392311e-05, "loss": 1.5672, "step": 260 }, { "epoch": 0.10400219161706473, "grad_norm": 0.4745709300041199, "learning_rate": 7.684774419054747e-05, "loss": 1.7128, "step": 261 }, { "epoch": 0.10440066744701516, "grad_norm": 0.5071855783462524, "learning_rate": 7.614898141017996e-05, "loss": 1.7368, "step": 262 }, { "epoch": 0.1047991432769656, "grad_norm": 0.5377690196037292, "learning_rate": 7.54514512859201e-05, "loss": 1.8659, "step": 263 }, { "epoch": 0.10519761910691605, "grad_norm": 0.4762866199016571, "learning_rate": 7.475518986716194e-05, "loss": 1.6012, "step": 264 }, { "epoch": 0.10559609493686649, "grad_norm": 0.46296924352645874, "learning_rate": 7.406023313773097e-05, "loss": 1.5484, "step": 265 }, { "epoch": 0.10599457076681693, "grad_norm": 0.47845426201820374, "learning_rate": 7.336661701402439e-05, "loss": 1.6248, "step": 266 }, { "epoch": 0.10639304659676736, "grad_norm": 0.48351001739501953, "learning_rate": 7.267437734315492e-05, "loss": 1.5549, "step": 267 }, { "epoch": 0.1067915224267178, "grad_norm": 0.48554375767707825, "learning_rate": 7.198354990109805e-05, "loss": 1.5708, "step": 268 }, { "epoch": 0.10718999825666825, "grad_norm": 0.47755077481269836, "learning_rate": 7.129417039084333e-05, "loss": 1.5864, "step": 269 }, { "epoch": 0.10758847408661869, "grad_norm": 0.4970269799232483, "learning_rate": 7.060627444054893e-05, "loss": 1.6373, "step": 270 }, { "epoch": 0.10798694991656912, "grad_norm": 0.47547978162765503, "learning_rate": 6.99198976017005e-05, "loss": 1.7433, "step": 271 }, { "epoch": 0.10838542574651956, "grad_norm": 0.5408848524093628, "learning_rate": 6.923507534727373e-05, "loss": 1.77, "step": 272 }, { "epoch": 0.10878390157647, "grad_norm": 0.49777430295944214, "learning_rate": 6.855184306990106e-05, "loss": 1.6071, "step": 273 }, { "epoch": 0.10918237740642044, "grad_norm": 0.4691534638404846, "learning_rate": 6.78702360800425e-05, "loss": 1.5913, "step": 274 }, { "epoch": 0.10958085323637089, "grad_norm": 0.5284269452095032, "learning_rate": 6.719028960416098e-05, "loss": 1.8038, "step": 275 }, { "epoch": 0.10997932906632132, "grad_norm": 0.49061042070388794, "learning_rate": 6.651203878290139e-05, "loss": 1.5991, "step": 276 }, { "epoch": 0.11037780489627176, "grad_norm": 0.5676330327987671, "learning_rate": 6.583551866927475e-05, "loss": 1.8924, "step": 277 }, { "epoch": 0.1107762807262222, "grad_norm": 0.5392544865608215, "learning_rate": 6.516076422684654e-05, "loss": 1.7611, "step": 278 }, { "epoch": 0.11117475655617264, "grad_norm": 0.5719506740570068, "learning_rate": 6.448781032792972e-05, "loss": 1.756, "step": 279 }, { "epoch": 0.11157323238612307, "grad_norm": 0.4809233248233795, "learning_rate": 6.381669175178248e-05, "loss": 1.641, "step": 280 }, { "epoch": 0.11197170821607352, "grad_norm": 0.48434188961982727, "learning_rate": 6.31474431828108e-05, "loss": 1.579, "step": 281 }, { "epoch": 0.11237018404602396, "grad_norm": 0.5024405717849731, "learning_rate": 6.248009920877592e-05, "loss": 1.6653, "step": 282 }, { "epoch": 0.1127686598759744, "grad_norm": 0.441279798746109, "learning_rate": 6.181469431900672e-05, "loss": 1.5105, "step": 283 }, { "epoch": 0.11316713570592483, "grad_norm": 0.5233234763145447, "learning_rate": 6.115126290261745e-05, "loss": 1.7695, "step": 284 }, { "epoch": 0.11356561153587527, "grad_norm": 0.5281261801719666, "learning_rate": 6.048983924673022e-05, "loss": 1.76, "step": 285 }, { "epoch": 0.11396408736582572, "grad_norm": 0.534590482711792, "learning_rate": 5.983045753470308e-05, "loss": 1.8155, "step": 286 }, { "epoch": 0.11436256319577616, "grad_norm": 0.5247072577476501, "learning_rate": 5.917315184436345e-05, "loss": 1.6073, "step": 287 }, { "epoch": 0.1147610390257266, "grad_norm": 0.4829355776309967, "learning_rate": 5.851795614624682e-05, "loss": 1.5224, "step": 288 }, { "epoch": 0.11515951485567703, "grad_norm": 0.516015887260437, "learning_rate": 5.786490430184115e-05, "loss": 1.6813, "step": 289 }, { "epoch": 0.11555799068562747, "grad_norm": 0.48894891142845154, "learning_rate": 5.72140300618369e-05, "loss": 1.7965, "step": 290 }, { "epoch": 0.11595646651557791, "grad_norm": 0.49149996042251587, "learning_rate": 5.656536706438267e-05, "loss": 1.6388, "step": 291 }, { "epoch": 0.11635494234552836, "grad_norm": 0.4835774898529053, "learning_rate": 5.591894883334667e-05, "loss": 1.6856, "step": 292 }, { "epoch": 0.1167534181754788, "grad_norm": 0.5278857946395874, "learning_rate": 5.5274808776584367e-05, "loss": 1.6883, "step": 293 }, { "epoch": 0.11715189400542923, "grad_norm": 0.4995588958263397, "learning_rate": 5.463298018421171e-05, "loss": 1.519, "step": 294 }, { "epoch": 0.11755036983537967, "grad_norm": 0.5236543416976929, "learning_rate": 5.399349622688479e-05, "loss": 1.7372, "step": 295 }, { "epoch": 0.11794884566533011, "grad_norm": 0.45699524879455566, "learning_rate": 5.335638995408545e-05, "loss": 1.6082, "step": 296 }, { "epoch": 0.11834732149528056, "grad_norm": 0.5191316604614258, "learning_rate": 5.272169429241325e-05, "loss": 1.7123, "step": 297 }, { "epoch": 0.118745797325231, "grad_norm": 0.42880895733833313, "learning_rate": 5.208944204388377e-05, "loss": 1.4809, "step": 298 }, { "epoch": 0.11914427315518143, "grad_norm": 0.5574065446853638, "learning_rate": 5.145966588423341e-05, "loss": 1.8128, "step": 299 }, { "epoch": 0.11954274898513187, "grad_norm": 0.47847244143486023, "learning_rate": 5.0832398361230596e-05, "loss": 1.5699, "step": 300 }, { "epoch": 0.11954274898513187, "eval_loss": 1.700640082359314, "eval_runtime": 603.0633, "eval_samples_per_second": 14.018, "eval_steps_per_second": 3.505, "step": 300 } ], "logging_steps": 1, "max_steps": 447, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.388464319922176e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }