{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.13992537313432835, "eval_steps": 38, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009328358208955224, "grad_norm": 2.931856870651245, "learning_rate": 4e-05, "loss": 3.1183, "step": 1 }, { "epoch": 0.0009328358208955224, "eval_loss": 0.7989240884780884, "eval_runtime": 61.068, "eval_samples_per_second": 7.402, "eval_steps_per_second": 3.701, "step": 1 }, { "epoch": 0.0018656716417910447, "grad_norm": 3.0857083797454834, "learning_rate": 8e-05, "loss": 3.3967, "step": 2 }, { "epoch": 0.002798507462686567, "grad_norm": 2.9495201110839844, "learning_rate": 0.00012, "loss": 3.4361, "step": 3 }, { "epoch": 0.0037313432835820895, "grad_norm": 2.4459776878356934, "learning_rate": 0.00016, "loss": 3.2275, "step": 4 }, { "epoch": 0.0046641791044776115, "grad_norm": 2.410581588745117, "learning_rate": 0.0002, "loss": 3.0069, "step": 5 }, { "epoch": 0.005597014925373134, "grad_norm": 1.99094820022583, "learning_rate": 0.00024, "loss": 2.6997, "step": 6 }, { "epoch": 0.0065298507462686565, "grad_norm": 1.850408911705017, "learning_rate": 0.00028, "loss": 2.6469, "step": 7 }, { "epoch": 0.007462686567164179, "grad_norm": 2.972104072570801, "learning_rate": 0.00032, "loss": 3.351, "step": 8 }, { "epoch": 0.008395522388059701, "grad_norm": 2.8936173915863037, "learning_rate": 0.00036, "loss": 2.8041, "step": 9 }, { "epoch": 0.009328358208955223, "grad_norm": 2.354464530944824, "learning_rate": 0.0004, "loss": 2.7652, "step": 10 }, { "epoch": 0.010261194029850746, "grad_norm": 2.024070978164673, "learning_rate": 0.0003999496469885013, "loss": 2.8786, "step": 11 }, { "epoch": 0.011194029850746268, "grad_norm": 1.8775830268859863, "learning_rate": 0.00039979861330826294, "loss": 2.0356, "step": 12 }, { "epoch": 0.012126865671641791, "grad_norm": 1.7130846977233887, "learning_rate": 0.0003995469750092912, "loss": 2.6772, "step": 13 }, { "epoch": 0.013059701492537313, "grad_norm": 1.9945520162582397, "learning_rate": 0.00039919485879904784, "loss": 2.8959, "step": 14 }, { "epoch": 0.013992537313432836, "grad_norm": 1.6973563432693481, "learning_rate": 0.00039874244197864856, "loss": 2.3531, "step": 15 }, { "epoch": 0.014925373134328358, "grad_norm": 1.5936486721038818, "learning_rate": 0.00039818995235358696, "loss": 1.9481, "step": 16 }, { "epoch": 0.01585820895522388, "grad_norm": 1.828995943069458, "learning_rate": 0.00039753766811902755, "loss": 3.6787, "step": 17 }, { "epoch": 0.016791044776119403, "grad_norm": 1.553421139717102, "learning_rate": 0.0003967859177197259, "loss": 2.2696, "step": 18 }, { "epoch": 0.017723880597014924, "grad_norm": 1.2893372774124146, "learning_rate": 0.00039593507968464716, "loss": 1.9452, "step": 19 }, { "epoch": 0.018656716417910446, "grad_norm": 1.674631953239441, "learning_rate": 0.0003949855824363647, "loss": 2.2916, "step": 20 }, { "epoch": 0.01958955223880597, "grad_norm": 1.4569449424743652, "learning_rate": 0.0003939379040753374, "loss": 2.1701, "step": 21 }, { "epoch": 0.020522388059701493, "grad_norm": 1.438828468322754, "learning_rate": 0.00039279257213917066, "loss": 2.5621, "step": 22 }, { "epoch": 0.021455223880597014, "grad_norm": 1.3770123720169067, "learning_rate": 0.0003915501633369861, "loss": 2.1366, "step": 23 }, { "epoch": 0.022388059701492536, "grad_norm": 1.4712828397750854, "learning_rate": 0.00039021130325903074, "loss": 2.319, "step": 24 }, { "epoch": 0.02332089552238806, "grad_norm": 1.465248465538025, "learning_rate": 0.00038877666606167355, "loss": 2.3959, "step": 25 }, { "epoch": 0.024253731343283583, "grad_norm": 1.4384740591049194, "learning_rate": 0.00038724697412794747, "loss": 2.097, "step": 26 }, { "epoch": 0.025186567164179104, "grad_norm": 1.3539812564849854, "learning_rate": 0.0003856229977038078, "loss": 2.2313, "step": 27 }, { "epoch": 0.026119402985074626, "grad_norm": 1.3618801832199097, "learning_rate": 0.0003839055545102902, "loss": 2.1054, "step": 28 }, { "epoch": 0.027052238805970148, "grad_norm": 1.422633409500122, "learning_rate": 0.00038209550933176323, "loss": 2.276, "step": 29 }, { "epoch": 0.027985074626865673, "grad_norm": 1.4232622385025024, "learning_rate": 0.0003801937735804838, "loss": 2.1735, "step": 30 }, { "epoch": 0.028917910447761194, "grad_norm": 1.4555679559707642, "learning_rate": 0.0003782013048376736, "loss": 2.2706, "step": 31 }, { "epoch": 0.029850746268656716, "grad_norm": 1.2929563522338867, "learning_rate": 0.0003761191063713476, "loss": 1.9037, "step": 32 }, { "epoch": 0.030783582089552237, "grad_norm": 1.2687627077102661, "learning_rate": 0.0003739482266311391, "loss": 2.1032, "step": 33 }, { "epoch": 0.03171641791044776, "grad_norm": 1.2993357181549072, "learning_rate": 0.00037168975872037323, "loss": 2.0062, "step": 34 }, { "epoch": 0.03264925373134328, "grad_norm": 1.3507018089294434, "learning_rate": 0.00036934483984565685, "loss": 2.1522, "step": 35 }, { "epoch": 0.033582089552238806, "grad_norm": 1.4183921813964844, "learning_rate": 0.00036691465074426054, "loss": 1.845, "step": 36 }, { "epoch": 0.03451492537313433, "grad_norm": 1.3370906114578247, "learning_rate": 0.00036440041508958203, "loss": 1.9448, "step": 37 }, { "epoch": 0.03544776119402985, "grad_norm": 1.4249347448349, "learning_rate": 0.0003618033988749895, "loss": 2.4594, "step": 38 }, { "epoch": 0.03544776119402985, "eval_loss": 0.5035107135772705, "eval_runtime": 60.2427, "eval_samples_per_second": 7.503, "eval_steps_per_second": 3.751, "step": 38 }, { "epoch": 0.036380597014925374, "grad_norm": 1.4865802526474, "learning_rate": 0.00035912490977635625, "loss": 2.2656, "step": 39 }, { "epoch": 0.03731343283582089, "grad_norm": 1.3488837480545044, "learning_rate": 0.000356366296493606, "loss": 1.9688, "step": 40 }, { "epoch": 0.03824626865671642, "grad_norm": 1.4332904815673828, "learning_rate": 0.0003535289480716022, "loss": 2.2615, "step": 41 }, { "epoch": 0.03917910447761194, "grad_norm": 1.516402006149292, "learning_rate": 0.00035061429320072223, "loss": 2.059, "step": 42 }, { "epoch": 0.04011194029850746, "grad_norm": 1.4039419889450073, "learning_rate": 0.00034762379949746815, "loss": 2.1295, "step": 43 }, { "epoch": 0.041044776119402986, "grad_norm": 1.2742645740509033, "learning_rate": 0.0003445589727654783, "loss": 2.1964, "step": 44 }, { "epoch": 0.04197761194029851, "grad_norm": 1.115033507347107, "learning_rate": 0.0003414213562373095, "loss": 1.8306, "step": 45 }, { "epoch": 0.04291044776119403, "grad_norm": 1.1708894968032837, "learning_rate": 0.00033821252979737297, "loss": 1.9652, "step": 46 }, { "epoch": 0.043843283582089554, "grad_norm": 1.185393214225769, "learning_rate": 0.0003349341091864149, "loss": 1.8172, "step": 47 }, { "epoch": 0.04477611940298507, "grad_norm": 1.1099275350570679, "learning_rate": 0.00033158774518794254, "loss": 1.7576, "step": 48 }, { "epoch": 0.0457089552238806, "grad_norm": 1.0703908205032349, "learning_rate": 0.0003281751227970048, "loss": 1.9036, "step": 49 }, { "epoch": 0.04664179104477612, "grad_norm": 1.1421232223510742, "learning_rate": 0.00032469796037174674, "loss": 1.9759, "step": 50 }, { "epoch": 0.04757462686567164, "grad_norm": 1.1319009065628052, "learning_rate": 0.000321158008768164, "loss": 1.7476, "step": 51 }, { "epoch": 0.048507462686567165, "grad_norm": 1.1899311542510986, "learning_rate": 0.00031755705045849464, "loss": 1.8308, "step": 52 }, { "epoch": 0.049440298507462684, "grad_norm": 1.246800184249878, "learning_rate": 0.0003138968986336904, "loss": 1.9183, "step": 53 }, { "epoch": 0.05037313432835821, "grad_norm": 1.207138180732727, "learning_rate": 0.0003101793962904205, "loss": 1.8768, "step": 54 }, { "epoch": 0.051305970149253734, "grad_norm": 0.9686072468757629, "learning_rate": 0.00030640641530306733, "loss": 1.6107, "step": 55 }, { "epoch": 0.05223880597014925, "grad_norm": 1.1268424987792969, "learning_rate": 0.00030257985548118126, "loss": 1.9169, "step": 56 }, { "epoch": 0.05317164179104478, "grad_norm": 1.3243463039398193, "learning_rate": 0.0002987016436128694, "loss": 2.1651, "step": 57 }, { "epoch": 0.054104477611940295, "grad_norm": 1.230141043663025, "learning_rate": 0.0002947737324945997, "loss": 2.0871, "step": 58 }, { "epoch": 0.05503731343283582, "grad_norm": 1.0468418598175049, "learning_rate": 0.00029079809994790937, "loss": 1.7348, "step": 59 }, { "epoch": 0.055970149253731345, "grad_norm": 1.3394778966903687, "learning_rate": 0.00028677674782351165, "loss": 2.3597, "step": 60 }, { "epoch": 0.05690298507462686, "grad_norm": 1.2126855850219727, "learning_rate": 0.00028271170099330415, "loss": 2.0063, "step": 61 }, { "epoch": 0.05783582089552239, "grad_norm": 1.2102502584457397, "learning_rate": 0.00027860500633078477, "loss": 2.0048, "step": 62 }, { "epoch": 0.058768656716417914, "grad_norm": 1.2987444400787354, "learning_rate": 0.00027445873168038907, "loss": 2.6436, "step": 63 }, { "epoch": 0.05970149253731343, "grad_norm": 1.1055057048797607, "learning_rate": 0.0002702749648162686, "loss": 1.8015, "step": 64 }, { "epoch": 0.06063432835820896, "grad_norm": 1.1894230842590332, "learning_rate": 0.00026605581239103347, "loss": 2.0075, "step": 65 }, { "epoch": 0.061567164179104475, "grad_norm": 0.9544552564620972, "learning_rate": 0.00026180339887498953, "loss": 1.5043, "step": 66 }, { "epoch": 0.0625, "grad_norm": 1.2134445905685425, "learning_rate": 0.00025751986548640346, "loss": 1.9537, "step": 67 }, { "epoch": 0.06343283582089553, "grad_norm": 1.061187505722046, "learning_rate": 0.00025320736911333503, "loss": 1.6231, "step": 68 }, { "epoch": 0.06436567164179105, "grad_norm": 1.1641594171524048, "learning_rate": 0.0002488680812275788, "loss": 1.6016, "step": 69 }, { "epoch": 0.06529850746268656, "grad_norm": 1.1650375127792358, "learning_rate": 0.0002445041867912629, "loss": 1.8811, "step": 70 }, { "epoch": 0.06623134328358209, "grad_norm": 1.319548487663269, "learning_rate": 0.00024011788315665458, "loss": 1.7969, "step": 71 }, { "epoch": 0.06716417910447761, "grad_norm": 1.009516954421997, "learning_rate": 0.00023571137895972733, "loss": 1.4261, "step": 72 }, { "epoch": 0.06809701492537314, "grad_norm": 1.1219674348831177, "learning_rate": 0.0002312868930080462, "loss": 1.5247, "step": 73 }, { "epoch": 0.06902985074626866, "grad_norm": 1.0498907566070557, "learning_rate": 0.0002268466531635311, "loss": 1.7131, "step": 74 }, { "epoch": 0.06996268656716417, "grad_norm": 1.0986140966415405, "learning_rate": 0.00022239289522066157, "loss": 1.7584, "step": 75 }, { "epoch": 0.0708955223880597, "grad_norm": 1.2214365005493164, "learning_rate": 0.00021792786178068672, "loss": 1.7782, "step": 76 }, { "epoch": 0.0708955223880597, "eval_loss": 0.47117629647254944, "eval_runtime": 60.2201, "eval_samples_per_second": 7.506, "eval_steps_per_second": 3.753, "step": 76 }, { "epoch": 0.07182835820895522, "grad_norm": 1.2228177785873413, "learning_rate": 0.00021345380112240797, "loss": 2.1149, "step": 77 }, { "epoch": 0.07276119402985075, "grad_norm": 1.0541841983795166, "learning_rate": 0.00020897296607010301, "loss": 1.6306, "step": 78 }, { "epoch": 0.07369402985074627, "grad_norm": 1.1272865533828735, "learning_rate": 0.00020448761285916104, "loss": 1.9262, "step": 79 }, { "epoch": 0.07462686567164178, "grad_norm": 1.1912484169006348, "learning_rate": 0.0002, "loss": 2.0926, "step": 80 }, { "epoch": 0.07555970149253731, "grad_norm": 1.2342007160186768, "learning_rate": 0.00019551238714083903, "loss": 2.3557, "step": 81 }, { "epoch": 0.07649253731343283, "grad_norm": 1.068264365196228, "learning_rate": 0.00019102703392989709, "loss": 1.9768, "step": 82 }, { "epoch": 0.07742537313432836, "grad_norm": 1.035170555114746, "learning_rate": 0.00018654619887759207, "loss": 1.6881, "step": 83 }, { "epoch": 0.07835820895522388, "grad_norm": 1.2881474494934082, "learning_rate": 0.00018207213821931333, "loss": 2.1289, "step": 84 }, { "epoch": 0.07929104477611941, "grad_norm": 1.1358542442321777, "learning_rate": 0.00017760710477933845, "loss": 2.2829, "step": 85 }, { "epoch": 0.08022388059701492, "grad_norm": 1.2520966529846191, "learning_rate": 0.00017315334683646897, "loss": 1.552, "step": 86 }, { "epoch": 0.08115671641791045, "grad_norm": 0.9834119081497192, "learning_rate": 0.00016871310699195379, "loss": 1.5413, "step": 87 }, { "epoch": 0.08208955223880597, "grad_norm": 1.043672800064087, "learning_rate": 0.00016428862104027268, "loss": 1.7337, "step": 88 }, { "epoch": 0.0830223880597015, "grad_norm": 1.0788074731826782, "learning_rate": 0.00015988211684334546, "loss": 1.3079, "step": 89 }, { "epoch": 0.08395522388059702, "grad_norm": 1.0924961566925049, "learning_rate": 0.00015549581320873715, "loss": 1.7402, "step": 90 }, { "epoch": 0.08488805970149253, "grad_norm": 1.1585627794265747, "learning_rate": 0.00015113191877242117, "loss": 1.7314, "step": 91 }, { "epoch": 0.08582089552238806, "grad_norm": 1.0373111963272095, "learning_rate": 0.00014679263088666499, "loss": 1.7103, "step": 92 }, { "epoch": 0.08675373134328358, "grad_norm": 1.178009271621704, "learning_rate": 0.00014248013451359656, "loss": 1.8889, "step": 93 }, { "epoch": 0.08768656716417911, "grad_norm": 1.110259771347046, "learning_rate": 0.00013819660112501054, "loss": 1.685, "step": 94 }, { "epoch": 0.08861940298507463, "grad_norm": 1.0290873050689697, "learning_rate": 0.00013394418760896666, "loss": 1.6533, "step": 95 }, { "epoch": 0.08955223880597014, "grad_norm": 1.0740941762924194, "learning_rate": 0.00012972503518373144, "loss": 1.8436, "step": 96 }, { "epoch": 0.09048507462686567, "grad_norm": 1.1851738691329956, "learning_rate": 0.00012554126831961098, "loss": 1.8081, "step": 97 }, { "epoch": 0.0914179104477612, "grad_norm": 1.0965279340744019, "learning_rate": 0.0001213949936692153, "loss": 1.8133, "step": 98 }, { "epoch": 0.09235074626865672, "grad_norm": 1.0990545749664307, "learning_rate": 0.00011728829900669591, "loss": 1.7193, "step": 99 }, { "epoch": 0.09328358208955224, "grad_norm": 1.0958396196365356, "learning_rate": 0.00011322325217648839, "loss": 1.3802, "step": 100 }, { "epoch": 0.09421641791044776, "grad_norm": 1.0536209344863892, "learning_rate": 0.00010920190005209065, "loss": 1.8329, "step": 101 }, { "epoch": 0.09514925373134328, "grad_norm": 1.0651001930236816, "learning_rate": 0.00010522626750540028, "loss": 1.6494, "step": 102 }, { "epoch": 0.0960820895522388, "grad_norm": 1.1998820304870605, "learning_rate": 0.00010129835638713063, "loss": 2.0276, "step": 103 }, { "epoch": 0.09701492537313433, "grad_norm": 0.9894289374351501, "learning_rate": 9.74201445188188e-05, "loss": 1.5171, "step": 104 }, { "epoch": 0.09794776119402986, "grad_norm": 1.2795695066452026, "learning_rate": 9.359358469693271e-05, "loss": 1.7987, "step": 105 }, { "epoch": 0.09888059701492537, "grad_norm": 1.0911823511123657, "learning_rate": 8.982060370957952e-05, "loss": 1.6949, "step": 106 }, { "epoch": 0.09981343283582089, "grad_norm": 0.9669733047485352, "learning_rate": 8.610310136630962e-05, "loss": 1.405, "step": 107 }, { "epoch": 0.10074626865671642, "grad_norm": 1.1521021127700806, "learning_rate": 8.24429495415054e-05, "loss": 1.8475, "step": 108 }, { "epoch": 0.10167910447761194, "grad_norm": 1.085671305656433, "learning_rate": 7.884199123183605e-05, "loss": 1.5308, "step": 109 }, { "epoch": 0.10261194029850747, "grad_norm": 1.141276240348816, "learning_rate": 7.530203962825331e-05, "loss": 1.8675, "step": 110 }, { "epoch": 0.10354477611940298, "grad_norm": 1.0789172649383545, "learning_rate": 7.182487720299517e-05, "loss": 1.7724, "step": 111 }, { "epoch": 0.1044776119402985, "grad_norm": 1.0939713716506958, "learning_rate": 6.841225481205749e-05, "loss": 1.5574, "step": 112 }, { "epoch": 0.10541044776119403, "grad_norm": 1.2875022888183594, "learning_rate": 6.506589081358514e-05, "loss": 1.7821, "step": 113 }, { "epoch": 0.10634328358208955, "grad_norm": 1.1060545444488525, "learning_rate": 6.178747020262707e-05, "loss": 1.543, "step": 114 }, { "epoch": 0.10634328358208955, "eval_loss": 0.44875675439834595, "eval_runtime": 61.3854, "eval_samples_per_second": 7.363, "eval_steps_per_second": 3.682, "step": 114 }, { "epoch": 0.10727611940298508, "grad_norm": 1.2963577508926392, "learning_rate": 5.857864376269051e-05, "loss": 1.577, "step": 115 }, { "epoch": 0.10820895522388059, "grad_norm": 1.2188680171966553, "learning_rate": 5.544102723452171e-05, "loss": 1.6038, "step": 116 }, { "epoch": 0.10914179104477612, "grad_norm": 0.8995980024337769, "learning_rate": 5.237620050253189e-05, "loss": 1.1796, "step": 117 }, { "epoch": 0.11007462686567164, "grad_norm": 1.1543958187103271, "learning_rate": 4.938570679927783e-05, "loss": 1.8755, "step": 118 }, { "epoch": 0.11100746268656717, "grad_norm": 1.2092195749282837, "learning_rate": 4.647105192839778e-05, "loss": 2.0984, "step": 119 }, { "epoch": 0.11194029850746269, "grad_norm": 1.0605789422988892, "learning_rate": 4.363370350639404e-05, "loss": 2.0146, "step": 120 }, { "epoch": 0.11287313432835822, "grad_norm": 1.2720032930374146, "learning_rate": 4.087509022364382e-05, "loss": 1.8382, "step": 121 }, { "epoch": 0.11380597014925373, "grad_norm": 1.2557578086853027, "learning_rate": 3.819660112501053e-05, "loss": 2.2032, "step": 122 }, { "epoch": 0.11473880597014925, "grad_norm": 1.1983158588409424, "learning_rate": 3.5599584910418035e-05, "loss": 1.6507, "step": 123 }, { "epoch": 0.11567164179104478, "grad_norm": 1.0300534963607788, "learning_rate": 3.3085349255739474e-05, "loss": 1.6445, "step": 124 }, { "epoch": 0.1166044776119403, "grad_norm": 1.1426405906677246, "learning_rate": 3.0655160154343174e-05, "loss": 2.3686, "step": 125 }, { "epoch": 0.11753731343283583, "grad_norm": 1.3037739992141724, "learning_rate": 2.831024127962678e-05, "loss": 2.1821, "step": 126 }, { "epoch": 0.11847014925373134, "grad_norm": 1.049167275428772, "learning_rate": 2.6051773368860934e-05, "loss": 1.5581, "step": 127 }, { "epoch": 0.11940298507462686, "grad_norm": 1.130072832107544, "learning_rate": 2.38808936286524e-05, "loss": 2.1087, "step": 128 }, { "epoch": 0.12033582089552239, "grad_norm": 1.1626509428024292, "learning_rate": 2.1798695162326442e-05, "loss": 1.9543, "step": 129 }, { "epoch": 0.12126865671641791, "grad_norm": 1.1691805124282837, "learning_rate": 1.9806226419516192e-05, "loss": 1.949, "step": 130 }, { "epoch": 0.12220149253731344, "grad_norm": 1.067617654800415, "learning_rate": 1.790449066823683e-05, "loss": 2.0026, "step": 131 }, { "epoch": 0.12313432835820895, "grad_norm": 1.0745978355407715, "learning_rate": 1.6094445489709885e-05, "loss": 1.5739, "step": 132 }, { "epoch": 0.12406716417910447, "grad_norm": 1.4443845748901367, "learning_rate": 1.4377002296192233e-05, "loss": 1.3747, "step": 133 }, { "epoch": 0.125, "grad_norm": 1.1256107091903687, "learning_rate": 1.275302587205256e-05, "loss": 1.9545, "step": 134 }, { "epoch": 0.1259328358208955, "grad_norm": 1.1774152517318726, "learning_rate": 1.1223333938326485e-05, "loss": 1.8889, "step": 135 }, { "epoch": 0.12686567164179105, "grad_norm": 1.1691941022872925, "learning_rate": 9.788696740969295e-06, "loss": 1.6015, "step": 136 }, { "epoch": 0.12779850746268656, "grad_norm": 1.647544026374817, "learning_rate": 8.44983666301391e-06, "loss": 1.6521, "step": 137 }, { "epoch": 0.1287313432835821, "grad_norm": 1.0957173109054565, "learning_rate": 7.2074278608293525e-06, "loss": 1.9019, "step": 138 }, { "epoch": 0.1296641791044776, "grad_norm": 1.0895917415618896, "learning_rate": 6.062095924662625e-06, "loss": 1.5371, "step": 139 }, { "epoch": 0.13059701492537312, "grad_norm": 0.8600479960441589, "learning_rate": 5.0144175636352765e-06, "loss": 1.1489, "step": 140 }, { "epoch": 0.13152985074626866, "grad_norm": 1.0613805055618286, "learning_rate": 4.064920315352904e-06, "loss": 1.2605, "step": 141 }, { "epoch": 0.13246268656716417, "grad_norm": 1.176459789276123, "learning_rate": 3.2140822802740668e-06, "loss": 2.0061, "step": 142 }, { "epoch": 0.1333955223880597, "grad_norm": 1.1844390630722046, "learning_rate": 2.462331880972468e-06, "loss": 1.7673, "step": 143 }, { "epoch": 0.13432835820895522, "grad_norm": 1.0816205739974976, "learning_rate": 1.81004764641306e-06, "loss": 1.6904, "step": 144 }, { "epoch": 0.13526119402985073, "grad_norm": 1.1789848804473877, "learning_rate": 1.2575580213514792e-06, "loss": 2.0924, "step": 145 }, { "epoch": 0.13619402985074627, "grad_norm": 1.1259171962738037, "learning_rate": 8.051412009521864e-07, "loss": 1.7072, "step": 146 }, { "epoch": 0.13712686567164178, "grad_norm": 0.9554341435432434, "learning_rate": 4.530249907087836e-07, "loss": 1.3331, "step": 147 }, { "epoch": 0.13805970149253732, "grad_norm": 1.1093275547027588, "learning_rate": 2.0138669173708213e-07, "loss": 1.5626, "step": 148 }, { "epoch": 0.13899253731343283, "grad_norm": 1.1262925863265991, "learning_rate": 5.035301149869387e-08, "loss": 1.5356, "step": 149 }, { "epoch": 0.13992537313432835, "grad_norm": 1.1811816692352295, "learning_rate": 0.0, "loss": 1.5122, "step": 150 } ], "logging_steps": 1, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 38, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9665789124608e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }