{ "best_metric": 0.0009016587864607573, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 1.0021598272138228, "eval_steps": 50, "global_step": 116, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008639308855291577, "grad_norm": 0.5806592106819153, "learning_rate": 1e-05, "loss": 0.0374, "step": 1 }, { "epoch": 0.008639308855291577, "eval_loss": 0.15225130319595337, "eval_runtime": 20.0655, "eval_samples_per_second": 9.718, "eval_steps_per_second": 2.442, "step": 1 }, { "epoch": 0.017278617710583154, "grad_norm": 0.6535840630531311, "learning_rate": 2e-05, "loss": 0.0344, "step": 2 }, { "epoch": 0.02591792656587473, "grad_norm": 0.4030662775039673, "learning_rate": 3e-05, "loss": 0.0354, "step": 3 }, { "epoch": 0.03455723542116631, "grad_norm": 0.1255313754081726, "learning_rate": 4e-05, "loss": 0.0286, "step": 4 }, { "epoch": 0.04319654427645788, "grad_norm": 0.15026195347309113, "learning_rate": 5e-05, "loss": 0.0283, "step": 5 }, { "epoch": 0.05183585313174946, "grad_norm": 0.34920528531074524, "learning_rate": 6e-05, "loss": 0.0299, "step": 6 }, { "epoch": 0.06047516198704104, "grad_norm": 0.16989673674106598, "learning_rate": 7e-05, "loss": 0.0313, "step": 7 }, { "epoch": 0.06911447084233262, "grad_norm": 0.1835932433605194, "learning_rate": 8e-05, "loss": 0.0256, "step": 8 }, { "epoch": 0.07775377969762419, "grad_norm": 0.2971993088722229, "learning_rate": 9e-05, "loss": 0.0199, "step": 9 }, { "epoch": 0.08639308855291576, "grad_norm": 0.1404445469379425, "learning_rate": 0.0001, "loss": 0.005, "step": 10 }, { "epoch": 0.09503239740820735, "grad_norm": 0.36361753940582275, "learning_rate": 9.997804182543973e-05, "loss": 0.01, "step": 11 }, { "epoch": 0.10367170626349892, "grad_norm": 0.4649268388748169, "learning_rate": 9.991218658821608e-05, "loss": 0.0044, "step": 12 }, { "epoch": 0.11231101511879049, "grad_norm": 0.08656516671180725, "learning_rate": 9.980249213076084e-05, "loss": 0.0023, "step": 13 }, { "epoch": 0.12095032397408208, "grad_norm": 0.1461178958415985, "learning_rate": 9.964905480067586e-05, "loss": 0.0051, "step": 14 }, { "epoch": 0.12958963282937366, "grad_norm": 0.10659543424844742, "learning_rate": 9.94520093661082e-05, "loss": 0.0028, "step": 15 }, { "epoch": 0.13822894168466524, "grad_norm": 0.06367552280426025, "learning_rate": 9.921152889737984e-05, "loss": 0.001, "step": 16 }, { "epoch": 0.1468682505399568, "grad_norm": 0.07228197157382965, "learning_rate": 9.89278246149752e-05, "loss": 0.0008, "step": 17 }, { "epoch": 0.15550755939524838, "grad_norm": 0.14873471856117249, "learning_rate": 9.860114570402054e-05, "loss": 0.0028, "step": 18 }, { "epoch": 0.16414686825053995, "grad_norm": 0.20919160544872284, "learning_rate": 9.823177909541794e-05, "loss": 0.0062, "step": 19 }, { "epoch": 0.17278617710583152, "grad_norm": 0.11298267543315887, "learning_rate": 9.782004921382612e-05, "loss": 0.0006, "step": 20 }, { "epoch": 0.18142548596112312, "grad_norm": 0.02028828300535679, "learning_rate": 9.736631769270957e-05, "loss": 0.0008, "step": 21 }, { "epoch": 0.1900647948164147, "grad_norm": 0.17469586431980133, "learning_rate": 9.687098305670605e-05, "loss": 0.0036, "step": 22 }, { "epoch": 0.19870410367170627, "grad_norm": 0.06677763164043427, "learning_rate": 9.633448037159167e-05, "loss": 0.0011, "step": 23 }, { "epoch": 0.20734341252699784, "grad_norm": 0.08885731548070908, "learning_rate": 9.575728086215092e-05, "loss": 0.002, "step": 24 }, { "epoch": 0.2159827213822894, "grad_norm": 0.07344262301921844, "learning_rate": 9.513989149828718e-05, "loss": 0.0016, "step": 25 }, { "epoch": 0.22462203023758098, "grad_norm": 0.012290475890040398, "learning_rate": 9.448285454973738e-05, "loss": 0.0006, "step": 26 }, { "epoch": 0.23326133909287258, "grad_norm": 0.2666710913181305, "learning_rate": 9.378674710978185e-05, "loss": 0.001, "step": 27 }, { "epoch": 0.24190064794816415, "grad_norm": 0.15356595814228058, "learning_rate": 9.305218058836778e-05, "loss": 0.0012, "step": 28 }, { "epoch": 0.2505399568034557, "grad_norm": 0.05029755458235741, "learning_rate": 9.22798001750913e-05, "loss": 0.0158, "step": 29 }, { "epoch": 0.2591792656587473, "grad_norm": 0.07061488926410675, "learning_rate": 9.14702842725101e-05, "loss": 0.0148, "step": 30 }, { "epoch": 0.2678185745140389, "grad_norm": 0.05950339511036873, "learning_rate": 9.062434390028407e-05, "loss": 0.0154, "step": 31 }, { "epoch": 0.27645788336933047, "grad_norm": 0.045820388942956924, "learning_rate": 8.974272207066767e-05, "loss": 0.0116, "step": 32 }, { "epoch": 0.28509719222462204, "grad_norm": 0.03941355645656586, "learning_rate": 8.882619313590212e-05, "loss": 0.0072, "step": 33 }, { "epoch": 0.2937365010799136, "grad_norm": 0.02324046567082405, "learning_rate": 8.787556210808101e-05, "loss": 0.0012, "step": 34 }, { "epoch": 0.3023758099352052, "grad_norm": 0.053608641028404236, "learning_rate": 8.689166395208636e-05, "loss": 0.0023, "step": 35 }, { "epoch": 0.31101511879049676, "grad_norm": 0.062077559530735016, "learning_rate": 8.587536285221656e-05, "loss": 0.0017, "step": 36 }, { "epoch": 0.31965442764578833, "grad_norm": 0.09231416881084442, "learning_rate": 8.482755145314986e-05, "loss": 0.0018, "step": 37 }, { "epoch": 0.3282937365010799, "grad_norm": 0.08266697824001312, "learning_rate": 8.374915007591053e-05, "loss": 0.0021, "step": 38 }, { "epoch": 0.3369330453563715, "grad_norm": 0.019569700583815575, "learning_rate": 8.264110590952609e-05, "loss": 0.0005, "step": 39 }, { "epoch": 0.34557235421166305, "grad_norm": 0.07023479044437408, "learning_rate": 8.150439217908556e-05, "loss": 0.0019, "step": 40 }, { "epoch": 0.3542116630669546, "grad_norm": 0.029872030019760132, "learning_rate": 8.034000729092968e-05, "loss": 0.0011, "step": 41 }, { "epoch": 0.36285097192224625, "grad_norm": 0.18075761198997498, "learning_rate": 7.91489739557236e-05, "loss": 0.0011, "step": 42 }, { "epoch": 0.3714902807775378, "grad_norm": 0.010835711844265461, "learning_rate": 7.793233829018262e-05, "loss": 0.0003, "step": 43 }, { "epoch": 0.3801295896328294, "grad_norm": 0.010719070211052895, "learning_rate": 7.669116889823955e-05, "loss": 0.0004, "step": 44 }, { "epoch": 0.38876889848812096, "grad_norm": 0.015587416477501392, "learning_rate": 7.542655593246103e-05, "loss": 0.0004, "step": 45 }, { "epoch": 0.39740820734341253, "grad_norm": 0.0032320828177034855, "learning_rate": 7.413961013653726e-05, "loss": 0.0002, "step": 46 }, { "epoch": 0.4060475161987041, "grad_norm": 0.24344860017299652, "learning_rate": 7.283146186968565e-05, "loss": 0.0009, "step": 47 }, { "epoch": 0.4146868250539957, "grad_norm": 0.1430875062942505, "learning_rate": 7.150326011382604e-05, "loss": 0.0021, "step": 48 }, { "epoch": 0.42332613390928725, "grad_norm": 0.01567676290869713, "learning_rate": 7.015617146439863e-05, "loss": 0.0002, "step": 49 }, { "epoch": 0.4319654427645788, "grad_norm": 0.11232331395149231, "learning_rate": 6.879137910571191e-05, "loss": 0.004, "step": 50 }, { "epoch": 0.4319654427645788, "eval_loss": 0.0022863983176648617, "eval_runtime": 20.6121, "eval_samples_per_second": 9.46, "eval_steps_per_second": 2.377, "step": 50 }, { "epoch": 0.4406047516198704, "grad_norm": 0.023258408531546593, "learning_rate": 6.741008177171995e-05, "loss": 0.0002, "step": 51 }, { "epoch": 0.44924406047516197, "grad_norm": 0.015541836619377136, "learning_rate": 6.601349269314188e-05, "loss": 0.0004, "step": 52 }, { "epoch": 0.45788336933045354, "grad_norm": 0.004774713423103094, "learning_rate": 6.460283853184879e-05, "loss": 0.0003, "step": 53 }, { "epoch": 0.46652267818574517, "grad_norm": 0.1771538257598877, "learning_rate": 6.317935830345338e-05, "loss": 0.0037, "step": 54 }, { "epoch": 0.47516198704103674, "grad_norm": 0.10851258039474487, "learning_rate": 6.174430228904919e-05, "loss": 0.0021, "step": 55 }, { "epoch": 0.4838012958963283, "grad_norm": 0.029805807396769524, "learning_rate": 6.029893093705492e-05, "loss": 0.0004, "step": 56 }, { "epoch": 0.4924406047516199, "grad_norm": 0.05265142768621445, "learning_rate": 5.884451375612865e-05, "loss": 0.0072, "step": 57 }, { "epoch": 0.5010799136069114, "grad_norm": 0.06926386058330536, "learning_rate": 5.738232820012407e-05, "loss": 0.0062, "step": 58 }, { "epoch": 0.509719222462203, "grad_norm": 0.08675476908683777, "learning_rate": 5.5913658546068295e-05, "loss": 0.0054, "step": 59 }, { "epoch": 0.5183585313174947, "grad_norm": 0.03324931487441063, "learning_rate": 5.4439794766146746e-05, "loss": 0.0035, "step": 60 }, { "epoch": 0.5269978401727862, "grad_norm": 0.07478857040405273, "learning_rate": 5.296203139468572e-05, "loss": 0.0024, "step": 61 }, { "epoch": 0.5356371490280778, "grad_norm": 0.08285272121429443, "learning_rate": 5.148166639112799e-05, "loss": 0.0018, "step": 62 }, { "epoch": 0.5442764578833693, "grad_norm": 0.023794766515493393, "learning_rate": 5e-05, "loss": 0.0007, "step": 63 }, { "epoch": 0.5529157667386609, "grad_norm": 0.025584915652871132, "learning_rate": 4.851833360887201e-05, "loss": 0.0005, "step": 64 }, { "epoch": 0.5615550755939525, "grad_norm": 0.04154638200998306, "learning_rate": 4.703796860531429e-05, "loss": 0.0015, "step": 65 }, { "epoch": 0.5701943844492441, "grad_norm": 0.017531629651784897, "learning_rate": 4.5560205233853266e-05, "loss": 0.0004, "step": 66 }, { "epoch": 0.5788336933045356, "grad_norm": 0.046652115881443024, "learning_rate": 4.4086341453931716e-05, "loss": 0.0005, "step": 67 }, { "epoch": 0.5874730021598272, "grad_norm": 0.21531158685684204, "learning_rate": 4.2617671799875944e-05, "loss": 0.0018, "step": 68 }, { "epoch": 0.5961123110151187, "grad_norm": 0.11707913130521774, "learning_rate": 4.115548624387137e-05, "loss": 0.0013, "step": 69 }, { "epoch": 0.6047516198704104, "grad_norm": 0.008791811764240265, "learning_rate": 3.970106906294509e-05, "loss": 0.0003, "step": 70 }, { "epoch": 0.6133909287257019, "grad_norm": 0.017406433820724487, "learning_rate": 3.825569771095082e-05, "loss": 0.0003, "step": 71 }, { "epoch": 0.6220302375809935, "grad_norm": 0.03273649513721466, "learning_rate": 3.682064169654663e-05, "loss": 0.0008, "step": 72 }, { "epoch": 0.6306695464362851, "grad_norm": 0.014443274587392807, "learning_rate": 3.539716146815122e-05, "loss": 0.0003, "step": 73 }, { "epoch": 0.6393088552915767, "grad_norm": 0.0037852220702916384, "learning_rate": 3.3986507306858125e-05, "loss": 0.0002, "step": 74 }, { "epoch": 0.6479481641468683, "grad_norm": 0.006565776187926531, "learning_rate": 3.258991822828007e-05, "loss": 0.0002, "step": 75 }, { "epoch": 0.6565874730021598, "grad_norm": 0.0011975999223068357, "learning_rate": 3.12086208942881e-05, "loss": 0.0001, "step": 76 }, { "epoch": 0.6652267818574514, "grad_norm": 0.021672353148460388, "learning_rate": 2.98438285356014e-05, "loss": 0.0003, "step": 77 }, { "epoch": 0.673866090712743, "grad_norm": 0.010406700894236565, "learning_rate": 2.8496739886173995e-05, "loss": 0.0004, "step": 78 }, { "epoch": 0.6825053995680346, "grad_norm": 0.13345706462860107, "learning_rate": 2.716853813031435e-05, "loss": 0.0008, "step": 79 }, { "epoch": 0.6911447084233261, "grad_norm": 0.0036847260780632496, "learning_rate": 2.5860389863462765e-05, "loss": 0.0002, "step": 80 }, { "epoch": 0.6997840172786177, "grad_norm": 0.031118186190724373, "learning_rate": 2.4573444067538986e-05, "loss": 0.0002, "step": 81 }, { "epoch": 0.7084233261339092, "grad_norm": 0.1681869924068451, "learning_rate": 2.3308831101760486e-05, "loss": 0.0022, "step": 82 }, { "epoch": 0.7170626349892009, "grad_norm": 0.06865206360816956, "learning_rate": 2.2067661709817383e-05, "loss": 0.0009, "step": 83 }, { "epoch": 0.7257019438444925, "grad_norm": 0.15822599828243256, "learning_rate": 2.0851026044276406e-05, "loss": 0.0014, "step": 84 }, { "epoch": 0.734341252699784, "grad_norm": 0.0194676723331213, "learning_rate": 1.9659992709070345e-05, "loss": 0.002, "step": 85 }, { "epoch": 0.7429805615550756, "grad_norm": 0.031017431989312172, "learning_rate": 1.849560782091445e-05, "loss": 0.0021, "step": 86 }, { "epoch": 0.7516198704103672, "grad_norm": 0.03133060783147812, "learning_rate": 1.7358894090473925e-05, "loss": 0.0023, "step": 87 }, { "epoch": 0.7602591792656588, "grad_norm": 0.03962257131934166, "learning_rate": 1.6250849924089484e-05, "loss": 0.0018, "step": 88 }, { "epoch": 0.7688984881209503, "grad_norm": 0.027627507224678993, "learning_rate": 1.5172448546850165e-05, "loss": 0.0015, "step": 89 }, { "epoch": 0.7775377969762419, "grad_norm": 0.021091526374220848, "learning_rate": 1.4124637147783432e-05, "loss": 0.0005, "step": 90 }, { "epoch": 0.7861771058315334, "grad_norm": 0.01683308742940426, "learning_rate": 1.3108336047913633e-05, "loss": 0.0005, "step": 91 }, { "epoch": 0.7948164146868251, "grad_norm": 0.033081360161304474, "learning_rate": 1.2124437891918993e-05, "loss": 0.0004, "step": 92 }, { "epoch": 0.8034557235421166, "grad_norm": 0.023177431896328926, "learning_rate": 1.1173806864097886e-05, "loss": 0.0008, "step": 93 }, { "epoch": 0.8120950323974082, "grad_norm": 0.03851751610636711, "learning_rate": 1.0257277929332332e-05, "loss": 0.0005, "step": 94 }, { "epoch": 0.8207343412526998, "grad_norm": 0.07173438370227814, "learning_rate": 9.375656099715934e-06, "loss": 0.0004, "step": 95 }, { "epoch": 0.8293736501079914, "grad_norm": 0.015922777354717255, "learning_rate": 8.529715727489912e-06, "loss": 0.0006, "step": 96 }, { "epoch": 0.838012958963283, "grad_norm": 0.02879387140274048, "learning_rate": 7.720199824908692e-06, "loss": 0.0004, "step": 97 }, { "epoch": 0.8466522678185745, "grad_norm": 0.0059148469008505344, "learning_rate": 6.947819411632223e-06, "loss": 0.0007, "step": 98 }, { "epoch": 0.8552915766738661, "grad_norm": 0.006823898293077946, "learning_rate": 6.213252890218163e-06, "loss": 0.0002, "step": 99 }, { "epoch": 0.8639308855291576, "grad_norm": 0.05277214199304581, "learning_rate": 5.51714545026264e-06, "loss": 0.0005, "step": 100 }, { "epoch": 0.8639308855291576, "eval_loss": 0.0009016587864607573, "eval_runtime": 20.3415, "eval_samples_per_second": 9.586, "eval_steps_per_second": 2.409, "step": 100 }, { "epoch": 0.8725701943844493, "grad_norm": 0.012060822919011116, "learning_rate": 4.860108501712824e-06, "loss": 0.0003, "step": 101 }, { "epoch": 0.8812095032397408, "grad_norm": 0.060871824622154236, "learning_rate": 4.242719137849077e-06, "loss": 0.0007, "step": 102 }, { "epoch": 0.8898488120950324, "grad_norm": 0.04358503967523575, "learning_rate": 3.6655196284083317e-06, "loss": 0.0006, "step": 103 }, { "epoch": 0.8984881209503239, "grad_norm": 0.015056795440614223, "learning_rate": 3.1290169432939553e-06, "loss": 0.0003, "step": 104 }, { "epoch": 0.9071274298056156, "grad_norm": 0.06826309114694595, "learning_rate": 2.6336823072904304e-06, "loss": 0.0019, "step": 105 }, { "epoch": 0.9157667386609071, "grad_norm": 0.004253576509654522, "learning_rate": 2.179950786173879e-06, "loss": 0.0002, "step": 106 }, { "epoch": 0.9244060475161987, "grad_norm": 0.027383577078580856, "learning_rate": 1.7682209045820686e-06, "loss": 0.0004, "step": 107 }, { "epoch": 0.9330453563714903, "grad_norm": 0.01728072762489319, "learning_rate": 1.3988542959794627e-06, "loss": 0.0003, "step": 108 }, { "epoch": 0.9416846652267818, "grad_norm": 0.11028740555047989, "learning_rate": 1.0721753850247984e-06, "loss": 0.0021, "step": 109 }, { "epoch": 0.9503239740820735, "grad_norm": 0.007546401582658291, "learning_rate": 7.884711026201585e-07, "loss": 0.0002, "step": 110 }, { "epoch": 0.958963282937365, "grad_norm": 0.011820383369922638, "learning_rate": 5.479906338917984e-07, "loss": 0.0002, "step": 111 }, { "epoch": 0.9676025917926566, "grad_norm": 0.027329521253705025, "learning_rate": 3.5094519932415417e-07, "loss": 0.0002, "step": 112 }, { "epoch": 0.9762419006479481, "grad_norm": 0.027211442589759827, "learning_rate": 1.975078692391552e-07, "loss": 0.0012, "step": 113 }, { "epoch": 0.9848812095032398, "grad_norm": 0.013164684176445007, "learning_rate": 8.781341178393244e-08, "loss": 0.0004, "step": 114 }, { "epoch": 0.9935205183585313, "grad_norm": 0.0681779533624649, "learning_rate": 2.1958174560282595e-08, "loss": 0.0021, "step": 115 }, { "epoch": 1.0021598272138228, "grad_norm": 0.34717699885368347, "learning_rate": 0.0, "loss": 0.0036, "step": 116 } ], "logging_steps": 1, "max_steps": 116, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3075543649878016e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }