|
{ |
|
"best_metric": 0.0009016587864607573, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 1.0021598272138228, |
|
"eval_steps": 50, |
|
"global_step": 116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008639308855291577, |
|
"grad_norm": 0.5806592106819153, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0374, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008639308855291577, |
|
"eval_loss": 0.15225130319595337, |
|
"eval_runtime": 20.0655, |
|
"eval_samples_per_second": 9.718, |
|
"eval_steps_per_second": 2.442, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017278617710583154, |
|
"grad_norm": 0.6535840630531311, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0344, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02591792656587473, |
|
"grad_norm": 0.4030662775039673, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0354, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03455723542116631, |
|
"grad_norm": 0.1255313754081726, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0286, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04319654427645788, |
|
"grad_norm": 0.15026195347309113, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0283, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05183585313174946, |
|
"grad_norm": 0.34920528531074524, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0299, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06047516198704104, |
|
"grad_norm": 0.16989673674106598, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0313, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06911447084233262, |
|
"grad_norm": 0.1835932433605194, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0256, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07775377969762419, |
|
"grad_norm": 0.2971993088722229, |
|
"learning_rate": 9e-05, |
|
"loss": 0.0199, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08639308855291576, |
|
"grad_norm": 0.1404445469379425, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09503239740820735, |
|
"grad_norm": 0.36361753940582275, |
|
"learning_rate": 9.997804182543973e-05, |
|
"loss": 0.01, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10367170626349892, |
|
"grad_norm": 0.4649268388748169, |
|
"learning_rate": 9.991218658821608e-05, |
|
"loss": 0.0044, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11231101511879049, |
|
"grad_norm": 0.08656516671180725, |
|
"learning_rate": 9.980249213076084e-05, |
|
"loss": 0.0023, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12095032397408208, |
|
"grad_norm": 0.1461178958415985, |
|
"learning_rate": 9.964905480067586e-05, |
|
"loss": 0.0051, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12958963282937366, |
|
"grad_norm": 0.10659543424844742, |
|
"learning_rate": 9.94520093661082e-05, |
|
"loss": 0.0028, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13822894168466524, |
|
"grad_norm": 0.06367552280426025, |
|
"learning_rate": 9.921152889737984e-05, |
|
"loss": 0.001, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1468682505399568, |
|
"grad_norm": 0.07228197157382965, |
|
"learning_rate": 9.89278246149752e-05, |
|
"loss": 0.0008, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.15550755939524838, |
|
"grad_norm": 0.14873471856117249, |
|
"learning_rate": 9.860114570402054e-05, |
|
"loss": 0.0028, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.16414686825053995, |
|
"grad_norm": 0.20919160544872284, |
|
"learning_rate": 9.823177909541794e-05, |
|
"loss": 0.0062, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17278617710583152, |
|
"grad_norm": 0.11298267543315887, |
|
"learning_rate": 9.782004921382612e-05, |
|
"loss": 0.0006, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18142548596112312, |
|
"grad_norm": 0.02028828300535679, |
|
"learning_rate": 9.736631769270957e-05, |
|
"loss": 0.0008, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1900647948164147, |
|
"grad_norm": 0.17469586431980133, |
|
"learning_rate": 9.687098305670605e-05, |
|
"loss": 0.0036, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.19870410367170627, |
|
"grad_norm": 0.06677763164043427, |
|
"learning_rate": 9.633448037159167e-05, |
|
"loss": 0.0011, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.20734341252699784, |
|
"grad_norm": 0.08885731548070908, |
|
"learning_rate": 9.575728086215092e-05, |
|
"loss": 0.002, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2159827213822894, |
|
"grad_norm": 0.07344262301921844, |
|
"learning_rate": 9.513989149828718e-05, |
|
"loss": 0.0016, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22462203023758098, |
|
"grad_norm": 0.012290475890040398, |
|
"learning_rate": 9.448285454973738e-05, |
|
"loss": 0.0006, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.23326133909287258, |
|
"grad_norm": 0.2666710913181305, |
|
"learning_rate": 9.378674710978185e-05, |
|
"loss": 0.001, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.24190064794816415, |
|
"grad_norm": 0.15356595814228058, |
|
"learning_rate": 9.305218058836778e-05, |
|
"loss": 0.0012, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2505399568034557, |
|
"grad_norm": 0.05029755458235741, |
|
"learning_rate": 9.22798001750913e-05, |
|
"loss": 0.0158, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2591792656587473, |
|
"grad_norm": 0.07061488926410675, |
|
"learning_rate": 9.14702842725101e-05, |
|
"loss": 0.0148, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2678185745140389, |
|
"grad_norm": 0.05950339511036873, |
|
"learning_rate": 9.062434390028407e-05, |
|
"loss": 0.0154, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.27645788336933047, |
|
"grad_norm": 0.045820388942956924, |
|
"learning_rate": 8.974272207066767e-05, |
|
"loss": 0.0116, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.28509719222462204, |
|
"grad_norm": 0.03941355645656586, |
|
"learning_rate": 8.882619313590212e-05, |
|
"loss": 0.0072, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2937365010799136, |
|
"grad_norm": 0.02324046567082405, |
|
"learning_rate": 8.787556210808101e-05, |
|
"loss": 0.0012, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3023758099352052, |
|
"grad_norm": 0.053608641028404236, |
|
"learning_rate": 8.689166395208636e-05, |
|
"loss": 0.0023, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.31101511879049676, |
|
"grad_norm": 0.062077559530735016, |
|
"learning_rate": 8.587536285221656e-05, |
|
"loss": 0.0017, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.31965442764578833, |
|
"grad_norm": 0.09231416881084442, |
|
"learning_rate": 8.482755145314986e-05, |
|
"loss": 0.0018, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3282937365010799, |
|
"grad_norm": 0.08266697824001312, |
|
"learning_rate": 8.374915007591053e-05, |
|
"loss": 0.0021, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3369330453563715, |
|
"grad_norm": 0.019569700583815575, |
|
"learning_rate": 8.264110590952609e-05, |
|
"loss": 0.0005, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.34557235421166305, |
|
"grad_norm": 0.07023479044437408, |
|
"learning_rate": 8.150439217908556e-05, |
|
"loss": 0.0019, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3542116630669546, |
|
"grad_norm": 0.029872030019760132, |
|
"learning_rate": 8.034000729092968e-05, |
|
"loss": 0.0011, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.36285097192224625, |
|
"grad_norm": 0.18075761198997498, |
|
"learning_rate": 7.91489739557236e-05, |
|
"loss": 0.0011, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3714902807775378, |
|
"grad_norm": 0.010835711844265461, |
|
"learning_rate": 7.793233829018262e-05, |
|
"loss": 0.0003, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3801295896328294, |
|
"grad_norm": 0.010719070211052895, |
|
"learning_rate": 7.669116889823955e-05, |
|
"loss": 0.0004, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.38876889848812096, |
|
"grad_norm": 0.015587416477501392, |
|
"learning_rate": 7.542655593246103e-05, |
|
"loss": 0.0004, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.39740820734341253, |
|
"grad_norm": 0.0032320828177034855, |
|
"learning_rate": 7.413961013653726e-05, |
|
"loss": 0.0002, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4060475161987041, |
|
"grad_norm": 0.24344860017299652, |
|
"learning_rate": 7.283146186968565e-05, |
|
"loss": 0.0009, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4146868250539957, |
|
"grad_norm": 0.1430875062942505, |
|
"learning_rate": 7.150326011382604e-05, |
|
"loss": 0.0021, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.42332613390928725, |
|
"grad_norm": 0.01567676290869713, |
|
"learning_rate": 7.015617146439863e-05, |
|
"loss": 0.0002, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"grad_norm": 0.11232331395149231, |
|
"learning_rate": 6.879137910571191e-05, |
|
"loss": 0.004, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"eval_loss": 0.0022863983176648617, |
|
"eval_runtime": 20.6121, |
|
"eval_samples_per_second": 9.46, |
|
"eval_steps_per_second": 2.377, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4406047516198704, |
|
"grad_norm": 0.023258408531546593, |
|
"learning_rate": 6.741008177171995e-05, |
|
"loss": 0.0002, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.44924406047516197, |
|
"grad_norm": 0.015541836619377136, |
|
"learning_rate": 6.601349269314188e-05, |
|
"loss": 0.0004, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.45788336933045354, |
|
"grad_norm": 0.004774713423103094, |
|
"learning_rate": 6.460283853184879e-05, |
|
"loss": 0.0003, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.46652267818574517, |
|
"grad_norm": 0.1771538257598877, |
|
"learning_rate": 6.317935830345338e-05, |
|
"loss": 0.0037, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.47516198704103674, |
|
"grad_norm": 0.10851258039474487, |
|
"learning_rate": 6.174430228904919e-05, |
|
"loss": 0.0021, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4838012958963283, |
|
"grad_norm": 0.029805807396769524, |
|
"learning_rate": 6.029893093705492e-05, |
|
"loss": 0.0004, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4924406047516199, |
|
"grad_norm": 0.05265142768621445, |
|
"learning_rate": 5.884451375612865e-05, |
|
"loss": 0.0072, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5010799136069114, |
|
"grad_norm": 0.06926386058330536, |
|
"learning_rate": 5.738232820012407e-05, |
|
"loss": 0.0062, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.509719222462203, |
|
"grad_norm": 0.08675476908683777, |
|
"learning_rate": 5.5913658546068295e-05, |
|
"loss": 0.0054, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5183585313174947, |
|
"grad_norm": 0.03324931487441063, |
|
"learning_rate": 5.4439794766146746e-05, |
|
"loss": 0.0035, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5269978401727862, |
|
"grad_norm": 0.07478857040405273, |
|
"learning_rate": 5.296203139468572e-05, |
|
"loss": 0.0024, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5356371490280778, |
|
"grad_norm": 0.08285272121429443, |
|
"learning_rate": 5.148166639112799e-05, |
|
"loss": 0.0018, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5442764578833693, |
|
"grad_norm": 0.023794766515493393, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0007, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5529157667386609, |
|
"grad_norm": 0.025584915652871132, |
|
"learning_rate": 4.851833360887201e-05, |
|
"loss": 0.0005, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5615550755939525, |
|
"grad_norm": 0.04154638200998306, |
|
"learning_rate": 4.703796860531429e-05, |
|
"loss": 0.0015, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5701943844492441, |
|
"grad_norm": 0.017531629651784897, |
|
"learning_rate": 4.5560205233853266e-05, |
|
"loss": 0.0004, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5788336933045356, |
|
"grad_norm": 0.046652115881443024, |
|
"learning_rate": 4.4086341453931716e-05, |
|
"loss": 0.0005, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5874730021598272, |
|
"grad_norm": 0.21531158685684204, |
|
"learning_rate": 4.2617671799875944e-05, |
|
"loss": 0.0018, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5961123110151187, |
|
"grad_norm": 0.11707913130521774, |
|
"learning_rate": 4.115548624387137e-05, |
|
"loss": 0.0013, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6047516198704104, |
|
"grad_norm": 0.008791811764240265, |
|
"learning_rate": 3.970106906294509e-05, |
|
"loss": 0.0003, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6133909287257019, |
|
"grad_norm": 0.017406433820724487, |
|
"learning_rate": 3.825569771095082e-05, |
|
"loss": 0.0003, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6220302375809935, |
|
"grad_norm": 0.03273649513721466, |
|
"learning_rate": 3.682064169654663e-05, |
|
"loss": 0.0008, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6306695464362851, |
|
"grad_norm": 0.014443274587392807, |
|
"learning_rate": 3.539716146815122e-05, |
|
"loss": 0.0003, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6393088552915767, |
|
"grad_norm": 0.0037852220702916384, |
|
"learning_rate": 3.3986507306858125e-05, |
|
"loss": 0.0002, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6479481641468683, |
|
"grad_norm": 0.006565776187926531, |
|
"learning_rate": 3.258991822828007e-05, |
|
"loss": 0.0002, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6565874730021598, |
|
"grad_norm": 0.0011975999223068357, |
|
"learning_rate": 3.12086208942881e-05, |
|
"loss": 0.0001, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6652267818574514, |
|
"grad_norm": 0.021672353148460388, |
|
"learning_rate": 2.98438285356014e-05, |
|
"loss": 0.0003, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.673866090712743, |
|
"grad_norm": 0.010406700894236565, |
|
"learning_rate": 2.8496739886173995e-05, |
|
"loss": 0.0004, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6825053995680346, |
|
"grad_norm": 0.13345706462860107, |
|
"learning_rate": 2.716853813031435e-05, |
|
"loss": 0.0008, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6911447084233261, |
|
"grad_norm": 0.0036847260780632496, |
|
"learning_rate": 2.5860389863462765e-05, |
|
"loss": 0.0002, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6997840172786177, |
|
"grad_norm": 0.031118186190724373, |
|
"learning_rate": 2.4573444067538986e-05, |
|
"loss": 0.0002, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7084233261339092, |
|
"grad_norm": 0.1681869924068451, |
|
"learning_rate": 2.3308831101760486e-05, |
|
"loss": 0.0022, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7170626349892009, |
|
"grad_norm": 0.06865206360816956, |
|
"learning_rate": 2.2067661709817383e-05, |
|
"loss": 0.0009, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7257019438444925, |
|
"grad_norm": 0.15822599828243256, |
|
"learning_rate": 2.0851026044276406e-05, |
|
"loss": 0.0014, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.734341252699784, |
|
"grad_norm": 0.0194676723331213, |
|
"learning_rate": 1.9659992709070345e-05, |
|
"loss": 0.002, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7429805615550756, |
|
"grad_norm": 0.031017431989312172, |
|
"learning_rate": 1.849560782091445e-05, |
|
"loss": 0.0021, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7516198704103672, |
|
"grad_norm": 0.03133060783147812, |
|
"learning_rate": 1.7358894090473925e-05, |
|
"loss": 0.0023, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7602591792656588, |
|
"grad_norm": 0.03962257131934166, |
|
"learning_rate": 1.6250849924089484e-05, |
|
"loss": 0.0018, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7688984881209503, |
|
"grad_norm": 0.027627507224678993, |
|
"learning_rate": 1.5172448546850165e-05, |
|
"loss": 0.0015, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7775377969762419, |
|
"grad_norm": 0.021091526374220848, |
|
"learning_rate": 1.4124637147783432e-05, |
|
"loss": 0.0005, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7861771058315334, |
|
"grad_norm": 0.01683308742940426, |
|
"learning_rate": 1.3108336047913633e-05, |
|
"loss": 0.0005, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7948164146868251, |
|
"grad_norm": 0.033081360161304474, |
|
"learning_rate": 1.2124437891918993e-05, |
|
"loss": 0.0004, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8034557235421166, |
|
"grad_norm": 0.023177431896328926, |
|
"learning_rate": 1.1173806864097886e-05, |
|
"loss": 0.0008, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8120950323974082, |
|
"grad_norm": 0.03851751610636711, |
|
"learning_rate": 1.0257277929332332e-05, |
|
"loss": 0.0005, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8207343412526998, |
|
"grad_norm": 0.07173438370227814, |
|
"learning_rate": 9.375656099715934e-06, |
|
"loss": 0.0004, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8293736501079914, |
|
"grad_norm": 0.015922777354717255, |
|
"learning_rate": 8.529715727489912e-06, |
|
"loss": 0.0006, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.838012958963283, |
|
"grad_norm": 0.02879387140274048, |
|
"learning_rate": 7.720199824908692e-06, |
|
"loss": 0.0004, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8466522678185745, |
|
"grad_norm": 0.0059148469008505344, |
|
"learning_rate": 6.947819411632223e-06, |
|
"loss": 0.0007, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8552915766738661, |
|
"grad_norm": 0.006823898293077946, |
|
"learning_rate": 6.213252890218163e-06, |
|
"loss": 0.0002, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"grad_norm": 0.05277214199304581, |
|
"learning_rate": 5.51714545026264e-06, |
|
"loss": 0.0005, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"eval_loss": 0.0009016587864607573, |
|
"eval_runtime": 20.3415, |
|
"eval_samples_per_second": 9.586, |
|
"eval_steps_per_second": 2.409, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8725701943844493, |
|
"grad_norm": 0.012060822919011116, |
|
"learning_rate": 4.860108501712824e-06, |
|
"loss": 0.0003, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8812095032397408, |
|
"grad_norm": 0.060871824622154236, |
|
"learning_rate": 4.242719137849077e-06, |
|
"loss": 0.0007, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8898488120950324, |
|
"grad_norm": 0.04358503967523575, |
|
"learning_rate": 3.6655196284083317e-06, |
|
"loss": 0.0006, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.8984881209503239, |
|
"grad_norm": 0.015056795440614223, |
|
"learning_rate": 3.1290169432939553e-06, |
|
"loss": 0.0003, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9071274298056156, |
|
"grad_norm": 0.06826309114694595, |
|
"learning_rate": 2.6336823072904304e-06, |
|
"loss": 0.0019, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9157667386609071, |
|
"grad_norm": 0.004253576509654522, |
|
"learning_rate": 2.179950786173879e-06, |
|
"loss": 0.0002, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9244060475161987, |
|
"grad_norm": 0.027383577078580856, |
|
"learning_rate": 1.7682209045820686e-06, |
|
"loss": 0.0004, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9330453563714903, |
|
"grad_norm": 0.01728072762489319, |
|
"learning_rate": 1.3988542959794627e-06, |
|
"loss": 0.0003, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9416846652267818, |
|
"grad_norm": 0.11028740555047989, |
|
"learning_rate": 1.0721753850247984e-06, |
|
"loss": 0.0021, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9503239740820735, |
|
"grad_norm": 0.007546401582658291, |
|
"learning_rate": 7.884711026201585e-07, |
|
"loss": 0.0002, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.958963282937365, |
|
"grad_norm": 0.011820383369922638, |
|
"learning_rate": 5.479906338917984e-07, |
|
"loss": 0.0002, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9676025917926566, |
|
"grad_norm": 0.027329521253705025, |
|
"learning_rate": 3.5094519932415417e-07, |
|
"loss": 0.0002, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9762419006479481, |
|
"grad_norm": 0.027211442589759827, |
|
"learning_rate": 1.975078692391552e-07, |
|
"loss": 0.0012, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.9848812095032398, |
|
"grad_norm": 0.013164684176445007, |
|
"learning_rate": 8.781341178393244e-08, |
|
"loss": 0.0004, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9935205183585313, |
|
"grad_norm": 0.0681779533624649, |
|
"learning_rate": 2.1958174560282595e-08, |
|
"loss": 0.0021, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0021598272138228, |
|
"grad_norm": 0.34717699885368347, |
|
"learning_rate": 0.0, |
|
"loss": 0.0036, |
|
"step": 116 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3075543649878016e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|