|
{ |
|
"best_metric": 0.07799232006072998, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.07573247503194964, |
|
"eval_steps": 100, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007573247503194964, |
|
"grad_norm": 0.9691388607025146, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3196, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007573247503194964, |
|
"eval_loss": 1.60433828830719, |
|
"eval_runtime": 661.4874, |
|
"eval_samples_per_second": 6.724, |
|
"eval_steps_per_second": 1.681, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015146495006389928, |
|
"grad_norm": 1.2349046468734741, |
|
"learning_rate": 4e-05, |
|
"loss": 1.5521, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002271974250958489, |
|
"grad_norm": 1.330872654914856, |
|
"learning_rate": 6e-05, |
|
"loss": 1.4937, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0030292990012779856, |
|
"grad_norm": 1.3608745336532593, |
|
"learning_rate": 8e-05, |
|
"loss": 1.5829, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003786623751597482, |
|
"grad_norm": 1.3821661472320557, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4121, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004543948501916978, |
|
"grad_norm": 1.752526879310608, |
|
"learning_rate": 0.00012, |
|
"loss": 1.353, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005301273252236474, |
|
"grad_norm": 1.3683583736419678, |
|
"learning_rate": 0.00014, |
|
"loss": 1.2337, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006058598002555971, |
|
"grad_norm": 1.4449743032455444, |
|
"learning_rate": 0.00016, |
|
"loss": 0.9753, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006815922752875467, |
|
"grad_norm": 1.432096242904663, |
|
"learning_rate": 0.00018, |
|
"loss": 0.8742, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007573247503194964, |
|
"grad_norm": 1.2310497760772705, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7185, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00833057225351446, |
|
"grad_norm": 1.158161997795105, |
|
"learning_rate": 0.00019996203070249516, |
|
"loss": 0.633, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009087897003833957, |
|
"grad_norm": 1.1543669700622559, |
|
"learning_rate": 0.00019984815164333163, |
|
"loss": 0.5801, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009845221754153453, |
|
"grad_norm": 0.9982590079307556, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 0.5354, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010602546504472949, |
|
"grad_norm": 0.8578788638114929, |
|
"learning_rate": 0.00019939306773179497, |
|
"loss": 0.5006, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011359871254792446, |
|
"grad_norm": 0.5724056363105774, |
|
"learning_rate": 0.00019905220846375032, |
|
"loss": 0.4145, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012117196005111942, |
|
"grad_norm": 0.5412246584892273, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 0.4199, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012874520755431438, |
|
"grad_norm": 0.4610505700111389, |
|
"learning_rate": 0.0001981451493252418, |
|
"loss": 0.4001, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013631845505750934, |
|
"grad_norm": 0.47394484281539917, |
|
"learning_rate": 0.00019757963826274357, |
|
"loss": 0.3721, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.014389170256070432, |
|
"grad_norm": 0.43060505390167236, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 0.3345, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.015146495006389928, |
|
"grad_norm": 0.46104079484939575, |
|
"learning_rate": 0.00019622680003092503, |
|
"loss": 0.4648, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015903819756709424, |
|
"grad_norm": 0.38638371229171753, |
|
"learning_rate": 0.00019544050018795075, |
|
"loss": 0.2985, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01666114450702892, |
|
"grad_norm": 0.4296252727508545, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 0.2837, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.017418469257348416, |
|
"grad_norm": 0.4201738238334656, |
|
"learning_rate": 0.0001936511241197055, |
|
"loss": 0.2629, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.018175794007667914, |
|
"grad_norm": 0.46103158593177795, |
|
"learning_rate": 0.00019264940672148018, |
|
"loss": 0.2894, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01893311875798741, |
|
"grad_norm": 0.36362382769584656, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 0.2094, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.019690443508306905, |
|
"grad_norm": 0.42857611179351807, |
|
"learning_rate": 0.00019043571606975777, |
|
"loss": 0.2518, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.020447768258626403, |
|
"grad_norm": 0.7166701555252075, |
|
"learning_rate": 0.0001892254238618394, |
|
"loss": 0.2777, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.021205093008945897, |
|
"grad_norm": 0.3727664053440094, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 0.2025, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.021962417759265395, |
|
"grad_norm": 0.3549087941646576, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.1908, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.022719742509584893, |
|
"grad_norm": 0.33737561106681824, |
|
"learning_rate": 0.00018519194088383273, |
|
"loss": 0.1957, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023477067259904387, |
|
"grad_norm": 0.34675121307373047, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 0.1625, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.024234392010223885, |
|
"grad_norm": 0.3653319478034973, |
|
"learning_rate": 0.0001821777815225245, |
|
"loss": 0.1832, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02499171676054338, |
|
"grad_norm": 0.31412753462791443, |
|
"learning_rate": 0.00018057651056609784, |
|
"loss": 0.1717, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.025749041510862877, |
|
"grad_norm": 0.3502964675426483, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 0.159, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.026506366261182374, |
|
"grad_norm": 0.3261137008666992, |
|
"learning_rate": 0.0001771916650916321, |
|
"loss": 0.1488, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02726369101150187, |
|
"grad_norm": 0.31498923897743225, |
|
"learning_rate": 0.00017541066097768963, |
|
"loss": 0.156, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.028021015761821366, |
|
"grad_norm": 0.3408859670162201, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 0.1598, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.028778340512140864, |
|
"grad_norm": 0.324367880821228, |
|
"learning_rate": 0.00017167825131684513, |
|
"loss": 0.145, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.029535665262460358, |
|
"grad_norm": 0.31265532970428467, |
|
"learning_rate": 0.00016972968010939954, |
|
"loss": 0.1514, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.030292990012779856, |
|
"grad_norm": 0.34943684935569763, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 0.1219, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03105031476309935, |
|
"grad_norm": 0.3439270853996277, |
|
"learning_rate": 0.00016567520240477344, |
|
"loss": 0.135, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03180763951341885, |
|
"grad_norm": 0.29284727573394775, |
|
"learning_rate": 0.00016357237482099684, |
|
"loss": 0.1091, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.032564964263738345, |
|
"grad_norm": 0.3866511881351471, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 0.1463, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03332228901405784, |
|
"grad_norm": 0.35192635655403137, |
|
"learning_rate": 0.00015922352526649803, |
|
"loss": 0.1351, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.034079613764377334, |
|
"grad_norm": 0.3105197846889496, |
|
"learning_rate": 0.00015698080575102661, |
|
"loss": 0.125, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03483693851469683, |
|
"grad_norm": 0.4554622173309326, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 0.1594, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03559426326501633, |
|
"grad_norm": 0.34930619597435, |
|
"learning_rate": 0.00015236729139878782, |
|
"loss": 0.1222, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03635158801533583, |
|
"grad_norm": 0.35271599888801575, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.116, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.037108912765655325, |
|
"grad_norm": 0.29121342301368713, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 0.1188, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03786623751597482, |
|
"grad_norm": 0.320047527551651, |
|
"learning_rate": 0.00014515333583108896, |
|
"loss": 0.1343, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03862356226629431, |
|
"grad_norm": 0.38673272728919983, |
|
"learning_rate": 0.00014267764354964038, |
|
"loss": 0.1142, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03938088701661381, |
|
"grad_norm": 0.5377465486526489, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 0.3442, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04013821176693331, |
|
"grad_norm": 0.33489689230918884, |
|
"learning_rate": 0.00013763093719478358, |
|
"loss": 0.113, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.040895536517252806, |
|
"grad_norm": 0.29426538944244385, |
|
"learning_rate": 0.00013506375551927547, |
|
"loss": 0.092, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.041652861267572304, |
|
"grad_norm": 0.2807617485523224, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 0.1682, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.042410186017891795, |
|
"grad_norm": 0.32574039697647095, |
|
"learning_rate": 0.00012985148110016947, |
|
"loss": 0.1055, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04316751076821129, |
|
"grad_norm": 0.37014421820640564, |
|
"learning_rate": 0.00012721034648453353, |
|
"loss": 0.1322, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04392483551853079, |
|
"grad_norm": 0.2937864065170288, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 0.0958, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04468216026885029, |
|
"grad_norm": 0.2736242711544037, |
|
"learning_rate": 0.0001218681091206376, |
|
"loss": 0.0937, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.045439485019169785, |
|
"grad_norm": 0.2477613240480423, |
|
"learning_rate": 0.00011917106319237386, |
|
"loss": 0.0855, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.046196809769489276, |
|
"grad_norm": 0.34489548206329346, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 0.0924, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.046954134519808774, |
|
"grad_norm": 0.28233641386032104, |
|
"learning_rate": 0.00011373535578184082, |
|
"loss": 0.0963, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04771145927012827, |
|
"grad_norm": 0.259147584438324, |
|
"learning_rate": 0.00011100082209940795, |
|
"loss": 0.0803, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04846878402044777, |
|
"grad_norm": 0.2993817627429962, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 0.1052, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04922610877076727, |
|
"grad_norm": 0.30414098501205444, |
|
"learning_rate": 0.00010550877603558655, |
|
"loss": 0.0841, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04998343352108676, |
|
"grad_norm": 0.39788779616355896, |
|
"learning_rate": 0.00010275543423681621, |
|
"loss": 0.0723, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.050740758271406255, |
|
"grad_norm": 0.28072524070739746, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0866, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05149808302172575, |
|
"grad_norm": 0.2475721836090088, |
|
"learning_rate": 9.724456576318381e-05, |
|
"loss": 0.089, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05225540777204525, |
|
"grad_norm": 0.4116728901863098, |
|
"learning_rate": 9.449122396441345e-05, |
|
"loss": 0.3701, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05301273252236475, |
|
"grad_norm": 0.3344607353210449, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 0.1114, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.053770057272684246, |
|
"grad_norm": 0.27849143743515015, |
|
"learning_rate": 8.899917790059208e-05, |
|
"loss": 0.0977, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05452738202300374, |
|
"grad_norm": 0.3085162341594696, |
|
"learning_rate": 8.626464421815919e-05, |
|
"loss": 0.0901, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.055284706773323235, |
|
"grad_norm": 0.28304585814476013, |
|
"learning_rate": 8.35405409719266e-05, |
|
"loss": 0.0881, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05604203152364273, |
|
"grad_norm": 0.31556403636932373, |
|
"learning_rate": 8.082893680762619e-05, |
|
"loss": 0.0758, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05679935627396223, |
|
"grad_norm": 0.25185614824295044, |
|
"learning_rate": 7.813189087936243e-05, |
|
"loss": 0.0932, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05755668102428173, |
|
"grad_norm": 0.31156933307647705, |
|
"learning_rate": 7.54514512859201e-05, |
|
"loss": 0.0927, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05831400577460122, |
|
"grad_norm": 0.29737532138824463, |
|
"learning_rate": 7.278965351546648e-05, |
|
"loss": 0.0872, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.059071330524920716, |
|
"grad_norm": 0.3499886393547058, |
|
"learning_rate": 7.014851889983057e-05, |
|
"loss": 0.0822, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.059828655275240214, |
|
"grad_norm": 0.31357136368751526, |
|
"learning_rate": 6.753005307953167e-05, |
|
"loss": 0.0902, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06058598002555971, |
|
"grad_norm": 0.2909204065799713, |
|
"learning_rate": 6.493624448072457e-05, |
|
"loss": 0.0752, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06134330477587921, |
|
"grad_norm": 0.2916364371776581, |
|
"learning_rate": 6.236906280521646e-05, |
|
"loss": 0.0905, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0621006295261987, |
|
"grad_norm": 0.3733891546726227, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 0.0775, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0628579542765182, |
|
"grad_norm": 0.23621852695941925, |
|
"learning_rate": 5.732235645035964e-05, |
|
"loss": 0.075, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0636152790268377, |
|
"grad_norm": 0.2937829792499542, |
|
"learning_rate": 5.484666416891109e-05, |
|
"loss": 0.1098, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06437260377715719, |
|
"grad_norm": 0.24242699146270752, |
|
"learning_rate": 5.240526069629265e-05, |
|
"loss": 0.0769, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06512992852747669, |
|
"grad_norm": 0.47885003685951233, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.225, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06588725327779618, |
|
"grad_norm": 0.2480865865945816, |
|
"learning_rate": 4.763270860121222e-05, |
|
"loss": 0.0739, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06664457802811569, |
|
"grad_norm": 0.33025848865509033, |
|
"learning_rate": 4.530518418775733e-05, |
|
"loss": 0.0982, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06740190277843518, |
|
"grad_norm": 0.26735973358154297, |
|
"learning_rate": 4.301919424897338e-05, |
|
"loss": 0.0774, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06815922752875467, |
|
"grad_norm": 0.3508649170398712, |
|
"learning_rate": 4.077647473350201e-05, |
|
"loss": 0.0848, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06891655227907417, |
|
"grad_norm": 0.2726826071739197, |
|
"learning_rate": 3.857872873103322e-05, |
|
"loss": 0.0847, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06967387702939366, |
|
"grad_norm": 0.2948499023914337, |
|
"learning_rate": 3.642762517900322e-05, |
|
"loss": 0.076, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07043120177971317, |
|
"grad_norm": 0.28020283579826355, |
|
"learning_rate": 3.4324797595226565e-05, |
|
"loss": 0.0618, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07118852653003266, |
|
"grad_norm": 0.293235182762146, |
|
"learning_rate": 3.227184283742591e-05, |
|
"loss": 0.0931, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07194585128035215, |
|
"grad_norm": 0.22709383070468903, |
|
"learning_rate": 3.0270319890600462e-05, |
|
"loss": 0.0661, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07270317603067165, |
|
"grad_norm": 0.2769714593887329, |
|
"learning_rate": 2.8321748683154893e-05, |
|
"loss": 0.075, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07346050078099114, |
|
"grad_norm": 0.2676548957824707, |
|
"learning_rate": 2.6427608932686843e-05, |
|
"loss": 0.0885, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07421782553131065, |
|
"grad_norm": 0.2752656936645508, |
|
"learning_rate": 2.4589339022310386e-05, |
|
"loss": 0.0751, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07497515028163014, |
|
"grad_norm": 0.32387983798980713, |
|
"learning_rate": 2.2808334908367914e-05, |
|
"loss": 0.0886, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07573247503194964, |
|
"grad_norm": 0.3434562683105469, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 0.1105, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07573247503194964, |
|
"eval_loss": 0.07799232006072998, |
|
"eval_runtime": 665.6799, |
|
"eval_samples_per_second": 6.682, |
|
"eval_steps_per_second": 1.67, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 124, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.025880320459407e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|