|
{ |
|
"best_metric": 0.584774374961853, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.1970928800197093, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009854644000985464, |
|
"grad_norm": 0.4313327372074127, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0899, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009854644000985464, |
|
"eval_loss": 0.9981184601783752, |
|
"eval_runtime": 158.8136, |
|
"eval_samples_per_second": 10.761, |
|
"eval_steps_per_second": 5.384, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001970928800197093, |
|
"grad_norm": 0.5051336884498596, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3115, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0029563932002956393, |
|
"grad_norm": 0.40696489810943604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3648, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003941857600394186, |
|
"grad_norm": 0.5555673241615295, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.4126, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004927322000492732, |
|
"grad_norm": 0.6959850788116455, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.5118, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005912786400591279, |
|
"grad_norm": 0.6902092695236206, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6241, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006898250800689825, |
|
"grad_norm": 0.5990986824035645, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.7635, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007883715200788372, |
|
"grad_norm": 0.46151483058929443, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.7826, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008869179600886918, |
|
"grad_norm": 0.40509724617004395, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8468, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009854644000985464, |
|
"grad_norm": 0.4107632040977478, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7862, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01084010840108401, |
|
"grad_norm": 0.4689662754535675, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.7878, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011825572801182557, |
|
"grad_norm": 0.4455258250236511, |
|
"learning_rate": 4e-05, |
|
"loss": 0.8286, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012811037201281104, |
|
"grad_norm": 0.4460776746273041, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.6505, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01379650160137965, |
|
"grad_norm": 0.47701597213745117, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.8125, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.014781966001478197, |
|
"grad_norm": 0.43874800205230713, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6508, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015767430401576743, |
|
"grad_norm": 0.43464934825897217, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.7736, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01675289480167529, |
|
"grad_norm": 0.4389445185661316, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 0.7805, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017738359201773836, |
|
"grad_norm": 0.33348941802978516, |
|
"learning_rate": 6e-05, |
|
"loss": 0.6069, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018723823601872382, |
|
"grad_norm": 0.3531840443611145, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.6574, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01970928800197093, |
|
"grad_norm": 0.30243533849716187, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.6736, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.020694752402069475, |
|
"grad_norm": 0.32596221566200256, |
|
"learning_rate": 7e-05, |
|
"loss": 0.6822, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02168021680216802, |
|
"grad_norm": 0.3076384365558624, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 0.7694, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.022665681202266568, |
|
"grad_norm": 0.30813416838645935, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.6511, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.023651145602365115, |
|
"grad_norm": 0.3227630853652954, |
|
"learning_rate": 8e-05, |
|
"loss": 0.6856, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02463661000246366, |
|
"grad_norm": 0.3547810912132263, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.6536, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.025622074402562207, |
|
"grad_norm": 0.3056383430957794, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.7591, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.026607538802660754, |
|
"grad_norm": 0.33836042881011963, |
|
"learning_rate": 9e-05, |
|
"loss": 0.8528, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0275930032027593, |
|
"grad_norm": 0.3435473144054413, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.7285, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.028578467602857847, |
|
"grad_norm": 0.3219856917858124, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.7288, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.029563932002956393, |
|
"grad_norm": 0.3125074505805969, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7225, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03054939640305494, |
|
"grad_norm": 0.31725504994392395, |
|
"learning_rate": 9.999146252290264e-05, |
|
"loss": 0.7351, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.031534860803153486, |
|
"grad_norm": 0.3184851109981537, |
|
"learning_rate": 9.996585300715116e-05, |
|
"loss": 0.7631, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 0.36127910017967224, |
|
"learning_rate": 9.99231801983717e-05, |
|
"loss": 0.7501, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03350578960335058, |
|
"grad_norm": 0.32324203848838806, |
|
"learning_rate": 9.986345866928941e-05, |
|
"loss": 0.7823, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03449125400344913, |
|
"grad_norm": 0.3167076110839844, |
|
"learning_rate": 9.978670881475172e-05, |
|
"loss": 0.6496, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03547671840354767, |
|
"grad_norm": 0.3640748858451843, |
|
"learning_rate": 9.96929568447637e-05, |
|
"loss": 0.6557, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03646218280364622, |
|
"grad_norm": 0.40133535861968994, |
|
"learning_rate": 9.958223477553714e-05, |
|
"loss": 0.7987, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.037447647203744765, |
|
"grad_norm": 0.40022367238998413, |
|
"learning_rate": 9.94545804185573e-05, |
|
"loss": 0.7052, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.038433111603843315, |
|
"grad_norm": 0.4175143241882324, |
|
"learning_rate": 9.931003736767013e-05, |
|
"loss": 0.7733, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03941857600394186, |
|
"grad_norm": 0.5691938996315002, |
|
"learning_rate": 9.91486549841951e-05, |
|
"loss": 0.6385, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04040404040404041, |
|
"grad_norm": 0.36552998423576355, |
|
"learning_rate": 9.89704883800683e-05, |
|
"loss": 0.5661, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04138950480413895, |
|
"grad_norm": 0.44288337230682373, |
|
"learning_rate": 9.877559839902184e-05, |
|
"loss": 0.7711, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0423749692042375, |
|
"grad_norm": 0.42461535334587097, |
|
"learning_rate": 9.85640515958057e-05, |
|
"loss": 0.7106, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04336043360433604, |
|
"grad_norm": 0.46416065096855164, |
|
"learning_rate": 9.833592021345937e-05, |
|
"loss": 0.7392, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04434589800443459, |
|
"grad_norm": 0.46228498220443726, |
|
"learning_rate": 9.809128215864097e-05, |
|
"loss": 0.6193, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.045331362404533136, |
|
"grad_norm": 0.5916977524757385, |
|
"learning_rate": 9.783022097502204e-05, |
|
"loss": 0.9369, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.046316826804631686, |
|
"grad_norm": 0.509521484375, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.8942, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04730229120473023, |
|
"grad_norm": 0.5575716495513916, |
|
"learning_rate": 9.725919140804099e-05, |
|
"loss": 0.7795, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04828775560482878, |
|
"grad_norm": 0.674028754234314, |
|
"learning_rate": 9.694941803075283e-05, |
|
"loss": 0.9203, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04927322000492732, |
|
"grad_norm": 0.8474329113960266, |
|
"learning_rate": 9.662361147021779e-05, |
|
"loss": 0.9088, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04927322000492732, |
|
"eval_loss": 0.6253278255462646, |
|
"eval_runtime": 160.106, |
|
"eval_samples_per_second": 10.674, |
|
"eval_steps_per_second": 5.34, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05025868440502587, |
|
"grad_norm": 0.09365927428007126, |
|
"learning_rate": 9.628188298907782e-05, |
|
"loss": 0.0552, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.051244148805124415, |
|
"grad_norm": 0.13093718886375427, |
|
"learning_rate": 9.592434928729616e-05, |
|
"loss": 0.1879, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.052229613205222965, |
|
"grad_norm": 0.17437294125556946, |
|
"learning_rate": 9.555113246230442e-05, |
|
"loss": 0.23, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05321507760532151, |
|
"grad_norm": 0.2906794250011444, |
|
"learning_rate": 9.516235996730645e-05, |
|
"loss": 0.3665, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05420054200542006, |
|
"grad_norm": 0.2222241759300232, |
|
"learning_rate": 9.475816456775313e-05, |
|
"loss": 0.4059, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0551860064055186, |
|
"grad_norm": 0.24162358045578003, |
|
"learning_rate": 9.43386842960031e-05, |
|
"loss": 0.5642, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05617147080561715, |
|
"grad_norm": 0.21459117531776428, |
|
"learning_rate": 9.39040624041849e-05, |
|
"loss": 0.6121, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.057156935205715693, |
|
"grad_norm": 0.2023555487394333, |
|
"learning_rate": 9.345444731527642e-05, |
|
"loss": 0.5059, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05814239960581424, |
|
"grad_norm": 0.2224394828081131, |
|
"learning_rate": 9.298999257241863e-05, |
|
"loss": 0.6517, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.059127864005912786, |
|
"grad_norm": 0.24458082020282745, |
|
"learning_rate": 9.251085678648072e-05, |
|
"loss": 0.7442, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.060113328406011336, |
|
"grad_norm": 0.22274242341518402, |
|
"learning_rate": 9.201720358189464e-05, |
|
"loss": 0.6335, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06109879280610988, |
|
"grad_norm": 0.2185131013393402, |
|
"learning_rate": 9.150920154077754e-05, |
|
"loss": 0.5323, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06208425720620843, |
|
"grad_norm": 0.21939384937286377, |
|
"learning_rate": 9.098702414536107e-05, |
|
"loss": 0.6066, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06306972160630697, |
|
"grad_norm": 0.20755380392074585, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.5819, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06405518600640552, |
|
"grad_norm": 0.2545813024044037, |
|
"learning_rate": 8.9900861364012e-05, |
|
"loss": 0.7208, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 0.25121399760246277, |
|
"learning_rate": 8.933724690167417e-05, |
|
"loss": 0.8203, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06602611480660261, |
|
"grad_norm": 0.23934347927570343, |
|
"learning_rate": 8.876019880555649e-05, |
|
"loss": 0.6853, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06701157920670116, |
|
"grad_norm": 0.2448682188987732, |
|
"learning_rate": 8.816991413705516e-05, |
|
"loss": 0.8493, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0679970436067997, |
|
"grad_norm": 0.21752771735191345, |
|
"learning_rate": 8.756659447784368e-05, |
|
"loss": 0.4707, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06898250800689826, |
|
"grad_norm": 0.2293497771024704, |
|
"learning_rate": 8.695044586103296e-05, |
|
"loss": 0.6304, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0699679724069968, |
|
"grad_norm": 0.24045786261558533, |
|
"learning_rate": 8.632167870081121e-05, |
|
"loss": 0.6314, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07095343680709534, |
|
"grad_norm": 0.26281052827835083, |
|
"learning_rate": 8.568050772058762e-05, |
|
"loss": 0.619, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07193890120719389, |
|
"grad_norm": 0.28175660967826843, |
|
"learning_rate": 8.502715187966455e-05, |
|
"loss": 0.7979, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07292436560729244, |
|
"grad_norm": 0.2699892520904541, |
|
"learning_rate": 8.436183429846313e-05, |
|
"loss": 0.7732, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07390983000739099, |
|
"grad_norm": 0.2922893166542053, |
|
"learning_rate": 8.368478218232787e-05, |
|
"loss": 0.8827, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07489529440748953, |
|
"grad_norm": 0.27122068405151367, |
|
"learning_rate": 8.299622674393614e-05, |
|
"loss": 0.6182, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07588075880758807, |
|
"grad_norm": 0.28131553530693054, |
|
"learning_rate": 8.229640312433937e-05, |
|
"loss": 0.6974, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07686622320768663, |
|
"grad_norm": 0.26899459958076477, |
|
"learning_rate": 8.158555031266254e-05, |
|
"loss": 0.6486, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07785168760778517, |
|
"grad_norm": 0.31664565205574036, |
|
"learning_rate": 8.086391106448965e-05, |
|
"loss": 0.7727, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07883715200788372, |
|
"grad_norm": 0.28200066089630127, |
|
"learning_rate": 8.013173181896283e-05, |
|
"loss": 0.643, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07982261640798226, |
|
"grad_norm": 0.3218248188495636, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.7016, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08080808080808081, |
|
"grad_norm": 0.32966381311416626, |
|
"learning_rate": 7.863675700402526e-05, |
|
"loss": 0.7472, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08179354520817936, |
|
"grad_norm": 0.31329721212387085, |
|
"learning_rate": 7.787447196714427e-05, |
|
"loss": 0.7025, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0827790096082779, |
|
"grad_norm": 0.29452916979789734, |
|
"learning_rate": 7.710266782362247e-05, |
|
"loss": 0.6234, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08376447400837644, |
|
"grad_norm": 0.34324830770492554, |
|
"learning_rate": 7.63216081438678e-05, |
|
"loss": 0.6944, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.084749938408475, |
|
"grad_norm": 0.33645856380462646, |
|
"learning_rate": 7.553155965904535e-05, |
|
"loss": 0.6598, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08573540280857354, |
|
"grad_norm": 0.33191972970962524, |
|
"learning_rate": 7.473279216998895e-05, |
|
"loss": 0.6206, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08672086720867209, |
|
"grad_norm": 0.4066058099269867, |
|
"learning_rate": 7.392557845506432e-05, |
|
"loss": 0.7335, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08770633160877063, |
|
"grad_norm": 0.37192967534065247, |
|
"learning_rate": 7.311019417701566e-05, |
|
"loss": 0.6201, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08869179600886919, |
|
"grad_norm": 0.40204325318336487, |
|
"learning_rate": 7.228691778882693e-05, |
|
"loss": 0.691, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08967726040896773, |
|
"grad_norm": 0.3774620592594147, |
|
"learning_rate": 7.145603043863045e-05, |
|
"loss": 0.6234, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.09066272480906627, |
|
"grad_norm": 0.38905617594718933, |
|
"learning_rate": 7.061781587369519e-05, |
|
"loss": 0.5785, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09164818920916482, |
|
"grad_norm": 0.40719079971313477, |
|
"learning_rate": 6.977256034352712e-05, |
|
"loss": 0.7717, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09263365360926337, |
|
"grad_norm": 0.42338162660598755, |
|
"learning_rate": 6.892055250211552e-05, |
|
"loss": 0.7243, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09361911800936192, |
|
"grad_norm": 0.4960746169090271, |
|
"learning_rate": 6.806208330935766e-05, |
|
"loss": 0.694, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09460458240946046, |
|
"grad_norm": 0.45286256074905396, |
|
"learning_rate": 6.719744593169641e-05, |
|
"loss": 0.7173, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.095590046809559, |
|
"grad_norm": 0.47811850905418396, |
|
"learning_rate": 6.632693564200416e-05, |
|
"loss": 0.7395, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09657551120965756, |
|
"grad_norm": 0.5401033759117126, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.7726, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 0.6063845157623291, |
|
"learning_rate": 6.456948734446624e-05, |
|
"loss": 0.8667, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09854644000985464, |
|
"grad_norm": 0.7505123615264893, |
|
"learning_rate": 6.368314950360415e-05, |
|
"loss": 0.6364, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09854644000985464, |
|
"eval_loss": 0.6039547920227051, |
|
"eval_runtime": 159.9711, |
|
"eval_samples_per_second": 10.683, |
|
"eval_steps_per_second": 5.345, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09953190440995319, |
|
"grad_norm": 0.06492675840854645, |
|
"learning_rate": 6.279213887972179e-05, |
|
"loss": 0.0378, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.10051736881005174, |
|
"grad_norm": 0.10757127404212952, |
|
"learning_rate": 6.189675975213094e-05, |
|
"loss": 0.1717, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.10150283321015029, |
|
"grad_norm": 0.139942467212677, |
|
"learning_rate": 6.099731789198344e-05, |
|
"loss": 0.2134, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.10248829761024883, |
|
"grad_norm": 0.17105042934417725, |
|
"learning_rate": 6.009412045785051e-05, |
|
"loss": 0.242, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.10347376201034737, |
|
"grad_norm": 0.1677185744047165, |
|
"learning_rate": 5.918747589082853e-05, |
|
"loss": 0.2773, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10445922641044593, |
|
"grad_norm": 0.19385424256324768, |
|
"learning_rate": 5.82776938092065e-05, |
|
"loss": 0.4616, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.10544469081054447, |
|
"grad_norm": 0.19936293363571167, |
|
"learning_rate": 5.736508490273188e-05, |
|
"loss": 0.5253, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.10643015521064302, |
|
"grad_norm": 0.225832998752594, |
|
"learning_rate": 5.644996082651017e-05, |
|
"loss": 0.6073, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10741561961074156, |
|
"grad_norm": 0.20826251804828644, |
|
"learning_rate": 5.553263409457504e-05, |
|
"loss": 0.5133, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10840108401084012, |
|
"grad_norm": 0.19744732975959778, |
|
"learning_rate": 5.4613417973165106e-05, |
|
"loss": 0.5992, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10938654841093866, |
|
"grad_norm": 0.20350094139575958, |
|
"learning_rate": 5.3692626373743706e-05, |
|
"loss": 0.5886, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1103720128110372, |
|
"grad_norm": 0.2253555953502655, |
|
"learning_rate": 5.27705737457985e-05, |
|
"loss": 0.7352, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.11135747721113574, |
|
"grad_norm": 0.1978590339422226, |
|
"learning_rate": 5.184757496945726e-05, |
|
"loss": 0.6002, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1123429416112343, |
|
"grad_norm": 0.23348231613636017, |
|
"learning_rate": 5.092394524795649e-05, |
|
"loss": 0.5746, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.11332840601133284, |
|
"grad_norm": 0.223561093211174, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7063, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11431387041143139, |
|
"grad_norm": 0.22021301090717316, |
|
"learning_rate": 4.907605475204352e-05, |
|
"loss": 0.5453, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11529933481152993, |
|
"grad_norm": 0.22918701171875, |
|
"learning_rate": 4.8152425030542766e-05, |
|
"loss": 0.6408, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11628479921162849, |
|
"grad_norm": 0.221017986536026, |
|
"learning_rate": 4.72294262542015e-05, |
|
"loss": 0.6825, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11727026361172703, |
|
"grad_norm": 0.24463246762752533, |
|
"learning_rate": 4.6307373626256306e-05, |
|
"loss": 0.674, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11825572801182557, |
|
"grad_norm": 0.22127676010131836, |
|
"learning_rate": 4.5386582026834906e-05, |
|
"loss": 0.5877, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11924119241192412, |
|
"grad_norm": 0.25243082642555237, |
|
"learning_rate": 4.446736590542497e-05, |
|
"loss": 0.7659, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.12022665681202267, |
|
"grad_norm": 0.25524279475212097, |
|
"learning_rate": 4.3550039173489845e-05, |
|
"loss": 0.7332, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.12121212121212122, |
|
"grad_norm": 0.2742602527141571, |
|
"learning_rate": 4.2634915097268115e-05, |
|
"loss": 0.8811, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.12219758561221976, |
|
"grad_norm": 0.2482510656118393, |
|
"learning_rate": 4.1722306190793495e-05, |
|
"loss": 0.7613, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1231830500123183, |
|
"grad_norm": 0.2792511284351349, |
|
"learning_rate": 4.0812524109171476e-05, |
|
"loss": 0.7514, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12416851441241686, |
|
"grad_norm": 0.28039655089378357, |
|
"learning_rate": 3.99058795421495e-05, |
|
"loss": 0.6821, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1251539788125154, |
|
"grad_norm": 0.3473927080631256, |
|
"learning_rate": 3.9002682108016585e-05, |
|
"loss": 0.6593, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12613944321261394, |
|
"grad_norm": 0.2565251290798187, |
|
"learning_rate": 3.8103240247869075e-05, |
|
"loss": 0.5399, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1271249076127125, |
|
"grad_norm": 0.28116095066070557, |
|
"learning_rate": 3.720786112027822e-05, |
|
"loss": 0.6255, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12811037201281103, |
|
"grad_norm": 0.28025346994400024, |
|
"learning_rate": 3.631685049639586e-05, |
|
"loss": 0.7028, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1290958364129096, |
|
"grad_norm": 0.2895910441875458, |
|
"learning_rate": 3.543051265553377e-05, |
|
"loss": 0.7361, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 0.2944524884223938, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.7275, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.13106676521310667, |
|
"grad_norm": 0.334114134311676, |
|
"learning_rate": 3.367306435799584e-05, |
|
"loss": 0.8179, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.13205222961320523, |
|
"grad_norm": 0.3113299012184143, |
|
"learning_rate": 3.2802554068303596e-05, |
|
"loss": 0.7505, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.13303769401330376, |
|
"grad_norm": 0.3196057677268982, |
|
"learning_rate": 3.1937916690642356e-05, |
|
"loss": 0.7061, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13402315841340232, |
|
"grad_norm": 0.3126354217529297, |
|
"learning_rate": 3.107944749788449e-05, |
|
"loss": 0.7001, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.13500862281350087, |
|
"grad_norm": 0.31723636388778687, |
|
"learning_rate": 3.0227439656472877e-05, |
|
"loss": 0.5961, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1359940872135994, |
|
"grad_norm": 0.3157641291618347, |
|
"learning_rate": 2.9382184126304834e-05, |
|
"loss": 0.5105, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.13697955161369796, |
|
"grad_norm": 0.337483286857605, |
|
"learning_rate": 2.8543969561369556e-05, |
|
"loss": 0.6706, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.13796501601379652, |
|
"grad_norm": 0.35467472672462463, |
|
"learning_rate": 2.771308221117309e-05, |
|
"loss": 0.6632, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13895048041389504, |
|
"grad_norm": 0.3754487633705139, |
|
"learning_rate": 2.688980582298435e-05, |
|
"loss": 0.6372, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1399359448139936, |
|
"grad_norm": 0.3946709930896759, |
|
"learning_rate": 2.607442154493568e-05, |
|
"loss": 0.7413, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.14092140921409213, |
|
"grad_norm": 0.37810003757476807, |
|
"learning_rate": 2.5267207830011068e-05, |
|
"loss": 0.6634, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1419068736141907, |
|
"grad_norm": 0.3932291567325592, |
|
"learning_rate": 2.446844034095466e-05, |
|
"loss": 0.6756, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.14289233801428924, |
|
"grad_norm": 0.42648932337760925, |
|
"learning_rate": 2.3678391856132204e-05, |
|
"loss": 0.7081, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14387780241438777, |
|
"grad_norm": 0.46039843559265137, |
|
"learning_rate": 2.2897332176377528e-05, |
|
"loss": 0.8046, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.14486326681448633, |
|
"grad_norm": 0.4435557425022125, |
|
"learning_rate": 2.2125528032855724e-05, |
|
"loss": 0.6347, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1458487312145849, |
|
"grad_norm": 0.509733259677887, |
|
"learning_rate": 2.136324299597474e-05, |
|
"loss": 0.8318, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.14683419561468342, |
|
"grad_norm": 0.5783795714378357, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.662, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.14781966001478197, |
|
"grad_norm": 0.8224442005157471, |
|
"learning_rate": 1.9868268181037185e-05, |
|
"loss": 0.6612, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14781966001478197, |
|
"eval_loss": 0.5897608995437622, |
|
"eval_runtime": 160.4128, |
|
"eval_samples_per_second": 10.654, |
|
"eval_steps_per_second": 5.33, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1488051244148805, |
|
"grad_norm": 0.04582216218113899, |
|
"learning_rate": 1.9136088935510362e-05, |
|
"loss": 0.0314, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14979058881497906, |
|
"grad_norm": 0.09543165564537048, |
|
"learning_rate": 1.8414449687337464e-05, |
|
"loss": 0.1812, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.15077605321507762, |
|
"grad_norm": 0.10564053803682327, |
|
"learning_rate": 1.7703596875660645e-05, |
|
"loss": 0.259, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.15176151761517614, |
|
"grad_norm": 0.1349058598279953, |
|
"learning_rate": 1.700377325606388e-05, |
|
"loss": 0.3295, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1527469820152747, |
|
"grad_norm": 0.16049978137016296, |
|
"learning_rate": 1.631521781767214e-05, |
|
"loss": 0.4605, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15373244641537326, |
|
"grad_norm": 0.19529177248477936, |
|
"learning_rate": 1.5638165701536868e-05, |
|
"loss": 0.6925, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1547179108154718, |
|
"grad_norm": 0.14760489761829376, |
|
"learning_rate": 1.4972848120335453e-05, |
|
"loss": 0.4273, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.15570337521557034, |
|
"grad_norm": 0.1731565296649933, |
|
"learning_rate": 1.4319492279412388e-05, |
|
"loss": 0.4982, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.15668883961566887, |
|
"grad_norm": 0.19718369841575623, |
|
"learning_rate": 1.3678321299188801e-05, |
|
"loss": 0.5252, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15767430401576743, |
|
"grad_norm": 0.19219151139259338, |
|
"learning_rate": 1.3049554138967051e-05, |
|
"loss": 0.6744, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.158659768415866, |
|
"grad_norm": 0.19782665371894836, |
|
"learning_rate": 1.2433405522156332e-05, |
|
"loss": 0.5857, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.15964523281596452, |
|
"grad_norm": 0.19759586453437805, |
|
"learning_rate": 1.183008586294485e-05, |
|
"loss": 0.6169, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.16063069721606307, |
|
"grad_norm": 0.2008185088634491, |
|
"learning_rate": 1.1239801194443506e-05, |
|
"loss": 0.5558, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.16161616161616163, |
|
"grad_norm": 0.20355170965194702, |
|
"learning_rate": 1.066275309832584e-05, |
|
"loss": 0.6305, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 0.2057691514492035, |
|
"learning_rate": 1.0099138635988026e-05, |
|
"loss": 0.629, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.16358709041635872, |
|
"grad_norm": 0.21220219135284424, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.5687, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.16457255481645724, |
|
"grad_norm": 0.22451376914978027, |
|
"learning_rate": 9.012975854638949e-06, |
|
"loss": 0.7258, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1655580192165558, |
|
"grad_norm": 0.24449403584003448, |
|
"learning_rate": 8.490798459222476e-06, |
|
"loss": 0.6929, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.16654348361665436, |
|
"grad_norm": 0.2417096346616745, |
|
"learning_rate": 7.982796418105371e-06, |
|
"loss": 0.7392, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1675289480167529, |
|
"grad_norm": 0.27315232157707214, |
|
"learning_rate": 7.489143213519301e-06, |
|
"loss": 0.7316, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16851441241685144, |
|
"grad_norm": 0.25194498896598816, |
|
"learning_rate": 7.010007427581378e-06, |
|
"loss": 0.5979, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.16949987681695, |
|
"grad_norm": 0.24112963676452637, |
|
"learning_rate": 6.5455526847235825e-06, |
|
"loss": 0.69, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.17048534121704853, |
|
"grad_norm": 0.2511669397354126, |
|
"learning_rate": 6.0959375958151045e-06, |
|
"loss": 0.5824, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1714708056171471, |
|
"grad_norm": 0.24215401709079742, |
|
"learning_rate": 5.6613157039969055e-06, |
|
"loss": 0.6823, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.17245627001724562, |
|
"grad_norm": 0.28520438075065613, |
|
"learning_rate": 5.241835432246889e-06, |
|
"loss": 0.7416, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.17344173441734417, |
|
"grad_norm": 0.2623746395111084, |
|
"learning_rate": 4.837640032693558e-06, |
|
"loss": 0.6514, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.17442719881744273, |
|
"grad_norm": 0.2783282995223999, |
|
"learning_rate": 4.448867537695578e-06, |
|
"loss": 0.7159, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.17541266321754126, |
|
"grad_norm": 0.27611035108566284, |
|
"learning_rate": 4.075650712703849e-06, |
|
"loss": 0.7525, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.17639812761763982, |
|
"grad_norm": 0.270900696516037, |
|
"learning_rate": 3.71811701092219e-06, |
|
"loss": 0.7557, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.17738359201773837, |
|
"grad_norm": 0.2633064389228821, |
|
"learning_rate": 3.376388529782215e-06, |
|
"loss": 0.5719, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1783690564178369, |
|
"grad_norm": 0.2590899169445038, |
|
"learning_rate": 3.0505819692471792e-06, |
|
"loss": 0.5344, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.17935452081793546, |
|
"grad_norm": 0.28156155347824097, |
|
"learning_rate": 2.7408085919590264e-06, |
|
"loss": 0.6333, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.180339985218034, |
|
"grad_norm": 0.2902732491493225, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.5511, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.18132544961813254, |
|
"grad_norm": 0.3187778890132904, |
|
"learning_rate": 2.1697790249779636e-06, |
|
"loss": 0.7326, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1823109140182311, |
|
"grad_norm": 0.2755086421966553, |
|
"learning_rate": 1.908717841359048e-06, |
|
"loss": 0.4491, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.18329637841832963, |
|
"grad_norm": 0.33460649847984314, |
|
"learning_rate": 1.6640797865406288e-06, |
|
"loss": 0.6483, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1842818428184282, |
|
"grad_norm": 0.3185296356678009, |
|
"learning_rate": 1.4359484041943038e-06, |
|
"loss": 0.6061, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.18526730721852674, |
|
"grad_norm": 0.343330055475235, |
|
"learning_rate": 1.2244016009781701e-06, |
|
"loss": 0.7053, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.18625277161862527, |
|
"grad_norm": 0.35610195994377136, |
|
"learning_rate": 1.0295116199317057e-06, |
|
"loss": 0.6565, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.18723823601872383, |
|
"grad_norm": 0.3516658842563629, |
|
"learning_rate": 8.513450158049108e-07, |
|
"loss": 0.7232, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18822370041882236, |
|
"grad_norm": 0.37256768345832825, |
|
"learning_rate": 6.899626323298713e-07, |
|
"loss": 0.5819, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18920916481892092, |
|
"grad_norm": 0.4146597385406494, |
|
"learning_rate": 5.454195814427021e-07, |
|
"loss": 0.653, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.19019462921901947, |
|
"grad_norm": 0.3835974335670471, |
|
"learning_rate": 4.177652244628627e-07, |
|
"loss": 0.6009, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.191180093619118, |
|
"grad_norm": 0.5377586483955383, |
|
"learning_rate": 3.0704315523631953e-07, |
|
"loss": 0.7371, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.19216555801921656, |
|
"grad_norm": 0.4629654884338379, |
|
"learning_rate": 2.1329118524827662e-07, |
|
"loss": 0.7908, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.19315102241931512, |
|
"grad_norm": 0.4720406234264374, |
|
"learning_rate": 1.3654133071059893e-07, |
|
"loss": 0.7832, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.19413648681941365, |
|
"grad_norm": 0.5157343745231628, |
|
"learning_rate": 7.681980162830282e-08, |
|
"loss": 0.8257, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.5014312267303467, |
|
"learning_rate": 3.4146992848854695e-08, |
|
"loss": 0.7237, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.19610741561961073, |
|
"grad_norm": 0.5394123792648315, |
|
"learning_rate": 8.537477097364522e-09, |
|
"loss": 0.6931, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1970928800197093, |
|
"grad_norm": 0.6996267437934875, |
|
"learning_rate": 0.0, |
|
"loss": 0.7056, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1970928800197093, |
|
"eval_loss": 0.584774374961853, |
|
"eval_runtime": 160.1549, |
|
"eval_samples_per_second": 10.671, |
|
"eval_steps_per_second": 5.339, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.401698120071905e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|