|
{ |
|
"best_metric": 1.1349619626998901, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.03266372693124286, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016331863465621427, |
|
"grad_norm": 1.671673059463501, |
|
"learning_rate": 1.001e-05, |
|
"loss": 1.0081, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016331863465621427, |
|
"eval_loss": 1.9000940322875977, |
|
"eval_runtime": 345.3153, |
|
"eval_samples_per_second": 7.466, |
|
"eval_steps_per_second": 1.868, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00032663726931242854, |
|
"grad_norm": 2.1853973865509033, |
|
"learning_rate": 2.002e-05, |
|
"loss": 1.0249, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004899559039686428, |
|
"grad_norm": 2.061541795730591, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 1.0868, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006532745386248571, |
|
"grad_norm": 2.0026113986968994, |
|
"learning_rate": 4.004e-05, |
|
"loss": 1.2093, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008165931732810714, |
|
"grad_norm": 1.3478747606277466, |
|
"learning_rate": 5.005e-05, |
|
"loss": 0.9586, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009799118079372856, |
|
"grad_norm": 1.2198477983474731, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 1.001, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011432304425935, |
|
"grad_norm": 1.0008301734924316, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.0274, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0013065490772497142, |
|
"grad_norm": 0.9373151063919067, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.0383, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014698677119059284, |
|
"grad_norm": 0.984042763710022, |
|
"learning_rate": 9.009e-05, |
|
"loss": 0.9748, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0016331863465621427, |
|
"grad_norm": 0.9123459458351135, |
|
"learning_rate": 0.0001001, |
|
"loss": 1.0147, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001796504981218357, |
|
"grad_norm": 0.941821277141571, |
|
"learning_rate": 9.957315789473684e-05, |
|
"loss": 0.8924, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019598236158745713, |
|
"grad_norm": 0.8732932209968567, |
|
"learning_rate": 9.904631578947367e-05, |
|
"loss": 0.9978, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0021231422505307855, |
|
"grad_norm": 0.8224309086799622, |
|
"learning_rate": 9.851947368421052e-05, |
|
"loss": 1.064, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002286460885187, |
|
"grad_norm": 0.9332011342048645, |
|
"learning_rate": 9.799263157894736e-05, |
|
"loss": 1.0589, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002449779519843214, |
|
"grad_norm": 0.9349180459976196, |
|
"learning_rate": 9.746578947368421e-05, |
|
"loss": 1.1028, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0026130981544994283, |
|
"grad_norm": 0.8387799263000488, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 1.0083, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0027764167891556426, |
|
"grad_norm": 0.8592500686645508, |
|
"learning_rate": 9.641210526315789e-05, |
|
"loss": 1.0184, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002939735423811857, |
|
"grad_norm": 0.8980984687805176, |
|
"learning_rate": 9.588526315789473e-05, |
|
"loss": 0.937, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.003103054058468071, |
|
"grad_norm": 0.8308787941932678, |
|
"learning_rate": 9.535842105263157e-05, |
|
"loss": 0.9237, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0032663726931242854, |
|
"grad_norm": 1.0007582902908325, |
|
"learning_rate": 9.483157894736841e-05, |
|
"loss": 1.0406, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0034296913277804997, |
|
"grad_norm": 1.081726312637329, |
|
"learning_rate": 9.430473684210526e-05, |
|
"loss": 1.007, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.003593009962436714, |
|
"grad_norm": 0.9985299110412598, |
|
"learning_rate": 9.37778947368421e-05, |
|
"loss": 1.0914, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0037563285970929282, |
|
"grad_norm": 1.0678647756576538, |
|
"learning_rate": 9.325105263157894e-05, |
|
"loss": 1.2774, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0039196472317491425, |
|
"grad_norm": 1.0457429885864258, |
|
"learning_rate": 9.272421052631578e-05, |
|
"loss": 0.9798, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004082965866405357, |
|
"grad_norm": 1.1700959205627441, |
|
"learning_rate": 9.219736842105263e-05, |
|
"loss": 1.2144, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004246284501061571, |
|
"grad_norm": 1.3236618041992188, |
|
"learning_rate": 9.167052631578946e-05, |
|
"loss": 1.1766, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004409603135717786, |
|
"grad_norm": 1.2073756456375122, |
|
"learning_rate": 9.114368421052632e-05, |
|
"loss": 0.8459, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004572921770374, |
|
"grad_norm": 1.3933292627334595, |
|
"learning_rate": 9.061684210526315e-05, |
|
"loss": 1.0467, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004736240405030214, |
|
"grad_norm": 1.4482253789901733, |
|
"learning_rate": 9.009e-05, |
|
"loss": 1.1401, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004899559039686428, |
|
"grad_norm": 1.365537405014038, |
|
"learning_rate": 8.956315789473683e-05, |
|
"loss": 0.9932, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005062877674342643, |
|
"grad_norm": 1.475220799446106, |
|
"learning_rate": 8.903631578947368e-05, |
|
"loss": 1.2499, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005226196308998857, |
|
"grad_norm": 1.531992793083191, |
|
"learning_rate": 8.850947368421052e-05, |
|
"loss": 1.0115, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005389514943655071, |
|
"grad_norm": 2.305574655532837, |
|
"learning_rate": 8.798263157894736e-05, |
|
"loss": 0.9368, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.005552833578311285, |
|
"grad_norm": 1.7354161739349365, |
|
"learning_rate": 8.745578947368422e-05, |
|
"loss": 1.049, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0057161522129675, |
|
"grad_norm": 2.7026562690734863, |
|
"learning_rate": 8.692894736842105e-05, |
|
"loss": 1.3359, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005879470847623714, |
|
"grad_norm": 2.1211535930633545, |
|
"learning_rate": 8.64021052631579e-05, |
|
"loss": 1.2523, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0060427894822799285, |
|
"grad_norm": 2.5288071632385254, |
|
"learning_rate": 8.587526315789473e-05, |
|
"loss": 1.43, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006206108116936142, |
|
"grad_norm": 2.433478832244873, |
|
"learning_rate": 8.534842105263157e-05, |
|
"loss": 1.214, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006369426751592357, |
|
"grad_norm": 2.750303268432617, |
|
"learning_rate": 8.482157894736842e-05, |
|
"loss": 1.0099, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.006532745386248571, |
|
"grad_norm": 2.981351375579834, |
|
"learning_rate": 8.429473684210525e-05, |
|
"loss": 1.6271, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0066960640209047856, |
|
"grad_norm": 3.7224624156951904, |
|
"learning_rate": 8.376789473684211e-05, |
|
"loss": 1.733, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006859382655560999, |
|
"grad_norm": 3.8278751373291016, |
|
"learning_rate": 8.324105263157894e-05, |
|
"loss": 1.5315, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007022701290217214, |
|
"grad_norm": 2.7715137004852295, |
|
"learning_rate": 8.271421052631579e-05, |
|
"loss": 1.103, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.007186019924873428, |
|
"grad_norm": 4.1672892570495605, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 1.6296, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007349338559529643, |
|
"grad_norm": 4.840080261230469, |
|
"learning_rate": 8.166052631578947e-05, |
|
"loss": 1.3456, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0075126571941858565, |
|
"grad_norm": 4.377506256103516, |
|
"learning_rate": 8.113368421052631e-05, |
|
"loss": 1.6744, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.007675975828842071, |
|
"grad_norm": 4.180212497711182, |
|
"learning_rate": 8.060684210526315e-05, |
|
"loss": 1.4903, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007839294463498285, |
|
"grad_norm": 4.473518371582031, |
|
"learning_rate": 8.008e-05, |
|
"loss": 2.0731, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008002613098154499, |
|
"grad_norm": 7.101559162139893, |
|
"learning_rate": 7.955315789473684e-05, |
|
"loss": 2.0784, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.008165931732810714, |
|
"grad_norm": 12.615863800048828, |
|
"learning_rate": 7.902631578947368e-05, |
|
"loss": 3.4556, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008165931732810714, |
|
"eval_loss": 1.2871240377426147, |
|
"eval_runtime": 345.3086, |
|
"eval_samples_per_second": 7.466, |
|
"eval_steps_per_second": 1.868, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008329250367466928, |
|
"grad_norm": 1.8585017919540405, |
|
"learning_rate": 7.849947368421052e-05, |
|
"loss": 0.9102, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008492569002123142, |
|
"grad_norm": 1.3500086069107056, |
|
"learning_rate": 7.797263157894736e-05, |
|
"loss": 1.079, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008655887636779356, |
|
"grad_norm": 0.9420222043991089, |
|
"learning_rate": 7.744578947368421e-05, |
|
"loss": 0.9128, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.008819206271435572, |
|
"grad_norm": 0.7498682141304016, |
|
"learning_rate": 7.691894736842104e-05, |
|
"loss": 0.9181, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008982524906091785, |
|
"grad_norm": 0.6257831454277039, |
|
"learning_rate": 7.63921052631579e-05, |
|
"loss": 0.8515, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.009145843540748, |
|
"grad_norm": 0.6922722458839417, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 1.0347, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.009309162175404213, |
|
"grad_norm": 0.6588111519813538, |
|
"learning_rate": 7.533842105263158e-05, |
|
"loss": 0.9266, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009472480810060429, |
|
"grad_norm": 0.6781211495399475, |
|
"learning_rate": 7.481157894736841e-05, |
|
"loss": 0.8475, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.009635799444716642, |
|
"grad_norm": 0.7402060627937317, |
|
"learning_rate": 7.428473684210526e-05, |
|
"loss": 0.9287, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.009799118079372856, |
|
"grad_norm": 0.7259023189544678, |
|
"learning_rate": 7.375789473684209e-05, |
|
"loss": 0.9206, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00996243671402907, |
|
"grad_norm": 0.7616604566574097, |
|
"learning_rate": 7.323105263157895e-05, |
|
"loss": 1.0049, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.010125755348685286, |
|
"grad_norm": 0.7781885862350464, |
|
"learning_rate": 7.270421052631578e-05, |
|
"loss": 0.9962, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0102890739833415, |
|
"grad_norm": 0.7381305694580078, |
|
"learning_rate": 7.217736842105263e-05, |
|
"loss": 0.9049, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010452392617997713, |
|
"grad_norm": 0.6955409646034241, |
|
"learning_rate": 7.165052631578947e-05, |
|
"loss": 0.8963, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.010615711252653927, |
|
"grad_norm": 0.8050999045372009, |
|
"learning_rate": 7.11236842105263e-05, |
|
"loss": 1.0838, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.010779029887310143, |
|
"grad_norm": 0.7544150352478027, |
|
"learning_rate": 7.059684210526315e-05, |
|
"loss": 0.9493, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.010942348521966357, |
|
"grad_norm": 0.8155741095542908, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.1459, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01110566715662257, |
|
"grad_norm": 0.7876653671264648, |
|
"learning_rate": 6.954315789473684e-05, |
|
"loss": 1.1572, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.011268985791278784, |
|
"grad_norm": 0.7840471267700195, |
|
"learning_rate": 6.901631578947368e-05, |
|
"loss": 0.9516, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.011432304425935, |
|
"grad_norm": 0.8437424302101135, |
|
"learning_rate": 6.848947368421052e-05, |
|
"loss": 1.0117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.011595623060591214, |
|
"grad_norm": 1.0028568506240845, |
|
"learning_rate": 6.796263157894737e-05, |
|
"loss": 1.0057, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011758941695247428, |
|
"grad_norm": 0.9504867792129517, |
|
"learning_rate": 6.74357894736842e-05, |
|
"loss": 0.7451, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011922260329903641, |
|
"grad_norm": 0.9289828538894653, |
|
"learning_rate": 6.690894736842105e-05, |
|
"loss": 0.9866, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.012085578964559857, |
|
"grad_norm": 0.9018694758415222, |
|
"learning_rate": 6.638210526315788e-05, |
|
"loss": 0.9381, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.01224889759921607, |
|
"grad_norm": 1.0793242454528809, |
|
"learning_rate": 6.585526315789474e-05, |
|
"loss": 0.8213, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012412216233872285, |
|
"grad_norm": 1.2196921110153198, |
|
"learning_rate": 6.532842105263157e-05, |
|
"loss": 1.2063, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.012575534868528498, |
|
"grad_norm": 1.1026328802108765, |
|
"learning_rate": 6.480157894736842e-05, |
|
"loss": 0.9335, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012738853503184714, |
|
"grad_norm": 1.1513912677764893, |
|
"learning_rate": 6.427473684210526e-05, |
|
"loss": 0.8602, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012902172137840928, |
|
"grad_norm": 1.6112369298934937, |
|
"learning_rate": 6.37478947368421e-05, |
|
"loss": 0.9575, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.013065490772497142, |
|
"grad_norm": 1.4818161725997925, |
|
"learning_rate": 6.322105263157894e-05, |
|
"loss": 1.0907, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.013228809407153356, |
|
"grad_norm": 1.3476381301879883, |
|
"learning_rate": 6.269421052631577e-05, |
|
"loss": 1.1235, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.013392128041809571, |
|
"grad_norm": 2.3844518661499023, |
|
"learning_rate": 6.216736842105263e-05, |
|
"loss": 1.5409, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.013555446676465785, |
|
"grad_norm": 3.1227309703826904, |
|
"learning_rate": 6.164052631578947e-05, |
|
"loss": 1.0545, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013718765311121999, |
|
"grad_norm": 1.8032991886138916, |
|
"learning_rate": 6.111368421052631e-05, |
|
"loss": 1.458, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013882083945778213, |
|
"grad_norm": 3.0967934131622314, |
|
"learning_rate": 6.058684210526315e-05, |
|
"loss": 1.4376, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.014045402580434428, |
|
"grad_norm": 1.9893653392791748, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 1.2981, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.014208721215090642, |
|
"grad_norm": 1.9208163022994995, |
|
"learning_rate": 5.953315789473684e-05, |
|
"loss": 0.9923, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.014372039849746856, |
|
"grad_norm": 2.3992538452148438, |
|
"learning_rate": 5.9006315789473676e-05, |
|
"loss": 1.1079, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.01453535848440307, |
|
"grad_norm": 2.272313117980957, |
|
"learning_rate": 5.847947368421053e-05, |
|
"loss": 1.1979, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014698677119059285, |
|
"grad_norm": 2.8902785778045654, |
|
"learning_rate": 5.795263157894737e-05, |
|
"loss": 1.3671, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014861995753715499, |
|
"grad_norm": 2.847869634628296, |
|
"learning_rate": 5.742578947368421e-05, |
|
"loss": 0.7691, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.015025314388371713, |
|
"grad_norm": 2.585005044937134, |
|
"learning_rate": 5.6898947368421046e-05, |
|
"loss": 1.2341, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.015188633023027927, |
|
"grad_norm": 3.3812036514282227, |
|
"learning_rate": 5.6372105263157886e-05, |
|
"loss": 1.3677, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.015351951657684142, |
|
"grad_norm": 4.347215175628662, |
|
"learning_rate": 5.584526315789473e-05, |
|
"loss": 1.5338, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.015515270292340356, |
|
"grad_norm": 2.856776237487793, |
|
"learning_rate": 5.531842105263158e-05, |
|
"loss": 1.4113, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01567858892699657, |
|
"grad_norm": 3.887981414794922, |
|
"learning_rate": 5.4791578947368424e-05, |
|
"loss": 1.4782, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.015841907561652786, |
|
"grad_norm": 3.390988826751709, |
|
"learning_rate": 5.426473684210526e-05, |
|
"loss": 2.0514, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.016005226196308998, |
|
"grad_norm": 90.9446792602539, |
|
"learning_rate": 5.37378947368421e-05, |
|
"loss": 1.5698, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.016168544830965213, |
|
"grad_norm": 4.8651556968688965, |
|
"learning_rate": 5.321105263157894e-05, |
|
"loss": 1.732, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01633186346562143, |
|
"grad_norm": 8.150487899780273, |
|
"learning_rate": 5.268421052631578e-05, |
|
"loss": 2.5082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01633186346562143, |
|
"eval_loss": 1.1979554891586304, |
|
"eval_runtime": 345.8196, |
|
"eval_samples_per_second": 7.455, |
|
"eval_steps_per_second": 1.865, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01649518210027764, |
|
"grad_norm": 0.7058354020118713, |
|
"learning_rate": 5.2157368421052626e-05, |
|
"loss": 0.7955, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.016658500734933857, |
|
"grad_norm": 0.7873735427856445, |
|
"learning_rate": 5.163052631578947e-05, |
|
"loss": 0.9298, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01682181936959007, |
|
"grad_norm": 0.6918278336524963, |
|
"learning_rate": 5.110368421052632e-05, |
|
"loss": 0.941, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.016985138004246284, |
|
"grad_norm": 0.6960136294364929, |
|
"learning_rate": 5.057684210526316e-05, |
|
"loss": 0.8583, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0171484566389025, |
|
"grad_norm": 0.6780745983123779, |
|
"learning_rate": 5.005e-05, |
|
"loss": 0.8733, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.017311775273558712, |
|
"grad_norm": 0.6429307460784912, |
|
"learning_rate": 4.9523157894736836e-05, |
|
"loss": 0.9417, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.017475093908214927, |
|
"grad_norm": 0.6558470726013184, |
|
"learning_rate": 4.899631578947368e-05, |
|
"loss": 0.8916, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.017638412542871143, |
|
"grad_norm": 0.6242132186889648, |
|
"learning_rate": 4.846947368421052e-05, |
|
"loss": 1.0048, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.017801731177527355, |
|
"grad_norm": 0.5471253991127014, |
|
"learning_rate": 4.794263157894737e-05, |
|
"loss": 0.8926, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01796504981218357, |
|
"grad_norm": 0.5996319055557251, |
|
"learning_rate": 4.7415789473684206e-05, |
|
"loss": 1.032, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.018128368446839783, |
|
"grad_norm": 0.6278873682022095, |
|
"learning_rate": 4.688894736842105e-05, |
|
"loss": 1.0255, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.018291687081496, |
|
"grad_norm": 0.622478723526001, |
|
"learning_rate": 4.636210526315789e-05, |
|
"loss": 0.9705, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.018455005716152214, |
|
"grad_norm": 0.6352484226226807, |
|
"learning_rate": 4.583526315789473e-05, |
|
"loss": 0.7911, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.018618324350808426, |
|
"grad_norm": 0.681956946849823, |
|
"learning_rate": 4.530842105263158e-05, |
|
"loss": 1.0143, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01878164298546464, |
|
"grad_norm": 0.7090697288513184, |
|
"learning_rate": 4.4781578947368416e-05, |
|
"loss": 0.9628, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.018944961620120857, |
|
"grad_norm": 0.7363274097442627, |
|
"learning_rate": 4.425473684210526e-05, |
|
"loss": 1.0271, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.01910828025477707, |
|
"grad_norm": 0.7926649451255798, |
|
"learning_rate": 4.372789473684211e-05, |
|
"loss": 1.124, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.019271598889433285, |
|
"grad_norm": 0.7625167369842529, |
|
"learning_rate": 4.320105263157895e-05, |
|
"loss": 1.1374, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.019434917524089497, |
|
"grad_norm": 0.8049649000167847, |
|
"learning_rate": 4.2674210526315786e-05, |
|
"loss": 0.9706, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.019598236158745713, |
|
"grad_norm": 0.7378389239311218, |
|
"learning_rate": 4.2147368421052626e-05, |
|
"loss": 0.8136, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.019761554793401928, |
|
"grad_norm": 0.8343645930290222, |
|
"learning_rate": 4.162052631578947e-05, |
|
"loss": 0.9652, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.01992487342805814, |
|
"grad_norm": 1.196692705154419, |
|
"learning_rate": 4.109368421052631e-05, |
|
"loss": 0.9455, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.020088192062714356, |
|
"grad_norm": 0.9792311787605286, |
|
"learning_rate": 4.056684210526316e-05, |
|
"loss": 1.0111, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02025151069737057, |
|
"grad_norm": 0.9197056889533997, |
|
"learning_rate": 4.004e-05, |
|
"loss": 0.975, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.020414829332026783, |
|
"grad_norm": 1.0094839334487915, |
|
"learning_rate": 3.951315789473684e-05, |
|
"loss": 1.1968, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.020578147966683, |
|
"grad_norm": 1.0122300386428833, |
|
"learning_rate": 3.898631578947368e-05, |
|
"loss": 0.9602, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02074146660133921, |
|
"grad_norm": 1.0537972450256348, |
|
"learning_rate": 3.845947368421052e-05, |
|
"loss": 0.9842, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.020904785235995427, |
|
"grad_norm": 1.1098566055297852, |
|
"learning_rate": 3.7932631578947367e-05, |
|
"loss": 0.6929, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.021068103870651642, |
|
"grad_norm": 1.246183156967163, |
|
"learning_rate": 3.7405789473684206e-05, |
|
"loss": 1.0037, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.021231422505307854, |
|
"grad_norm": 1.2114588022232056, |
|
"learning_rate": 3.6878947368421045e-05, |
|
"loss": 0.9154, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02139474113996407, |
|
"grad_norm": 1.2241871356964111, |
|
"learning_rate": 3.635210526315789e-05, |
|
"loss": 1.0221, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.021558059774620286, |
|
"grad_norm": 1.3580658435821533, |
|
"learning_rate": 3.582526315789474e-05, |
|
"loss": 1.0275, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.021721378409276498, |
|
"grad_norm": 1.6600242853164673, |
|
"learning_rate": 3.5298421052631576e-05, |
|
"loss": 1.1471, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.021884697043932713, |
|
"grad_norm": 1.9517477750778198, |
|
"learning_rate": 3.477157894736842e-05, |
|
"loss": 1.21, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.022048015678588925, |
|
"grad_norm": 2.107109308242798, |
|
"learning_rate": 3.424473684210526e-05, |
|
"loss": 1.4034, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02221133431324514, |
|
"grad_norm": 3.42795991897583, |
|
"learning_rate": 3.37178947368421e-05, |
|
"loss": 1.2696, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.022374652947901356, |
|
"grad_norm": 2.0616705417633057, |
|
"learning_rate": 3.319105263157894e-05, |
|
"loss": 1.1452, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.02253797158255757, |
|
"grad_norm": 2.5637259483337402, |
|
"learning_rate": 3.2664210526315786e-05, |
|
"loss": 1.1689, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.022701290217213784, |
|
"grad_norm": 3.1429243087768555, |
|
"learning_rate": 3.213736842105263e-05, |
|
"loss": 1.4649, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02286460885187, |
|
"grad_norm": 3.0663323402404785, |
|
"learning_rate": 3.161052631578947e-05, |
|
"loss": 1.4323, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.023027927486526212, |
|
"grad_norm": 3.1458401679992676, |
|
"learning_rate": 3.108368421052632e-05, |
|
"loss": 0.9673, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.023191246121182427, |
|
"grad_norm": 3.3782215118408203, |
|
"learning_rate": 3.0556842105263156e-05, |
|
"loss": 1.3108, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.02335456475583864, |
|
"grad_norm": 3.1077187061309814, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 1.6808, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.023517883390494855, |
|
"grad_norm": 3.3202567100524902, |
|
"learning_rate": 2.9503157894736838e-05, |
|
"loss": 1.6597, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02368120202515107, |
|
"grad_norm": 2.8775930404663086, |
|
"learning_rate": 2.8976315789473684e-05, |
|
"loss": 0.9313, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.023844520659807283, |
|
"grad_norm": 3.3522984981536865, |
|
"learning_rate": 2.8449473684210523e-05, |
|
"loss": 1.8898, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0240078392944635, |
|
"grad_norm": 4.523774147033691, |
|
"learning_rate": 2.7922631578947366e-05, |
|
"loss": 1.1704, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.024171157929119714, |
|
"grad_norm": 5.291178226470947, |
|
"learning_rate": 2.7395789473684212e-05, |
|
"loss": 2.2631, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.024334476563775926, |
|
"grad_norm": 5.7989959716796875, |
|
"learning_rate": 2.686894736842105e-05, |
|
"loss": 2.0457, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.02449779519843214, |
|
"grad_norm": 10.409539222717285, |
|
"learning_rate": 2.634210526315789e-05, |
|
"loss": 1.8767, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02449779519843214, |
|
"eval_loss": 1.1476330757141113, |
|
"eval_runtime": 345.8578, |
|
"eval_samples_per_second": 7.454, |
|
"eval_steps_per_second": 1.865, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.024661113833088357, |
|
"grad_norm": 0.5229954719543457, |
|
"learning_rate": 2.5815263157894736e-05, |
|
"loss": 0.9078, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.02482443246774457, |
|
"grad_norm": 0.4165619909763336, |
|
"learning_rate": 2.528842105263158e-05, |
|
"loss": 0.7202, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.024987751102400785, |
|
"grad_norm": 0.4411524832248688, |
|
"learning_rate": 2.4761578947368418e-05, |
|
"loss": 0.7657, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.025151069737056997, |
|
"grad_norm": 0.5870716571807861, |
|
"learning_rate": 2.423473684210526e-05, |
|
"loss": 0.9064, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.025314388371713212, |
|
"grad_norm": 0.5716325044631958, |
|
"learning_rate": 2.3707894736842103e-05, |
|
"loss": 0.7211, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.025477707006369428, |
|
"grad_norm": 0.5912550091743469, |
|
"learning_rate": 2.3181052631578946e-05, |
|
"loss": 0.9137, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 0.49093204736709595, |
|
"learning_rate": 2.265421052631579e-05, |
|
"loss": 0.9175, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.025804344275681856, |
|
"grad_norm": 0.5379444360733032, |
|
"learning_rate": 2.212736842105263e-05, |
|
"loss": 1.0109, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.02596766291033807, |
|
"grad_norm": 0.5582550764083862, |
|
"learning_rate": 2.1600526315789474e-05, |
|
"loss": 0.9187, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.026130981544994283, |
|
"grad_norm": 0.5776020288467407, |
|
"learning_rate": 2.1073684210526313e-05, |
|
"loss": 0.8756, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0262943001796505, |
|
"grad_norm": 0.6320648789405823, |
|
"learning_rate": 2.0546842105263155e-05, |
|
"loss": 1.002, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.02645761881430671, |
|
"grad_norm": 0.5884015560150146, |
|
"learning_rate": 2.002e-05, |
|
"loss": 0.82, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.026620937448962927, |
|
"grad_norm": 0.6072445511817932, |
|
"learning_rate": 1.949315789473684e-05, |
|
"loss": 0.9141, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.026784256083619142, |
|
"grad_norm": 0.578770637512207, |
|
"learning_rate": 1.8966315789473683e-05, |
|
"loss": 0.8936, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.026947574718275354, |
|
"grad_norm": 0.7762899994850159, |
|
"learning_rate": 1.8439473684210522e-05, |
|
"loss": 0.67, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02711089335293157, |
|
"grad_norm": 0.7052147388458252, |
|
"learning_rate": 1.791263157894737e-05, |
|
"loss": 0.7984, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.027274211987587785, |
|
"grad_norm": 0.6481052041053772, |
|
"learning_rate": 1.738578947368421e-05, |
|
"loss": 0.9779, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.027437530622243998, |
|
"grad_norm": 0.6853712201118469, |
|
"learning_rate": 1.685894736842105e-05, |
|
"loss": 0.8923, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.027600849256900213, |
|
"grad_norm": 0.7015615105628967, |
|
"learning_rate": 1.6332105263157893e-05, |
|
"loss": 0.8661, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.027764167891556425, |
|
"grad_norm": 0.8164605498313904, |
|
"learning_rate": 1.5805263157894735e-05, |
|
"loss": 0.8789, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02792748652621264, |
|
"grad_norm": 0.8073320984840393, |
|
"learning_rate": 1.5278421052631578e-05, |
|
"loss": 1.0431, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.028090805160868856, |
|
"grad_norm": 0.9141411185264587, |
|
"learning_rate": 1.4751578947368419e-05, |
|
"loss": 1.069, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02825412379552507, |
|
"grad_norm": 1.0805171728134155, |
|
"learning_rate": 1.4224736842105262e-05, |
|
"loss": 1.0579, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.028417442430181284, |
|
"grad_norm": 0.8766102194786072, |
|
"learning_rate": 1.3697894736842106e-05, |
|
"loss": 0.7755, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0285807610648375, |
|
"grad_norm": 0.9655460715293884, |
|
"learning_rate": 1.3171052631578945e-05, |
|
"loss": 0.967, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02874407969949371, |
|
"grad_norm": 0.9592732787132263, |
|
"learning_rate": 1.264421052631579e-05, |
|
"loss": 0.9174, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.028907398334149927, |
|
"grad_norm": 1.3226499557495117, |
|
"learning_rate": 1.211736842105263e-05, |
|
"loss": 1.2512, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.02907071696880614, |
|
"grad_norm": 1.1680970191955566, |
|
"learning_rate": 1.1590526315789473e-05, |
|
"loss": 0.948, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.029234035603462355, |
|
"grad_norm": 1.0348116159439087, |
|
"learning_rate": 1.1063684210526316e-05, |
|
"loss": 0.8562, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.02939735423811857, |
|
"grad_norm": 1.6529384851455688, |
|
"learning_rate": 1.0536842105263156e-05, |
|
"loss": 1.1671, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.029560672872774783, |
|
"grad_norm": 1.5153594017028809, |
|
"learning_rate": 1.001e-05, |
|
"loss": 1.0921, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.029723991507430998, |
|
"grad_norm": 1.3825584650039673, |
|
"learning_rate": 9.483157894736842e-06, |
|
"loss": 1.1131, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.029887310142087214, |
|
"grad_norm": 2.2654531002044678, |
|
"learning_rate": 8.956315789473684e-06, |
|
"loss": 1.129, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.030050628776743426, |
|
"grad_norm": 1.8482561111450195, |
|
"learning_rate": 8.429473684210525e-06, |
|
"loss": 1.1118, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.03021394741139964, |
|
"grad_norm": 2.0934503078460693, |
|
"learning_rate": 7.902631578947368e-06, |
|
"loss": 1.0566, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.030377266046055854, |
|
"grad_norm": 2.0610568523406982, |
|
"learning_rate": 7.3757894736842095e-06, |
|
"loss": 1.5909, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.03054058468071207, |
|
"grad_norm": 2.510016918182373, |
|
"learning_rate": 6.848947368421053e-06, |
|
"loss": 1.1805, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.030703903315368285, |
|
"grad_norm": 2.6841964721679688, |
|
"learning_rate": 6.322105263157895e-06, |
|
"loss": 1.2795, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.030867221950024497, |
|
"grad_norm": 2.3344948291778564, |
|
"learning_rate": 5.7952631578947365e-06, |
|
"loss": 1.2345, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.031030540584680712, |
|
"grad_norm": 2.7954304218292236, |
|
"learning_rate": 5.268421052631578e-06, |
|
"loss": 1.2223, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.031193859219336928, |
|
"grad_norm": 2.858825922012329, |
|
"learning_rate": 4.741578947368421e-06, |
|
"loss": 1.7338, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03135717785399314, |
|
"grad_norm": 3.322049856185913, |
|
"learning_rate": 4.2147368421052626e-06, |
|
"loss": 1.4427, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03152049648864935, |
|
"grad_norm": 2.922865152359009, |
|
"learning_rate": 3.6878947368421047e-06, |
|
"loss": 1.5083, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.03168381512330557, |
|
"grad_norm": 4.792754173278809, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 1.614, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03184713375796178, |
|
"grad_norm": 2.5056612491607666, |
|
"learning_rate": 2.634210526315789e-06, |
|
"loss": 1.0081, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.032010452392617995, |
|
"grad_norm": 3.5326714515686035, |
|
"learning_rate": 2.1073684210526313e-06, |
|
"loss": 0.9609, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.032173771027274214, |
|
"grad_norm": 5.468371868133545, |
|
"learning_rate": 1.5805263157894737e-06, |
|
"loss": 2.8999, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03233708966193043, |
|
"grad_norm": 5.357850074768066, |
|
"learning_rate": 1.0536842105263156e-06, |
|
"loss": 2.4673, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03250040829658664, |
|
"grad_norm": 5.674871444702148, |
|
"learning_rate": 5.268421052631578e-07, |
|
"loss": 2.0768, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03266372693124286, |
|
"grad_norm": 5.8404860496521, |
|
"learning_rate": 0.0, |
|
"loss": 1.6252, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03266372693124286, |
|
"eval_loss": 1.1349619626998901, |
|
"eval_runtime": 346.0024, |
|
"eval_samples_per_second": 7.451, |
|
"eval_steps_per_second": 1.864, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.65769979075625e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|