|
{ |
|
"best_metric": 0.10595488548278809, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.2720348204570185, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000544069640914037, |
|
"eval_loss": 0.3388690948486328, |
|
"eval_runtime": 65.8398, |
|
"eval_samples_per_second": 5.878, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00544069640914037, |
|
"grad_norm": 0.17145936191082, |
|
"learning_rate": 4.22e-05, |
|
"loss": 0.2495, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01088139281828074, |
|
"grad_norm": 0.27646932005882263, |
|
"learning_rate": 8.44e-05, |
|
"loss": 0.2263, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01632208922742111, |
|
"grad_norm": 0.2995341122150421, |
|
"learning_rate": 0.0001266, |
|
"loss": 0.1708, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02176278563656148, |
|
"grad_norm": 0.2981434166431427, |
|
"learning_rate": 0.0001688, |
|
"loss": 0.1959, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02720348204570185, |
|
"grad_norm": 0.16809503734111786, |
|
"learning_rate": 0.000211, |
|
"loss": 0.1967, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02720348204570185, |
|
"eval_loss": 0.14689071476459503, |
|
"eval_runtime": 66.1969, |
|
"eval_samples_per_second": 5.846, |
|
"eval_steps_per_second": 1.465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03264417845484222, |
|
"grad_norm": 0.29836317896842957, |
|
"learning_rate": 0.00021074300730241147, |
|
"loss": 0.1934, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03808487486398259, |
|
"grad_norm": 0.5127425789833069, |
|
"learning_rate": 0.00020997328125223568, |
|
"loss": 0.2337, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04352557127312296, |
|
"grad_norm": 0.7148259878158569, |
|
"learning_rate": 0.0002086945718774165, |
|
"loss": 0.3728, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04896626768226333, |
|
"grad_norm": 0.7618590593338013, |
|
"learning_rate": 0.00020691310892149265, |
|
"loss": 0.4246, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0544069640914037, |
|
"grad_norm": 1.4085367918014526, |
|
"learning_rate": 0.00020463757149291335, |
|
"loss": 0.6418, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0544069640914037, |
|
"eval_loss": 0.18226467072963715, |
|
"eval_runtime": 66.1407, |
|
"eval_samples_per_second": 5.851, |
|
"eval_steps_per_second": 1.467, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05984766050054407, |
|
"grad_norm": 0.09314069151878357, |
|
"learning_rate": 0.0002018790457812944, |
|
"loss": 0.1106, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06528835690968444, |
|
"grad_norm": 0.19934406876564026, |
|
"learning_rate": 0.0001986509710466168, |
|
"loss": 0.1377, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07072905331882481, |
|
"grad_norm": 0.16686424612998962, |
|
"learning_rate": 0.00019496907414450293, |
|
"loss": 0.1762, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07616974972796518, |
|
"grad_norm": 0.1390169858932495, |
|
"learning_rate": 0.00019085129290655697, |
|
"loss": 0.1576, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08161044613710555, |
|
"grad_norm": 0.36551663279533386, |
|
"learning_rate": 0.00018631768874905217, |
|
"loss": 0.2147, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08161044613710555, |
|
"eval_loss": 0.1364036649465561, |
|
"eval_runtime": 66.6017, |
|
"eval_samples_per_second": 5.811, |
|
"eval_steps_per_second": 1.456, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08705114254624592, |
|
"grad_norm": 0.26904261112213135, |
|
"learning_rate": 0.0001813903489357277, |
|
"loss": 0.234, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09249183895538629, |
|
"grad_norm": 0.448312371969223, |
|
"learning_rate": 0.00017609327897085954, |
|
"loss": 0.2675, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09793253536452666, |
|
"grad_norm": 0.6152125000953674, |
|
"learning_rate": 0.00017045228564685694, |
|
"loss": 0.3036, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10337323177366703, |
|
"grad_norm": 0.39368465542793274, |
|
"learning_rate": 0.0001644948513161638, |
|
"loss": 0.3332, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1088139281828074, |
|
"grad_norm": 1.3323695659637451, |
|
"learning_rate": 0.00015825, |
|
"loss": 0.4591, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1088139281828074, |
|
"eval_loss": 0.14527782797813416, |
|
"eval_runtime": 66.1265, |
|
"eval_samples_per_second": 5.852, |
|
"eval_steps_per_second": 1.467, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11425462459194777, |
|
"grad_norm": 0.17673972249031067, |
|
"learning_rate": 0.00015174815598624768, |
|
"loss": 0.1101, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11969532100108814, |
|
"grad_norm": 0.18985572457313538, |
|
"learning_rate": 0.00014502099560537873, |
|
"loss": 0.1234, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1251360174102285, |
|
"grad_norm": 0.1908355951309204, |
|
"learning_rate": 0.00013810129290655696, |
|
"loss": 0.13, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1305767138193689, |
|
"grad_norm": 0.2177634984254837, |
|
"learning_rate": 0.00013102275998576495, |
|
"loss": 0.1802, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13601741022850924, |
|
"grad_norm": 0.18758904933929443, |
|
"learning_rate": 0.00012381988274386116, |
|
"loss": 0.1612, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13601741022850924, |
|
"eval_loss": 0.13536955416202545, |
|
"eval_runtime": 66.6366, |
|
"eval_samples_per_second": 5.808, |
|
"eval_steps_per_second": 1.456, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14145810663764963, |
|
"grad_norm": 0.5093297958374023, |
|
"learning_rate": 0.00011652775287473745, |
|
"loss": 0.1836, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14689880304678998, |
|
"grad_norm": 0.5727118849754333, |
|
"learning_rate": 0.00010918189690211387, |
|
"loss": 0.2044, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15233949945593037, |
|
"grad_norm": 0.4531668424606323, |
|
"learning_rate": 0.00010181810309788618, |
|
"loss": 0.2924, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15778019586507072, |
|
"grad_norm": 0.9934888482093811, |
|
"learning_rate": 9.447224712526258e-05, |
|
"loss": 0.3537, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1632208922742111, |
|
"grad_norm": 0.6647962927818298, |
|
"learning_rate": 8.718011725613886e-05, |
|
"loss": 0.4845, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1632208922742111, |
|
"eval_loss": 0.12240716814994812, |
|
"eval_runtime": 66.1711, |
|
"eval_samples_per_second": 5.848, |
|
"eval_steps_per_second": 1.466, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16866158868335146, |
|
"grad_norm": 0.062041934579610825, |
|
"learning_rate": 7.997724001423507e-05, |
|
"loss": 0.0804, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17410228509249184, |
|
"grad_norm": 0.2328738570213318, |
|
"learning_rate": 7.289870709344306e-05, |
|
"loss": 0.079, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1795429815016322, |
|
"grad_norm": 0.1590886265039444, |
|
"learning_rate": 6.597900439462128e-05, |
|
"loss": 0.1257, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18498367791077258, |
|
"grad_norm": 0.205345019698143, |
|
"learning_rate": 5.9251844013752326e-05, |
|
"loss": 0.1441, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19042437431991294, |
|
"grad_norm": 0.1600612848997116, |
|
"learning_rate": 5.275000000000002e-05, |
|
"loss": 0.1837, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19042437431991294, |
|
"eval_loss": 0.11754260957241058, |
|
"eval_runtime": 66.1447, |
|
"eval_samples_per_second": 5.851, |
|
"eval_steps_per_second": 1.466, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19586507072905332, |
|
"grad_norm": 0.26457643508911133, |
|
"learning_rate": 4.650514868383623e-05, |
|
"loss": 0.1924, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.20130576713819368, |
|
"grad_norm": 0.36637988686561584, |
|
"learning_rate": 4.054771435314305e-05, |
|
"loss": 0.1966, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.20674646354733406, |
|
"grad_norm": 0.4704027473926544, |
|
"learning_rate": 3.4906721029140495e-05, |
|
"loss": 0.2195, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.21218715995647444, |
|
"grad_norm": 0.7301682829856873, |
|
"learning_rate": 2.9609651064272323e-05, |
|
"loss": 0.3013, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2176278563656148, |
|
"grad_norm": 1.4596174955368042, |
|
"learning_rate": 2.468231125094783e-05, |
|
"loss": 0.534, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2176278563656148, |
|
"eval_loss": 0.10635092109441757, |
|
"eval_runtime": 66.0037, |
|
"eval_samples_per_second": 5.863, |
|
"eval_steps_per_second": 1.47, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22306855277475518, |
|
"grad_norm": 0.08893619477748871, |
|
"learning_rate": 2.0148707093443057e-05, |
|
"loss": 0.0705, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22850924918389554, |
|
"grad_norm": 0.11107783019542694, |
|
"learning_rate": 1.603092585549706e-05, |
|
"loss": 0.0688, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23394994559303592, |
|
"grad_norm": 0.2446814477443695, |
|
"learning_rate": 1.2349028953383204e-05, |
|
"loss": 0.1321, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23939064200217627, |
|
"grad_norm": 0.1496814340353012, |
|
"learning_rate": 9.120954218705596e-06, |
|
"loss": 0.157, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24483133841131666, |
|
"grad_norm": 0.18574567139148712, |
|
"learning_rate": 6.362428507086673e-06, |
|
"loss": 0.1899, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24483133841131666, |
|
"eval_loss": 0.10692635923624039, |
|
"eval_runtime": 66.4273, |
|
"eval_samples_per_second": 5.826, |
|
"eval_steps_per_second": 1.46, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.250272034820457, |
|
"grad_norm": 0.14279238879680634, |
|
"learning_rate": 4.0868910785073565e-06, |
|
"loss": 0.1664, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25571273122959737, |
|
"grad_norm": 0.6236724853515625, |
|
"learning_rate": 2.3054281225835e-06, |
|
"loss": 0.2752, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2611534276387378, |
|
"grad_norm": 0.802135169506073, |
|
"learning_rate": 1.026718747764327e-06, |
|
"loss": 0.2971, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26659412404787813, |
|
"grad_norm": 0.7756718993186951, |
|
"learning_rate": 2.5699269758854715e-07, |
|
"loss": 0.3215, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2720348204570185, |
|
"grad_norm": 0.9758143424987793, |
|
"learning_rate": 0.0, |
|
"loss": 0.4517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2720348204570185, |
|
"eval_loss": 0.10595488548278809, |
|
"eval_runtime": 66.4878, |
|
"eval_samples_per_second": 5.821, |
|
"eval_steps_per_second": 1.459, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3049289243623424e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|