|
{ |
|
"best_metric": 1.727027177810669, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.008390845587464077, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.1954227937320384e-05, |
|
"eval_loss": 1.8378162384033203, |
|
"eval_runtime": 453.393, |
|
"eval_samples_per_second": 11.068, |
|
"eval_steps_per_second": 2.768, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00041954227937320384, |
|
"grad_norm": 2.091989755630493, |
|
"learning_rate": 4.22e-05, |
|
"loss": 1.6646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008390845587464077, |
|
"grad_norm": 2.7637720108032227, |
|
"learning_rate": 8.44e-05, |
|
"loss": 1.6956, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0012586268381196116, |
|
"grad_norm": 1.8225716352462769, |
|
"learning_rate": 0.0001266, |
|
"loss": 1.4925, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0016781691174928154, |
|
"grad_norm": 2.2302634716033936, |
|
"learning_rate": 0.0001688, |
|
"loss": 1.7316, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.002097711396866019, |
|
"grad_norm": 3.6677043437957764, |
|
"learning_rate": 0.000211, |
|
"loss": 1.6306, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.002097711396866019, |
|
"eval_loss": 1.727027177810669, |
|
"eval_runtime": 452.3372, |
|
"eval_samples_per_second": 11.093, |
|
"eval_steps_per_second": 2.774, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.002517253676239223, |
|
"grad_norm": 4.509883403778076, |
|
"learning_rate": 0.00021074300730241147, |
|
"loss": 1.6763, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0029367959556124267, |
|
"grad_norm": 2.8632655143737793, |
|
"learning_rate": 0.00020997328125223568, |
|
"loss": 1.8204, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0033563382349856307, |
|
"grad_norm": 2.905240297317505, |
|
"learning_rate": 0.0002086945718774165, |
|
"loss": 1.822, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0037758805143588347, |
|
"grad_norm": 5.918646812438965, |
|
"learning_rate": 0.00020691310892149265, |
|
"loss": 1.7552, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004195422793732038, |
|
"grad_norm": 7.395406723022461, |
|
"learning_rate": 0.00020463757149291335, |
|
"loss": 1.5701, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004195422793732038, |
|
"eval_loss": 1.9698339700698853, |
|
"eval_runtime": 457.466, |
|
"eval_samples_per_second": 10.969, |
|
"eval_steps_per_second": 2.743, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004614965073105242, |
|
"grad_norm": 2.087064504623413, |
|
"learning_rate": 0.0002018790457812944, |
|
"loss": 1.7137, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.005034507352478446, |
|
"grad_norm": 1.7459348440170288, |
|
"learning_rate": 0.0001986509710466168, |
|
"loss": 1.7363, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.00545404963185165, |
|
"grad_norm": 2.1844427585601807, |
|
"learning_rate": 0.00019496907414450293, |
|
"loss": 1.7358, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0058735919112248534, |
|
"grad_norm": 2.2916972637176514, |
|
"learning_rate": 0.00019085129290655697, |
|
"loss": 1.7705, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.006293134190598058, |
|
"grad_norm": 2.5905094146728516, |
|
"learning_rate": 0.00018631768874905217, |
|
"loss": 1.7402, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.006293134190598058, |
|
"eval_loss": 1.7718510627746582, |
|
"eval_runtime": 452.7846, |
|
"eval_samples_per_second": 11.083, |
|
"eval_steps_per_second": 2.772, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0067126764699712614, |
|
"grad_norm": 3.6152729988098145, |
|
"learning_rate": 0.0001813903489357277, |
|
"loss": 1.7279, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.007132218749344465, |
|
"grad_norm": 2.7611238956451416, |
|
"learning_rate": 0.00017609327897085954, |
|
"loss": 1.7047, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0075517610287176695, |
|
"grad_norm": 3.315603256225586, |
|
"learning_rate": 0.00017045228564685694, |
|
"loss": 1.8601, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.007971303308090873, |
|
"grad_norm": 4.356021881103516, |
|
"learning_rate": 0.0001644948513161638, |
|
"loss": 1.8429, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.008390845587464077, |
|
"grad_norm": 6.690892219543457, |
|
"learning_rate": 0.00015825, |
|
"loss": 1.5689, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.008390845587464077, |
|
"eval_loss": 1.9429612159729004, |
|
"eval_runtime": 452.8314, |
|
"eval_samples_per_second": 11.081, |
|
"eval_steps_per_second": 2.771, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.203800670568448e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|