|
{ |
|
"best_metric": 1.6414285898208618, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-30", |
|
"epoch": 0.05364327223960662, |
|
"eval_steps": 5, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001788109074653554, |
|
"eval_loss": 2.64847469329834, |
|
"eval_runtime": 22.0136, |
|
"eval_samples_per_second": 10.721, |
|
"eval_steps_per_second": 5.36, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005364327223960662, |
|
"grad_norm": 3.505396842956543, |
|
"learning_rate": 6e-05, |
|
"loss": 10.5576, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008940545373267769, |
|
"eval_loss": 2.574577808380127, |
|
"eval_runtime": 22.2405, |
|
"eval_samples_per_second": 10.611, |
|
"eval_steps_per_second": 5.306, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010728654447921324, |
|
"grad_norm": 4.872518062591553, |
|
"learning_rate": 0.00012, |
|
"loss": 10.941, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.016092981671881983, |
|
"grad_norm": 4.743135452270508, |
|
"learning_rate": 0.00018, |
|
"loss": 8.6214, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.017881090746535537, |
|
"eval_loss": 1.9574334621429443, |
|
"eval_runtime": 22.3329, |
|
"eval_samples_per_second": 10.567, |
|
"eval_steps_per_second": 5.284, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021457308895842648, |
|
"grad_norm": 7.875356674194336, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 7.8164, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02682163611980331, |
|
"grad_norm": 6.834690570831299, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 7.7133, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02682163611980331, |
|
"eval_loss": 1.80838942527771, |
|
"eval_runtime": 22.3548, |
|
"eval_samples_per_second": 10.557, |
|
"eval_steps_per_second": 5.279, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03218596334376397, |
|
"grad_norm": 7.706214904785156, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 7.4598, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.035762181493071074, |
|
"eval_loss": 1.6834994554519653, |
|
"eval_runtime": 22.3956, |
|
"eval_samples_per_second": 10.538, |
|
"eval_steps_per_second": 5.269, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03755029056772463, |
|
"grad_norm": 4.207357883453369, |
|
"learning_rate": 8.435655349597689e-05, |
|
"loss": 6.9531, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.042914617791685296, |
|
"grad_norm": 6.524333953857422, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 6.6744, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.044702726866338846, |
|
"eval_loss": 1.6509045362472534, |
|
"eval_runtime": 22.4185, |
|
"eval_samples_per_second": 10.527, |
|
"eval_steps_per_second": 5.264, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.048278945015645953, |
|
"grad_norm": 5.440735816955566, |
|
"learning_rate": 1.0899347581163221e-05, |
|
"loss": 6.4247, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05364327223960662, |
|
"grad_norm": 6.279931545257568, |
|
"learning_rate": 0.0, |
|
"loss": 6.1608, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05364327223960662, |
|
"eval_loss": 1.6414285898208618, |
|
"eval_runtime": 22.4499, |
|
"eval_samples_per_second": 10.512, |
|
"eval_steps_per_second": 5.256, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 30, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8811139795968000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|