|
{ |
|
"best_metric": 0.8627253174781799, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-288", |
|
"epoch": 3.096774193548387, |
|
"eval_steps": 96, |
|
"global_step": 576, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005376344086021506, |
|
"eval_loss": 2.6265158653259277, |
|
"eval_runtime": 6.7666, |
|
"eval_samples_per_second": 23.202, |
|
"eval_steps_per_second": 0.739, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 1.9268544912338257, |
|
"learning_rate": 0.000384, |
|
"loss": 2.8198, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"eval_loss": 1.0718849897384644, |
|
"eval_runtime": 6.8186, |
|
"eval_samples_per_second": 23.025, |
|
"eval_steps_per_second": 0.733, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"grad_norm": 1.5161734819412231, |
|
"learning_rate": 0.00039997559251058097, |
|
"loss": 1.8322, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"eval_loss": 0.9115006923675537, |
|
"eval_runtime": 6.8277, |
|
"eval_samples_per_second": 22.995, |
|
"eval_steps_per_second": 0.732, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.5483870967741935, |
|
"grad_norm": 1.7862235307693481, |
|
"learning_rate": 0.00039989808570615163, |
|
"loss": 1.4055, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.5483870967741935, |
|
"eval_loss": 0.8627253174781799, |
|
"eval_runtime": 6.8612, |
|
"eval_samples_per_second": 22.882, |
|
"eval_steps_per_second": 0.729, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"grad_norm": 1.742639422416687, |
|
"learning_rate": 0.0003997674540450395, |
|
"loss": 1.2677, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"eval_loss": 0.8888482451438904, |
|
"eval_runtime": 6.8571, |
|
"eval_samples_per_second": 22.896, |
|
"eval_steps_per_second": 0.729, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": 2.217721939086914, |
|
"learning_rate": 0.00039958373224387655, |
|
"loss": 0.9139, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"eval_loss": 0.8767105340957642, |
|
"eval_runtime": 6.9026, |
|
"eval_samples_per_second": 22.745, |
|
"eval_steps_per_second": 0.724, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.096774193548387, |
|
"grad_norm": 2.5925180912017822, |
|
"learning_rate": 0.0003993469691285149, |
|
"loss": 0.9038, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 3.096774193548387, |
|
"eval_loss": 0.99139404296875, |
|
"eval_runtime": 7.0307, |
|
"eval_samples_per_second": 22.331, |
|
"eval_steps_per_second": 0.711, |
|
"step": 576 |
|
} |
|
], |
|
"logging_steps": 96, |
|
"max_steps": 18600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 96, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8750975648989184e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|