{ "best_metric": 0.8627253174781799, "best_model_checkpoint": "miner_id_24/checkpoint-288", "epoch": 3.096774193548387, "eval_steps": 96, "global_step": 576, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005376344086021506, "eval_loss": 2.6265158653259277, "eval_runtime": 6.7666, "eval_samples_per_second": 23.202, "eval_steps_per_second": 0.739, "step": 1 }, { "epoch": 0.5161290322580645, "grad_norm": 1.9268544912338257, "learning_rate": 0.000384, "loss": 2.8198, "step": 96 }, { "epoch": 0.5161290322580645, "eval_loss": 1.0718849897384644, "eval_runtime": 6.8186, "eval_samples_per_second": 23.025, "eval_steps_per_second": 0.733, "step": 96 }, { "epoch": 1.032258064516129, "grad_norm": 1.5161734819412231, "learning_rate": 0.00039997559251058097, "loss": 1.8322, "step": 192 }, { "epoch": 1.032258064516129, "eval_loss": 0.9115006923675537, "eval_runtime": 6.8277, "eval_samples_per_second": 22.995, "eval_steps_per_second": 0.732, "step": 192 }, { "epoch": 1.5483870967741935, "grad_norm": 1.7862235307693481, "learning_rate": 0.00039989808570615163, "loss": 1.4055, "step": 288 }, { "epoch": 1.5483870967741935, "eval_loss": 0.8627253174781799, "eval_runtime": 6.8612, "eval_samples_per_second": 22.882, "eval_steps_per_second": 0.729, "step": 288 }, { "epoch": 2.064516129032258, "grad_norm": 1.742639422416687, "learning_rate": 0.0003997674540450395, "loss": 1.2677, "step": 384 }, { "epoch": 2.064516129032258, "eval_loss": 0.8888482451438904, "eval_runtime": 6.8571, "eval_samples_per_second": 22.896, "eval_steps_per_second": 0.729, "step": 384 }, { "epoch": 2.5806451612903225, "grad_norm": 2.217721939086914, "learning_rate": 0.00039958373224387655, "loss": 0.9139, "step": 480 }, { "epoch": 2.5806451612903225, "eval_loss": 0.8767105340957642, "eval_runtime": 6.9026, "eval_samples_per_second": 22.745, "eval_steps_per_second": 0.724, "step": 480 }, { "epoch": 3.096774193548387, "grad_norm": 2.5925180912017822, "learning_rate": 0.0003993469691285149, "loss": 0.9038, "step": 576 }, { "epoch": 3.096774193548387, "eval_loss": 0.99139404296875, "eval_runtime": 7.0307, "eval_samples_per_second": 22.331, "eval_steps_per_second": 0.711, "step": 576 } ], "logging_steps": 96, "max_steps": 18600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 96, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8750975648989184e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }