{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-300", "epoch": 3.239959500506244, "eval_steps": 300, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0026999662504218697, "eval_loss": 11.5, "eval_runtime": 3.5463, "eval_samples_per_second": 703.548, "eval_steps_per_second": 43.989, "step": 1 }, { "epoch": 0.809989875126561, "grad_norm": 0.0007467871182598174, "learning_rate": 0.00039997100677598455, "loss": 92.0, "step": 300 }, { "epoch": 0.809989875126561, "eval_loss": 11.5, "eval_runtime": 3.6707, "eval_samples_per_second": 679.708, "eval_steps_per_second": 42.499, "step": 300 }, { "epoch": 1.6199797502531217, "grad_norm": 0.0008554400410503149, "learning_rate": 0.0003998188153347523, "loss": 92.0, "step": 600 }, { "epoch": 1.6199797502531217, "eval_loss": 11.5, "eval_runtime": 3.5931, "eval_samples_per_second": 694.379, "eval_steps_per_second": 43.416, "step": 600 }, { "epoch": 2.429969625379683, "grad_norm": 0.0010360981104895473, "learning_rate": 0.0003995362765176636, "loss": 92.0, "step": 900 }, { "epoch": 2.429969625379683, "eval_loss": 11.5, "eval_runtime": 3.4612, "eval_samples_per_second": 720.85, "eval_steps_per_second": 45.071, "step": 900 }, { "epoch": 3.239959500506244, "grad_norm": 0.001791034359484911, "learning_rate": 0.00039912357463265427, "loss": 92.0, "step": 1200 }, { "epoch": 3.239959500506244, "eval_loss": 11.5, "eval_runtime": 3.5767, "eval_samples_per_second": 697.574, "eval_steps_per_second": 43.616, "step": 1200 } ], "logging_steps": 300, "max_steps": 37000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 300, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 382496853270528.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }