{ "best_metric": 1.1180486679077148, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.03682563063892469, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003682563063892469, "eval_loss": 2.228027582168579, "eval_runtime": 105.1707, "eval_samples_per_second": 10.878, "eval_steps_per_second": 2.719, "step": 1 }, { "epoch": 0.0036825630638924692, "grad_norm": 3.64420485496521, "learning_rate": 4.0600000000000004e-05, "loss": 4.2977, "step": 10 }, { "epoch": 0.0073651261277849385, "grad_norm": 1.8111019134521484, "learning_rate": 8.120000000000001e-05, "loss": 3.6581, "step": 20 }, { "epoch": 0.011047689191677408, "grad_norm": 2.067847728729248, "learning_rate": 0.00012179999999999999, "loss": 3.1829, "step": 30 }, { "epoch": 0.014730252255569877, "grad_norm": 1.6672817468643188, "learning_rate": 0.00016240000000000002, "loss": 2.8307, "step": 40 }, { "epoch": 0.018412815319462345, "grad_norm": 1.8722939491271973, "learning_rate": 0.000203, "loss": 2.597, "step": 50 }, { "epoch": 0.018412815319462345, "eval_loss": 1.3078378438949585, "eval_runtime": 104.9597, "eval_samples_per_second": 10.899, "eval_steps_per_second": 2.725, "step": 50 }, { "epoch": 0.022095378383354815, "grad_norm": 1.3702597618103027, "learning_rate": 0.00020275275110137215, "loss": 2.5237, "step": 60 }, { "epoch": 0.025777941447247283, "grad_norm": 1.518210768699646, "learning_rate": 0.00020201220897726938, "loss": 2.4878, "step": 70 }, { "epoch": 0.029460504511139754, "grad_norm": 1.3466520309448242, "learning_rate": 0.00020078198147448128, "loss": 2.3046, "step": 80 }, { "epoch": 0.03314306757503222, "grad_norm": 1.2383614778518677, "learning_rate": 0.00019906806213773937, "loss": 2.3387, "step": 90 }, { "epoch": 0.03682563063892469, "grad_norm": 1.4980638027191162, "learning_rate": 0.0001968788010097697, "loss": 2.0506, "step": 100 }, { "epoch": 0.03682563063892469, "eval_loss": 1.1180486679077148, "eval_runtime": 105.0543, "eval_samples_per_second": 10.89, "eval_steps_per_second": 2.722, "step": 100 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.026877886444339e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }