{ "best_metric": 3.9591970443725586, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.050352467270896276, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002014098690835851, "grad_norm": 5.243093490600586, "learning_rate": 2e-05, "loss": 19.8411, "step": 1 }, { "epoch": 0.002014098690835851, "eval_loss": 5.059567928314209, "eval_runtime": 9.9585, "eval_samples_per_second": 21.088, "eval_steps_per_second": 10.544, "step": 1 }, { "epoch": 0.004028197381671702, "grad_norm": 4.998810291290283, "learning_rate": 4e-05, "loss": 21.7359, "step": 2 }, { "epoch": 0.006042296072507553, "grad_norm": 6.04394006729126, "learning_rate": 6e-05, "loss": 24.2109, "step": 3 }, { "epoch": 0.008056394763343404, "grad_norm": 6.116606712341309, "learning_rate": 8e-05, "loss": 22.4319, "step": 4 }, { "epoch": 0.010070493454179255, "grad_norm": 4.266777515411377, "learning_rate": 0.0001, "loss": 19.2349, "step": 5 }, { "epoch": 0.010070493454179255, "eval_loss": 4.971032619476318, "eval_runtime": 9.9491, "eval_samples_per_second": 21.107, "eval_steps_per_second": 10.554, "step": 5 }, { "epoch": 0.012084592145015106, "grad_norm": 4.383306503295898, "learning_rate": 0.00012, "loss": 12.7878, "step": 6 }, { "epoch": 0.014098690835850957, "grad_norm": 4.937915325164795, "learning_rate": 0.00014, "loss": 19.7955, "step": 7 }, { "epoch": 0.016112789526686808, "grad_norm": 3.5370934009552, "learning_rate": 0.00016, "loss": 15.5616, "step": 8 }, { "epoch": 0.01812688821752266, "grad_norm": 8.180222511291504, "learning_rate": 0.00018, "loss": 26.651, "step": 9 }, { "epoch": 0.02014098690835851, "grad_norm": 8.891468048095703, "learning_rate": 0.0002, "loss": 26.4958, "step": 10 }, { "epoch": 0.02014098690835851, "eval_loss": 4.756030082702637, "eval_runtime": 9.965, "eval_samples_per_second": 21.074, "eval_steps_per_second": 10.537, "step": 10 }, { "epoch": 0.022155085599194362, "grad_norm": 6.933834552764893, "learning_rate": 0.00019781476007338058, "loss": 18.7005, "step": 11 }, { "epoch": 0.02416918429003021, "grad_norm": 6.866347312927246, "learning_rate": 0.0001913545457642601, "loss": 21.0418, "step": 12 }, { "epoch": 0.026183282980866064, "grad_norm": 5.648362636566162, "learning_rate": 0.00018090169943749476, "loss": 16.3807, "step": 13 }, { "epoch": 0.028197381671701913, "grad_norm": 8.93856430053711, "learning_rate": 0.00016691306063588583, "loss": 20.5595, "step": 14 }, { "epoch": 0.030211480362537766, "grad_norm": 6.642890930175781, "learning_rate": 0.00015000000000000001, "loss": 16.273, "step": 15 }, { "epoch": 0.030211480362537766, "eval_loss": 4.319993019104004, "eval_runtime": 10.0192, "eval_samples_per_second": 20.96, "eval_steps_per_second": 10.48, "step": 15 }, { "epoch": 0.032225579053373615, "grad_norm": 6.6722331047058105, "learning_rate": 0.00013090169943749476, "loss": 17.0076, "step": 16 }, { "epoch": 0.03423967774420947, "grad_norm": 8.032441139221191, "learning_rate": 0.00011045284632676536, "loss": 16.5987, "step": 17 }, { "epoch": 0.03625377643504532, "grad_norm": 8.647908210754395, "learning_rate": 8.954715367323468e-05, "loss": 17.4807, "step": 18 }, { "epoch": 0.038267875125881166, "grad_norm": 9.85554313659668, "learning_rate": 6.909830056250527e-05, "loss": 18.2034, "step": 19 }, { "epoch": 0.04028197381671702, "grad_norm": 6.346075057983398, "learning_rate": 5.000000000000002e-05, "loss": 13.1005, "step": 20 }, { "epoch": 0.04028197381671702, "eval_loss": 4.020318508148193, "eval_runtime": 10.0603, "eval_samples_per_second": 20.874, "eval_steps_per_second": 10.437, "step": 20 }, { "epoch": 0.04229607250755287, "grad_norm": 13.50845718383789, "learning_rate": 3.308693936411421e-05, "loss": 23.3169, "step": 21 }, { "epoch": 0.044310171198388724, "grad_norm": 6.7406439781188965, "learning_rate": 1.9098300562505266e-05, "loss": 14.8342, "step": 22 }, { "epoch": 0.04632426988922457, "grad_norm": 7.017367839813232, "learning_rate": 8.645454235739903e-06, "loss": 14.3364, "step": 23 }, { "epoch": 0.04833836858006042, "grad_norm": 7.411367893218994, "learning_rate": 2.1852399266194314e-06, "loss": 13.5709, "step": 24 }, { "epoch": 0.050352467270896276, "grad_norm": 7.053526878356934, "learning_rate": 0.0, "loss": 16.6839, "step": 25 }, { "epoch": 0.050352467270896276, "eval_loss": 3.9591970443725586, "eval_runtime": 10.0358, "eval_samples_per_second": 20.925, "eval_steps_per_second": 10.463, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 54117928009728.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }