{ "best_metric": 0.8129812479019165, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0011708916339792752, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.6835665359171006e-05, "grad_norm": 0.7221629619598389, "learning_rate": 2e-05, "loss": 0.876, "step": 1 }, { "epoch": 4.6835665359171006e-05, "eval_loss": 1.0366460084915161, "eval_runtime": 2844.3805, "eval_samples_per_second": 3.161, "eval_steps_per_second": 1.58, "step": 1 }, { "epoch": 9.367133071834201e-05, "grad_norm": 0.8914138078689575, "learning_rate": 4e-05, "loss": 0.9882, "step": 2 }, { "epoch": 0.00014050699607751302, "grad_norm": 0.9784770607948303, "learning_rate": 6e-05, "loss": 1.2942, "step": 3 }, { "epoch": 0.00018734266143668403, "grad_norm": 0.7786074280738831, "learning_rate": 8e-05, "loss": 0.9422, "step": 4 }, { "epoch": 0.00023417832679585503, "grad_norm": 0.6185960173606873, "learning_rate": 0.0001, "loss": 0.8609, "step": 5 }, { "epoch": 0.00023417832679585503, "eval_loss": 0.9876631498336792, "eval_runtime": 2857.2233, "eval_samples_per_second": 3.146, "eval_steps_per_second": 1.573, "step": 5 }, { "epoch": 0.00028101399215502604, "grad_norm": 0.6685774326324463, "learning_rate": 0.00012, "loss": 0.8235, "step": 6 }, { "epoch": 0.00032784965751419707, "grad_norm": 0.6721071004867554, "learning_rate": 0.00014, "loss": 0.989, "step": 7 }, { "epoch": 0.00037468532287336805, "grad_norm": 0.8013759255409241, "learning_rate": 0.00016, "loss": 0.8728, "step": 8 }, { "epoch": 0.0004215209882325391, "grad_norm": 0.6883695721626282, "learning_rate": 0.00018, "loss": 0.774, "step": 9 }, { "epoch": 0.00046835665359171006, "grad_norm": 0.5397657155990601, "learning_rate": 0.0002, "loss": 0.7173, "step": 10 }, { "epoch": 0.00046835665359171006, "eval_loss": 0.8521348237991333, "eval_runtime": 2828.8096, "eval_samples_per_second": 3.178, "eval_steps_per_second": 1.589, "step": 10 }, { "epoch": 0.000515192318950881, "grad_norm": 0.6538496017456055, "learning_rate": 0.00019781476007338058, "loss": 0.8828, "step": 11 }, { "epoch": 0.0005620279843100521, "grad_norm": 0.7199555039405823, "learning_rate": 0.0001913545457642601, "loss": 0.6093, "step": 12 }, { "epoch": 0.0006088636496692231, "grad_norm": 0.8212117552757263, "learning_rate": 0.00018090169943749476, "loss": 0.842, "step": 13 }, { "epoch": 0.0006556993150283941, "grad_norm": 0.6258594989776611, "learning_rate": 0.00016691306063588583, "loss": 0.7928, "step": 14 }, { "epoch": 0.0007025349803875652, "grad_norm": 0.8337199687957764, "learning_rate": 0.00015000000000000001, "loss": 0.8851, "step": 15 }, { "epoch": 0.0007025349803875652, "eval_loss": 0.8297687768936157, "eval_runtime": 2839.0047, "eval_samples_per_second": 3.167, "eval_steps_per_second": 1.583, "step": 15 }, { "epoch": 0.0007493706457467361, "grad_norm": 0.5545913577079773, "learning_rate": 0.00013090169943749476, "loss": 0.7313, "step": 16 }, { "epoch": 0.0007962063111059071, "grad_norm": 0.6206797361373901, "learning_rate": 0.00011045284632676536, "loss": 0.8542, "step": 17 }, { "epoch": 0.0008430419764650782, "grad_norm": 0.6755259037017822, "learning_rate": 8.954715367323468e-05, "loss": 0.8265, "step": 18 }, { "epoch": 0.0008898776418242492, "grad_norm": 0.46088144183158875, "learning_rate": 6.909830056250527e-05, "loss": 0.6002, "step": 19 }, { "epoch": 0.0009367133071834201, "grad_norm": 0.6171578764915466, "learning_rate": 5.000000000000002e-05, "loss": 0.9655, "step": 20 }, { "epoch": 0.0009367133071834201, "eval_loss": 0.815974235534668, "eval_runtime": 2833.2714, "eval_samples_per_second": 3.173, "eval_steps_per_second": 1.587, "step": 20 }, { "epoch": 0.0009835489725425913, "grad_norm": 0.47180765867233276, "learning_rate": 3.308693936411421e-05, "loss": 0.7465, "step": 21 }, { "epoch": 0.001030384637901762, "grad_norm": 0.4130145311355591, "learning_rate": 1.9098300562505266e-05, "loss": 0.5707, "step": 22 }, { "epoch": 0.0010772203032609331, "grad_norm": 0.730938732624054, "learning_rate": 8.645454235739903e-06, "loss": 0.8911, "step": 23 }, { "epoch": 0.0011240559686201042, "grad_norm": 0.5450481176376343, "learning_rate": 2.1852399266194314e-06, "loss": 0.7751, "step": 24 }, { "epoch": 0.0011708916339792752, "grad_norm": 0.48658469319343567, "learning_rate": 0.0, "loss": 0.8835, "step": 25 }, { "epoch": 0.0011708916339792752, "eval_loss": 0.8129812479019165, "eval_runtime": 2839.9874, "eval_samples_per_second": 3.166, "eval_steps_per_second": 1.583, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5918605594066944.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }