{ "best_metric": 1.9018210172653198, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0057185337679419, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022874135071767598, "grad_norm": 3.1676807403564453, "learning_rate": 2e-05, "loss": 15.7884, "step": 1 }, { "epoch": 0.00022874135071767598, "eval_loss": 4.049134254455566, "eval_runtime": 317.0162, "eval_samples_per_second": 5.807, "eval_steps_per_second": 2.905, "step": 1 }, { "epoch": 0.00045748270143535196, "grad_norm": 3.3963818550109863, "learning_rate": 4e-05, "loss": 15.5728, "step": 2 }, { "epoch": 0.0006862240521530279, "grad_norm": 3.2129127979278564, "learning_rate": 6e-05, "loss": 14.4822, "step": 3 }, { "epoch": 0.0009149654028707039, "grad_norm": 3.7225141525268555, "learning_rate": 8e-05, "loss": 17.6575, "step": 4 }, { "epoch": 0.00114370675358838, "grad_norm": 3.091374397277832, "learning_rate": 0.0001, "loss": 15.4111, "step": 5 }, { "epoch": 0.00114370675358838, "eval_loss": 4.0190510749816895, "eval_runtime": 316.0493, "eval_samples_per_second": 5.825, "eval_steps_per_second": 2.914, "step": 5 }, { "epoch": 0.0013724481043060558, "grad_norm": 3.7836978435516357, "learning_rate": 0.00012, "loss": 15.3633, "step": 6 }, { "epoch": 0.0016011894550237318, "grad_norm": 4.331509590148926, "learning_rate": 0.00014, "loss": 14.4719, "step": 7 }, { "epoch": 0.0018299308057414079, "grad_norm": 6.673212051391602, "learning_rate": 0.00016, "loss": 19.2312, "step": 8 }, { "epoch": 0.002058672156459084, "grad_norm": 6.4779863357543945, "learning_rate": 0.00018, "loss": 16.3462, "step": 9 }, { "epoch": 0.00228741350717676, "grad_norm": 6.56136417388916, "learning_rate": 0.0002, "loss": 12.3944, "step": 10 }, { "epoch": 0.00228741350717676, "eval_loss": 3.267272472381592, "eval_runtime": 316.0973, "eval_samples_per_second": 5.824, "eval_steps_per_second": 2.914, "step": 10 }, { "epoch": 0.0025161548578944357, "grad_norm": 7.548186779022217, "learning_rate": 0.00019781476007338058, "loss": 14.214, "step": 11 }, { "epoch": 0.0027448962086121117, "grad_norm": 10.005125999450684, "learning_rate": 0.0001913545457642601, "loss": 14.8289, "step": 12 }, { "epoch": 0.0029736375593297877, "grad_norm": 7.77647590637207, "learning_rate": 0.00018090169943749476, "loss": 10.9902, "step": 13 }, { "epoch": 0.0032023789100474637, "grad_norm": 9.317489624023438, "learning_rate": 0.00016691306063588583, "loss": 11.2201, "step": 14 }, { "epoch": 0.0034311202607651397, "grad_norm": 8.553632736206055, "learning_rate": 0.00015000000000000001, "loss": 9.4567, "step": 15 }, { "epoch": 0.0034311202607651397, "eval_loss": 2.2402334213256836, "eval_runtime": 317.5791, "eval_samples_per_second": 5.797, "eval_steps_per_second": 2.9, "step": 15 }, { "epoch": 0.0036598616114828157, "grad_norm": 7.3252387046813965, "learning_rate": 0.00013090169943749476, "loss": 6.2007, "step": 16 }, { "epoch": 0.0038886029622004917, "grad_norm": 9.066070556640625, "learning_rate": 0.00011045284632676536, "loss": 7.0014, "step": 17 }, { "epoch": 0.004117344312918168, "grad_norm": 13.760567665100098, "learning_rate": 8.954715367323468e-05, "loss": 9.2674, "step": 18 }, { "epoch": 0.004346085663635844, "grad_norm": 9.562538146972656, "learning_rate": 6.909830056250527e-05, "loss": 8.6528, "step": 19 }, { "epoch": 0.00457482701435352, "grad_norm": 12.553031921386719, "learning_rate": 5.000000000000002e-05, "loss": 9.9563, "step": 20 }, { "epoch": 0.00457482701435352, "eval_loss": 1.9547957181930542, "eval_runtime": 317.0485, "eval_samples_per_second": 5.807, "eval_steps_per_second": 2.905, "step": 20 }, { "epoch": 0.004803568365071196, "grad_norm": 10.91247844696045, "learning_rate": 3.308693936411421e-05, "loss": 6.3325, "step": 21 }, { "epoch": 0.005032309715788871, "grad_norm": 10.488737106323242, "learning_rate": 1.9098300562505266e-05, "loss": 8.6445, "step": 22 }, { "epoch": 0.005261051066506548, "grad_norm": 9.67850399017334, "learning_rate": 8.645454235739903e-06, "loss": 6.8417, "step": 23 }, { "epoch": 0.005489792417224223, "grad_norm": 9.22018814086914, "learning_rate": 2.1852399266194314e-06, "loss": 5.6596, "step": 24 }, { "epoch": 0.0057185337679419, "grad_norm": 10.53501033782959, "learning_rate": 0.0, "loss": 7.4384, "step": 25 }, { "epoch": 0.0057185337679419, "eval_loss": 1.9018210172653198, "eval_runtime": 317.3453, "eval_samples_per_second": 5.801, "eval_steps_per_second": 2.902, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8493418414080000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }