{ "best_metric": 2.153864622116089, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.04828002414001207, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00024140012070006034, "eval_loss": 2.989262819290161, "eval_runtime": 100.8385, "eval_samples_per_second": 17.305, "eval_steps_per_second": 4.334, "step": 1 }, { "epoch": 0.0024140012070006035, "grad_norm": 5.14186954498291, "learning_rate": 4.36e-05, "loss": 2.1011, "step": 10 }, { "epoch": 0.004828002414001207, "grad_norm": 7.176346778869629, "learning_rate": 8.72e-05, "loss": 2.2516, "step": 20 }, { "epoch": 0.00724200362100181, "grad_norm": 6.447731971740723, "learning_rate": 0.0001308, "loss": 2.0652, "step": 30 }, { "epoch": 0.009656004828002414, "grad_norm": 6.7839179039001465, "learning_rate": 0.0001744, "loss": 2.0828, "step": 40 }, { "epoch": 0.012070006035003017, "grad_norm": 7.772573947906494, "learning_rate": 0.000218, "loss": 2.3877, "step": 50 }, { "epoch": 0.012070006035003017, "eval_loss": 2.153864622116089, "eval_runtime": 100.9581, "eval_samples_per_second": 17.284, "eval_steps_per_second": 4.329, "step": 50 }, { "epoch": 0.01448400724200362, "grad_norm": 3.764359712600708, "learning_rate": 0.00021773448147832086, "loss": 1.971, "step": 60 }, { "epoch": 0.016898008449004225, "grad_norm": 5.581189155578613, "learning_rate": 0.0002169392194928312, "loss": 1.9339, "step": 70 }, { "epoch": 0.01931200965600483, "grad_norm": 5.161837100982666, "learning_rate": 0.00021561808847998484, "loss": 2.2624, "step": 80 }, { "epoch": 0.02172601086300543, "grad_norm": 4.681901454925537, "learning_rate": 0.00021377752485727676, "loss": 1.9126, "step": 90 }, { "epoch": 0.024140012070006035, "grad_norm": 13.321852684020996, "learning_rate": 0.00021142649566566402, "loss": 2.2267, "step": 100 }, { "epoch": 0.024140012070006035, "eval_loss": 2.305248737335205, "eval_runtime": 100.7966, "eval_samples_per_second": 17.312, "eval_steps_per_second": 4.335, "step": 100 }, { "epoch": 0.026554013277006638, "grad_norm": 4.219131946563721, "learning_rate": 0.0002085764548830435, "loss": 1.8166, "step": 110 }, { "epoch": 0.02896801448400724, "grad_norm": 3.56708025932312, "learning_rate": 0.00020524128762162305, "loss": 2.1941, "step": 120 }, { "epoch": 0.03138201569100785, "grad_norm": 6.397162437438965, "learning_rate": 0.00020143724248105043, "loss": 2.0798, "step": 130 }, { "epoch": 0.03379601689800845, "grad_norm": 5.533897876739502, "learning_rate": 0.0001971828523868693, "loss": 1.9979, "step": 140 }, { "epoch": 0.036210018105009054, "grad_norm": 7.768519878387451, "learning_rate": 0.0001924988442999686, "loss": 2.1741, "step": 150 }, { "epoch": 0.036210018105009054, "eval_loss": 2.3202924728393555, "eval_runtime": 100.8543, "eval_samples_per_second": 17.302, "eval_steps_per_second": 4.333, "step": 150 }, { "epoch": 0.03862401931200966, "grad_norm": 4.070739269256592, "learning_rate": 0.00018740803823691298, "loss": 2.1066, "step": 160 }, { "epoch": 0.04103802051901026, "grad_norm": 4.031443119049072, "learning_rate": 0.00018193523609311556, "loss": 2.1219, "step": 170 }, { "epoch": 0.04345202172601086, "grad_norm": 5.493770599365234, "learning_rate": 0.00017610710081049675, "loss": 1.9506, "step": 180 }, { "epoch": 0.045866022933011466, "grad_norm": 4.385707378387451, "learning_rate": 0.00016995202647831142, "loss": 2.0755, "step": 190 }, { "epoch": 0.04828002414001207, "grad_norm": 8.468510627746582, "learning_rate": 0.00016350000000000002, "loss": 2.2808, "step": 200 }, { "epoch": 0.04828002414001207, "eval_loss": 2.17218017578125, "eval_runtime": 100.7675, "eval_samples_per_second": 17.317, "eval_steps_per_second": 4.337, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.32624144891904e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }