{ "best_metric": 2.5573222637176514, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.03305238803503553, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016526194017517766, "eval_loss": 3.1149251461029053, "eval_runtime": 43.6347, "eval_samples_per_second": 58.394, "eval_steps_per_second": 14.598, "step": 1 }, { "epoch": 0.0016526194017517765, "grad_norm": 2.4044153690338135, "learning_rate": 4.36e-05, "loss": 2.4216, "step": 10 }, { "epoch": 0.003305238803503553, "grad_norm": 3.2215139865875244, "learning_rate": 8.72e-05, "loss": 2.2895, "step": 20 }, { "epoch": 0.00495785820525533, "grad_norm": 4.437511444091797, "learning_rate": 0.0001308, "loss": 2.2952, "step": 30 }, { "epoch": 0.006610477607007106, "grad_norm": 6.676159381866455, "learning_rate": 0.0001744, "loss": 2.2616, "step": 40 }, { "epoch": 0.008263097008758883, "grad_norm": 22.480907440185547, "learning_rate": 0.000218, "loss": 2.7042, "step": 50 }, { "epoch": 0.008263097008758883, "eval_loss": 2.7512803077697754, "eval_runtime": 43.5634, "eval_samples_per_second": 58.489, "eval_steps_per_second": 14.622, "step": 50 }, { "epoch": 0.00991571641051066, "grad_norm": 2.332223415374756, "learning_rate": 0.00021773448147832086, "loss": 2.3331, "step": 60 }, { "epoch": 0.011568335812262435, "grad_norm": 2.4621620178222656, "learning_rate": 0.0002169392194928312, "loss": 2.3293, "step": 70 }, { "epoch": 0.013220955214014212, "grad_norm": 3.450859546661377, "learning_rate": 0.00021561808847998484, "loss": 2.1836, "step": 80 }, { "epoch": 0.014873574615765989, "grad_norm": 7.603415489196777, "learning_rate": 0.00021377752485727676, "loss": 2.2165, "step": 90 }, { "epoch": 0.016526194017517766, "grad_norm": 19.09664535522461, "learning_rate": 0.00021142649566566402, "loss": 3.0109, "step": 100 }, { "epoch": 0.016526194017517766, "eval_loss": 2.7991316318511963, "eval_runtime": 43.5731, "eval_samples_per_second": 58.476, "eval_steps_per_second": 14.619, "step": 100 }, { "epoch": 0.01817881341926954, "grad_norm": 2.182637929916382, "learning_rate": 0.0002085764548830435, "loss": 2.4546, "step": 110 }, { "epoch": 0.01983143282102132, "grad_norm": 2.9767863750457764, "learning_rate": 0.00020524128762162305, "loss": 2.252, "step": 120 }, { "epoch": 0.021484052222773095, "grad_norm": 4.451344966888428, "learning_rate": 0.00020143724248105043, "loss": 2.1921, "step": 130 }, { "epoch": 0.02313667162452487, "grad_norm": 5.990928649902344, "learning_rate": 0.0001971828523868693, "loss": 2.0887, "step": 140 }, { "epoch": 0.02478929102627665, "grad_norm": 19.66733741760254, "learning_rate": 0.0001924988442999686, "loss": 2.5636, "step": 150 }, { "epoch": 0.02478929102627665, "eval_loss": 2.7373805046081543, "eval_runtime": 43.5877, "eval_samples_per_second": 58.457, "eval_steps_per_second": 14.614, "step": 150 }, { "epoch": 0.026441910428028424, "grad_norm": 2.0548884868621826, "learning_rate": 0.00018740803823691298, "loss": 2.4033, "step": 160 }, { "epoch": 0.028094529829780203, "grad_norm": 2.9234230518341064, "learning_rate": 0.00018193523609311556, "loss": 2.2948, "step": 170 }, { "epoch": 0.029747149231531978, "grad_norm": 3.7016618251800537, "learning_rate": 0.00017610710081049675, "loss": 2.3234, "step": 180 }, { "epoch": 0.03139976863328375, "grad_norm": 5.965432643890381, "learning_rate": 0.00016995202647831142, "loss": 2.2832, "step": 190 }, { "epoch": 0.03305238803503553, "grad_norm": 17.460437774658203, "learning_rate": 0.00016350000000000002, "loss": 3.0321, "step": 200 }, { "epoch": 0.03305238803503553, "eval_loss": 2.5573222637176514, "eval_runtime": 43.5955, "eval_samples_per_second": 58.446, "eval_steps_per_second": 14.612, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5042364292792320.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }