{ "best_metric": 0.5773412585258484, "best_model_checkpoint": "miner_id_24/checkpoint-880", "epoch": 1.0362694300518134, "eval_steps": 40, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010362694300518134, "eval_loss": 0.742955207824707, "eval_runtime": 20.981, "eval_samples_per_second": 9.723, "eval_steps_per_second": 0.334, "step": 1 }, { "epoch": 0.04145077720207254, "eval_loss": 0.6771278977394104, "eval_runtime": 21.0227, "eval_samples_per_second": 9.704, "eval_steps_per_second": 0.333, "step": 40 }, { "epoch": 0.08290155440414508, "eval_loss": 0.6422113180160522, "eval_runtime": 20.9905, "eval_samples_per_second": 9.719, "eval_steps_per_second": 0.333, "step": 80 }, { "epoch": 0.10362694300518134, "grad_norm": 0.38452568650245667, "learning_rate": 0.00019999999999999998, "loss": 1.3417, "step": 100 }, { "epoch": 0.12435233160621761, "eval_loss": 0.6342259645462036, "eval_runtime": 21.0605, "eval_samples_per_second": 9.686, "eval_steps_per_second": 0.332, "step": 120 }, { "epoch": 0.16580310880829016, "eval_loss": 0.6262779831886292, "eval_runtime": 21.1055, "eval_samples_per_second": 9.666, "eval_steps_per_second": 0.332, "step": 160 }, { "epoch": 0.20725388601036268, "grad_norm": 0.41497528553009033, "learning_rate": 0.0002999992001464462, "loss": 1.2346, "step": 200 }, { "epoch": 0.20725388601036268, "eval_loss": 0.6233254075050354, "eval_runtime": 21.0436, "eval_samples_per_second": 9.694, "eval_steps_per_second": 0.333, "step": 200 }, { "epoch": 0.24870466321243523, "eval_loss": 0.619152307510376, "eval_runtime": 21.0068, "eval_samples_per_second": 9.711, "eval_steps_per_second": 0.333, "step": 240 }, { "epoch": 0.29015544041450775, "eval_loss": 0.6167010068893433, "eval_runtime": 21.0018, "eval_samples_per_second": 9.713, "eval_steps_per_second": 0.333, "step": 280 }, { "epoch": 0.31088082901554404, "grad_norm": 0.6404178738594055, "learning_rate": 0.0002999928013691971, "loss": 1.2428, "step": 300 }, { "epoch": 0.3316062176165803, "eval_loss": 0.6130599975585938, "eval_runtime": 21.0415, "eval_samples_per_second": 9.695, "eval_steps_per_second": 0.333, "step": 320 }, { "epoch": 0.37305699481865284, "eval_loss": 0.611344039440155, "eval_runtime": 21.0579, "eval_samples_per_second": 9.688, "eval_steps_per_second": 0.332, "step": 360 }, { "epoch": 0.41450777202072536, "grad_norm": 0.5714089870452881, "learning_rate": 0.00029998000408766265, "loss": 1.1918, "step": 400 }, { "epoch": 0.41450777202072536, "eval_loss": 0.605933666229248, "eval_runtime": 21.0463, "eval_samples_per_second": 9.693, "eval_steps_per_second": 0.333, "step": 400 }, { "epoch": 0.45595854922279794, "eval_loss": 0.6033980846405029, "eval_runtime": 21.0016, "eval_samples_per_second": 9.714, "eval_steps_per_second": 0.333, "step": 440 }, { "epoch": 0.49740932642487046, "eval_loss": 0.6009693741798401, "eval_runtime": 20.9877, "eval_samples_per_second": 9.72, "eval_steps_per_second": 0.334, "step": 480 }, { "epoch": 0.5181347150259067, "grad_norm": 0.7262830138206482, "learning_rate": 0.00029996080884775884, "loss": 1.2162, "step": 500 }, { "epoch": 0.538860103626943, "eval_loss": 0.6007779836654663, "eval_runtime": 21.0336, "eval_samples_per_second": 9.699, "eval_steps_per_second": 0.333, "step": 520 }, { "epoch": 0.5803108808290155, "eval_loss": 0.5946472883224487, "eval_runtime": 20.9844, "eval_samples_per_second": 9.721, "eval_steps_per_second": 0.334, "step": 560 }, { "epoch": 0.6217616580310881, "grad_norm": 0.95758455991745, "learning_rate": 0.0002999352164683305, "loss": 1.1867, "step": 600 }, { "epoch": 0.6217616580310881, "eval_loss": 0.5968028903007507, "eval_runtime": 21.0097, "eval_samples_per_second": 9.71, "eval_steps_per_second": 0.333, "step": 600 }, { "epoch": 0.6632124352331606, "eval_loss": 0.5911960005760193, "eval_runtime": 20.987, "eval_samples_per_second": 9.72, "eval_steps_per_second": 0.334, "step": 640 }, { "epoch": 0.7046632124352331, "eval_loss": 0.5886518955230713, "eval_runtime": 21.0007, "eval_samples_per_second": 9.714, "eval_steps_per_second": 0.333, "step": 680 }, { "epoch": 0.7253886010362695, "grad_norm": 0.5711341500282288, "learning_rate": 0.00029990322804111623, "loss": 1.1763, "step": 700 }, { "epoch": 0.7461139896373057, "eval_loss": 0.5855283141136169, "eval_runtime": 21.0019, "eval_samples_per_second": 9.713, "eval_steps_per_second": 0.333, "step": 720 }, { "epoch": 0.7875647668393783, "eval_loss": 0.5841448903083801, "eval_runtime": 20.994, "eval_samples_per_second": 9.717, "eval_steps_per_second": 0.333, "step": 760 }, { "epoch": 0.8290155440414507, "grad_norm": 0.7626607418060303, "learning_rate": 0.00029986484493070223, "loss": 1.1783, "step": 800 }, { "epoch": 0.8290155440414507, "eval_loss": 0.5830583572387695, "eval_runtime": 21.0001, "eval_samples_per_second": 9.714, "eval_steps_per_second": 0.333, "step": 800 }, { "epoch": 0.8704663212435233, "eval_loss": 0.5801699757575989, "eval_runtime": 21.0085, "eval_samples_per_second": 9.71, "eval_steps_per_second": 0.333, "step": 840 }, { "epoch": 0.9119170984455959, "eval_loss": 0.5773412585258484, "eval_runtime": 21.0062, "eval_samples_per_second": 9.711, "eval_steps_per_second": 0.333, "step": 880 }, { "epoch": 0.9326424870466321, "grad_norm": 0.8522987961769104, "learning_rate": 0.00029982006877446357, "loss": 1.1443, "step": 900 }, { "epoch": 0.9533678756476683, "eval_loss": 0.5795757174491882, "eval_runtime": 21.0572, "eval_samples_per_second": 9.688, "eval_steps_per_second": 0.332, "step": 920 }, { "epoch": 0.9948186528497409, "eval_loss": 0.5778769254684448, "eval_runtime": 20.9986, "eval_samples_per_second": 9.715, "eval_steps_per_second": 0.333, "step": 960 }, { "epoch": 1.0362694300518134, "grad_norm": 0.8339794278144836, "learning_rate": 0.000299768901482495, "loss": 1.0328, "step": 1000 }, { "epoch": 1.0362694300518134, "eval_loss": 0.5930935740470886, "eval_runtime": 21.0358, "eval_samples_per_second": 9.698, "eval_steps_per_second": 0.333, "step": 1000 } ], "logging_steps": 100, "max_steps": 48250, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 40, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.258924645482496e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }