{ "best_metric": 0.3432886600494385, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.011273957158962795, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003757985719654265, "eval_loss": 0.5640259981155396, "eval_runtime": 256.133, "eval_samples_per_second": 4.377, "eval_steps_per_second": 2.19, "step": 1 }, { "epoch": 0.0011273957158962795, "grad_norm": 0.699572741985321, "learning_rate": 0.00012, "loss": 0.4837, "step": 3 }, { "epoch": 0.0018789928598271326, "eval_loss": 0.4370155930519104, "eval_runtime": 257.9937, "eval_samples_per_second": 4.345, "eval_steps_per_second": 2.174, "step": 5 }, { "epoch": 0.002254791431792559, "grad_norm": 0.46814200282096863, "learning_rate": 0.0001992114701314478, "loss": 0.4558, "step": 6 }, { "epoch": 0.0033821871476888386, "grad_norm": 0.3834305703639984, "learning_rate": 0.00018763066800438636, "loss": 0.3557, "step": 9 }, { "epoch": 0.003757985719654265, "eval_loss": 0.372182160615921, "eval_runtime": 258.0673, "eval_samples_per_second": 4.344, "eval_steps_per_second": 2.174, "step": 10 }, { "epoch": 0.004509582863585118, "grad_norm": 0.4100714325904846, "learning_rate": 0.000163742398974869, "loss": 0.4093, "step": 12 }, { "epoch": 0.005636978579481398, "grad_norm": 0.3543630540370941, "learning_rate": 0.00013090169943749476, "loss": 0.3956, "step": 15 }, { "epoch": 0.005636978579481398, "eval_loss": 0.35663875937461853, "eval_runtime": 258.0391, "eval_samples_per_second": 4.344, "eval_steps_per_second": 2.174, "step": 15 }, { "epoch": 0.006764374295377677, "grad_norm": 0.3837605118751526, "learning_rate": 9.372094804706867e-05, "loss": 0.3204, "step": 18 }, { "epoch": 0.00751597143930853, "eval_loss": 0.3486693203449249, "eval_runtime": 258.2364, "eval_samples_per_second": 4.341, "eval_steps_per_second": 2.172, "step": 20 }, { "epoch": 0.007891770011273957, "grad_norm": 0.37864017486572266, "learning_rate": 5.7422070843492734e-05, "loss": 0.3019, "step": 21 }, { "epoch": 0.009019165727170236, "grad_norm": 0.3736952543258667, "learning_rate": 2.7103137257858868e-05, "loss": 0.3343, "step": 24 }, { "epoch": 0.009394964299135663, "eval_loss": 0.34419161081314087, "eval_runtime": 258.8527, "eval_samples_per_second": 4.331, "eval_steps_per_second": 2.167, "step": 25 }, { "epoch": 0.010146561443066516, "grad_norm": 0.3935553729534149, "learning_rate": 7.022351411174866e-06, "loss": 0.3589, "step": 27 }, { "epoch": 0.011273957158962795, "grad_norm": 0.38622140884399414, "learning_rate": 0.0, "loss": 0.3078, "step": 30 }, { "epoch": 0.011273957158962795, "eval_loss": 0.3432886600494385, "eval_runtime": 258.1492, "eval_samples_per_second": 4.342, "eval_steps_per_second": 2.173, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4001413214437376e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }