{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.004265984109209193, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.1329920546045965e-05, "eval_loss": 0.7577171921730042, "eval_runtime": 1240.3631, "eval_samples_per_second": 15.915, "eval_steps_per_second": 7.957, "step": 1 }, { "epoch": 0.00021329920546045967, "grad_norm": 1.315788984298706, "learning_rate": 0.00019967573081342103, "loss": 2.8729, "step": 10 }, { "epoch": 0.00042659841092091935, "grad_norm": 1.1967780590057373, "learning_rate": 0.0001970941817426052, "loss": 1.8038, "step": 20 }, { "epoch": 0.0006398976163813789, "grad_norm": 1.2115387916564941, "learning_rate": 0.00019199794436588243, "loss": 1.6148, "step": 30 }, { "epoch": 0.0008531968218418387, "grad_norm": 0.935818076133728, "learning_rate": 0.0001845190085543795, "loss": 1.3865, "step": 40 }, { "epoch": 0.0010664960273022983, "grad_norm": 1.0215239524841309, "learning_rate": 0.00017485107481711012, "loss": 1.3748, "step": 50 }, { "epoch": 0.0010664960273022983, "eval_loss": 0.3277254104614258, "eval_runtime": 1241.5957, "eval_samples_per_second": 15.899, "eval_steps_per_second": 7.949, "step": 50 }, { "epoch": 0.0012797952327627579, "grad_norm": 1.1998786926269531, "learning_rate": 0.00016324453755953773, "loss": 1.2868, "step": 60 }, { "epoch": 0.0014930944382232176, "grad_norm": 1.1020232439041138, "learning_rate": 0.00015000000000000001, "loss": 1.2541, "step": 70 }, { "epoch": 0.0017063936436836774, "grad_norm": 1.032950758934021, "learning_rate": 0.00013546048870425356, "loss": 1.2934, "step": 80 }, { "epoch": 0.001919692849144137, "grad_norm": 1.2034289836883545, "learning_rate": 0.00012000256937760445, "loss": 1.1281, "step": 90 }, { "epoch": 0.0021329920546045967, "grad_norm": 1.1966689825057983, "learning_rate": 0.00010402659401094152, "loss": 1.2666, "step": 100 }, { "epoch": 0.0021329920546045967, "eval_loss": 0.2983585298061371, "eval_runtime": 1241.2395, "eval_samples_per_second": 15.903, "eval_steps_per_second": 7.952, "step": 100 }, { "epoch": 0.002346291260065056, "grad_norm": 1.0272051095962524, "learning_rate": 8.79463319744677e-05, "loss": 1.2709, "step": 110 }, { "epoch": 0.0025595904655255157, "grad_norm": 1.6282625198364258, "learning_rate": 7.217825360835473e-05, "loss": 1.1521, "step": 120 }, { "epoch": 0.0027728896709859757, "grad_norm": 0.982958197593689, "learning_rate": 5.713074385969457e-05, "loss": 1.1276, "step": 130 }, { "epoch": 0.0029861888764464353, "grad_norm": 1.0101426839828491, "learning_rate": 4.3193525326884435e-05, "loss": 1.0648, "step": 140 }, { "epoch": 0.003199488081906895, "grad_norm": 1.2995356321334839, "learning_rate": 3.072756464904006e-05, "loss": 1.1229, "step": 150 }, { "epoch": 0.003199488081906895, "eval_loss": 0.2859799861907959, "eval_runtime": 1241.8568, "eval_samples_per_second": 15.896, "eval_steps_per_second": 7.948, "step": 150 }, { "epoch": 0.0034127872873673548, "grad_norm": 1.258757472038269, "learning_rate": 2.0055723659649904e-05, "loss": 1.0815, "step": 160 }, { "epoch": 0.0036260864928278143, "grad_norm": 1.325924038887024, "learning_rate": 1.1454397434679021e-05, "loss": 1.0969, "step": 170 }, { "epoch": 0.003839385698288274, "grad_norm": 1.1691293716430664, "learning_rate": 5.146355805285452e-06, "loss": 1.0415, "step": 180 }, { "epoch": 0.004052684903748734, "grad_norm": 1.2433751821517944, "learning_rate": 1.2949737362087156e-06, "loss": 1.1115, "step": 190 }, { "epoch": 0.004265984109209193, "grad_norm": 1.129075050354004, "learning_rate": 0.0, "loss": 1.1193, "step": 200 }, { "epoch": 0.004265984109209193, "eval_loss": 0.28365498781204224, "eval_runtime": 1241.6171, "eval_samples_per_second": 15.899, "eval_steps_per_second": 7.949, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.3218937061376e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }