{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.987755102040816, "eval_steps": 500, "global_step": 366, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08163265306122448, "grad_norm": 0.4708639085292816, "learning_rate": 2.702702702702703e-05, "loss": 0.5385, "step": 10 }, { "epoch": 0.16326530612244897, "grad_norm": 0.4191964268684387, "learning_rate": 5.405405405405406e-05, "loss": 0.4991, "step": 20 }, { "epoch": 0.24489795918367346, "grad_norm": 0.40369561314582825, "learning_rate": 8.108108108108109e-05, "loss": 0.3058, "step": 30 }, { "epoch": 0.32653061224489793, "grad_norm": 0.6080173850059509, "learning_rate": 9.997948550797227e-05, "loss": 0.223, "step": 40 }, { "epoch": 0.40816326530612246, "grad_norm": 0.3516406714916229, "learning_rate": 9.961525153583327e-05, "loss": 0.1966, "step": 50 }, { "epoch": 0.4897959183673469, "grad_norm": 0.3611922264099121, "learning_rate": 9.879896064123961e-05, "loss": 0.1635, "step": 60 }, { "epoch": 0.5714285714285714, "grad_norm": 0.3673703372478485, "learning_rate": 9.753805025397779e-05, "loss": 0.1547, "step": 70 }, { "epoch": 0.6530612244897959, "grad_norm": 0.31258246302604675, "learning_rate": 9.584400884284545e-05, "loss": 0.141, "step": 80 }, { "epoch": 0.7346938775510204, "grad_norm": 0.40288954973220825, "learning_rate": 9.373227124134888e-05, "loss": 0.1343, "step": 90 }, { "epoch": 0.8163265306122449, "grad_norm": 0.36042073369026184, "learning_rate": 9.122207801708802e-05, "loss": 0.1552, "step": 100 }, { "epoch": 0.8979591836734694, "grad_norm": 0.33795708417892456, "learning_rate": 8.833630016614976e-05, "loss": 0.1341, "step": 110 }, { "epoch": 0.9795918367346939, "grad_norm": 0.2802504003047943, "learning_rate": 8.510123072976239e-05, "loss": 0.1361, "step": 120 }, { "epoch": 1.0612244897959184, "grad_norm": 0.3562999367713928, "learning_rate": 8.154634523184388e-05, "loss": 0.1554, "step": 130 }, { "epoch": 1.1428571428571428, "grad_norm": 0.3551533818244934, "learning_rate": 7.770403312015721e-05, "loss": 0.1222, "step": 140 }, { "epoch": 1.2244897959183674, "grad_norm": 0.3744591474533081, "learning_rate": 7.360930265797935e-05, "loss": 0.1268, "step": 150 }, { "epoch": 1.306122448979592, "grad_norm": 0.3726373016834259, "learning_rate": 6.929946195508932e-05, "loss": 0.0923, "step": 160 }, { "epoch": 1.3877551020408163, "grad_norm": 0.45406657457351685, "learning_rate": 6.481377904428171e-05, "loss": 0.1236, "step": 170 }, { "epoch": 1.469387755102041, "grad_norm": 0.26440876722335815, "learning_rate": 6.019312410053286e-05, "loss": 0.1059, "step": 180 }, { "epoch": 1.5510204081632653, "grad_norm": 0.3584797978401184, "learning_rate": 5.547959706265068e-05, "loss": 0.13, "step": 190 }, { "epoch": 1.6326530612244898, "grad_norm": 0.2686370611190796, "learning_rate": 5.0716144050239375e-05, "loss": 0.1132, "step": 200 }, { "epoch": 1.7142857142857144, "grad_norm": 0.29573601484298706, "learning_rate": 4.594616607090028e-05, "loss": 0.1093, "step": 210 }, { "epoch": 1.7959183673469388, "grad_norm": 0.33462539315223694, "learning_rate": 4.121312358283463e-05, "loss": 0.1014, "step": 220 }, { "epoch": 1.8775510204081631, "grad_norm": 0.40894749760627747, "learning_rate": 3.656014051577713e-05, "loss": 0.1038, "step": 230 }, { "epoch": 1.9591836734693877, "grad_norm": 0.30836549401283264, "learning_rate": 3.202961135812437e-05, "loss": 0.1088, "step": 240 }, { "epoch": 2.0408163265306123, "grad_norm": 0.3176974654197693, "learning_rate": 2.7662814890184818e-05, "loss": 0.1055, "step": 250 }, { "epoch": 2.122448979591837, "grad_norm": 0.2630854845046997, "learning_rate": 2.3499538082923606e-05, "loss": 0.0942, "step": 260 }, { "epoch": 2.204081632653061, "grad_norm": 0.3280630111694336, "learning_rate": 1.9577713588953795e-05, "loss": 0.0954, "step": 270 }, { "epoch": 2.2857142857142856, "grad_norm": 0.2787545621395111, "learning_rate": 1.5933074128684332e-05, "loss": 0.0855, "step": 280 }, { "epoch": 2.36734693877551, "grad_norm": 0.3742433190345764, "learning_rate": 1.2598826920598772e-05, "loss": 0.1161, "step": 290 }, { "epoch": 2.4489795918367347, "grad_norm": 0.2906797528266907, "learning_rate": 9.605351122011309e-06, "loss": 0.0914, "step": 300 }, { "epoch": 2.5306122448979593, "grad_norm": 0.3414059579372406, "learning_rate": 6.979921036993042e-06, "loss": 0.0971, "step": 310 }, { "epoch": 2.612244897959184, "grad_norm": 0.2982410490512848, "learning_rate": 4.746457613389904e-06, "loss": 0.0855, "step": 320 }, { "epoch": 2.693877551020408, "grad_norm": 0.3430004417896271, "learning_rate": 2.925310493105099e-06, "loss": 0.0928, "step": 330 }, { "epoch": 2.7755102040816326, "grad_norm": 0.4764673709869385, "learning_rate": 1.5330726014397668e-06, "loss": 0.1081, "step": 340 }, { "epoch": 2.857142857142857, "grad_norm": 0.3845561742782593, "learning_rate": 5.824289648152126e-07, "loss": 0.0895, "step": 350 }, { "epoch": 2.938775510204082, "grad_norm": 0.3539472818374634, "learning_rate": 8.204113433559201e-08, "loss": 0.0974, "step": 360 } ], "logging_steps": 10, "max_steps": 366, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9841219894640640.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }