{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05851755526657997, "eval_steps": 9, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006501950585175553, "eval_loss": 0.9398170113563538, "eval_runtime": 167.5557, "eval_samples_per_second": 15.464, "eval_steps_per_second": 1.934, "step": 1 }, { "epoch": 0.0019505851755526658, "grad_norm": 1.2298109531402588, "learning_rate": 1.5e-05, "loss": 3.8388, "step": 3 }, { "epoch": 0.0039011703511053317, "grad_norm": 1.150593638420105, "learning_rate": 3e-05, "loss": 3.8144, "step": 6 }, { "epoch": 0.005851755526657998, "grad_norm": 1.1785922050476074, "learning_rate": 4.5e-05, "loss": 3.754, "step": 9 }, { "epoch": 0.005851755526657998, "eval_loss": 0.9156110882759094, "eval_runtime": 170.2657, "eval_samples_per_second": 15.217, "eval_steps_per_second": 1.903, "step": 9 }, { "epoch": 0.007802340702210663, "grad_norm": 1.2483290433883667, "learning_rate": 4.993910125649561e-05, "loss": 3.6203, "step": 12 }, { "epoch": 0.00975292587776333, "grad_norm": 1.0634453296661377, "learning_rate": 4.962019382530521e-05, "loss": 3.4936, "step": 15 }, { "epoch": 0.011703511053315995, "grad_norm": 1.0305792093276978, "learning_rate": 4.9031542398457974e-05, "loss": 3.1803, "step": 18 }, { "epoch": 0.011703511053315995, "eval_loss": 0.8215236663818359, "eval_runtime": 170.1654, "eval_samples_per_second": 15.226, "eval_steps_per_second": 1.904, "step": 18 }, { "epoch": 0.013654096228868661, "grad_norm": 1.1813019514083862, "learning_rate": 4.817959636416969e-05, "loss": 3.3118, "step": 21 }, { "epoch": 0.015604681404421327, "grad_norm": 1.077471137046814, "learning_rate": 4.707368982147318e-05, "loss": 3.3702, "step": 24 }, { "epoch": 0.017555266579973992, "grad_norm": 0.9821540713310242, "learning_rate": 4.572593931387604e-05, "loss": 3.0259, "step": 27 }, { "epoch": 0.017555266579973992, "eval_loss": 0.7803987860679626, "eval_runtime": 170.2152, "eval_samples_per_second": 15.222, "eval_steps_per_second": 1.903, "step": 27 }, { "epoch": 0.01950585175552666, "grad_norm": 1.0215818881988525, "learning_rate": 4.415111107797445e-05, "loss": 3.1122, "step": 30 }, { "epoch": 0.021456436931079324, "grad_norm": 0.985200822353363, "learning_rate": 4.2366459261474933e-05, "loss": 3.1104, "step": 33 }, { "epoch": 0.02340702210663199, "grad_norm": 0.9306589961051941, "learning_rate": 4.039153688314145e-05, "loss": 2.9517, "step": 36 }, { "epoch": 0.02340702210663199, "eval_loss": 0.7560431957244873, "eval_runtime": 170.1454, "eval_samples_per_second": 15.228, "eval_steps_per_second": 1.904, "step": 36 }, { "epoch": 0.025357607282184655, "grad_norm": 0.9917410612106323, "learning_rate": 3.824798160583012e-05, "loss": 3.134, "step": 39 }, { "epoch": 0.027308192457737322, "grad_norm": 1.0550307035446167, "learning_rate": 3.5959278669726935e-05, "loss": 2.9545, "step": 42 }, { "epoch": 0.029258777633289986, "grad_norm": 1.0387388467788696, "learning_rate": 3.355050358314172e-05, "loss": 2.982, "step": 45 }, { "epoch": 0.029258777633289986, "eval_loss": 0.7399918437004089, "eval_runtime": 170.3061, "eval_samples_per_second": 15.214, "eval_steps_per_second": 1.902, "step": 45 }, { "epoch": 0.031209362808842653, "grad_norm": 1.1577284336090088, "learning_rate": 3.104804738999169e-05, "loss": 2.8839, "step": 48 }, { "epoch": 0.03315994798439532, "grad_norm": 1.0641475915908813, "learning_rate": 2.8479327524001636e-05, "loss": 2.9474, "step": 51 }, { "epoch": 0.035110533159947985, "grad_norm": 1.7697352170944214, "learning_rate": 2.587248741756253e-05, "loss": 2.8995, "step": 54 }, { "epoch": 0.035110533159947985, "eval_loss": 0.729458749294281, "eval_runtime": 170.3686, "eval_samples_per_second": 15.208, "eval_steps_per_second": 1.902, "step": 54 }, { "epoch": 0.03706111833550065, "grad_norm": 1.0338619947433472, "learning_rate": 2.3256088156396868e-05, "loss": 2.8834, "step": 57 }, { "epoch": 0.03901170351105332, "grad_norm": 1.1941914558410645, "learning_rate": 2.0658795558326743e-05, "loss": 2.7783, "step": 60 }, { "epoch": 0.04096228868660598, "grad_norm": 1.12138032913208, "learning_rate": 1.8109066104575023e-05, "loss": 2.8563, "step": 63 }, { "epoch": 0.04096228868660598, "eval_loss": 0.7231634855270386, "eval_runtime": 170.3411, "eval_samples_per_second": 15.211, "eval_steps_per_second": 1.902, "step": 63 }, { "epoch": 0.04291287386215865, "grad_norm": 1.3698627948760986, "learning_rate": 1.56348351646022e-05, "loss": 2.8257, "step": 66 }, { "epoch": 0.044863459037711315, "grad_norm": 1.0287761688232422, "learning_rate": 1.3263210930352737e-05, "loss": 2.8939, "step": 69 }, { "epoch": 0.04681404421326398, "grad_norm": 1.206251621246338, "learning_rate": 1.1020177413231334e-05, "loss": 2.8268, "step": 72 }, { "epoch": 0.04681404421326398, "eval_loss": 0.7189695239067078, "eval_runtime": 170.4241, "eval_samples_per_second": 15.203, "eval_steps_per_second": 1.901, "step": 72 }, { "epoch": 0.04876462938881664, "grad_norm": 1.061952829360962, "learning_rate": 8.930309757836517e-06, "loss": 2.8395, "step": 75 }, { "epoch": 0.05071521456436931, "grad_norm": 1.1924004554748535, "learning_rate": 7.016504991533726e-06, "loss": 2.9183, "step": 78 }, { "epoch": 0.05266579973992198, "grad_norm": 1.1760205030441284, "learning_rate": 5.299731159831953e-06, "loss": 3.0306, "step": 81 }, { "epoch": 0.05266579973992198, "eval_loss": 0.7165172696113586, "eval_runtime": 170.3914, "eval_samples_per_second": 15.206, "eval_steps_per_second": 1.902, "step": 81 }, { "epoch": 0.054616384915474644, "grad_norm": 1.2741774320602417, "learning_rate": 3.798797596089351e-06, "loss": 2.8466, "step": 84 }, { "epoch": 0.056566970091027305, "grad_norm": 1.1168195009231567, "learning_rate": 2.5301488425208296e-06, "loss": 2.9594, "step": 87 }, { "epoch": 0.05851755526657997, "grad_norm": 1.4195281267166138, "learning_rate": 1.5076844803522922e-06, "loss": 2.8093, "step": 90 }, { "epoch": 0.05851755526657997, "eval_loss": 0.7156469821929932, "eval_runtime": 170.2698, "eval_samples_per_second": 15.217, "eval_steps_per_second": 1.903, "step": 90 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1794920544351027e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }