{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 0.18797126412391663, "learning_rate": 9.949748743718594e-05, "loss": 0.6604, "step": 10 }, { "epoch": 0.1, "grad_norm": 0.15986427664756775, "learning_rate": 9.84924623115578e-05, "loss": 0.6109, "step": 20 }, { "epoch": 0.15, "grad_norm": 0.14691925048828125, "learning_rate": 9.748743718592965e-05, "loss": 0.6119, "step": 30 }, { "epoch": 0.2, "grad_norm": 0.12482919543981552, "learning_rate": 9.64824120603015e-05, "loss": 0.6518, "step": 40 }, { "epoch": 0.25, "grad_norm": 0.1446041613817215, "learning_rate": 9.547738693467337e-05, "loss": 0.5964, "step": 50 }, { "epoch": 0.3, "grad_norm": 0.11769542098045349, "learning_rate": 9.447236180904523e-05, "loss": 0.5714, "step": 60 }, { "epoch": 0.35, "grad_norm": 0.1492375284433365, "learning_rate": 9.34673366834171e-05, "loss": 0.6074, "step": 70 }, { "epoch": 0.4, "grad_norm": 0.16033588349819183, "learning_rate": 9.246231155778895e-05, "loss": 0.5441, "step": 80 }, { "epoch": 0.45, "grad_norm": 0.12141509354114532, "learning_rate": 9.14572864321608e-05, "loss": 0.5898, "step": 90 }, { "epoch": 0.5, "grad_norm": 0.14080357551574707, "learning_rate": 9.045226130653267e-05, "loss": 0.5599, "step": 100 }, { "epoch": 0.55, "grad_norm": 0.14213934540748596, "learning_rate": 8.944723618090453e-05, "loss": 0.5713, "step": 110 }, { "epoch": 0.6, "grad_norm": 0.1625017374753952, "learning_rate": 8.84422110552764e-05, "loss": 0.543, "step": 120 }, { "epoch": 0.65, "grad_norm": 0.16236303746700287, "learning_rate": 8.743718592964825e-05, "loss": 0.5461, "step": 130 }, { "epoch": 0.7, "grad_norm": 0.14767853915691376, "learning_rate": 8.64321608040201e-05, "loss": 0.615, "step": 140 }, { "epoch": 0.75, "grad_norm": 0.1415889859199524, "learning_rate": 8.542713567839196e-05, "loss": 0.5717, "step": 150 }, { "epoch": 0.8, "grad_norm": 0.14877167344093323, "learning_rate": 8.442211055276383e-05, "loss": 0.5413, "step": 160 }, { "epoch": 0.85, "grad_norm": 0.15324772894382477, "learning_rate": 8.341708542713568e-05, "loss": 0.5564, "step": 170 }, { "epoch": 0.9, "grad_norm": 0.14583168923854828, "learning_rate": 8.241206030150754e-05, "loss": 0.549, "step": 180 }, { "epoch": 0.95, "grad_norm": 0.17882034182548523, "learning_rate": 8.14070351758794e-05, "loss": 0.5495, "step": 190 }, { "epoch": 1.0, "grad_norm": 0.13743259012699127, "learning_rate": 8.040201005025126e-05, "loss": 0.5545, "step": 200 }, { "epoch": 1.05, "grad_norm": 0.1426534354686737, "learning_rate": 7.939698492462313e-05, "loss": 0.5137, "step": 210 }, { "epoch": 1.1, "grad_norm": 0.16195817291736603, "learning_rate": 7.839195979899498e-05, "loss": 0.5241, "step": 220 }, { "epoch": 1.15, "grad_norm": 0.17723768949508667, "learning_rate": 7.738693467336684e-05, "loss": 0.5201, "step": 230 }, { "epoch": 1.2, "grad_norm": 0.1717960685491562, "learning_rate": 7.638190954773869e-05, "loss": 0.4599, "step": 240 }, { "epoch": 1.25, "grad_norm": 0.17444512248039246, "learning_rate": 7.537688442211056e-05, "loss": 0.5151, "step": 250 }, { "epoch": 1.3, "grad_norm": 0.19015884399414062, "learning_rate": 7.437185929648241e-05, "loss": 0.5413, "step": 260 }, { "epoch": 1.35, "grad_norm": 0.1704825609922409, "learning_rate": 7.336683417085427e-05, "loss": 0.5352, "step": 270 }, { "epoch": 1.4, "grad_norm": 0.18583402037620544, "learning_rate": 7.236180904522614e-05, "loss": 0.4971, "step": 280 }, { "epoch": 1.45, "grad_norm": 0.1627408117055893, "learning_rate": 7.135678391959799e-05, "loss": 0.4678, "step": 290 }, { "epoch": 1.5, "grad_norm": 0.167761892080307, "learning_rate": 7.035175879396985e-05, "loss": 0.5322, "step": 300 }, { "epoch": 1.55, "grad_norm": 0.18980734050273895, "learning_rate": 6.93467336683417e-05, "loss": 0.5084, "step": 310 }, { "epoch": 1.6, "grad_norm": 0.18042020499706268, "learning_rate": 6.834170854271357e-05, "loss": 0.5538, "step": 320 }, { "epoch": 1.65, "grad_norm": 0.2088419497013092, "learning_rate": 6.733668341708544e-05, "loss": 0.4912, "step": 330 }, { "epoch": 1.7, "grad_norm": 0.15689465403556824, "learning_rate": 6.633165829145729e-05, "loss": 0.5273, "step": 340 }, { "epoch": 1.75, "grad_norm": 0.18145041167736053, "learning_rate": 6.532663316582915e-05, "loss": 0.5564, "step": 350 }, { "epoch": 1.8, "grad_norm": 0.1750493198633194, "learning_rate": 6.4321608040201e-05, "loss": 0.4913, "step": 360 }, { "epoch": 1.85, "grad_norm": 0.19720715284347534, "learning_rate": 6.331658291457287e-05, "loss": 0.5309, "step": 370 }, { "epoch": 1.9, "grad_norm": 0.1915196031332016, "learning_rate": 6.231155778894473e-05, "loss": 0.5385, "step": 380 }, { "epoch": 1.95, "grad_norm": 0.18675631284713745, "learning_rate": 6.130653266331658e-05, "loss": 0.5007, "step": 390 }, { "epoch": 2.0, "grad_norm": 0.19217941164970398, "learning_rate": 6.030150753768844e-05, "loss": 0.4999, "step": 400 }, { "epoch": 2.05, "grad_norm": 0.1979852020740509, "learning_rate": 5.929648241206031e-05, "loss": 0.4839, "step": 410 }, { "epoch": 2.1, "grad_norm": 0.2420262098312378, "learning_rate": 5.829145728643216e-05, "loss": 0.4727, "step": 420 }, { "epoch": 2.15, "grad_norm": 0.21145766973495483, "learning_rate": 5.728643216080403e-05, "loss": 0.4942, "step": 430 }, { "epoch": 2.2, "grad_norm": 0.23028914630413055, "learning_rate": 5.628140703517588e-05, "loss": 0.4984, "step": 440 }, { "epoch": 2.25, "grad_norm": 0.19866284728050232, "learning_rate": 5.527638190954774e-05, "loss": 0.4909, "step": 450 }, { "epoch": 2.3, "grad_norm": 0.18788225948810577, "learning_rate": 5.4271356783919604e-05, "loss": 0.4889, "step": 460 }, { "epoch": 2.35, "grad_norm": 0.20647984743118286, "learning_rate": 5.3266331658291455e-05, "loss": 0.4844, "step": 470 }, { "epoch": 2.4, "grad_norm": 0.2238943725824356, "learning_rate": 5.226130653266332e-05, "loss": 0.4531, "step": 480 }, { "epoch": 2.45, "grad_norm": 0.2138652801513672, "learning_rate": 5.125628140703518e-05, "loss": 0.4631, "step": 490 }, { "epoch": 2.5, "grad_norm": 0.2267480194568634, "learning_rate": 5.0251256281407036e-05, "loss": 0.4617, "step": 500 } ], "logging_steps": 10, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.069113651395789e+17, "train_batch_size": 10, "trial_name": null, "trial_params": null }