{ "best_metric": 1.1666896343231201, "best_model_checkpoint": "./results/checkpoint-2500", "epoch": 1.0, "eval_steps": 4, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9951999999999997e-07, "loss": 2.6285, "step": 4 }, { "epoch": 0.0, "eval_loss": 2.4697508811950684, "eval_runtime": 0.485, "eval_samples_per_second": 8.248, "eval_steps_per_second": 2.062, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.9904e-07, "loss": 2.6222, "step": 8 }, { "epoch": 0.0, "eval_loss": 2.465975284576416, "eval_runtime": 0.6323, "eval_samples_per_second": 6.326, "eval_steps_per_second": 1.582, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.9856e-07, "loss": 2.6536, "step": 12 }, { "epoch": 0.0, "eval_loss": 2.460374116897583, "eval_runtime": 0.6478, "eval_samples_per_second": 6.175, "eval_steps_per_second": 1.544, "step": 12 }, { "epoch": 0.01, "learning_rate": 2.9808e-07, "loss": 2.6785, "step": 16 }, { "epoch": 0.01, "eval_loss": 2.4556970596313477, "eval_runtime": 0.6653, "eval_samples_per_second": 6.012, "eval_steps_per_second": 1.503, "step": 16 }, { "epoch": 0.01, "learning_rate": 2.9759999999999996e-07, "loss": 2.6085, "step": 20 }, { "epoch": 0.01, "eval_loss": 2.4514715671539307, "eval_runtime": 0.5241, "eval_samples_per_second": 7.632, "eval_steps_per_second": 1.908, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.9711999999999995e-07, "loss": 2.5907, "step": 24 }, { "epoch": 0.01, "eval_loss": 2.4462974071502686, "eval_runtime": 0.4689, "eval_samples_per_second": 8.53, "eval_steps_per_second": 2.133, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.9664e-07, "loss": 2.5942, "step": 28 }, { "epoch": 0.01, "eval_loss": 2.4415194988250732, "eval_runtime": 0.4829, "eval_samples_per_second": 8.284, "eval_steps_per_second": 2.071, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.9615999999999997e-07, "loss": 2.6101, "step": 32 }, { "epoch": 0.01, "eval_loss": 2.437161922454834, "eval_runtime": 0.4715, "eval_samples_per_second": 8.483, "eval_steps_per_second": 2.121, "step": 32 }, { "epoch": 0.01, "learning_rate": 2.9568e-07, "loss": 2.5827, "step": 36 }, { "epoch": 0.01, "eval_loss": 2.432689666748047, "eval_runtime": 0.4938, "eval_samples_per_second": 8.1, "eval_steps_per_second": 2.025, "step": 36 }, { "epoch": 0.02, "learning_rate": 2.952e-07, "loss": 2.5729, "step": 40 }, { "epoch": 0.02, "eval_loss": 2.4281153678894043, "eval_runtime": 0.5021, "eval_samples_per_second": 7.966, "eval_steps_per_second": 1.991, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.9472e-07, "loss": 2.5856, "step": 44 }, { "epoch": 0.02, "eval_loss": 2.423053741455078, "eval_runtime": 0.593, "eval_samples_per_second": 6.746, "eval_steps_per_second": 1.686, "step": 44 }, { "epoch": 0.02, "learning_rate": 2.9423999999999997e-07, "loss": 2.589, "step": 48 }, { "epoch": 0.02, "eval_loss": 2.418571949005127, "eval_runtime": 0.6933, "eval_samples_per_second": 5.77, "eval_steps_per_second": 1.442, "step": 48 }, { "epoch": 0.02, "learning_rate": 2.9375999999999995e-07, "loss": 2.6483, "step": 52 }, { "epoch": 0.02, "eval_loss": 2.414531946182251, "eval_runtime": 0.7167, "eval_samples_per_second": 5.581, "eval_steps_per_second": 1.395, "step": 52 }, { "epoch": 0.02, "learning_rate": 2.9328e-07, "loss": 2.517, "step": 56 }, { "epoch": 0.02, "eval_loss": 2.409538745880127, "eval_runtime": 0.4826, "eval_samples_per_second": 8.289, "eval_steps_per_second": 2.072, "step": 56 }, { "epoch": 0.02, "learning_rate": 2.928e-07, "loss": 2.5987, "step": 60 }, { "epoch": 0.02, "eval_loss": 2.4050426483154297, "eval_runtime": 0.4757, "eval_samples_per_second": 8.409, "eval_steps_per_second": 2.102, "step": 60 }, { "epoch": 0.03, "learning_rate": 2.9232e-07, "loss": 2.5489, "step": 64 }, { "epoch": 0.03, "eval_loss": 2.400360107421875, "eval_runtime": 0.4945, "eval_samples_per_second": 8.089, "eval_steps_per_second": 2.022, "step": 64 }, { "epoch": 0.03, "learning_rate": 2.9184e-07, "loss": 2.5063, "step": 68 }, { "epoch": 0.03, "eval_loss": 2.396500587463379, "eval_runtime": 0.5, "eval_samples_per_second": 8.001, "eval_steps_per_second": 2.0, "step": 68 }, { "epoch": 0.03, "learning_rate": 2.9136e-07, "loss": 2.5867, "step": 72 }, { "epoch": 0.03, "eval_loss": 2.3916146755218506, "eval_runtime": 0.4602, "eval_samples_per_second": 8.693, "eval_steps_per_second": 2.173, "step": 72 }, { "epoch": 0.03, "learning_rate": 2.9087999999999997e-07, "loss": 2.544, "step": 76 }, { "epoch": 0.03, "eval_loss": 2.3873047828674316, "eval_runtime": 0.4731, "eval_samples_per_second": 8.456, "eval_steps_per_second": 2.114, "step": 76 }, { "epoch": 0.03, "learning_rate": 2.9039999999999995e-07, "loss": 2.5596, "step": 80 }, { "epoch": 0.03, "eval_loss": 2.382803440093994, "eval_runtime": 0.6092, "eval_samples_per_second": 6.566, "eval_steps_per_second": 1.642, "step": 80 }, { "epoch": 0.03, "learning_rate": 2.8992e-07, "loss": 2.5744, "step": 84 }, { "epoch": 0.03, "eval_loss": 2.3786380290985107, "eval_runtime": 0.7212, "eval_samples_per_second": 5.546, "eval_steps_per_second": 1.387, "step": 84 }, { "epoch": 0.04, "learning_rate": 2.8944e-07, "loss": 2.5588, "step": 88 }, { "epoch": 0.04, "eval_loss": 2.374176502227783, "eval_runtime": 0.6826, "eval_samples_per_second": 5.86, "eval_steps_per_second": 1.465, "step": 88 }, { "epoch": 0.04, "learning_rate": 2.8895999999999996e-07, "loss": 2.5579, "step": 92 }, { "epoch": 0.04, "eval_loss": 2.3702104091644287, "eval_runtime": 0.4896, "eval_samples_per_second": 8.169, "eval_steps_per_second": 2.042, "step": 92 }, { "epoch": 0.04, "learning_rate": 2.8848e-07, "loss": 2.5245, "step": 96 }, { "epoch": 0.04, "eval_loss": 2.3660218715667725, "eval_runtime": 0.4764, "eval_samples_per_second": 8.397, "eval_steps_per_second": 2.099, "step": 96 }, { "epoch": 0.04, "learning_rate": 2.88e-07, "loss": 2.5132, "step": 100 }, { "epoch": 0.04, "eval_loss": 2.36110520362854, "eval_runtime": 0.4799, "eval_samples_per_second": 8.335, "eval_steps_per_second": 2.084, "step": 100 }, { "epoch": 0.04, "learning_rate": 2.8751999999999997e-07, "loss": 2.5037, "step": 104 }, { "epoch": 0.04, "eval_loss": 2.3570125102996826, "eval_runtime": 0.4722, "eval_samples_per_second": 8.47, "eval_steps_per_second": 2.118, "step": 104 }, { "epoch": 0.04, "learning_rate": 2.8704e-07, "loss": 2.4727, "step": 108 }, { "epoch": 0.04, "eval_loss": 2.3530666828155518, "eval_runtime": 0.467, "eval_samples_per_second": 8.565, "eval_steps_per_second": 2.141, "step": 108 }, { "epoch": 0.04, "learning_rate": 2.8656e-07, "loss": 2.4709, "step": 112 }, { "epoch": 0.04, "eval_loss": 2.348759412765503, "eval_runtime": 0.501, "eval_samples_per_second": 7.984, "eval_steps_per_second": 1.996, "step": 112 }, { "epoch": 0.05, "learning_rate": 2.8608e-07, "loss": 2.4711, "step": 116 }, { "epoch": 0.05, "eval_loss": 2.344454050064087, "eval_runtime": 0.6607, "eval_samples_per_second": 6.054, "eval_steps_per_second": 1.513, "step": 116 }, { "epoch": 0.05, "learning_rate": 2.8559999999999996e-07, "loss": 2.5445, "step": 120 }, { "epoch": 0.05, "eval_loss": 2.3402156829833984, "eval_runtime": 0.704, "eval_samples_per_second": 5.682, "eval_steps_per_second": 1.42, "step": 120 }, { "epoch": 0.05, "learning_rate": 2.8512e-07, "loss": 2.4994, "step": 124 }, { "epoch": 0.05, "eval_loss": 2.3362019062042236, "eval_runtime": 0.6849, "eval_samples_per_second": 5.84, "eval_steps_per_second": 1.46, "step": 124 }, { "epoch": 0.05, "learning_rate": 2.8464e-07, "loss": 2.5036, "step": 128 }, { "epoch": 0.05, "eval_loss": 2.3319339752197266, "eval_runtime": 0.4864, "eval_samples_per_second": 8.223, "eval_steps_per_second": 2.056, "step": 128 }, { "epoch": 0.05, "learning_rate": 2.8416e-07, "loss": 2.5525, "step": 132 }, { "epoch": 0.05, "eval_loss": 2.3276522159576416, "eval_runtime": 0.4783, "eval_samples_per_second": 8.364, "eval_steps_per_second": 2.091, "step": 132 }, { "epoch": 0.05, "learning_rate": 2.8368e-07, "loss": 2.5245, "step": 136 }, { "epoch": 0.05, "eval_loss": 2.3241090774536133, "eval_runtime": 0.4805, "eval_samples_per_second": 8.324, "eval_steps_per_second": 2.081, "step": 136 }, { "epoch": 0.06, "learning_rate": 2.832e-07, "loss": 2.4946, "step": 140 }, { "epoch": 0.06, "eval_loss": 2.3198165893554688, "eval_runtime": 0.473, "eval_samples_per_second": 8.457, "eval_steps_per_second": 2.114, "step": 140 }, { "epoch": 0.06, "learning_rate": 2.8272e-07, "loss": 2.5142, "step": 144 }, { "epoch": 0.06, "eval_loss": 2.3152613639831543, "eval_runtime": 0.4858, "eval_samples_per_second": 8.234, "eval_steps_per_second": 2.058, "step": 144 }, { "epoch": 0.06, "learning_rate": 2.8223999999999997e-07, "loss": 2.4639, "step": 148 }, { "epoch": 0.06, "eval_loss": 2.3112645149230957, "eval_runtime": 0.488, "eval_samples_per_second": 8.196, "eval_steps_per_second": 2.049, "step": 148 }, { "epoch": 0.06, "learning_rate": 2.8176e-07, "loss": 2.4796, "step": 152 }, { "epoch": 0.06, "eval_loss": 2.307020902633667, "eval_runtime": 0.6163, "eval_samples_per_second": 6.49, "eval_steps_per_second": 1.623, "step": 152 }, { "epoch": 0.06, "learning_rate": 2.8128e-07, "loss": 2.4529, "step": 156 }, { "epoch": 0.06, "eval_loss": 2.303062915802002, "eval_runtime": 0.6764, "eval_samples_per_second": 5.913, "eval_steps_per_second": 1.478, "step": 156 }, { "epoch": 0.06, "learning_rate": 2.808e-07, "loss": 2.4823, "step": 160 }, { "epoch": 0.06, "eval_loss": 2.2993311882019043, "eval_runtime": 0.6854, "eval_samples_per_second": 5.836, "eval_steps_per_second": 1.459, "step": 160 }, { "epoch": 0.07, "learning_rate": 2.8032e-07, "loss": 2.4439, "step": 164 }, { "epoch": 0.07, "eval_loss": 2.2947850227355957, "eval_runtime": 0.4745, "eval_samples_per_second": 8.429, "eval_steps_per_second": 2.107, "step": 164 }, { "epoch": 0.07, "learning_rate": 2.7984e-07, "loss": 2.4652, "step": 168 }, { "epoch": 0.07, "eval_loss": 2.2908992767333984, "eval_runtime": 0.4759, "eval_samples_per_second": 8.406, "eval_steps_per_second": 2.101, "step": 168 }, { "epoch": 0.07, "learning_rate": 2.7936e-07, "loss": 2.4574, "step": 172 }, { "epoch": 0.07, "eval_loss": 2.2867026329040527, "eval_runtime": 0.4973, "eval_samples_per_second": 8.043, "eval_steps_per_second": 2.011, "step": 172 }, { "epoch": 0.07, "learning_rate": 2.7887999999999997e-07, "loss": 2.4557, "step": 176 }, { "epoch": 0.07, "eval_loss": 2.283027172088623, "eval_runtime": 0.4719, "eval_samples_per_second": 8.477, "eval_steps_per_second": 2.119, "step": 176 }, { "epoch": 0.07, "learning_rate": 2.784e-07, "loss": 2.4462, "step": 180 }, { "epoch": 0.07, "eval_loss": 2.2787420749664307, "eval_runtime": 0.472, "eval_samples_per_second": 8.474, "eval_steps_per_second": 2.119, "step": 180 }, { "epoch": 0.07, "learning_rate": 2.7792e-07, "loss": 2.3962, "step": 184 }, { "epoch": 0.07, "eval_loss": 2.2745461463928223, "eval_runtime": 0.6328, "eval_samples_per_second": 6.322, "eval_steps_per_second": 1.58, "step": 184 }, { "epoch": 0.08, "learning_rate": 2.7744e-07, "loss": 2.3666, "step": 188 }, { "epoch": 0.08, "eval_loss": 2.2705912590026855, "eval_runtime": 0.6375, "eval_samples_per_second": 6.274, "eval_steps_per_second": 1.569, "step": 188 }, { "epoch": 0.08, "learning_rate": 2.7696e-07, "loss": 2.5024, "step": 192 }, { "epoch": 0.08, "eval_loss": 2.266995906829834, "eval_runtime": 0.6984, "eval_samples_per_second": 5.727, "eval_steps_per_second": 1.432, "step": 192 }, { "epoch": 0.08, "learning_rate": 2.7648e-07, "loss": 2.4419, "step": 196 }, { "epoch": 0.08, "eval_loss": 2.2626519203186035, "eval_runtime": 0.7334, "eval_samples_per_second": 5.454, "eval_steps_per_second": 1.363, "step": 196 }, { "epoch": 0.08, "learning_rate": 2.76e-07, "loss": 2.4246, "step": 200 }, { "epoch": 0.08, "eval_loss": 2.2583603858947754, "eval_runtime": 0.48, "eval_samples_per_second": 8.333, "eval_steps_per_second": 2.083, "step": 200 }, { "epoch": 0.08, "learning_rate": 2.7551999999999997e-07, "loss": 2.3853, "step": 204 }, { "epoch": 0.08, "eval_loss": 2.2551512718200684, "eval_runtime": 0.4939, "eval_samples_per_second": 8.098, "eval_steps_per_second": 2.025, "step": 204 }, { "epoch": 0.08, "learning_rate": 2.7503999999999995e-07, "loss": 2.4032, "step": 208 }, { "epoch": 0.08, "eval_loss": 2.251105785369873, "eval_runtime": 0.46, "eval_samples_per_second": 8.695, "eval_steps_per_second": 2.174, "step": 208 }, { "epoch": 0.08, "learning_rate": 2.7456e-07, "loss": 2.4444, "step": 212 }, { "epoch": 0.08, "eval_loss": 2.247025489807129, "eval_runtime": 0.4948, "eval_samples_per_second": 8.084, "eval_steps_per_second": 2.021, "step": 212 }, { "epoch": 0.09, "learning_rate": 2.7408e-07, "loss": 2.2932, "step": 216 }, { "epoch": 0.09, "eval_loss": 2.242764472961426, "eval_runtime": 0.4897, "eval_samples_per_second": 8.168, "eval_steps_per_second": 2.042, "step": 216 }, { "epoch": 0.09, "learning_rate": 2.736e-07, "loss": 2.3929, "step": 220 }, { "epoch": 0.09, "eval_loss": 2.2391483783721924, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 220 }, { "epoch": 0.09, "learning_rate": 2.7312e-07, "loss": 2.4112, "step": 224 }, { "epoch": 0.09, "eval_loss": 2.234977960586548, "eval_runtime": 0.648, "eval_samples_per_second": 6.172, "eval_steps_per_second": 1.543, "step": 224 }, { "epoch": 0.09, "learning_rate": 2.7264e-07, "loss": 2.4191, "step": 228 }, { "epoch": 0.09, "eval_loss": 2.231099843978882, "eval_runtime": 0.6862, "eval_samples_per_second": 5.829, "eval_steps_per_second": 1.457, "step": 228 }, { "epoch": 0.09, "learning_rate": 2.7215999999999997e-07, "loss": 2.4408, "step": 232 }, { "epoch": 0.09, "eval_loss": 2.2272462844848633, "eval_runtime": 0.7076, "eval_samples_per_second": 5.653, "eval_steps_per_second": 1.413, "step": 232 }, { "epoch": 0.09, "learning_rate": 2.7167999999999996e-07, "loss": 2.3884, "step": 236 }, { "epoch": 0.09, "eval_loss": 2.223376750946045, "eval_runtime": 0.5169, "eval_samples_per_second": 7.738, "eval_steps_per_second": 1.935, "step": 236 }, { "epoch": 0.1, "learning_rate": 2.712e-07, "loss": 2.3689, "step": 240 }, { "epoch": 0.1, "eval_loss": 2.2195653915405273, "eval_runtime": 0.4793, "eval_samples_per_second": 8.346, "eval_steps_per_second": 2.086, "step": 240 }, { "epoch": 0.1, "learning_rate": 2.7072e-07, "loss": 2.3689, "step": 244 }, { "epoch": 0.1, "eval_loss": 2.2153775691986084, "eval_runtime": 0.4771, "eval_samples_per_second": 8.384, "eval_steps_per_second": 2.096, "step": 244 }, { "epoch": 0.1, "learning_rate": 2.7024e-07, "loss": 2.3249, "step": 248 }, { "epoch": 0.1, "eval_loss": 2.211355209350586, "eval_runtime": 0.4778, "eval_samples_per_second": 8.372, "eval_steps_per_second": 2.093, "step": 248 }, { "epoch": 0.1, "learning_rate": 2.6976e-07, "loss": 2.4286, "step": 252 }, { "epoch": 0.1, "eval_loss": 2.207773208618164, "eval_runtime": 0.4873, "eval_samples_per_second": 8.209, "eval_steps_per_second": 2.052, "step": 252 }, { "epoch": 0.1, "learning_rate": 2.6928e-07, "loss": 2.3497, "step": 256 }, { "epoch": 0.1, "eval_loss": 2.203867197036743, "eval_runtime": 0.6281, "eval_samples_per_second": 6.368, "eval_steps_per_second": 1.592, "step": 256 }, { "epoch": 0.1, "learning_rate": 2.6879999999999997e-07, "loss": 2.284, "step": 260 }, { "epoch": 0.1, "eval_loss": 2.199937582015991, "eval_runtime": 0.6885, "eval_samples_per_second": 5.81, "eval_steps_per_second": 1.452, "step": 260 }, { "epoch": 0.11, "learning_rate": 2.6831999999999996e-07, "loss": 2.3333, "step": 264 }, { "epoch": 0.11, "eval_loss": 2.1958465576171875, "eval_runtime": 0.6799, "eval_samples_per_second": 5.883, "eval_steps_per_second": 1.471, "step": 264 }, { "epoch": 0.11, "learning_rate": 2.6784e-07, "loss": 2.3305, "step": 268 }, { "epoch": 0.11, "eval_loss": 2.192072868347168, "eval_runtime": 0.7165, "eval_samples_per_second": 5.583, "eval_steps_per_second": 1.396, "step": 268 }, { "epoch": 0.11, "learning_rate": 2.6736e-07, "loss": 2.3465, "step": 272 }, { "epoch": 0.11, "eval_loss": 2.1882476806640625, "eval_runtime": 0.485, "eval_samples_per_second": 8.247, "eval_steps_per_second": 2.062, "step": 272 }, { "epoch": 0.11, "learning_rate": 2.6687999999999997e-07, "loss": 2.3274, "step": 276 }, { "epoch": 0.11, "eval_loss": 2.1841320991516113, "eval_runtime": 0.4767, "eval_samples_per_second": 8.391, "eval_steps_per_second": 2.098, "step": 276 }, { "epoch": 0.11, "learning_rate": 2.664e-07, "loss": 2.3641, "step": 280 }, { "epoch": 0.11, "eval_loss": 2.1803271770477295, "eval_runtime": 0.5146, "eval_samples_per_second": 7.774, "eval_steps_per_second": 1.943, "step": 280 }, { "epoch": 0.11, "learning_rate": 2.6592e-07, "loss": 2.3089, "step": 284 }, { "epoch": 0.11, "eval_loss": 2.176274538040161, "eval_runtime": 0.488, "eval_samples_per_second": 8.196, "eval_steps_per_second": 2.049, "step": 284 }, { "epoch": 0.12, "learning_rate": 2.6543999999999997e-07, "loss": 2.2645, "step": 288 }, { "epoch": 0.12, "eval_loss": 2.1720588207244873, "eval_runtime": 0.4973, "eval_samples_per_second": 8.043, "eval_steps_per_second": 2.011, "step": 288 }, { "epoch": 0.12, "learning_rate": 2.6495999999999996e-07, "loss": 2.3439, "step": 292 }, { "epoch": 0.12, "eval_loss": 2.1687240600585938, "eval_runtime": 0.6283, "eval_samples_per_second": 6.366, "eval_steps_per_second": 1.592, "step": 292 }, { "epoch": 0.12, "learning_rate": 2.6448e-07, "loss": 2.3285, "step": 296 }, { "epoch": 0.12, "eval_loss": 2.1649253368377686, "eval_runtime": 0.6996, "eval_samples_per_second": 5.718, "eval_steps_per_second": 1.429, "step": 296 }, { "epoch": 0.12, "learning_rate": 2.64e-07, "loss": 2.3126, "step": 300 }, { "epoch": 0.12, "eval_loss": 2.160398483276367, "eval_runtime": 0.6904, "eval_samples_per_second": 5.794, "eval_steps_per_second": 1.448, "step": 300 }, { "epoch": 0.12, "learning_rate": 2.6351999999999997e-07, "loss": 2.3356, "step": 304 }, { "epoch": 0.12, "eval_loss": 2.1570284366607666, "eval_runtime": 0.4953, "eval_samples_per_second": 8.076, "eval_steps_per_second": 2.019, "step": 304 }, { "epoch": 0.12, "learning_rate": 2.6304e-07, "loss": 2.3396, "step": 308 }, { "epoch": 0.12, "eval_loss": 2.1527013778686523, "eval_runtime": 0.4977, "eval_samples_per_second": 8.037, "eval_steps_per_second": 2.009, "step": 308 }, { "epoch": 0.12, "learning_rate": 2.6256e-07, "loss": 2.2972, "step": 312 }, { "epoch": 0.12, "eval_loss": 2.148724317550659, "eval_runtime": 0.4939, "eval_samples_per_second": 8.099, "eval_steps_per_second": 2.025, "step": 312 }, { "epoch": 0.13, "learning_rate": 2.6208e-07, "loss": 2.3321, "step": 316 }, { "epoch": 0.13, "eval_loss": 2.1449663639068604, "eval_runtime": 0.4784, "eval_samples_per_second": 8.362, "eval_steps_per_second": 2.09, "step": 316 }, { "epoch": 0.13, "learning_rate": 2.616e-07, "loss": 2.3348, "step": 320 }, { "epoch": 0.13, "eval_loss": 2.1414906978607178, "eval_runtime": 0.4949, "eval_samples_per_second": 8.082, "eval_steps_per_second": 2.021, "step": 320 }, { "epoch": 0.13, "learning_rate": 2.6112e-07, "loss": 2.2728, "step": 324 }, { "epoch": 0.13, "eval_loss": 2.1374001502990723, "eval_runtime": 0.6321, "eval_samples_per_second": 6.328, "eval_steps_per_second": 1.582, "step": 324 }, { "epoch": 0.13, "learning_rate": 2.6064e-07, "loss": 2.287, "step": 328 }, { "epoch": 0.13, "eval_loss": 2.1333529949188232, "eval_runtime": 0.6547, "eval_samples_per_second": 6.109, "eval_steps_per_second": 1.527, "step": 328 }, { "epoch": 0.13, "learning_rate": 2.6015999999999997e-07, "loss": 2.2474, "step": 332 }, { "epoch": 0.13, "eval_loss": 2.1297547817230225, "eval_runtime": 0.7093, "eval_samples_per_second": 5.639, "eval_steps_per_second": 1.41, "step": 332 }, { "epoch": 0.13, "learning_rate": 2.5968e-07, "loss": 2.3214, "step": 336 }, { "epoch": 0.13, "eval_loss": 2.126392364501953, "eval_runtime": 0.6909, "eval_samples_per_second": 5.789, "eval_steps_per_second": 1.447, "step": 336 }, { "epoch": 0.14, "learning_rate": 2.592e-07, "loss": 2.2725, "step": 340 }, { "epoch": 0.14, "eval_loss": 2.122309923171997, "eval_runtime": 0.4823, "eval_samples_per_second": 8.293, "eval_steps_per_second": 2.073, "step": 340 }, { "epoch": 0.14, "learning_rate": 2.5872000000000003e-07, "loss": 2.3114, "step": 344 }, { "epoch": 0.14, "eval_loss": 2.118303060531616, "eval_runtime": 0.4954, "eval_samples_per_second": 8.075, "eval_steps_per_second": 2.019, "step": 344 }, { "epoch": 0.14, "learning_rate": 2.5824e-07, "loss": 2.2333, "step": 348 }, { "epoch": 0.14, "eval_loss": 2.114621162414551, "eval_runtime": 0.4856, "eval_samples_per_second": 8.238, "eval_steps_per_second": 2.059, "step": 348 }, { "epoch": 0.14, "learning_rate": 2.5776e-07, "loss": 2.2812, "step": 352 }, { "epoch": 0.14, "eval_loss": 2.11067795753479, "eval_runtime": 0.4778, "eval_samples_per_second": 8.372, "eval_steps_per_second": 2.093, "step": 352 }, { "epoch": 0.14, "learning_rate": 2.5728e-07, "loss": 2.2454, "step": 356 }, { "epoch": 0.14, "eval_loss": 2.106940746307373, "eval_runtime": 0.4945, "eval_samples_per_second": 8.089, "eval_steps_per_second": 2.022, "step": 356 }, { "epoch": 0.14, "learning_rate": 2.5679999999999997e-07, "loss": 2.2261, "step": 360 }, { "epoch": 0.14, "eval_loss": 2.1031668186187744, "eval_runtime": 0.6521, "eval_samples_per_second": 6.134, "eval_steps_per_second": 1.533, "step": 360 }, { "epoch": 0.15, "learning_rate": 2.5632e-07, "loss": 2.2841, "step": 364 }, { "epoch": 0.15, "eval_loss": 2.0989203453063965, "eval_runtime": 0.6249, "eval_samples_per_second": 6.401, "eval_steps_per_second": 1.6, "step": 364 }, { "epoch": 0.15, "learning_rate": 2.5584e-07, "loss": 2.2481, "step": 368 }, { "epoch": 0.15, "eval_loss": 2.095189332962036, "eval_runtime": 0.6855, "eval_samples_per_second": 5.835, "eval_steps_per_second": 1.459, "step": 368 }, { "epoch": 0.15, "learning_rate": 2.5536e-07, "loss": 2.278, "step": 372 }, { "epoch": 0.15, "eval_loss": 2.0912463665008545, "eval_runtime": 0.7393, "eval_samples_per_second": 5.411, "eval_steps_per_second": 1.353, "step": 372 }, { "epoch": 0.15, "learning_rate": 2.5488e-07, "loss": 2.2765, "step": 376 }, { "epoch": 0.15, "eval_loss": 2.087336301803589, "eval_runtime": 0.4793, "eval_samples_per_second": 8.345, "eval_steps_per_second": 2.086, "step": 376 }, { "epoch": 0.15, "learning_rate": 2.544e-07, "loss": 2.2232, "step": 380 }, { "epoch": 0.15, "eval_loss": 2.0833120346069336, "eval_runtime": 0.487, "eval_samples_per_second": 8.214, "eval_steps_per_second": 2.053, "step": 380 }, { "epoch": 0.15, "learning_rate": 2.5392e-07, "loss": 2.306, "step": 384 }, { "epoch": 0.15, "eval_loss": 2.079479932785034, "eval_runtime": 0.4722, "eval_samples_per_second": 8.471, "eval_steps_per_second": 2.118, "step": 384 }, { "epoch": 0.16, "learning_rate": 2.5343999999999997e-07, "loss": 2.2126, "step": 388 }, { "epoch": 0.16, "eval_loss": 2.0760295391082764, "eval_runtime": 0.4958, "eval_samples_per_second": 8.068, "eval_steps_per_second": 2.017, "step": 388 }, { "epoch": 0.16, "learning_rate": 2.5295999999999996e-07, "loss": 2.2557, "step": 392 }, { "epoch": 0.16, "eval_loss": 2.072136402130127, "eval_runtime": 0.469, "eval_samples_per_second": 8.529, "eval_steps_per_second": 2.132, "step": 392 }, { "epoch": 0.16, "learning_rate": 2.5248e-07, "loss": 2.1988, "step": 396 }, { "epoch": 0.16, "eval_loss": 2.0683670043945312, "eval_runtime": 0.6385, "eval_samples_per_second": 6.264, "eval_steps_per_second": 1.566, "step": 396 }, { "epoch": 0.16, "learning_rate": 2.52e-07, "loss": 2.1917, "step": 400 }, { "epoch": 0.16, "eval_loss": 2.0638906955718994, "eval_runtime": 0.6834, "eval_samples_per_second": 5.853, "eval_steps_per_second": 1.463, "step": 400 }, { "epoch": 0.16, "learning_rate": 2.5152e-07, "loss": 2.2479, "step": 404 }, { "epoch": 0.16, "eval_loss": 2.0599253177642822, "eval_runtime": 0.7261, "eval_samples_per_second": 5.509, "eval_steps_per_second": 1.377, "step": 404 }, { "epoch": 0.16, "learning_rate": 2.5104e-07, "loss": 2.1484, "step": 408 }, { "epoch": 0.16, "eval_loss": 2.055751085281372, "eval_runtime": 0.7367, "eval_samples_per_second": 5.429, "eval_steps_per_second": 1.357, "step": 408 }, { "epoch": 0.16, "learning_rate": 2.5056e-07, "loss": 2.1886, "step": 412 }, { "epoch": 0.16, "eval_loss": 2.052119016647339, "eval_runtime": 0.4808, "eval_samples_per_second": 8.319, "eval_steps_per_second": 2.08, "step": 412 }, { "epoch": 0.17, "learning_rate": 2.5007999999999997e-07, "loss": 2.2026, "step": 416 }, { "epoch": 0.17, "eval_loss": 2.0482354164123535, "eval_runtime": 0.4856, "eval_samples_per_second": 8.238, "eval_steps_per_second": 2.059, "step": 416 }, { "epoch": 0.17, "learning_rate": 2.4959999999999996e-07, "loss": 2.1572, "step": 420 }, { "epoch": 0.17, "eval_loss": 2.0441887378692627, "eval_runtime": 0.4779, "eval_samples_per_second": 8.37, "eval_steps_per_second": 2.093, "step": 420 }, { "epoch": 0.17, "learning_rate": 2.4912e-07, "loss": 2.1931, "step": 424 }, { "epoch": 0.17, "eval_loss": 2.0399935245513916, "eval_runtime": 0.4803, "eval_samples_per_second": 8.329, "eval_steps_per_second": 2.082, "step": 424 }, { "epoch": 0.17, "learning_rate": 2.4864e-07, "loss": 2.161, "step": 428 }, { "epoch": 0.17, "eval_loss": 2.03645920753479, "eval_runtime": 0.4924, "eval_samples_per_second": 8.123, "eval_steps_per_second": 2.031, "step": 428 }, { "epoch": 0.17, "learning_rate": 2.4816e-07, "loss": 2.1115, "step": 432 }, { "epoch": 0.17, "eval_loss": 2.032196044921875, "eval_runtime": 0.6345, "eval_samples_per_second": 6.304, "eval_steps_per_second": 1.576, "step": 432 }, { "epoch": 0.17, "learning_rate": 2.4768e-07, "loss": 2.173, "step": 436 }, { "epoch": 0.17, "eval_loss": 2.028397560119629, "eval_runtime": 0.6625, "eval_samples_per_second": 6.038, "eval_steps_per_second": 1.509, "step": 436 }, { "epoch": 0.18, "learning_rate": 2.472e-07, "loss": 2.1491, "step": 440 }, { "epoch": 0.18, "eval_loss": 2.0247464179992676, "eval_runtime": 0.6969, "eval_samples_per_second": 5.74, "eval_steps_per_second": 1.435, "step": 440 }, { "epoch": 0.18, "learning_rate": 2.4672e-07, "loss": 2.1716, "step": 444 }, { "epoch": 0.18, "eval_loss": 2.0203933715820312, "eval_runtime": 0.7311, "eval_samples_per_second": 5.471, "eval_steps_per_second": 1.368, "step": 444 }, { "epoch": 0.18, "learning_rate": 2.4623999999999996e-07, "loss": 2.2031, "step": 448 }, { "epoch": 0.18, "eval_loss": 2.016533374786377, "eval_runtime": 0.4875, "eval_samples_per_second": 8.206, "eval_steps_per_second": 2.051, "step": 448 }, { "epoch": 0.18, "learning_rate": 2.4576e-07, "loss": 2.1466, "step": 452 }, { "epoch": 0.18, "eval_loss": 2.012568473815918, "eval_runtime": 0.4897, "eval_samples_per_second": 8.168, "eval_steps_per_second": 2.042, "step": 452 }, { "epoch": 0.18, "learning_rate": 2.4528e-07, "loss": 2.1384, "step": 456 }, { "epoch": 0.18, "eval_loss": 2.0088417530059814, "eval_runtime": 0.4969, "eval_samples_per_second": 8.05, "eval_steps_per_second": 2.013, "step": 456 }, { "epoch": 0.18, "learning_rate": 2.4479999999999997e-07, "loss": 2.1824, "step": 460 }, { "epoch": 0.18, "eval_loss": 2.0047850608825684, "eval_runtime": 0.4897, "eval_samples_per_second": 8.168, "eval_steps_per_second": 2.042, "step": 460 }, { "epoch": 0.19, "learning_rate": 2.4432e-07, "loss": 2.1401, "step": 464 }, { "epoch": 0.19, "eval_loss": 2.0006463527679443, "eval_runtime": 0.4882, "eval_samples_per_second": 8.193, "eval_steps_per_second": 2.048, "step": 464 }, { "epoch": 0.19, "learning_rate": 2.4384e-07, "loss": 2.2086, "step": 468 }, { "epoch": 0.19, "eval_loss": 1.9969314336776733, "eval_runtime": 0.6612, "eval_samples_per_second": 6.049, "eval_steps_per_second": 1.512, "step": 468 }, { "epoch": 0.19, "learning_rate": 2.4336e-07, "loss": 2.1687, "step": 472 }, { "epoch": 0.19, "eval_loss": 1.9925954341888428, "eval_runtime": 0.6804, "eval_samples_per_second": 5.879, "eval_steps_per_second": 1.47, "step": 472 }, { "epoch": 0.19, "learning_rate": 2.4287999999999996e-07, "loss": 2.145, "step": 476 }, { "epoch": 0.19, "eval_loss": 1.9888066053390503, "eval_runtime": 0.6955, "eval_samples_per_second": 5.752, "eval_steps_per_second": 1.438, "step": 476 }, { "epoch": 0.19, "learning_rate": 2.424e-07, "loss": 2.2007, "step": 480 }, { "epoch": 0.19, "eval_loss": 1.9850127696990967, "eval_runtime": 0.7558, "eval_samples_per_second": 5.292, "eval_steps_per_second": 1.323, "step": 480 }, { "epoch": 0.19, "learning_rate": 2.4192e-07, "loss": 2.1367, "step": 484 }, { "epoch": 0.19, "eval_loss": 1.9808437824249268, "eval_runtime": 0.4706, "eval_samples_per_second": 8.499, "eval_steps_per_second": 2.125, "step": 484 }, { "epoch": 0.2, "learning_rate": 2.4143999999999997e-07, "loss": 2.1291, "step": 488 }, { "epoch": 0.2, "eval_loss": 1.9767786264419556, "eval_runtime": 0.4803, "eval_samples_per_second": 8.327, "eval_steps_per_second": 2.082, "step": 488 }, { "epoch": 0.2, "learning_rate": 2.4096e-07, "loss": 2.1124, "step": 492 }, { "epoch": 0.2, "eval_loss": 1.9728602170944214, "eval_runtime": 0.4802, "eval_samples_per_second": 8.33, "eval_steps_per_second": 2.082, "step": 492 }, { "epoch": 0.2, "learning_rate": 2.4048e-07, "loss": 2.0738, "step": 496 }, { "epoch": 0.2, "eval_loss": 1.968900203704834, "eval_runtime": 0.4884, "eval_samples_per_second": 8.189, "eval_steps_per_second": 2.047, "step": 496 }, { "epoch": 0.2, "learning_rate": 2.4e-07, "loss": 2.1048, "step": 500 }, { "epoch": 0.2, "eval_loss": 1.9646457433700562, "eval_runtime": 0.5026, "eval_samples_per_second": 7.959, "eval_steps_per_second": 1.99, "step": 500 }, { "epoch": 0.2, "learning_rate": 2.3951999999999996e-07, "loss": 2.0995, "step": 504 }, { "epoch": 0.2, "eval_loss": 1.9606600999832153, "eval_runtime": 0.7928, "eval_samples_per_second": 5.045, "eval_steps_per_second": 1.261, "step": 504 }, { "epoch": 0.2, "learning_rate": 2.3903999999999995e-07, "loss": 2.0816, "step": 508 }, { "epoch": 0.2, "eval_loss": 1.956822395324707, "eval_runtime": 0.5321, "eval_samples_per_second": 7.518, "eval_steps_per_second": 1.879, "step": 508 }, { "epoch": 0.2, "learning_rate": 2.3856e-07, "loss": 2.0969, "step": 512 }, { "epoch": 0.2, "eval_loss": 1.9526716470718384, "eval_runtime": 0.5174, "eval_samples_per_second": 7.732, "eval_steps_per_second": 1.933, "step": 512 }, { "epoch": 0.21, "learning_rate": 2.3807999999999997e-07, "loss": 2.1034, "step": 516 }, { "epoch": 0.21, "eval_loss": 1.948419451713562, "eval_runtime": 0.5393, "eval_samples_per_second": 7.418, "eval_steps_per_second": 1.854, "step": 516 }, { "epoch": 0.21, "learning_rate": 2.376e-07, "loss": 2.0654, "step": 520 }, { "epoch": 0.21, "eval_loss": 1.9442145824432373, "eval_runtime": 0.5372, "eval_samples_per_second": 7.446, "eval_steps_per_second": 1.861, "step": 520 }, { "epoch": 0.21, "learning_rate": 2.3712e-07, "loss": 2.1175, "step": 524 }, { "epoch": 0.21, "eval_loss": 1.9403698444366455, "eval_runtime": 0.5129, "eval_samples_per_second": 7.798, "eval_steps_per_second": 1.95, "step": 524 }, { "epoch": 0.21, "learning_rate": 2.3663999999999998e-07, "loss": 2.0829, "step": 528 }, { "epoch": 0.21, "eval_loss": 1.936263084411621, "eval_runtime": 0.7202, "eval_samples_per_second": 5.554, "eval_steps_per_second": 1.388, "step": 528 }, { "epoch": 0.21, "learning_rate": 2.3616e-07, "loss": 2.0973, "step": 532 }, { "epoch": 0.21, "eval_loss": 1.9322115182876587, "eval_runtime": 0.6884, "eval_samples_per_second": 5.81, "eval_steps_per_second": 1.453, "step": 532 }, { "epoch": 0.21, "learning_rate": 2.3567999999999998e-07, "loss": 2.0439, "step": 536 }, { "epoch": 0.21, "eval_loss": 1.927826166152954, "eval_runtime": 0.7779, "eval_samples_per_second": 5.142, "eval_steps_per_second": 1.286, "step": 536 }, { "epoch": 0.22, "learning_rate": 2.352e-07, "loss": 2.0791, "step": 540 }, { "epoch": 0.22, "eval_loss": 1.923945426940918, "eval_runtime": 0.7514, "eval_samples_per_second": 5.323, "eval_steps_per_second": 1.331, "step": 540 }, { "epoch": 0.22, "learning_rate": 2.3471999999999997e-07, "loss": 2.0988, "step": 544 }, { "epoch": 0.22, "eval_loss": 1.9202955961227417, "eval_runtime": 0.5194, "eval_samples_per_second": 7.701, "eval_steps_per_second": 1.925, "step": 544 }, { "epoch": 0.22, "learning_rate": 2.3424e-07, "loss": 2.0179, "step": 548 }, { "epoch": 0.22, "eval_loss": 1.916027307510376, "eval_runtime": 0.5072, "eval_samples_per_second": 7.887, "eval_steps_per_second": 1.972, "step": 548 }, { "epoch": 0.22, "learning_rate": 2.3376e-07, "loss": 2.0452, "step": 552 }, { "epoch": 0.22, "eval_loss": 1.911855697631836, "eval_runtime": 0.5112, "eval_samples_per_second": 7.825, "eval_steps_per_second": 1.956, "step": 552 }, { "epoch": 0.22, "learning_rate": 2.3327999999999998e-07, "loss": 1.9792, "step": 556 }, { "epoch": 0.22, "eval_loss": 1.907868504524231, "eval_runtime": 0.5368, "eval_samples_per_second": 7.452, "eval_steps_per_second": 1.863, "step": 556 }, { "epoch": 0.22, "learning_rate": 2.328e-07, "loss": 1.9862, "step": 560 }, { "epoch": 0.22, "eval_loss": 1.9032366275787354, "eval_runtime": 0.52, "eval_samples_per_second": 7.692, "eval_steps_per_second": 1.923, "step": 560 }, { "epoch": 0.23, "learning_rate": 2.3231999999999998e-07, "loss": 2.0176, "step": 564 }, { "epoch": 0.23, "eval_loss": 1.8994207382202148, "eval_runtime": 0.5141, "eval_samples_per_second": 7.78, "eval_steps_per_second": 1.945, "step": 564 }, { "epoch": 0.23, "learning_rate": 2.3184e-07, "loss": 2.0066, "step": 568 }, { "epoch": 0.23, "eval_loss": 1.8953509330749512, "eval_runtime": 0.7027, "eval_samples_per_second": 5.692, "eval_steps_per_second": 1.423, "step": 568 }, { "epoch": 0.23, "learning_rate": 2.3135999999999998e-07, "loss": 2.0333, "step": 572 }, { "epoch": 0.23, "eval_loss": 1.8914432525634766, "eval_runtime": 0.7279, "eval_samples_per_second": 5.495, "eval_steps_per_second": 1.374, "step": 572 }, { "epoch": 0.23, "learning_rate": 2.3088e-07, "loss": 2.0316, "step": 576 }, { "epoch": 0.23, "eval_loss": 1.8870800733566284, "eval_runtime": 0.7212, "eval_samples_per_second": 5.546, "eval_steps_per_second": 1.386, "step": 576 }, { "epoch": 0.23, "learning_rate": 2.304e-07, "loss": 2.0114, "step": 580 }, { "epoch": 0.23, "eval_loss": 1.8827916383743286, "eval_runtime": 0.6774, "eval_samples_per_second": 5.905, "eval_steps_per_second": 1.476, "step": 580 }, { "epoch": 0.23, "learning_rate": 2.2991999999999998e-07, "loss": 2.0093, "step": 584 }, { "epoch": 0.23, "eval_loss": 1.8788678646087646, "eval_runtime": 0.5185, "eval_samples_per_second": 7.715, "eval_steps_per_second": 1.929, "step": 584 }, { "epoch": 0.24, "learning_rate": 2.2944e-07, "loss": 1.9829, "step": 588 }, { "epoch": 0.24, "eval_loss": 1.8749186992645264, "eval_runtime": 0.5091, "eval_samples_per_second": 7.857, "eval_steps_per_second": 1.964, "step": 588 }, { "epoch": 0.24, "learning_rate": 2.2895999999999998e-07, "loss": 1.971, "step": 592 }, { "epoch": 0.24, "eval_loss": 1.8706499338150024, "eval_runtime": 0.5204, "eval_samples_per_second": 7.687, "eval_steps_per_second": 1.922, "step": 592 }, { "epoch": 0.24, "learning_rate": 2.2848000000000002e-07, "loss": 2.0188, "step": 596 }, { "epoch": 0.24, "eval_loss": 1.8667842149734497, "eval_runtime": 0.5224, "eval_samples_per_second": 7.657, "eval_steps_per_second": 1.914, "step": 596 }, { "epoch": 0.24, "learning_rate": 2.28e-07, "loss": 2.0081, "step": 600 }, { "epoch": 0.24, "eval_loss": 1.8627525568008423, "eval_runtime": 0.5196, "eval_samples_per_second": 7.699, "eval_steps_per_second": 1.925, "step": 600 }, { "epoch": 0.24, "learning_rate": 2.2752e-07, "loss": 2.0014, "step": 604 }, { "epoch": 0.24, "eval_loss": 1.8587167263031006, "eval_runtime": 0.7373, "eval_samples_per_second": 5.425, "eval_steps_per_second": 1.356, "step": 604 }, { "epoch": 0.24, "learning_rate": 2.2704e-07, "loss": 1.9741, "step": 608 }, { "epoch": 0.24, "eval_loss": 1.8543612957000732, "eval_runtime": 0.7492, "eval_samples_per_second": 5.339, "eval_steps_per_second": 1.335, "step": 608 }, { "epoch": 0.24, "learning_rate": 2.2655999999999999e-07, "loss": 1.9828, "step": 612 }, { "epoch": 0.24, "eval_loss": 1.8504937887191772, "eval_runtime": 0.7242, "eval_samples_per_second": 5.524, "eval_steps_per_second": 1.381, "step": 612 }, { "epoch": 0.25, "learning_rate": 2.2608e-07, "loss": 1.9481, "step": 616 }, { "epoch": 0.25, "eval_loss": 1.8463339805603027, "eval_runtime": 0.6997, "eval_samples_per_second": 5.716, "eval_steps_per_second": 1.429, "step": 616 }, { "epoch": 0.25, "learning_rate": 2.2559999999999998e-07, "loss": 1.9584, "step": 620 }, { "epoch": 0.25, "eval_loss": 1.8423882722854614, "eval_runtime": 0.5137, "eval_samples_per_second": 7.787, "eval_steps_per_second": 1.947, "step": 620 }, { "epoch": 0.25, "learning_rate": 2.2511999999999997e-07, "loss": 1.9449, "step": 624 }, { "epoch": 0.25, "eval_loss": 1.838066577911377, "eval_runtime": 0.5091, "eval_samples_per_second": 7.857, "eval_steps_per_second": 1.964, "step": 624 }, { "epoch": 0.25, "learning_rate": 2.2464e-07, "loss": 1.9753, "step": 628 }, { "epoch": 0.25, "eval_loss": 1.8342829942703247, "eval_runtime": 0.504, "eval_samples_per_second": 7.936, "eval_steps_per_second": 1.984, "step": 628 }, { "epoch": 0.25, "learning_rate": 2.2416e-07, "loss": 2.0055, "step": 632 }, { "epoch": 0.25, "eval_loss": 1.8300307989120483, "eval_runtime": 0.5201, "eval_samples_per_second": 7.691, "eval_steps_per_second": 1.923, "step": 632 }, { "epoch": 0.25, "learning_rate": 2.2368e-07, "loss": 1.98, "step": 636 }, { "epoch": 0.25, "eval_loss": 1.8260575532913208, "eval_runtime": 0.5267, "eval_samples_per_second": 7.594, "eval_steps_per_second": 1.898, "step": 636 }, { "epoch": 0.26, "learning_rate": 2.232e-07, "loss": 1.9757, "step": 640 }, { "epoch": 0.26, "eval_loss": 1.8222540616989136, "eval_runtime": 0.7574, "eval_samples_per_second": 5.281, "eval_steps_per_second": 1.32, "step": 640 }, { "epoch": 0.26, "learning_rate": 2.2271999999999997e-07, "loss": 1.9683, "step": 644 }, { "epoch": 0.26, "eval_loss": 1.818216323852539, "eval_runtime": 0.7304, "eval_samples_per_second": 5.476, "eval_steps_per_second": 1.369, "step": 644 }, { "epoch": 0.26, "learning_rate": 2.2223999999999998e-07, "loss": 1.926, "step": 648 }, { "epoch": 0.26, "eval_loss": 1.8140522241592407, "eval_runtime": 0.7453, "eval_samples_per_second": 5.367, "eval_steps_per_second": 1.342, "step": 648 }, { "epoch": 0.26, "learning_rate": 2.2175999999999997e-07, "loss": 1.9454, "step": 652 }, { "epoch": 0.26, "eval_loss": 1.8100805282592773, "eval_runtime": 0.6536, "eval_samples_per_second": 6.12, "eval_steps_per_second": 1.53, "step": 652 }, { "epoch": 0.26, "learning_rate": 2.2128e-07, "loss": 1.9352, "step": 656 }, { "epoch": 0.26, "eval_loss": 1.8059089183807373, "eval_runtime": 0.5193, "eval_samples_per_second": 7.702, "eval_steps_per_second": 1.926, "step": 656 }, { "epoch": 0.26, "learning_rate": 2.208e-07, "loss": 1.8816, "step": 660 }, { "epoch": 0.26, "eval_loss": 1.8020563125610352, "eval_runtime": 0.5265, "eval_samples_per_second": 7.597, "eval_steps_per_second": 1.899, "step": 660 }, { "epoch": 0.27, "learning_rate": 2.2032e-07, "loss": 1.9182, "step": 664 }, { "epoch": 0.27, "eval_loss": 1.7980492115020752, "eval_runtime": 0.5102, "eval_samples_per_second": 7.84, "eval_steps_per_second": 1.96, "step": 664 }, { "epoch": 0.27, "learning_rate": 2.1984e-07, "loss": 1.9659, "step": 668 }, { "epoch": 0.27, "eval_loss": 1.7941217422485352, "eval_runtime": 0.5988, "eval_samples_per_second": 6.681, "eval_steps_per_second": 1.67, "step": 668 }, { "epoch": 0.27, "learning_rate": 2.1935999999999997e-07, "loss": 1.8932, "step": 672 }, { "epoch": 0.27, "eval_loss": 1.7901490926742554, "eval_runtime": 0.5339, "eval_samples_per_second": 7.492, "eval_steps_per_second": 1.873, "step": 672 }, { "epoch": 0.27, "learning_rate": 2.1887999999999999e-07, "loss": 1.8608, "step": 676 }, { "epoch": 0.27, "eval_loss": 1.786109447479248, "eval_runtime": 0.7219, "eval_samples_per_second": 5.541, "eval_steps_per_second": 1.385, "step": 676 }, { "epoch": 0.27, "learning_rate": 2.184e-07, "loss": 1.941, "step": 680 }, { "epoch": 0.27, "eval_loss": 1.7824102640151978, "eval_runtime": 0.7619, "eval_samples_per_second": 5.25, "eval_steps_per_second": 1.313, "step": 680 }, { "epoch": 0.27, "learning_rate": 2.1792e-07, "loss": 1.8854, "step": 684 }, { "epoch": 0.27, "eval_loss": 1.77846097946167, "eval_runtime": 0.7601, "eval_samples_per_second": 5.262, "eval_steps_per_second": 1.316, "step": 684 }, { "epoch": 0.28, "learning_rate": 2.1744e-07, "loss": 1.8912, "step": 688 }, { "epoch": 0.28, "eval_loss": 1.7742952108383179, "eval_runtime": 0.59, "eval_samples_per_second": 6.78, "eval_steps_per_second": 1.695, "step": 688 }, { "epoch": 0.28, "learning_rate": 2.1695999999999998e-07, "loss": 1.8667, "step": 692 }, { "epoch": 0.28, "eval_loss": 1.770714521408081, "eval_runtime": 0.5262, "eval_samples_per_second": 7.601, "eval_steps_per_second": 1.9, "step": 692 }, { "epoch": 0.28, "learning_rate": 2.1648e-07, "loss": 1.912, "step": 696 }, { "epoch": 0.28, "eval_loss": 1.7666008472442627, "eval_runtime": 0.5272, "eval_samples_per_second": 7.587, "eval_steps_per_second": 1.897, "step": 696 }, { "epoch": 0.28, "learning_rate": 2.1599999999999998e-07, "loss": 1.9009, "step": 700 }, { "epoch": 0.28, "eval_loss": 1.7627824544906616, "eval_runtime": 0.5295, "eval_samples_per_second": 7.555, "eval_steps_per_second": 1.889, "step": 700 }, { "epoch": 0.28, "learning_rate": 2.1552000000000001e-07, "loss": 1.906, "step": 704 }, { "epoch": 0.28, "eval_loss": 1.75889253616333, "eval_runtime": 0.5589, "eval_samples_per_second": 7.157, "eval_steps_per_second": 1.789, "step": 704 }, { "epoch": 0.28, "learning_rate": 2.1504e-07, "loss": 1.8671, "step": 708 }, { "epoch": 0.28, "eval_loss": 1.7549973726272583, "eval_runtime": 0.687, "eval_samples_per_second": 5.822, "eval_steps_per_second": 1.456, "step": 708 }, { "epoch": 0.28, "learning_rate": 2.1455999999999998e-07, "loss": 1.8609, "step": 712 }, { "epoch": 0.28, "eval_loss": 1.7507662773132324, "eval_runtime": 0.7225, "eval_samples_per_second": 5.537, "eval_steps_per_second": 1.384, "step": 712 }, { "epoch": 0.29, "learning_rate": 2.1408e-07, "loss": 1.8485, "step": 716 }, { "epoch": 0.29, "eval_loss": 1.746917486190796, "eval_runtime": 0.7954, "eval_samples_per_second": 5.029, "eval_steps_per_second": 1.257, "step": 716 }, { "epoch": 0.29, "learning_rate": 2.1359999999999998e-07, "loss": 1.8334, "step": 720 }, { "epoch": 0.29, "eval_loss": 1.7430514097213745, "eval_runtime": 0.7433, "eval_samples_per_second": 5.381, "eval_steps_per_second": 1.345, "step": 720 }, { "epoch": 0.29, "learning_rate": 2.1312e-07, "loss": 1.8763, "step": 724 }, { "epoch": 0.29, "eval_loss": 1.7392196655273438, "eval_runtime": 0.5237, "eval_samples_per_second": 7.638, "eval_steps_per_second": 1.91, "step": 724 }, { "epoch": 0.29, "learning_rate": 2.1263999999999998e-07, "loss": 1.9005, "step": 728 }, { "epoch": 0.29, "eval_loss": 1.7355214357376099, "eval_runtime": 0.524, "eval_samples_per_second": 7.634, "eval_steps_per_second": 1.908, "step": 728 }, { "epoch": 0.29, "learning_rate": 2.1216000000000002e-07, "loss": 1.8669, "step": 732 }, { "epoch": 0.29, "eval_loss": 1.731513261795044, "eval_runtime": 0.5593, "eval_samples_per_second": 7.152, "eval_steps_per_second": 1.788, "step": 732 }, { "epoch": 0.29, "learning_rate": 2.1168e-07, "loss": 1.8984, "step": 736 }, { "epoch": 0.29, "eval_loss": 1.727636694908142, "eval_runtime": 0.5241, "eval_samples_per_second": 7.632, "eval_steps_per_second": 1.908, "step": 736 }, { "epoch": 0.3, "learning_rate": 2.1119999999999999e-07, "loss": 1.8074, "step": 740 }, { "epoch": 0.3, "eval_loss": 1.7240556478500366, "eval_runtime": 0.715, "eval_samples_per_second": 5.594, "eval_steps_per_second": 1.399, "step": 740 }, { "epoch": 0.3, "learning_rate": 2.1072e-07, "loss": 1.8614, "step": 744 }, { "epoch": 0.3, "eval_loss": 1.7201639413833618, "eval_runtime": 0.7611, "eval_samples_per_second": 5.256, "eval_steps_per_second": 1.314, "step": 744 }, { "epoch": 0.3, "learning_rate": 2.1023999999999998e-07, "loss": 1.8211, "step": 748 }, { "epoch": 0.3, "eval_loss": 1.7165008783340454, "eval_runtime": 0.7193, "eval_samples_per_second": 5.561, "eval_steps_per_second": 1.39, "step": 748 }, { "epoch": 0.3, "learning_rate": 2.0976e-07, "loss": 1.8553, "step": 752 }, { "epoch": 0.3, "eval_loss": 1.7123990058898926, "eval_runtime": 0.5463, "eval_samples_per_second": 7.323, "eval_steps_per_second": 1.831, "step": 752 }, { "epoch": 0.3, "learning_rate": 2.0927999999999998e-07, "loss": 1.7978, "step": 756 }, { "epoch": 0.3, "eval_loss": 1.7084720134735107, "eval_runtime": 0.574, "eval_samples_per_second": 6.968, "eval_steps_per_second": 1.742, "step": 756 }, { "epoch": 0.3, "learning_rate": 2.0879999999999996e-07, "loss": 1.8203, "step": 760 }, { "epoch": 0.3, "eval_loss": 1.7048146724700928, "eval_runtime": 0.5838, "eval_samples_per_second": 6.852, "eval_steps_per_second": 1.713, "step": 760 }, { "epoch": 0.31, "learning_rate": 2.0832e-07, "loss": 1.8192, "step": 764 }, { "epoch": 0.31, "eval_loss": 1.7010469436645508, "eval_runtime": 0.5225, "eval_samples_per_second": 7.656, "eval_steps_per_second": 1.914, "step": 764 }, { "epoch": 0.31, "learning_rate": 2.0784e-07, "loss": 1.8532, "step": 768 }, { "epoch": 0.31, "eval_loss": 1.6973625421524048, "eval_runtime": 0.525, "eval_samples_per_second": 7.619, "eval_steps_per_second": 1.905, "step": 768 }, { "epoch": 0.31, "learning_rate": 2.0736e-07, "loss": 1.8307, "step": 772 }, { "epoch": 0.31, "eval_loss": 1.6935136318206787, "eval_runtime": 0.7235, "eval_samples_per_second": 5.528, "eval_steps_per_second": 1.382, "step": 772 }, { "epoch": 0.31, "learning_rate": 2.0687999999999998e-07, "loss": 1.8207, "step": 776 }, { "epoch": 0.31, "eval_loss": 1.6895670890808105, "eval_runtime": 0.8289, "eval_samples_per_second": 4.826, "eval_steps_per_second": 1.206, "step": 776 }, { "epoch": 0.31, "learning_rate": 2.0639999999999997e-07, "loss": 1.7895, "step": 780 }, { "epoch": 0.31, "eval_loss": 1.6858075857162476, "eval_runtime": 0.7778, "eval_samples_per_second": 5.143, "eval_steps_per_second": 1.286, "step": 780 }, { "epoch": 0.31, "learning_rate": 2.0592e-07, "loss": 1.7976, "step": 784 }, { "epoch": 0.31, "eval_loss": 1.6820955276489258, "eval_runtime": 0.5265, "eval_samples_per_second": 7.597, "eval_steps_per_second": 1.899, "step": 784 }, { "epoch": 0.32, "learning_rate": 2.0544e-07, "loss": 1.814, "step": 788 }, { "epoch": 0.32, "eval_loss": 1.6785138845443726, "eval_runtime": 0.5179, "eval_samples_per_second": 7.724, "eval_steps_per_second": 1.931, "step": 788 }, { "epoch": 0.32, "learning_rate": 2.0496e-07, "loss": 1.7972, "step": 792 }, { "epoch": 0.32, "eval_loss": 1.674804449081421, "eval_runtime": 0.5304, "eval_samples_per_second": 7.541, "eval_steps_per_second": 1.885, "step": 792 }, { "epoch": 0.32, "learning_rate": 2.0448e-07, "loss": 1.8258, "step": 796 }, { "epoch": 0.32, "eval_loss": 1.6713837385177612, "eval_runtime": 0.5336, "eval_samples_per_second": 7.496, "eval_steps_per_second": 1.874, "step": 796 }, { "epoch": 0.32, "learning_rate": 2.04e-07, "loss": 1.79, "step": 800 }, { "epoch": 0.32, "eval_loss": 1.667376160621643, "eval_runtime": 0.7608, "eval_samples_per_second": 5.258, "eval_steps_per_second": 1.314, "step": 800 }, { "epoch": 0.32, "learning_rate": 2.0351999999999999e-07, "loss": 1.802, "step": 804 }, { "epoch": 0.32, "eval_loss": 1.6640408039093018, "eval_runtime": 0.7498, "eval_samples_per_second": 5.335, "eval_steps_per_second": 1.334, "step": 804 }, { "epoch": 0.32, "learning_rate": 2.0303999999999997e-07, "loss": 1.7784, "step": 808 }, { "epoch": 0.32, "eval_loss": 1.6603385210037231, "eval_runtime": 0.7501, "eval_samples_per_second": 5.333, "eval_steps_per_second": 1.333, "step": 808 }, { "epoch": 0.32, "learning_rate": 2.0256e-07, "loss": 1.7671, "step": 812 }, { "epoch": 0.32, "eval_loss": 1.6568516492843628, "eval_runtime": 0.5206, "eval_samples_per_second": 7.684, "eval_steps_per_second": 1.921, "step": 812 }, { "epoch": 0.33, "learning_rate": 2.0208e-07, "loss": 1.7618, "step": 816 }, { "epoch": 0.33, "eval_loss": 1.653469443321228, "eval_runtime": 0.5354, "eval_samples_per_second": 7.472, "eval_steps_per_second": 1.868, "step": 816 }, { "epoch": 0.33, "learning_rate": 2.016e-07, "loss": 1.8207, "step": 820 }, { "epoch": 0.33, "eval_loss": 1.6502578258514404, "eval_runtime": 0.523, "eval_samples_per_second": 7.648, "eval_steps_per_second": 1.912, "step": 820 }, { "epoch": 0.33, "learning_rate": 2.0112e-07, "loss": 1.7837, "step": 824 }, { "epoch": 0.33, "eval_loss": 1.6467454433441162, "eval_runtime": 0.5297, "eval_samples_per_second": 7.552, "eval_steps_per_second": 1.888, "step": 824 }, { "epoch": 0.33, "learning_rate": 2.0063999999999998e-07, "loss": 1.8066, "step": 828 }, { "epoch": 0.33, "eval_loss": 1.6439214944839478, "eval_runtime": 0.522, "eval_samples_per_second": 7.663, "eval_steps_per_second": 1.916, "step": 828 }, { "epoch": 0.33, "learning_rate": 2.0016e-07, "loss": 1.7814, "step": 832 }, { "epoch": 0.33, "eval_loss": 1.6407381296157837, "eval_runtime": 0.5382, "eval_samples_per_second": 7.432, "eval_steps_per_second": 1.858, "step": 832 }, { "epoch": 0.33, "learning_rate": 1.9967999999999997e-07, "loss": 1.7244, "step": 836 }, { "epoch": 0.33, "eval_loss": 1.6372514963150024, "eval_runtime": 0.7157, "eval_samples_per_second": 5.589, "eval_steps_per_second": 1.397, "step": 836 }, { "epoch": 0.34, "learning_rate": 1.992e-07, "loss": 1.7195, "step": 840 }, { "epoch": 0.34, "eval_loss": 1.634232997894287, "eval_runtime": 0.7254, "eval_samples_per_second": 5.514, "eval_steps_per_second": 1.379, "step": 840 }, { "epoch": 0.34, "learning_rate": 1.9872e-07, "loss": 1.7524, "step": 844 }, { "epoch": 0.34, "eval_loss": 1.6310441493988037, "eval_runtime": 0.7839, "eval_samples_per_second": 5.103, "eval_steps_per_second": 1.276, "step": 844 }, { "epoch": 0.34, "learning_rate": 1.9824e-07, "loss": 1.7644, "step": 848 }, { "epoch": 0.34, "eval_loss": 1.6279191970825195, "eval_runtime": 0.5253, "eval_samples_per_second": 7.615, "eval_steps_per_second": 1.904, "step": 848 }, { "epoch": 0.34, "learning_rate": 1.9776e-07, "loss": 1.7171, "step": 852 }, { "epoch": 0.34, "eval_loss": 1.6244579553604126, "eval_runtime": 0.5359, "eval_samples_per_second": 7.464, "eval_steps_per_second": 1.866, "step": 852 }, { "epoch": 0.34, "learning_rate": 1.9727999999999998e-07, "loss": 1.7418, "step": 856 }, { "epoch": 0.34, "eval_loss": 1.6212078332901, "eval_runtime": 0.5379, "eval_samples_per_second": 7.436, "eval_steps_per_second": 1.859, "step": 856 }, { "epoch": 0.34, "learning_rate": 1.968e-07, "loss": 1.7337, "step": 860 }, { "epoch": 0.34, "eval_loss": 1.6180227994918823, "eval_runtime": 0.5259, "eval_samples_per_second": 7.606, "eval_steps_per_second": 1.902, "step": 860 }, { "epoch": 0.35, "learning_rate": 1.9631999999999997e-07, "loss": 1.7441, "step": 864 }, { "epoch": 0.35, "eval_loss": 1.61477530002594, "eval_runtime": 0.5216, "eval_samples_per_second": 7.669, "eval_steps_per_second": 1.917, "step": 864 }, { "epoch": 0.35, "learning_rate": 1.9584e-07, "loss": 1.694, "step": 868 }, { "epoch": 0.35, "eval_loss": 1.611538052558899, "eval_runtime": 0.6803, "eval_samples_per_second": 5.88, "eval_steps_per_second": 1.47, "step": 868 }, { "epoch": 0.35, "learning_rate": 1.9536e-07, "loss": 1.7601, "step": 872 }, { "epoch": 0.35, "eval_loss": 1.6083098649978638, "eval_runtime": 0.716, "eval_samples_per_second": 5.586, "eval_steps_per_second": 1.397, "step": 872 }, { "epoch": 0.35, "learning_rate": 1.9487999999999998e-07, "loss": 1.7081, "step": 876 }, { "epoch": 0.35, "eval_loss": 1.6050214767456055, "eval_runtime": 0.7622, "eval_samples_per_second": 5.248, "eval_steps_per_second": 1.312, "step": 876 }, { "epoch": 0.35, "learning_rate": 1.944e-07, "loss": 1.7101, "step": 880 }, { "epoch": 0.35, "eval_loss": 1.6019953489303589, "eval_runtime": 0.7766, "eval_samples_per_second": 5.151, "eval_steps_per_second": 1.288, "step": 880 }, { "epoch": 0.35, "learning_rate": 1.9391999999999998e-07, "loss": 1.7271, "step": 884 }, { "epoch": 0.35, "eval_loss": 1.5990221500396729, "eval_runtime": 0.5153, "eval_samples_per_second": 7.763, "eval_steps_per_second": 1.941, "step": 884 }, { "epoch": 0.36, "learning_rate": 1.9344e-07, "loss": 1.7402, "step": 888 }, { "epoch": 0.36, "eval_loss": 1.5954092741012573, "eval_runtime": 0.5168, "eval_samples_per_second": 7.74, "eval_steps_per_second": 1.935, "step": 888 }, { "epoch": 0.36, "learning_rate": 1.9296e-07, "loss": 1.7125, "step": 892 }, { "epoch": 0.36, "eval_loss": 1.5921534299850464, "eval_runtime": 0.5424, "eval_samples_per_second": 7.375, "eval_steps_per_second": 1.844, "step": 892 }, { "epoch": 0.36, "learning_rate": 1.9248e-07, "loss": 1.6949, "step": 896 }, { "epoch": 0.36, "eval_loss": 1.5888370275497437, "eval_runtime": 0.5307, "eval_samples_per_second": 7.537, "eval_steps_per_second": 1.884, "step": 896 }, { "epoch": 0.36, "learning_rate": 1.92e-07, "loss": 1.7145, "step": 900 }, { "epoch": 0.36, "eval_loss": 1.5858186483383179, "eval_runtime": 0.511, "eval_samples_per_second": 7.828, "eval_steps_per_second": 1.957, "step": 900 }, { "epoch": 0.36, "learning_rate": 1.9151999999999998e-07, "loss": 1.6665, "step": 904 }, { "epoch": 0.36, "eval_loss": 1.5824443101882935, "eval_runtime": 0.6907, "eval_samples_per_second": 5.791, "eval_steps_per_second": 1.448, "step": 904 }, { "epoch": 0.36, "learning_rate": 1.9104e-07, "loss": 1.6929, "step": 908 }, { "epoch": 0.36, "eval_loss": 1.5796196460723877, "eval_runtime": 0.7487, "eval_samples_per_second": 5.342, "eval_steps_per_second": 1.336, "step": 908 }, { "epoch": 0.36, "learning_rate": 1.9055999999999998e-07, "loss": 1.7068, "step": 912 }, { "epoch": 0.36, "eval_loss": 1.5765777826309204, "eval_runtime": 0.7477, "eval_samples_per_second": 5.35, "eval_steps_per_second": 1.337, "step": 912 }, { "epoch": 0.37, "learning_rate": 1.9008000000000002e-07, "loss": 1.6877, "step": 916 }, { "epoch": 0.37, "eval_loss": 1.57340669631958, "eval_runtime": 0.753, "eval_samples_per_second": 5.312, "eval_steps_per_second": 1.328, "step": 916 }, { "epoch": 0.37, "learning_rate": 1.896e-07, "loss": 1.6718, "step": 920 }, { "epoch": 0.37, "eval_loss": 1.5706267356872559, "eval_runtime": 0.514, "eval_samples_per_second": 7.782, "eval_steps_per_second": 1.945, "step": 920 }, { "epoch": 0.37, "learning_rate": 1.8912e-07, "loss": 1.6886, "step": 924 }, { "epoch": 0.37, "eval_loss": 1.5676339864730835, "eval_runtime": 0.5222, "eval_samples_per_second": 7.66, "eval_steps_per_second": 1.915, "step": 924 }, { "epoch": 0.37, "learning_rate": 1.8864e-07, "loss": 1.7459, "step": 928 }, { "epoch": 0.37, "eval_loss": 1.5645827054977417, "eval_runtime": 0.5299, "eval_samples_per_second": 7.548, "eval_steps_per_second": 1.887, "step": 928 }, { "epoch": 0.37, "learning_rate": 1.8815999999999999e-07, "loss": 1.6596, "step": 932 }, { "epoch": 0.37, "eval_loss": 1.5616861581802368, "eval_runtime": 0.5303, "eval_samples_per_second": 7.543, "eval_steps_per_second": 1.886, "step": 932 }, { "epoch": 0.37, "learning_rate": 1.8768e-07, "loss": 1.6689, "step": 936 }, { "epoch": 0.37, "eval_loss": 1.5588451623916626, "eval_runtime": 0.5236, "eval_samples_per_second": 7.639, "eval_steps_per_second": 1.91, "step": 936 }, { "epoch": 0.38, "learning_rate": 1.8719999999999998e-07, "loss": 1.6744, "step": 940 }, { "epoch": 0.38, "eval_loss": 1.5560673475265503, "eval_runtime": 0.7233, "eval_samples_per_second": 5.53, "eval_steps_per_second": 1.383, "step": 940 }, { "epoch": 0.38, "learning_rate": 1.8671999999999997e-07, "loss": 1.7009, "step": 944 }, { "epoch": 0.38, "eval_loss": 1.5533243417739868, "eval_runtime": 0.6983, "eval_samples_per_second": 5.728, "eval_steps_per_second": 1.432, "step": 944 }, { "epoch": 0.38, "learning_rate": 1.8624e-07, "loss": 1.6651, "step": 948 }, { "epoch": 0.38, "eval_loss": 1.55048668384552, "eval_runtime": 0.7511, "eval_samples_per_second": 5.325, "eval_steps_per_second": 1.331, "step": 948 }, { "epoch": 0.38, "learning_rate": 1.8576e-07, "loss": 1.6821, "step": 952 }, { "epoch": 0.38, "eval_loss": 1.547943353652954, "eval_runtime": 0.532, "eval_samples_per_second": 7.519, "eval_steps_per_second": 1.88, "step": 952 }, { "epoch": 0.38, "learning_rate": 1.8528e-07, "loss": 1.6453, "step": 956 }, { "epoch": 0.38, "eval_loss": 1.5453405380249023, "eval_runtime": 0.5463, "eval_samples_per_second": 7.322, "eval_steps_per_second": 1.831, "step": 956 }, { "epoch": 0.38, "learning_rate": 1.848e-07, "loss": 1.6624, "step": 960 }, { "epoch": 0.38, "eval_loss": 1.542648196220398, "eval_runtime": 0.5288, "eval_samples_per_second": 7.564, "eval_steps_per_second": 1.891, "step": 960 }, { "epoch": 0.39, "learning_rate": 1.8431999999999997e-07, "loss": 1.6453, "step": 964 }, { "epoch": 0.39, "eval_loss": 1.5402462482452393, "eval_runtime": 0.5242, "eval_samples_per_second": 7.63, "eval_steps_per_second": 1.908, "step": 964 }, { "epoch": 0.39, "learning_rate": 1.8383999999999998e-07, "loss": 1.6451, "step": 968 }, { "epoch": 0.39, "eval_loss": 1.5377165079116821, "eval_runtime": 0.5169, "eval_samples_per_second": 7.738, "eval_steps_per_second": 1.935, "step": 968 }, { "epoch": 0.39, "learning_rate": 1.8335999999999997e-07, "loss": 1.6627, "step": 972 }, { "epoch": 0.39, "eval_loss": 1.5353412628173828, "eval_runtime": 0.6797, "eval_samples_per_second": 5.885, "eval_steps_per_second": 1.471, "step": 972 }, { "epoch": 0.39, "learning_rate": 1.8288e-07, "loss": 1.6423, "step": 976 }, { "epoch": 0.39, "eval_loss": 1.5325669050216675, "eval_runtime": 0.7175, "eval_samples_per_second": 5.575, "eval_steps_per_second": 1.394, "step": 976 }, { "epoch": 0.39, "learning_rate": 1.824e-07, "loss": 1.652, "step": 980 }, { "epoch": 0.39, "eval_loss": 1.530207872390747, "eval_runtime": 0.8099, "eval_samples_per_second": 4.939, "eval_steps_per_second": 1.235, "step": 980 }, { "epoch": 0.39, "learning_rate": 1.8192e-07, "loss": 1.6414, "step": 984 }, { "epoch": 0.39, "eval_loss": 1.5278236865997314, "eval_runtime": 0.7814, "eval_samples_per_second": 5.119, "eval_steps_per_second": 1.28, "step": 984 }, { "epoch": 0.4, "learning_rate": 1.8144e-07, "loss": 1.6107, "step": 988 }, { "epoch": 0.4, "eval_loss": 1.5253430604934692, "eval_runtime": 0.5386, "eval_samples_per_second": 7.427, "eval_steps_per_second": 1.857, "step": 988 }, { "epoch": 0.4, "learning_rate": 1.8095999999999997e-07, "loss": 1.6599, "step": 992 }, { "epoch": 0.4, "eval_loss": 1.5225120782852173, "eval_runtime": 0.5302, "eval_samples_per_second": 7.544, "eval_steps_per_second": 1.886, "step": 992 }, { "epoch": 0.4, "learning_rate": 1.8048e-07, "loss": 1.6326, "step": 996 }, { "epoch": 0.4, "eval_loss": 1.5201939344406128, "eval_runtime": 0.533, "eval_samples_per_second": 7.505, "eval_steps_per_second": 1.876, "step": 996 }, { "epoch": 0.4, "learning_rate": 1.8e-07, "loss": 1.6324, "step": 1000 }, { "epoch": 0.4, "eval_loss": 1.5175316333770752, "eval_runtime": 0.5316, "eval_samples_per_second": 7.525, "eval_steps_per_second": 1.881, "step": 1000 }, { "epoch": 0.4, "learning_rate": 1.7952e-07, "loss": 1.5907, "step": 1004 }, { "epoch": 0.4, "eval_loss": 1.5149424076080322, "eval_runtime": 0.7298, "eval_samples_per_second": 5.481, "eval_steps_per_second": 1.37, "step": 1004 }, { "epoch": 0.4, "learning_rate": 1.7904e-07, "loss": 1.6465, "step": 1008 }, { "epoch": 0.4, "eval_loss": 1.5124318599700928, "eval_runtime": 0.7308, "eval_samples_per_second": 5.473, "eval_steps_per_second": 1.368, "step": 1008 }, { "epoch": 0.4, "learning_rate": 1.7855999999999998e-07, "loss": 1.6148, "step": 1012 }, { "epoch": 0.4, "eval_loss": 1.510151743888855, "eval_runtime": 0.7345, "eval_samples_per_second": 5.446, "eval_steps_per_second": 1.361, "step": 1012 }, { "epoch": 0.41, "learning_rate": 1.7808e-07, "loss": 1.6064, "step": 1016 }, { "epoch": 0.41, "eval_loss": 1.5073630809783936, "eval_runtime": 0.5414, "eval_samples_per_second": 7.388, "eval_steps_per_second": 1.847, "step": 1016 }, { "epoch": 0.41, "learning_rate": 1.7759999999999998e-07, "loss": 1.6342, "step": 1020 }, { "epoch": 0.41, "eval_loss": 1.5052520036697388, "eval_runtime": 0.516, "eval_samples_per_second": 7.751, "eval_steps_per_second": 1.938, "step": 1020 }, { "epoch": 0.41, "learning_rate": 1.7712000000000001e-07, "loss": 1.605, "step": 1024 }, { "epoch": 0.41, "eval_loss": 1.5025243759155273, "eval_runtime": 0.5373, "eval_samples_per_second": 7.445, "eval_steps_per_second": 1.861, "step": 1024 }, { "epoch": 0.41, "learning_rate": 1.7664e-07, "loss": 1.6121, "step": 1028 }, { "epoch": 0.41, "eval_loss": 1.500252604484558, "eval_runtime": 0.5476, "eval_samples_per_second": 7.304, "eval_steps_per_second": 1.826, "step": 1028 }, { "epoch": 0.41, "learning_rate": 1.7616e-07, "loss": 1.617, "step": 1032 }, { "epoch": 0.41, "eval_loss": 1.4977892637252808, "eval_runtime": 0.5255, "eval_samples_per_second": 7.612, "eval_steps_per_second": 1.903, "step": 1032 }, { "epoch": 0.41, "learning_rate": 1.7568e-07, "loss": 1.5897, "step": 1036 }, { "epoch": 0.41, "eval_loss": 1.4954513311386108, "eval_runtime": 0.7255, "eval_samples_per_second": 5.513, "eval_steps_per_second": 1.378, "step": 1036 }, { "epoch": 0.42, "learning_rate": 1.7519999999999998e-07, "loss": 1.6022, "step": 1040 }, { "epoch": 0.42, "eval_loss": 1.4929691553115845, "eval_runtime": 0.6954, "eval_samples_per_second": 5.752, "eval_steps_per_second": 1.438, "step": 1040 }, { "epoch": 0.42, "learning_rate": 1.7472e-07, "loss": 1.5748, "step": 1044 }, { "epoch": 0.42, "eval_loss": 1.4902769327163696, "eval_runtime": 0.8026, "eval_samples_per_second": 4.984, "eval_steps_per_second": 1.246, "step": 1044 }, { "epoch": 0.42, "learning_rate": 1.7423999999999998e-07, "loss": 1.5974, "step": 1048 }, { "epoch": 0.42, "eval_loss": 1.4878779649734497, "eval_runtime": 0.7804, "eval_samples_per_second": 5.125, "eval_steps_per_second": 1.281, "step": 1048 }, { "epoch": 0.42, "learning_rate": 1.7376000000000002e-07, "loss": 1.6126, "step": 1052 }, { "epoch": 0.42, "eval_loss": 1.48554527759552, "eval_runtime": 0.5423, "eval_samples_per_second": 7.376, "eval_steps_per_second": 1.844, "step": 1052 }, { "epoch": 0.42, "learning_rate": 1.7328e-07, "loss": 1.6189, "step": 1056 }, { "epoch": 0.42, "eval_loss": 1.4827589988708496, "eval_runtime": 0.5326, "eval_samples_per_second": 7.511, "eval_steps_per_second": 1.878, "step": 1056 }, { "epoch": 0.42, "learning_rate": 1.7279999999999999e-07, "loss": 1.5916, "step": 1060 }, { "epoch": 0.42, "eval_loss": 1.4803836345672607, "eval_runtime": 0.5273, "eval_samples_per_second": 7.585, "eval_steps_per_second": 1.896, "step": 1060 }, { "epoch": 0.43, "learning_rate": 1.7232e-07, "loss": 1.5938, "step": 1064 }, { "epoch": 0.43, "eval_loss": 1.4778516292572021, "eval_runtime": 0.5436, "eval_samples_per_second": 7.358, "eval_steps_per_second": 1.839, "step": 1064 }, { "epoch": 0.43, "learning_rate": 1.7183999999999998e-07, "loss": 1.6026, "step": 1068 }, { "epoch": 0.43, "eval_loss": 1.475649118423462, "eval_runtime": 0.5298, "eval_samples_per_second": 7.549, "eval_steps_per_second": 1.887, "step": 1068 }, { "epoch": 0.43, "learning_rate": 1.7136e-07, "loss": 1.5687, "step": 1072 }, { "epoch": 0.43, "eval_loss": 1.473489761352539, "eval_runtime": 0.7191, "eval_samples_per_second": 5.562, "eval_steps_per_second": 1.391, "step": 1072 }, { "epoch": 0.43, "learning_rate": 1.7087999999999998e-07, "loss": 1.5413, "step": 1076 }, { "epoch": 0.43, "eval_loss": 1.4712145328521729, "eval_runtime": 0.7022, "eval_samples_per_second": 5.696, "eval_steps_per_second": 1.424, "step": 1076 }, { "epoch": 0.43, "learning_rate": 1.7039999999999996e-07, "loss": 1.5778, "step": 1080 }, { "epoch": 0.43, "eval_loss": 1.4688694477081299, "eval_runtime": 0.7707, "eval_samples_per_second": 5.19, "eval_steps_per_second": 1.298, "step": 1080 }, { "epoch": 0.43, "learning_rate": 1.6992e-07, "loss": 1.5731, "step": 1084 }, { "epoch": 0.43, "eval_loss": 1.4664225578308105, "eval_runtime": 0.5386, "eval_samples_per_second": 7.427, "eval_steps_per_second": 1.857, "step": 1084 }, { "epoch": 0.44, "learning_rate": 1.6944e-07, "loss": 1.5625, "step": 1088 }, { "epoch": 0.44, "eval_loss": 1.464247465133667, "eval_runtime": 0.5484, "eval_samples_per_second": 7.294, "eval_steps_per_second": 1.823, "step": 1088 }, { "epoch": 0.44, "learning_rate": 1.6896e-07, "loss": 1.55, "step": 1092 }, { "epoch": 0.44, "eval_loss": 1.4620987176895142, "eval_runtime": 0.5342, "eval_samples_per_second": 7.488, "eval_steps_per_second": 1.872, "step": 1092 }, { "epoch": 0.44, "learning_rate": 1.6847999999999998e-07, "loss": 1.5852, "step": 1096 }, { "epoch": 0.44, "eval_loss": 1.459930419921875, "eval_runtime": 0.5332, "eval_samples_per_second": 7.501, "eval_steps_per_second": 1.875, "step": 1096 }, { "epoch": 0.44, "learning_rate": 1.68e-07, "loss": 1.5614, "step": 1100 }, { "epoch": 0.44, "eval_loss": 1.4578797817230225, "eval_runtime": 0.5504, "eval_samples_per_second": 7.268, "eval_steps_per_second": 1.817, "step": 1100 }, { "epoch": 0.44, "learning_rate": 1.6752e-07, "loss": 1.5619, "step": 1104 }, { "epoch": 0.44, "eval_loss": 1.4559952020645142, "eval_runtime": 0.7448, "eval_samples_per_second": 5.37, "eval_steps_per_second": 1.343, "step": 1104 }, { "epoch": 0.44, "learning_rate": 1.6704e-07, "loss": 1.5658, "step": 1108 }, { "epoch": 0.44, "eval_loss": 1.454249382019043, "eval_runtime": 0.773, "eval_samples_per_second": 5.174, "eval_steps_per_second": 1.294, "step": 1108 }, { "epoch": 0.44, "learning_rate": 1.6656e-07, "loss": 1.5699, "step": 1112 }, { "epoch": 0.44, "eval_loss": 1.4521348476409912, "eval_runtime": 0.8287, "eval_samples_per_second": 4.827, "eval_steps_per_second": 1.207, "step": 1112 }, { "epoch": 0.45, "learning_rate": 1.6608e-07, "loss": 1.5738, "step": 1116 }, { "epoch": 0.45, "eval_loss": 1.450175404548645, "eval_runtime": 0.5398, "eval_samples_per_second": 7.41, "eval_steps_per_second": 1.852, "step": 1116 }, { "epoch": 0.45, "learning_rate": 1.656e-07, "loss": 1.5823, "step": 1120 }, { "epoch": 0.45, "eval_loss": 1.4481428861618042, "eval_runtime": 0.5592, "eval_samples_per_second": 7.153, "eval_steps_per_second": 1.788, "step": 1120 }, { "epoch": 0.45, "learning_rate": 1.6511999999999999e-07, "loss": 1.5425, "step": 1124 }, { "epoch": 0.45, "eval_loss": 1.4458932876586914, "eval_runtime": 0.5511, "eval_samples_per_second": 7.259, "eval_steps_per_second": 1.815, "step": 1124 }, { "epoch": 0.45, "learning_rate": 1.6463999999999997e-07, "loss": 1.5604, "step": 1128 }, { "epoch": 0.45, "eval_loss": 1.4438304901123047, "eval_runtime": 0.5355, "eval_samples_per_second": 7.47, "eval_steps_per_second": 1.867, "step": 1128 }, { "epoch": 0.45, "learning_rate": 1.6416e-07, "loss": 1.5562, "step": 1132 }, { "epoch": 0.45, "eval_loss": 1.442002773284912, "eval_runtime": 0.5332, "eval_samples_per_second": 7.502, "eval_steps_per_second": 1.876, "step": 1132 }, { "epoch": 0.45, "learning_rate": 1.6368e-07, "loss": 1.555, "step": 1136 }, { "epoch": 0.45, "eval_loss": 1.4399393796920776, "eval_runtime": 0.7524, "eval_samples_per_second": 5.316, "eval_steps_per_second": 1.329, "step": 1136 }, { "epoch": 0.46, "learning_rate": 1.632e-07, "loss": 1.5158, "step": 1140 }, { "epoch": 0.46, "eval_loss": 1.437983512878418, "eval_runtime": 0.7269, "eval_samples_per_second": 5.503, "eval_steps_per_second": 1.376, "step": 1140 }, { "epoch": 0.46, "learning_rate": 1.6272e-07, "loss": 1.5272, "step": 1144 }, { "epoch": 0.46, "eval_loss": 1.435863733291626, "eval_runtime": 0.7356, "eval_samples_per_second": 5.438, "eval_steps_per_second": 1.359, "step": 1144 }, { "epoch": 0.46, "learning_rate": 1.6223999999999998e-07, "loss": 1.5467, "step": 1148 }, { "epoch": 0.46, "eval_loss": 1.4338979721069336, "eval_runtime": 0.5695, "eval_samples_per_second": 7.023, "eval_steps_per_second": 1.756, "step": 1148 }, { "epoch": 0.46, "learning_rate": 1.6176e-07, "loss": 1.5399, "step": 1152 }, { "epoch": 0.46, "eval_loss": 1.4317151308059692, "eval_runtime": 0.5215, "eval_samples_per_second": 7.669, "eval_steps_per_second": 1.917, "step": 1152 }, { "epoch": 0.46, "learning_rate": 1.6127999999999997e-07, "loss": 1.5221, "step": 1156 }, { "epoch": 0.46, "eval_loss": 1.4296718835830688, "eval_runtime": 0.5471, "eval_samples_per_second": 7.311, "eval_steps_per_second": 1.828, "step": 1156 }, { "epoch": 0.46, "learning_rate": 1.608e-07, "loss": 1.5022, "step": 1160 }, { "epoch": 0.46, "eval_loss": 1.4277141094207764, "eval_runtime": 0.5395, "eval_samples_per_second": 7.414, "eval_steps_per_second": 1.853, "step": 1160 }, { "epoch": 0.47, "learning_rate": 1.6032e-07, "loss": 1.5385, "step": 1164 }, { "epoch": 0.47, "eval_loss": 1.4257354736328125, "eval_runtime": 0.5342, "eval_samples_per_second": 7.487, "eval_steps_per_second": 1.872, "step": 1164 }, { "epoch": 0.47, "learning_rate": 1.5984e-07, "loss": 1.5042, "step": 1168 }, { "epoch": 0.47, "eval_loss": 1.4236301183700562, "eval_runtime": 0.6434, "eval_samples_per_second": 6.217, "eval_steps_per_second": 1.554, "step": 1168 }, { "epoch": 0.47, "learning_rate": 1.5936e-07, "loss": 1.5007, "step": 1172 }, { "epoch": 0.47, "eval_loss": 1.421656608581543, "eval_runtime": 0.7224, "eval_samples_per_second": 5.537, "eval_steps_per_second": 1.384, "step": 1172 }, { "epoch": 0.47, "learning_rate": 1.5887999999999998e-07, "loss": 1.5323, "step": 1176 }, { "epoch": 0.47, "eval_loss": 1.4196075201034546, "eval_runtime": 0.7946, "eval_samples_per_second": 5.034, "eval_steps_per_second": 1.259, "step": 1176 }, { "epoch": 0.47, "learning_rate": 1.584e-07, "loss": 1.5269, "step": 1180 }, { "epoch": 0.47, "eval_loss": 1.4174154996871948, "eval_runtime": 0.82, "eval_samples_per_second": 4.878, "eval_steps_per_second": 1.22, "step": 1180 }, { "epoch": 0.47, "learning_rate": 1.5791999999999997e-07, "loss": 1.5379, "step": 1184 }, { "epoch": 0.47, "eval_loss": 1.4156051874160767, "eval_runtime": 0.5319, "eval_samples_per_second": 7.52, "eval_steps_per_second": 1.88, "step": 1184 }, { "epoch": 0.48, "learning_rate": 1.5744e-07, "loss": 1.522, "step": 1188 }, { "epoch": 0.48, "eval_loss": 1.4136687517166138, "eval_runtime": 0.5286, "eval_samples_per_second": 7.567, "eval_steps_per_second": 1.892, "step": 1188 }, { "epoch": 0.48, "learning_rate": 1.5696e-07, "loss": 1.506, "step": 1192 }, { "epoch": 0.48, "eval_loss": 1.4115678071975708, "eval_runtime": 0.553, "eval_samples_per_second": 7.233, "eval_steps_per_second": 1.808, "step": 1192 }, { "epoch": 0.48, "learning_rate": 1.5647999999999998e-07, "loss": 1.4986, "step": 1196 }, { "epoch": 0.48, "eval_loss": 1.409631371498108, "eval_runtime": 0.5273, "eval_samples_per_second": 7.585, "eval_steps_per_second": 1.896, "step": 1196 }, { "epoch": 0.48, "learning_rate": 1.56e-07, "loss": 1.4918, "step": 1200 }, { "epoch": 0.48, "eval_loss": 1.407455563545227, "eval_runtime": 0.5269, "eval_samples_per_second": 7.592, "eval_steps_per_second": 1.898, "step": 1200 }, { "epoch": 0.48, "learning_rate": 1.5551999999999998e-07, "loss": 1.5124, "step": 1204 }, { "epoch": 0.48, "eval_loss": 1.4056380987167358, "eval_runtime": 0.7536, "eval_samples_per_second": 5.308, "eval_steps_per_second": 1.327, "step": 1204 }, { "epoch": 0.48, "learning_rate": 1.5504000000000002e-07, "loss": 1.4926, "step": 1208 }, { "epoch": 0.48, "eval_loss": 1.403800368309021, "eval_runtime": 0.7248, "eval_samples_per_second": 5.519, "eval_steps_per_second": 1.38, "step": 1208 }, { "epoch": 0.48, "learning_rate": 1.5456e-07, "loss": 1.5053, "step": 1212 }, { "epoch": 0.48, "eval_loss": 1.40152907371521, "eval_runtime": 0.7447, "eval_samples_per_second": 5.371, "eval_steps_per_second": 1.343, "step": 1212 }, { "epoch": 0.49, "learning_rate": 1.5408e-07, "loss": 1.5043, "step": 1216 }, { "epoch": 0.49, "eval_loss": 1.3996310234069824, "eval_runtime": 0.738, "eval_samples_per_second": 5.42, "eval_steps_per_second": 1.355, "step": 1216 }, { "epoch": 0.49, "learning_rate": 1.536e-07, "loss": 1.5068, "step": 1220 }, { "epoch": 0.49, "eval_loss": 1.3975541591644287, "eval_runtime": 0.5275, "eval_samples_per_second": 7.583, "eval_steps_per_second": 1.896, "step": 1220 }, { "epoch": 0.49, "learning_rate": 1.5311999999999998e-07, "loss": 1.5039, "step": 1224 }, { "epoch": 0.49, "eval_loss": 1.3954721689224243, "eval_runtime": 0.5317, "eval_samples_per_second": 7.523, "eval_steps_per_second": 1.881, "step": 1224 }, { "epoch": 0.49, "learning_rate": 1.5264e-07, "loss": 1.4772, "step": 1228 }, { "epoch": 0.49, "eval_loss": 1.3933203220367432, "eval_runtime": 0.5283, "eval_samples_per_second": 7.571, "eval_steps_per_second": 1.893, "step": 1228 }, { "epoch": 0.49, "learning_rate": 1.5215999999999998e-07, "loss": 1.4873, "step": 1232 }, { "epoch": 0.49, "eval_loss": 1.3916254043579102, "eval_runtime": 0.5344, "eval_samples_per_second": 7.485, "eval_steps_per_second": 1.871, "step": 1232 }, { "epoch": 0.49, "learning_rate": 1.5168000000000002e-07, "loss": 1.4977, "step": 1236 }, { "epoch": 0.49, "eval_loss": 1.3896205425262451, "eval_runtime": 0.5249, "eval_samples_per_second": 7.62, "eval_steps_per_second": 1.905, "step": 1236 }, { "epoch": 0.5, "learning_rate": 1.512e-07, "loss": 1.5016, "step": 1240 }, { "epoch": 0.5, "eval_loss": 1.3873213529586792, "eval_runtime": 0.7136, "eval_samples_per_second": 5.605, "eval_steps_per_second": 1.401, "step": 1240 }, { "epoch": 0.5, "learning_rate": 1.5072e-07, "loss": 1.495, "step": 1244 }, { "epoch": 0.5, "eval_loss": 1.3854175806045532, "eval_runtime": 0.7372, "eval_samples_per_second": 5.426, "eval_steps_per_second": 1.357, "step": 1244 }, { "epoch": 0.5, "learning_rate": 1.5024e-07, "loss": 1.4803, "step": 1248 }, { "epoch": 0.5, "eval_loss": 1.3834645748138428, "eval_runtime": 0.7836, "eval_samples_per_second": 5.104, "eval_steps_per_second": 1.276, "step": 1248 }, { "epoch": 0.5, "learning_rate": 1.4975999999999999e-07, "loss": 1.4842, "step": 1252 }, { "epoch": 0.5, "eval_loss": 1.381633996963501, "eval_runtime": 0.5401, "eval_samples_per_second": 7.405, "eval_steps_per_second": 1.851, "step": 1252 }, { "epoch": 0.5, "learning_rate": 1.4928e-07, "loss": 1.4762, "step": 1256 }, { "epoch": 0.5, "eval_loss": 1.379853367805481, "eval_runtime": 0.5233, "eval_samples_per_second": 7.644, "eval_steps_per_second": 1.911, "step": 1256 }, { "epoch": 0.5, "learning_rate": 1.4879999999999998e-07, "loss": 1.4859, "step": 1260 }, { "epoch": 0.5, "eval_loss": 1.3780815601348877, "eval_runtime": 0.5276, "eval_samples_per_second": 7.582, "eval_steps_per_second": 1.895, "step": 1260 }, { "epoch": 0.51, "learning_rate": 1.4832e-07, "loss": 1.4948, "step": 1264 }, { "epoch": 0.51, "eval_loss": 1.3763624429702759, "eval_runtime": 0.5355, "eval_samples_per_second": 7.469, "eval_steps_per_second": 1.867, "step": 1264 }, { "epoch": 0.51, "learning_rate": 1.4784e-07, "loss": 1.4851, "step": 1268 }, { "epoch": 0.51, "eval_loss": 1.374289631843567, "eval_runtime": 0.5235, "eval_samples_per_second": 7.64, "eval_steps_per_second": 1.91, "step": 1268 }, { "epoch": 0.51, "learning_rate": 1.4736e-07, "loss": 1.4749, "step": 1272 }, { "epoch": 0.51, "eval_loss": 1.3724154233932495, "eval_runtime": 0.6384, "eval_samples_per_second": 6.266, "eval_steps_per_second": 1.566, "step": 1272 }, { "epoch": 0.51, "learning_rate": 1.4687999999999998e-07, "loss": 1.4594, "step": 1276 }, { "epoch": 0.51, "eval_loss": 1.3709261417388916, "eval_runtime": 0.7249, "eval_samples_per_second": 5.518, "eval_steps_per_second": 1.379, "step": 1276 }, { "epoch": 0.51, "learning_rate": 1.464e-07, "loss": 1.4517, "step": 1280 }, { "epoch": 0.51, "eval_loss": 1.3691537380218506, "eval_runtime": 0.7303, "eval_samples_per_second": 5.477, "eval_steps_per_second": 1.369, "step": 1280 }, { "epoch": 0.51, "learning_rate": 1.4592e-07, "loss": 1.4239, "step": 1284 }, { "epoch": 0.51, "eval_loss": 1.3673396110534668, "eval_runtime": 0.7929, "eval_samples_per_second": 5.044, "eval_steps_per_second": 1.261, "step": 1284 }, { "epoch": 0.52, "learning_rate": 1.4543999999999998e-07, "loss": 1.4775, "step": 1288 }, { "epoch": 0.52, "eval_loss": 1.3657190799713135, "eval_runtime": 0.5509, "eval_samples_per_second": 7.261, "eval_steps_per_second": 1.815, "step": 1288 }, { "epoch": 0.52, "learning_rate": 1.4496e-07, "loss": 1.4483, "step": 1292 }, { "epoch": 0.52, "eval_loss": 1.3642776012420654, "eval_runtime": 0.5236, "eval_samples_per_second": 7.639, "eval_steps_per_second": 1.91, "step": 1292 }, { "epoch": 0.52, "learning_rate": 1.4447999999999998e-07, "loss": 1.4688, "step": 1296 }, { "epoch": 0.52, "eval_loss": 1.3624374866485596, "eval_runtime": 0.5281, "eval_samples_per_second": 7.574, "eval_steps_per_second": 1.893, "step": 1296 }, { "epoch": 0.52, "learning_rate": 1.44e-07, "loss": 1.4566, "step": 1300 }, { "epoch": 0.52, "eval_loss": 1.3608499765396118, "eval_runtime": 0.5346, "eval_samples_per_second": 7.482, "eval_steps_per_second": 1.871, "step": 1300 }, { "epoch": 0.52, "learning_rate": 1.4352e-07, "loss": 1.4592, "step": 1304 }, { "epoch": 0.52, "eval_loss": 1.3591777086257935, "eval_runtime": 0.543, "eval_samples_per_second": 7.367, "eval_steps_per_second": 1.842, "step": 1304 }, { "epoch": 0.52, "learning_rate": 1.4304e-07, "loss": 1.4505, "step": 1308 }, { "epoch": 0.52, "eval_loss": 1.357291340827942, "eval_runtime": 0.7548, "eval_samples_per_second": 5.299, "eval_steps_per_second": 1.325, "step": 1308 }, { "epoch": 0.52, "learning_rate": 1.4256e-07, "loss": 1.4304, "step": 1312 }, { "epoch": 0.52, "eval_loss": 1.3557498455047607, "eval_runtime": 0.7262, "eval_samples_per_second": 5.508, "eval_steps_per_second": 1.377, "step": 1312 }, { "epoch": 0.53, "learning_rate": 1.4208e-07, "loss": 1.4691, "step": 1316 }, { "epoch": 0.53, "eval_loss": 1.3540558815002441, "eval_runtime": 0.7121, "eval_samples_per_second": 5.617, "eval_steps_per_second": 1.404, "step": 1316 }, { "epoch": 0.53, "learning_rate": 1.416e-07, "loss": 1.4423, "step": 1320 }, { "epoch": 0.53, "eval_loss": 1.3522251844406128, "eval_runtime": 0.7774, "eval_samples_per_second": 5.145, "eval_steps_per_second": 1.286, "step": 1320 }, { "epoch": 0.53, "learning_rate": 1.4111999999999998e-07, "loss": 1.4301, "step": 1324 }, { "epoch": 0.53, "eval_loss": 1.3508257865905762, "eval_runtime": 0.5433, "eval_samples_per_second": 7.362, "eval_steps_per_second": 1.841, "step": 1324 }, { "epoch": 0.53, "learning_rate": 1.4064e-07, "loss": 1.4422, "step": 1328 }, { "epoch": 0.53, "eval_loss": 1.3490896224975586, "eval_runtime": 0.5369, "eval_samples_per_second": 7.451, "eval_steps_per_second": 1.863, "step": 1328 }, { "epoch": 0.53, "learning_rate": 1.4016e-07, "loss": 1.4577, "step": 1332 }, { "epoch": 0.53, "eval_loss": 1.347461223602295, "eval_runtime": 0.5223, "eval_samples_per_second": 7.658, "eval_steps_per_second": 1.915, "step": 1332 }, { "epoch": 0.53, "learning_rate": 1.3968e-07, "loss": 1.4541, "step": 1336 }, { "epoch": 0.53, "eval_loss": 1.3457545042037964, "eval_runtime": 0.5399, "eval_samples_per_second": 7.409, "eval_steps_per_second": 1.852, "step": 1336 }, { "epoch": 0.54, "learning_rate": 1.392e-07, "loss": 1.4246, "step": 1340 }, { "epoch": 0.54, "eval_loss": 1.343980073928833, "eval_runtime": 0.5481, "eval_samples_per_second": 7.297, "eval_steps_per_second": 1.824, "step": 1340 }, { "epoch": 0.54, "learning_rate": 1.3872e-07, "loss": 1.4507, "step": 1344 }, { "epoch": 0.54, "eval_loss": 1.3423739671707153, "eval_runtime": 0.7414, "eval_samples_per_second": 5.395, "eval_steps_per_second": 1.349, "step": 1344 }, { "epoch": 0.54, "learning_rate": 1.3824e-07, "loss": 1.4312, "step": 1348 }, { "epoch": 0.54, "eval_loss": 1.3408253192901611, "eval_runtime": 0.7783, "eval_samples_per_second": 5.139, "eval_steps_per_second": 1.285, "step": 1348 }, { "epoch": 0.54, "learning_rate": 1.3775999999999998e-07, "loss": 1.4394, "step": 1352 }, { "epoch": 0.54, "eval_loss": 1.339220404624939, "eval_runtime": 0.771, "eval_samples_per_second": 5.188, "eval_steps_per_second": 1.297, "step": 1352 }, { "epoch": 0.54, "learning_rate": 1.3728e-07, "loss": 1.4271, "step": 1356 }, { "epoch": 0.54, "eval_loss": 1.3373547792434692, "eval_runtime": 0.5264, "eval_samples_per_second": 7.599, "eval_steps_per_second": 1.9, "step": 1356 }, { "epoch": 0.54, "learning_rate": 1.368e-07, "loss": 1.4081, "step": 1360 }, { "epoch": 0.54, "eval_loss": 1.3356679677963257, "eval_runtime": 0.5397, "eval_samples_per_second": 7.412, "eval_steps_per_second": 1.853, "step": 1360 }, { "epoch": 0.55, "learning_rate": 1.3632e-07, "loss": 1.4314, "step": 1364 }, { "epoch": 0.55, "eval_loss": 1.333927035331726, "eval_runtime": 0.5418, "eval_samples_per_second": 7.382, "eval_steps_per_second": 1.846, "step": 1364 }, { "epoch": 0.55, "learning_rate": 1.3583999999999998e-07, "loss": 1.4359, "step": 1368 }, { "epoch": 0.55, "eval_loss": 1.3325647115707397, "eval_runtime": 0.5464, "eval_samples_per_second": 7.321, "eval_steps_per_second": 1.83, "step": 1368 }, { "epoch": 0.55, "learning_rate": 1.3536e-07, "loss": 1.4381, "step": 1372 }, { "epoch": 0.55, "eval_loss": 1.3307887315750122, "eval_runtime": 0.5493, "eval_samples_per_second": 7.282, "eval_steps_per_second": 1.82, "step": 1372 }, { "epoch": 0.55, "learning_rate": 1.3488e-07, "loss": 1.4219, "step": 1376 }, { "epoch": 0.55, "eval_loss": 1.3293663263320923, "eval_runtime": 0.5921, "eval_samples_per_second": 6.755, "eval_steps_per_second": 1.689, "step": 1376 }, { "epoch": 0.55, "learning_rate": 1.3439999999999999e-07, "loss": 1.4669, "step": 1380 }, { "epoch": 0.55, "eval_loss": 1.3278565406799316, "eval_runtime": 0.7788, "eval_samples_per_second": 5.136, "eval_steps_per_second": 1.284, "step": 1380 }, { "epoch": 0.55, "learning_rate": 1.3392e-07, "loss": 1.4163, "step": 1384 }, { "epoch": 0.55, "eval_loss": 1.3260074853897095, "eval_runtime": 0.8128, "eval_samples_per_second": 4.921, "eval_steps_per_second": 1.23, "step": 1384 }, { "epoch": 0.56, "learning_rate": 1.3343999999999998e-07, "loss": 1.4153, "step": 1388 }, { "epoch": 0.56, "eval_loss": 1.3242360353469849, "eval_runtime": 0.8002, "eval_samples_per_second": 4.999, "eval_steps_per_second": 1.25, "step": 1388 }, { "epoch": 0.56, "learning_rate": 1.3296e-07, "loss": 1.4506, "step": 1392 }, { "epoch": 0.56, "eval_loss": 1.3229784965515137, "eval_runtime": 0.5395, "eval_samples_per_second": 7.414, "eval_steps_per_second": 1.854, "step": 1392 }, { "epoch": 0.56, "learning_rate": 1.3247999999999998e-07, "loss": 1.4229, "step": 1396 }, { "epoch": 0.56, "eval_loss": 1.3213036060333252, "eval_runtime": 0.5374, "eval_samples_per_second": 7.444, "eval_steps_per_second": 1.861, "step": 1396 }, { "epoch": 0.56, "learning_rate": 1.32e-07, "loss": 1.4218, "step": 1400 }, { "epoch": 0.56, "eval_loss": 1.3196250200271606, "eval_runtime": 0.5404, "eval_samples_per_second": 7.402, "eval_steps_per_second": 1.851, "step": 1400 }, { "epoch": 0.56, "learning_rate": 1.3152e-07, "loss": 1.4185, "step": 1404 }, { "epoch": 0.56, "eval_loss": 1.3180840015411377, "eval_runtime": 0.5573, "eval_samples_per_second": 7.177, "eval_steps_per_second": 1.794, "step": 1404 }, { "epoch": 0.56, "learning_rate": 1.3104e-07, "loss": 1.4283, "step": 1408 }, { "epoch": 0.56, "eval_loss": 1.316424012184143, "eval_runtime": 0.5204, "eval_samples_per_second": 7.686, "eval_steps_per_second": 1.922, "step": 1408 }, { "epoch": 0.56, "learning_rate": 1.3056e-07, "loss": 1.4202, "step": 1412 }, { "epoch": 0.56, "eval_loss": 1.3148062229156494, "eval_runtime": 0.7628, "eval_samples_per_second": 5.244, "eval_steps_per_second": 1.311, "step": 1412 }, { "epoch": 0.57, "learning_rate": 1.3007999999999998e-07, "loss": 1.3736, "step": 1416 }, { "epoch": 0.57, "eval_loss": 1.3131170272827148, "eval_runtime": 0.7763, "eval_samples_per_second": 5.153, "eval_steps_per_second": 1.288, "step": 1416 }, { "epoch": 0.57, "learning_rate": 1.296e-07, "loss": 1.4332, "step": 1420 }, { "epoch": 0.57, "eval_loss": 1.311560869216919, "eval_runtime": 0.7312, "eval_samples_per_second": 5.471, "eval_steps_per_second": 1.368, "step": 1420 }, { "epoch": 0.57, "learning_rate": 1.2912e-07, "loss": 1.4287, "step": 1424 }, { "epoch": 0.57, "eval_loss": 1.309916615486145, "eval_runtime": 0.6738, "eval_samples_per_second": 5.936, "eval_steps_per_second": 1.484, "step": 1424 }, { "epoch": 0.57, "learning_rate": 1.2864e-07, "loss": 1.4175, "step": 1428 }, { "epoch": 0.57, "eval_loss": 1.3080803155899048, "eval_runtime": 0.5396, "eval_samples_per_second": 7.412, "eval_steps_per_second": 1.853, "step": 1428 }, { "epoch": 0.57, "learning_rate": 1.2816e-07, "loss": 1.4152, "step": 1432 }, { "epoch": 0.57, "eval_loss": 1.3066335916519165, "eval_runtime": 0.5523, "eval_samples_per_second": 7.243, "eval_steps_per_second": 1.811, "step": 1432 }, { "epoch": 0.57, "learning_rate": 1.2768e-07, "loss": 1.4036, "step": 1436 }, { "epoch": 0.57, "eval_loss": 1.3054327964782715, "eval_runtime": 0.5404, "eval_samples_per_second": 7.402, "eval_steps_per_second": 1.851, "step": 1436 }, { "epoch": 0.58, "learning_rate": 1.272e-07, "loss": 1.4033, "step": 1440 }, { "epoch": 0.58, "eval_loss": 1.3037904500961304, "eval_runtime": 0.5534, "eval_samples_per_second": 7.228, "eval_steps_per_second": 1.807, "step": 1440 }, { "epoch": 0.58, "learning_rate": 1.2671999999999999e-07, "loss": 1.4095, "step": 1444 }, { "epoch": 0.58, "eval_loss": 1.302278757095337, "eval_runtime": 0.7546, "eval_samples_per_second": 5.301, "eval_steps_per_second": 1.325, "step": 1444 }, { "epoch": 0.58, "learning_rate": 1.2624e-07, "loss": 1.4129, "step": 1448 }, { "epoch": 0.58, "eval_loss": 1.3008112907409668, "eval_runtime": 0.7157, "eval_samples_per_second": 5.589, "eval_steps_per_second": 1.397, "step": 1448 }, { "epoch": 0.58, "learning_rate": 1.2576e-07, "loss": 1.3838, "step": 1452 }, { "epoch": 0.58, "eval_loss": 1.2994916439056396, "eval_runtime": 0.7773, "eval_samples_per_second": 5.146, "eval_steps_per_second": 1.286, "step": 1452 }, { "epoch": 0.58, "learning_rate": 1.2528e-07, "loss": 1.3939, "step": 1456 }, { "epoch": 0.58, "eval_loss": 1.2979990243911743, "eval_runtime": 0.8203, "eval_samples_per_second": 4.876, "eval_steps_per_second": 1.219, "step": 1456 }, { "epoch": 0.58, "learning_rate": 1.2479999999999998e-07, "loss": 1.4023, "step": 1460 }, { "epoch": 0.58, "eval_loss": 1.2964202165603638, "eval_runtime": 0.5392, "eval_samples_per_second": 7.419, "eval_steps_per_second": 1.855, "step": 1460 }, { "epoch": 0.59, "learning_rate": 1.2432e-07, "loss": 1.3751, "step": 1464 }, { "epoch": 0.59, "eval_loss": 1.2952665090560913, "eval_runtime": 0.533, "eval_samples_per_second": 7.505, "eval_steps_per_second": 1.876, "step": 1464 }, { "epoch": 0.59, "learning_rate": 1.2384e-07, "loss": 1.3657, "step": 1468 }, { "epoch": 0.59, "eval_loss": 1.2935295104980469, "eval_runtime": 0.5428, "eval_samples_per_second": 7.369, "eval_steps_per_second": 1.842, "step": 1468 }, { "epoch": 0.59, "learning_rate": 1.2336e-07, "loss": 1.375, "step": 1472 }, { "epoch": 0.59, "eval_loss": 1.292738914489746, "eval_runtime": 0.5365, "eval_samples_per_second": 7.456, "eval_steps_per_second": 1.864, "step": 1472 }, { "epoch": 0.59, "learning_rate": 1.2288e-07, "loss": 1.3846, "step": 1476 }, { "epoch": 0.59, "eval_loss": 1.291104793548584, "eval_runtime": 0.5462, "eval_samples_per_second": 7.323, "eval_steps_per_second": 1.831, "step": 1476 }, { "epoch": 0.59, "learning_rate": 1.2239999999999998e-07, "loss": 1.4192, "step": 1480 }, { "epoch": 0.59, "eval_loss": 1.2900675535202026, "eval_runtime": 0.7504, "eval_samples_per_second": 5.33, "eval_steps_per_second": 1.333, "step": 1480 }, { "epoch": 0.59, "learning_rate": 1.2192e-07, "loss": 1.3629, "step": 1484 }, { "epoch": 0.59, "eval_loss": 1.2886391878128052, "eval_runtime": 0.7924, "eval_samples_per_second": 5.048, "eval_steps_per_second": 1.262, "step": 1484 }, { "epoch": 0.6, "learning_rate": 1.2143999999999998e-07, "loss": 1.3947, "step": 1488 }, { "epoch": 0.6, "eval_loss": 1.287713646888733, "eval_runtime": 0.7522, "eval_samples_per_second": 5.318, "eval_steps_per_second": 1.329, "step": 1488 }, { "epoch": 0.6, "learning_rate": 1.2096e-07, "loss": 1.3485, "step": 1492 }, { "epoch": 0.6, "eval_loss": 1.2862787246704102, "eval_runtime": 0.5402, "eval_samples_per_second": 7.404, "eval_steps_per_second": 1.851, "step": 1492 }, { "epoch": 0.6, "learning_rate": 1.2048e-07, "loss": 1.405, "step": 1496 }, { "epoch": 0.6, "eval_loss": 1.2850462198257446, "eval_runtime": 0.5452, "eval_samples_per_second": 7.337, "eval_steps_per_second": 1.834, "step": 1496 }, { "epoch": 0.6, "learning_rate": 1.2e-07, "loss": 1.3758, "step": 1500 }, { "epoch": 0.6, "eval_loss": 1.2840522527694702, "eval_runtime": 0.541, "eval_samples_per_second": 7.394, "eval_steps_per_second": 1.849, "step": 1500 }, { "epoch": 0.6, "learning_rate": 1.1951999999999997e-07, "loss": 1.3832, "step": 1504 }, { "epoch": 0.6, "eval_loss": 1.282808542251587, "eval_runtime": 0.5449, "eval_samples_per_second": 7.34, "eval_steps_per_second": 1.835, "step": 1504 }, { "epoch": 0.6, "learning_rate": 1.1903999999999999e-07, "loss": 1.3314, "step": 1508 }, { "epoch": 0.6, "eval_loss": 1.2814455032348633, "eval_runtime": 0.5367, "eval_samples_per_second": 7.453, "eval_steps_per_second": 1.863, "step": 1508 }, { "epoch": 0.6, "learning_rate": 1.1856e-07, "loss": 1.3458, "step": 1512 }, { "epoch": 0.6, "eval_loss": 1.2800332307815552, "eval_runtime": 0.5306, "eval_samples_per_second": 7.538, "eval_steps_per_second": 1.884, "step": 1512 }, { "epoch": 0.61, "learning_rate": 1.1808e-07, "loss": 1.357, "step": 1516 }, { "epoch": 0.61, "eval_loss": 1.2792009115219116, "eval_runtime": 0.545, "eval_samples_per_second": 7.34, "eval_steps_per_second": 1.835, "step": 1516 }, { "epoch": 0.61, "learning_rate": 1.176e-07, "loss": 1.3808, "step": 1520 }, { "epoch": 0.61, "eval_loss": 1.2777525186538696, "eval_runtime": 0.5523, "eval_samples_per_second": 7.243, "eval_steps_per_second": 1.811, "step": 1520 }, { "epoch": 0.61, "learning_rate": 1.1712e-07, "loss": 1.3692, "step": 1524 }, { "epoch": 0.61, "eval_loss": 1.2765049934387207, "eval_runtime": 0.752, "eval_samples_per_second": 5.319, "eval_steps_per_second": 1.33, "step": 1524 }, { "epoch": 0.61, "learning_rate": 1.1663999999999999e-07, "loss": 1.3763, "step": 1528 }, { "epoch": 0.61, "eval_loss": 1.2754418849945068, "eval_runtime": 0.7731, "eval_samples_per_second": 5.174, "eval_steps_per_second": 1.293, "step": 1528 }, { "epoch": 0.61, "learning_rate": 1.1615999999999999e-07, "loss": 1.3505, "step": 1532 }, { "epoch": 0.61, "eval_loss": 1.2741388082504272, "eval_runtime": 0.798, "eval_samples_per_second": 5.012, "eval_steps_per_second": 1.253, "step": 1532 }, { "epoch": 0.61, "learning_rate": 1.1567999999999999e-07, "loss": 1.3579, "step": 1536 }, { "epoch": 0.61, "eval_loss": 1.273074746131897, "eval_runtime": 0.5381, "eval_samples_per_second": 7.433, "eval_steps_per_second": 1.858, "step": 1536 }, { "epoch": 0.62, "learning_rate": 1.152e-07, "loss": 1.3567, "step": 1540 }, { "epoch": 0.62, "eval_loss": 1.2716789245605469, "eval_runtime": 0.5423, "eval_samples_per_second": 7.376, "eval_steps_per_second": 1.844, "step": 1540 }, { "epoch": 0.62, "learning_rate": 1.1472e-07, "loss": 1.3608, "step": 1544 }, { "epoch": 0.62, "eval_loss": 1.270503044128418, "eval_runtime": 0.538, "eval_samples_per_second": 7.435, "eval_steps_per_second": 1.859, "step": 1544 }, { "epoch": 0.62, "learning_rate": 1.1424000000000001e-07, "loss": 1.3895, "step": 1548 }, { "epoch": 0.62, "eval_loss": 1.269209384918213, "eval_runtime": 0.5287, "eval_samples_per_second": 7.566, "eval_steps_per_second": 1.892, "step": 1548 }, { "epoch": 0.62, "learning_rate": 1.1376e-07, "loss": 1.3614, "step": 1552 }, { "epoch": 0.62, "eval_loss": 1.2682329416275024, "eval_runtime": 0.5516, "eval_samples_per_second": 7.252, "eval_steps_per_second": 1.813, "step": 1552 }, { "epoch": 0.62, "learning_rate": 1.1327999999999999e-07, "loss": 1.3643, "step": 1556 }, { "epoch": 0.62, "eval_loss": 1.2671722173690796, "eval_runtime": 0.7443, "eval_samples_per_second": 5.374, "eval_steps_per_second": 1.344, "step": 1556 }, { "epoch": 0.62, "learning_rate": 1.1279999999999999e-07, "loss": 1.3537, "step": 1560 }, { "epoch": 0.62, "eval_loss": 1.2658395767211914, "eval_runtime": 0.7724, "eval_samples_per_second": 5.179, "eval_steps_per_second": 1.295, "step": 1560 }, { "epoch": 0.63, "learning_rate": 1.1232e-07, "loss": 1.3564, "step": 1564 }, { "epoch": 0.63, "eval_loss": 1.2647554874420166, "eval_runtime": 0.7818, "eval_samples_per_second": 5.116, "eval_steps_per_second": 1.279, "step": 1564 }, { "epoch": 0.63, "learning_rate": 1.1184e-07, "loss": 1.3433, "step": 1568 }, { "epoch": 0.63, "eval_loss": 1.263366937637329, "eval_runtime": 0.7501, "eval_samples_per_second": 5.333, "eval_steps_per_second": 1.333, "step": 1568 }, { "epoch": 0.63, "learning_rate": 1.1135999999999999e-07, "loss": 1.3805, "step": 1572 }, { "epoch": 0.63, "eval_loss": 1.2624878883361816, "eval_runtime": 0.5467, "eval_samples_per_second": 7.316, "eval_steps_per_second": 1.829, "step": 1572 }, { "epoch": 0.63, "learning_rate": 1.1087999999999998e-07, "loss": 1.3309, "step": 1576 }, { "epoch": 0.63, "eval_loss": 1.2612154483795166, "eval_runtime": 0.5298, "eval_samples_per_second": 7.55, "eval_steps_per_second": 1.888, "step": 1576 }, { "epoch": 0.63, "learning_rate": 1.104e-07, "loss": 1.3408, "step": 1580 }, { "epoch": 0.63, "eval_loss": 1.260028600692749, "eval_runtime": 0.5364, "eval_samples_per_second": 7.457, "eval_steps_per_second": 1.864, "step": 1580 }, { "epoch": 0.63, "learning_rate": 1.0992e-07, "loss": 1.3505, "step": 1584 }, { "epoch": 0.63, "eval_loss": 1.2590596675872803, "eval_runtime": 0.5419, "eval_samples_per_second": 7.381, "eval_steps_per_second": 1.845, "step": 1584 }, { "epoch": 0.64, "learning_rate": 1.0943999999999999e-07, "loss": 1.355, "step": 1588 }, { "epoch": 0.64, "eval_loss": 1.258219838142395, "eval_runtime": 0.5356, "eval_samples_per_second": 7.469, "eval_steps_per_second": 1.867, "step": 1588 }, { "epoch": 0.64, "learning_rate": 1.0896e-07, "loss": 1.3426, "step": 1592 }, { "epoch": 0.64, "eval_loss": 1.2570216655731201, "eval_runtime": 0.7481, "eval_samples_per_second": 5.347, "eval_steps_per_second": 1.337, "step": 1592 }, { "epoch": 0.64, "learning_rate": 1.0847999999999999e-07, "loss": 1.3476, "step": 1596 }, { "epoch": 0.64, "eval_loss": 1.2555859088897705, "eval_runtime": 0.7344, "eval_samples_per_second": 5.447, "eval_steps_per_second": 1.362, "step": 1596 }, { "epoch": 0.64, "learning_rate": 1.0799999999999999e-07, "loss": 1.3448, "step": 1600 }, { "epoch": 0.64, "eval_loss": 1.2546257972717285, "eval_runtime": 0.8107, "eval_samples_per_second": 4.934, "eval_steps_per_second": 1.233, "step": 1600 }, { "epoch": 0.64, "learning_rate": 1.0752e-07, "loss": 1.3702, "step": 1604 }, { "epoch": 0.64, "eval_loss": 1.2532060146331787, "eval_runtime": 0.5269, "eval_samples_per_second": 7.592, "eval_steps_per_second": 1.898, "step": 1604 }, { "epoch": 0.64, "learning_rate": 1.0704e-07, "loss": 1.3198, "step": 1608 }, { "epoch": 0.64, "eval_loss": 1.2521923780441284, "eval_runtime": 0.5439, "eval_samples_per_second": 7.354, "eval_steps_per_second": 1.839, "step": 1608 }, { "epoch": 0.64, "learning_rate": 1.0656e-07, "loss": 1.3565, "step": 1612 }, { "epoch": 0.64, "eval_loss": 1.2510970830917358, "eval_runtime": 0.548, "eval_samples_per_second": 7.3, "eval_steps_per_second": 1.825, "step": 1612 }, { "epoch": 0.65, "learning_rate": 1.0608000000000001e-07, "loss": 1.3496, "step": 1616 }, { "epoch": 0.65, "eval_loss": 1.2498608827590942, "eval_runtime": 0.5393, "eval_samples_per_second": 7.417, "eval_steps_per_second": 1.854, "step": 1616 }, { "epoch": 0.65, "learning_rate": 1.0559999999999999e-07, "loss": 1.3346, "step": 1620 }, { "epoch": 0.65, "eval_loss": 1.2489620447158813, "eval_runtime": 0.5325, "eval_samples_per_second": 7.512, "eval_steps_per_second": 1.878, "step": 1620 }, { "epoch": 0.65, "learning_rate": 1.0511999999999999e-07, "loss": 1.3097, "step": 1624 }, { "epoch": 0.65, "eval_loss": 1.2476825714111328, "eval_runtime": 0.7695, "eval_samples_per_second": 5.198, "eval_steps_per_second": 1.3, "step": 1624 }, { "epoch": 0.65, "learning_rate": 1.0463999999999999e-07, "loss": 1.3224, "step": 1628 }, { "epoch": 0.65, "eval_loss": 1.2467668056488037, "eval_runtime": 0.7586, "eval_samples_per_second": 5.273, "eval_steps_per_second": 1.318, "step": 1628 }, { "epoch": 0.65, "learning_rate": 1.0416e-07, "loss": 1.321, "step": 1632 }, { "epoch": 0.65, "eval_loss": 1.2455267906188965, "eval_runtime": 0.7554, "eval_samples_per_second": 5.295, "eval_steps_per_second": 1.324, "step": 1632 }, { "epoch": 0.65, "learning_rate": 1.0368e-07, "loss": 1.3069, "step": 1636 }, { "epoch": 0.65, "eval_loss": 1.2445768117904663, "eval_runtime": 0.7823, "eval_samples_per_second": 5.113, "eval_steps_per_second": 1.278, "step": 1636 }, { "epoch": 0.66, "learning_rate": 1.0319999999999998e-07, "loss": 1.3358, "step": 1640 }, { "epoch": 0.66, "eval_loss": 1.243558406829834, "eval_runtime": 0.5542, "eval_samples_per_second": 7.218, "eval_steps_per_second": 1.804, "step": 1640 }, { "epoch": 0.66, "learning_rate": 1.0272e-07, "loss": 1.3413, "step": 1644 }, { "epoch": 0.66, "eval_loss": 1.242706060409546, "eval_runtime": 0.5488, "eval_samples_per_second": 7.289, "eval_steps_per_second": 1.822, "step": 1644 }, { "epoch": 0.66, "learning_rate": 1.0224e-07, "loss": 1.3328, "step": 1648 }, { "epoch": 0.66, "eval_loss": 1.2416179180145264, "eval_runtime": 0.5351, "eval_samples_per_second": 7.475, "eval_steps_per_second": 1.869, "step": 1648 }, { "epoch": 0.66, "learning_rate": 1.0175999999999999e-07, "loss": 1.341, "step": 1652 }, { "epoch": 0.66, "eval_loss": 1.2406481504440308, "eval_runtime": 0.5426, "eval_samples_per_second": 7.372, "eval_steps_per_second": 1.843, "step": 1652 }, { "epoch": 0.66, "learning_rate": 1.0128e-07, "loss": 1.3022, "step": 1656 }, { "epoch": 0.66, "eval_loss": 1.2396165132522583, "eval_runtime": 0.5368, "eval_samples_per_second": 7.451, "eval_steps_per_second": 1.863, "step": 1656 }, { "epoch": 0.66, "learning_rate": 1.008e-07, "loss": 1.3309, "step": 1660 }, { "epoch": 0.66, "eval_loss": 1.2385637760162354, "eval_runtime": 0.731, "eval_samples_per_second": 5.472, "eval_steps_per_second": 1.368, "step": 1660 }, { "epoch": 0.67, "learning_rate": 1.0031999999999999e-07, "loss": 1.3099, "step": 1664 }, { "epoch": 0.67, "eval_loss": 1.237720012664795, "eval_runtime": 0.7217, "eval_samples_per_second": 5.543, "eval_steps_per_second": 1.386, "step": 1664 }, { "epoch": 0.67, "learning_rate": 9.983999999999999e-08, "loss": 1.2979, "step": 1668 }, { "epoch": 0.67, "eval_loss": 1.2368155717849731, "eval_runtime": 0.7961, "eval_samples_per_second": 5.024, "eval_steps_per_second": 1.256, "step": 1668 }, { "epoch": 0.67, "learning_rate": 9.936e-08, "loss": 1.3219, "step": 1672 }, { "epoch": 0.67, "eval_loss": 1.2358148097991943, "eval_runtime": 0.6349, "eval_samples_per_second": 6.3, "eval_steps_per_second": 1.575, "step": 1672 }, { "epoch": 0.67, "learning_rate": 9.888e-08, "loss": 1.328, "step": 1676 }, { "epoch": 0.67, "eval_loss": 1.2349071502685547, "eval_runtime": 0.5583, "eval_samples_per_second": 7.165, "eval_steps_per_second": 1.791, "step": 1676 }, { "epoch": 0.67, "learning_rate": 9.84e-08, "loss": 1.3161, "step": 1680 }, { "epoch": 0.67, "eval_loss": 1.233930230140686, "eval_runtime": 0.5369, "eval_samples_per_second": 7.45, "eval_steps_per_second": 1.862, "step": 1680 }, { "epoch": 0.67, "learning_rate": 9.792e-08, "loss": 1.3435, "step": 1684 }, { "epoch": 0.67, "eval_loss": 1.2328994274139404, "eval_runtime": 0.5546, "eval_samples_per_second": 7.213, "eval_steps_per_second": 1.803, "step": 1684 }, { "epoch": 0.68, "learning_rate": 9.743999999999999e-08, "loss": 1.3191, "step": 1688 }, { "epoch": 0.68, "eval_loss": 1.2323206663131714, "eval_runtime": 0.5626, "eval_samples_per_second": 7.11, "eval_steps_per_second": 1.777, "step": 1688 }, { "epoch": 0.68, "learning_rate": 9.695999999999999e-08, "loss": 1.3427, "step": 1692 }, { "epoch": 0.68, "eval_loss": 1.2313920259475708, "eval_runtime": 0.7579, "eval_samples_per_second": 5.278, "eval_steps_per_second": 1.319, "step": 1692 }, { "epoch": 0.68, "learning_rate": 9.648e-08, "loss": 1.3157, "step": 1696 }, { "epoch": 0.68, "eval_loss": 1.2306554317474365, "eval_runtime": 0.7565, "eval_samples_per_second": 5.287, "eval_steps_per_second": 1.322, "step": 1696 }, { "epoch": 0.68, "learning_rate": 9.6e-08, "loss": 1.31, "step": 1700 }, { "epoch": 0.68, "eval_loss": 1.229779839515686, "eval_runtime": 0.8359, "eval_samples_per_second": 4.785, "eval_steps_per_second": 1.196, "step": 1700 }, { "epoch": 0.68, "learning_rate": 9.552e-08, "loss": 1.3418, "step": 1704 }, { "epoch": 0.68, "eval_loss": 1.2287366390228271, "eval_runtime": 0.8, "eval_samples_per_second": 5.0, "eval_steps_per_second": 1.25, "step": 1704 }, { "epoch": 0.68, "learning_rate": 9.504000000000001e-08, "loss": 1.3051, "step": 1708 }, { "epoch": 0.68, "eval_loss": 1.2279709577560425, "eval_runtime": 0.5523, "eval_samples_per_second": 7.243, "eval_steps_per_second": 1.811, "step": 1708 }, { "epoch": 0.68, "learning_rate": 9.456e-08, "loss": 1.3453, "step": 1712 }, { "epoch": 0.68, "eval_loss": 1.227084755897522, "eval_runtime": 0.5304, "eval_samples_per_second": 7.542, "eval_steps_per_second": 1.885, "step": 1712 }, { "epoch": 0.69, "learning_rate": 9.407999999999999e-08, "loss": 1.3146, "step": 1716 }, { "epoch": 0.69, "eval_loss": 1.226120114326477, "eval_runtime": 0.536, "eval_samples_per_second": 7.462, "eval_steps_per_second": 1.866, "step": 1716 }, { "epoch": 0.69, "learning_rate": 9.359999999999999e-08, "loss": 1.2961, "step": 1720 }, { "epoch": 0.69, "eval_loss": 1.2254786491394043, "eval_runtime": 0.5464, "eval_samples_per_second": 7.32, "eval_steps_per_second": 1.83, "step": 1720 }, { "epoch": 0.69, "learning_rate": 9.312e-08, "loss": 1.2989, "step": 1724 }, { "epoch": 0.69, "eval_loss": 1.2248467206954956, "eval_runtime": 0.5328, "eval_samples_per_second": 7.507, "eval_steps_per_second": 1.877, "step": 1724 }, { "epoch": 0.69, "learning_rate": 9.264e-08, "loss": 1.314, "step": 1728 }, { "epoch": 0.69, "eval_loss": 1.2238779067993164, "eval_runtime": 0.7698, "eval_samples_per_second": 5.196, "eval_steps_per_second": 1.299, "step": 1728 }, { "epoch": 0.69, "learning_rate": 9.215999999999999e-08, "loss": 1.3137, "step": 1732 }, { "epoch": 0.69, "eval_loss": 1.2231634855270386, "eval_runtime": 0.7516, "eval_samples_per_second": 5.322, "eval_steps_per_second": 1.33, "step": 1732 }, { "epoch": 0.69, "learning_rate": 9.167999999999998e-08, "loss": 1.323, "step": 1736 }, { "epoch": 0.69, "eval_loss": 1.222124457359314, "eval_runtime": 0.8004, "eval_samples_per_second": 4.997, "eval_steps_per_second": 1.249, "step": 1736 }, { "epoch": 0.7, "learning_rate": 9.12e-08, "loss": 1.3194, "step": 1740 }, { "epoch": 0.7, "eval_loss": 1.2216134071350098, "eval_runtime": 0.5409, "eval_samples_per_second": 7.394, "eval_steps_per_second": 1.849, "step": 1740 }, { "epoch": 0.7, "learning_rate": 9.072e-08, "loss": 1.2857, "step": 1744 }, { "epoch": 0.7, "eval_loss": 1.220569372177124, "eval_runtime": 0.5422, "eval_samples_per_second": 7.377, "eval_steps_per_second": 1.844, "step": 1744 }, { "epoch": 0.7, "learning_rate": 9.024e-08, "loss": 1.3101, "step": 1748 }, { "epoch": 0.7, "eval_loss": 1.2198562622070312, "eval_runtime": 0.5528, "eval_samples_per_second": 7.236, "eval_steps_per_second": 1.809, "step": 1748 }, { "epoch": 0.7, "learning_rate": 8.976e-08, "loss": 1.2962, "step": 1752 }, { "epoch": 0.7, "eval_loss": 1.2193635702133179, "eval_runtime": 0.5512, "eval_samples_per_second": 7.257, "eval_steps_per_second": 1.814, "step": 1752 }, { "epoch": 0.7, "learning_rate": 8.927999999999999e-08, "loss": 1.2927, "step": 1756 }, { "epoch": 0.7, "eval_loss": 1.218583345413208, "eval_runtime": 0.5559, "eval_samples_per_second": 7.195, "eval_steps_per_second": 1.799, "step": 1756 }, { "epoch": 0.7, "learning_rate": 8.879999999999999e-08, "loss": 1.2728, "step": 1760 }, { "epoch": 0.7, "eval_loss": 1.2178785800933838, "eval_runtime": 0.7605, "eval_samples_per_second": 5.26, "eval_steps_per_second": 1.315, "step": 1760 }, { "epoch": 0.71, "learning_rate": 8.832e-08, "loss": 1.2903, "step": 1764 }, { "epoch": 0.71, "eval_loss": 1.2169758081436157, "eval_runtime": 0.7602, "eval_samples_per_second": 5.262, "eval_steps_per_second": 1.315, "step": 1764 }, { "epoch": 0.71, "learning_rate": 8.784e-08, "loss": 1.3108, "step": 1768 }, { "epoch": 0.71, "eval_loss": 1.2163543701171875, "eval_runtime": 0.7924, "eval_samples_per_second": 5.048, "eval_steps_per_second": 1.262, "step": 1768 }, { "epoch": 0.71, "learning_rate": 8.736e-08, "loss": 1.2899, "step": 1772 }, { "epoch": 0.71, "eval_loss": 1.2156285047531128, "eval_runtime": 0.5511, "eval_samples_per_second": 7.259, "eval_steps_per_second": 1.815, "step": 1772 }, { "epoch": 0.71, "learning_rate": 8.688000000000001e-08, "loss": 1.2996, "step": 1776 }, { "epoch": 0.71, "eval_loss": 1.2148629426956177, "eval_runtime": 0.5465, "eval_samples_per_second": 7.32, "eval_steps_per_second": 1.83, "step": 1776 }, { "epoch": 0.71, "learning_rate": 8.639999999999999e-08, "loss": 1.2865, "step": 1780 }, { "epoch": 0.71, "eval_loss": 1.2141329050064087, "eval_runtime": 0.5496, "eval_samples_per_second": 7.278, "eval_steps_per_second": 1.819, "step": 1780 }, { "epoch": 0.71, "learning_rate": 8.591999999999999e-08, "loss": 1.3004, "step": 1784 }, { "epoch": 0.71, "eval_loss": 1.2135276794433594, "eval_runtime": 0.8113, "eval_samples_per_second": 4.93, "eval_steps_per_second": 1.233, "step": 1784 }, { "epoch": 0.72, "learning_rate": 8.543999999999999e-08, "loss": 1.2916, "step": 1788 }, { "epoch": 0.72, "eval_loss": 1.2127509117126465, "eval_runtime": 1.0008, "eval_samples_per_second": 3.997, "eval_steps_per_second": 0.999, "step": 1788 }, { "epoch": 0.72, "learning_rate": 8.496e-08, "loss": 1.2975, "step": 1792 }, { "epoch": 0.72, "eval_loss": 1.2119174003601074, "eval_runtime": 0.9106, "eval_samples_per_second": 4.393, "eval_steps_per_second": 1.098, "step": 1792 }, { "epoch": 0.72, "learning_rate": 8.448e-08, "loss": 1.3071, "step": 1796 }, { "epoch": 0.72, "eval_loss": 1.2113782167434692, "eval_runtime": 0.8267, "eval_samples_per_second": 4.838, "eval_steps_per_second": 1.21, "step": 1796 }, { "epoch": 0.72, "learning_rate": 8.4e-08, "loss": 1.2793, "step": 1800 }, { "epoch": 0.72, "eval_loss": 1.2105748653411865, "eval_runtime": 0.9729, "eval_samples_per_second": 4.111, "eval_steps_per_second": 1.028, "step": 1800 }, { "epoch": 0.72, "learning_rate": 8.352e-08, "loss": 1.2755, "step": 1804 }, { "epoch": 0.72, "eval_loss": 1.2097927331924438, "eval_runtime": 1.0248, "eval_samples_per_second": 3.903, "eval_steps_per_second": 0.976, "step": 1804 }, { "epoch": 0.72, "learning_rate": 8.304e-08, "loss": 1.2968, "step": 1808 }, { "epoch": 0.72, "eval_loss": 1.2092970609664917, "eval_runtime": 0.8732, "eval_samples_per_second": 4.581, "eval_steps_per_second": 1.145, "step": 1808 }, { "epoch": 0.72, "learning_rate": 8.255999999999999e-08, "loss": 1.3226, "step": 1812 }, { "epoch": 0.72, "eval_loss": 1.2085187435150146, "eval_runtime": 0.994, "eval_samples_per_second": 4.024, "eval_steps_per_second": 1.006, "step": 1812 }, { "epoch": 0.73, "learning_rate": 8.208e-08, "loss": 1.3117, "step": 1816 }, { "epoch": 0.73, "eval_loss": 1.2078773975372314, "eval_runtime": 0.9321, "eval_samples_per_second": 4.291, "eval_steps_per_second": 1.073, "step": 1816 }, { "epoch": 0.73, "learning_rate": 8.16e-08, "loss": 1.2957, "step": 1820 }, { "epoch": 0.73, "eval_loss": 1.207309603691101, "eval_runtime": 0.8433, "eval_samples_per_second": 4.743, "eval_steps_per_second": 1.186, "step": 1820 }, { "epoch": 0.73, "learning_rate": 8.111999999999999e-08, "loss": 1.2885, "step": 1824 }, { "epoch": 0.73, "eval_loss": 1.206638216972351, "eval_runtime": 1.0078, "eval_samples_per_second": 3.969, "eval_steps_per_second": 0.992, "step": 1824 }, { "epoch": 0.73, "learning_rate": 8.063999999999999e-08, "loss": 1.2731, "step": 1828 }, { "epoch": 0.73, "eval_loss": 1.2057451009750366, "eval_runtime": 0.9696, "eval_samples_per_second": 4.125, "eval_steps_per_second": 1.031, "step": 1828 }, { "epoch": 0.73, "learning_rate": 8.016e-08, "loss": 1.2821, "step": 1832 }, { "epoch": 0.73, "eval_loss": 1.2051252126693726, "eval_runtime": 0.8398, "eval_samples_per_second": 4.763, "eval_steps_per_second": 1.191, "step": 1832 }, { "epoch": 0.73, "learning_rate": 7.968e-08, "loss": 1.2944, "step": 1836 }, { "epoch": 0.73, "eval_loss": 1.2045457363128662, "eval_runtime": 0.8549, "eval_samples_per_second": 4.679, "eval_steps_per_second": 1.17, "step": 1836 }, { "epoch": 0.74, "learning_rate": 7.92e-08, "loss": 1.2768, "step": 1840 }, { "epoch": 0.74, "eval_loss": 1.2040053606033325, "eval_runtime": 0.9035, "eval_samples_per_second": 4.427, "eval_steps_per_second": 1.107, "step": 1840 }, { "epoch": 0.74, "learning_rate": 7.872e-08, "loss": 1.2917, "step": 1844 }, { "epoch": 0.74, "eval_loss": 1.2034111022949219, "eval_runtime": 0.8657, "eval_samples_per_second": 4.621, "eval_steps_per_second": 1.155, "step": 1844 }, { "epoch": 0.74, "learning_rate": 7.823999999999999e-08, "loss": 1.2935, "step": 1848 }, { "epoch": 0.74, "eval_loss": 1.2027428150177002, "eval_runtime": 0.8157, "eval_samples_per_second": 4.903, "eval_steps_per_second": 1.226, "step": 1848 }, { "epoch": 0.74, "learning_rate": 7.775999999999999e-08, "loss": 1.268, "step": 1852 }, { "epoch": 0.74, "eval_loss": 1.2022608518600464, "eval_runtime": 0.8551, "eval_samples_per_second": 4.678, "eval_steps_per_second": 1.169, "step": 1852 }, { "epoch": 0.74, "learning_rate": 7.728e-08, "loss": 1.2761, "step": 1856 }, { "epoch": 0.74, "eval_loss": 1.2016183137893677, "eval_runtime": 0.8348, "eval_samples_per_second": 4.791, "eval_steps_per_second": 1.198, "step": 1856 }, { "epoch": 0.74, "learning_rate": 7.68e-08, "loss": 1.3107, "step": 1860 }, { "epoch": 0.74, "eval_loss": 1.201023817062378, "eval_runtime": 0.8161, "eval_samples_per_second": 4.901, "eval_steps_per_second": 1.225, "step": 1860 }, { "epoch": 0.75, "learning_rate": 7.632e-08, "loss": 1.2752, "step": 1864 }, { "epoch": 0.75, "eval_loss": 1.2003718614578247, "eval_runtime": 1.0033, "eval_samples_per_second": 3.987, "eval_steps_per_second": 0.997, "step": 1864 }, { "epoch": 0.75, "learning_rate": 7.584000000000001e-08, "loss": 1.2661, "step": 1868 }, { "epoch": 0.75, "eval_loss": 1.1996574401855469, "eval_runtime": 0.8381, "eval_samples_per_second": 4.773, "eval_steps_per_second": 1.193, "step": 1868 }, { "epoch": 0.75, "learning_rate": 7.536e-08, "loss": 1.2985, "step": 1872 }, { "epoch": 0.75, "eval_loss": 1.199149250984192, "eval_runtime": 0.8342, "eval_samples_per_second": 4.795, "eval_steps_per_second": 1.199, "step": 1872 }, { "epoch": 0.75, "learning_rate": 7.487999999999999e-08, "loss": 1.2801, "step": 1876 }, { "epoch": 0.75, "eval_loss": 1.1985127925872803, "eval_runtime": 0.9225, "eval_samples_per_second": 4.336, "eval_steps_per_second": 1.084, "step": 1876 }, { "epoch": 0.75, "learning_rate": 7.439999999999999e-08, "loss": 1.2775, "step": 1880 }, { "epoch": 0.75, "eval_loss": 1.1979784965515137, "eval_runtime": 0.92, "eval_samples_per_second": 4.348, "eval_steps_per_second": 1.087, "step": 1880 }, { "epoch": 0.75, "learning_rate": 7.392e-08, "loss": 1.2741, "step": 1884 }, { "epoch": 0.75, "eval_loss": 1.1976195573806763, "eval_runtime": 0.8775, "eval_samples_per_second": 4.558, "eval_steps_per_second": 1.14, "step": 1884 }, { "epoch": 0.76, "learning_rate": 7.343999999999999e-08, "loss": 1.2747, "step": 1888 }, { "epoch": 0.76, "eval_loss": 1.197205662727356, "eval_runtime": 0.8885, "eval_samples_per_second": 4.502, "eval_steps_per_second": 1.125, "step": 1888 }, { "epoch": 0.76, "learning_rate": 7.296e-08, "loss": 1.2772, "step": 1892 }, { "epoch": 0.76, "eval_loss": 1.1963942050933838, "eval_runtime": 0.9218, "eval_samples_per_second": 4.339, "eval_steps_per_second": 1.085, "step": 1892 }, { "epoch": 0.76, "learning_rate": 7.248e-08, "loss": 1.2953, "step": 1896 }, { "epoch": 0.76, "eval_loss": 1.1961135864257812, "eval_runtime": 0.8111, "eval_samples_per_second": 4.932, "eval_steps_per_second": 1.233, "step": 1896 }, { "epoch": 0.76, "learning_rate": 7.2e-08, "loss": 1.3052, "step": 1900 }, { "epoch": 0.76, "eval_loss": 1.1957508325576782, "eval_runtime": 0.9897, "eval_samples_per_second": 4.042, "eval_steps_per_second": 1.01, "step": 1900 }, { "epoch": 0.76, "learning_rate": 7.152e-08, "loss": 1.2505, "step": 1904 }, { "epoch": 0.76, "eval_loss": 1.1951295137405396, "eval_runtime": 0.9069, "eval_samples_per_second": 4.41, "eval_steps_per_second": 1.103, "step": 1904 }, { "epoch": 0.76, "learning_rate": 7.104e-08, "loss": 1.3088, "step": 1908 }, { "epoch": 0.76, "eval_loss": 1.1944581270217896, "eval_runtime": 0.7895, "eval_samples_per_second": 5.066, "eval_steps_per_second": 1.267, "step": 1908 }, { "epoch": 0.76, "learning_rate": 7.055999999999999e-08, "loss": 1.2705, "step": 1912 }, { "epoch": 0.76, "eval_loss": 1.1939201354980469, "eval_runtime": 0.912, "eval_samples_per_second": 4.386, "eval_steps_per_second": 1.097, "step": 1912 }, { "epoch": 0.77, "learning_rate": 7.008e-08, "loss": 1.2606, "step": 1916 }, { "epoch": 0.77, "eval_loss": 1.1934112310409546, "eval_runtime": 0.948, "eval_samples_per_second": 4.219, "eval_steps_per_second": 1.055, "step": 1916 }, { "epoch": 0.77, "learning_rate": 6.96e-08, "loss": 1.2729, "step": 1920 }, { "epoch": 0.77, "eval_loss": 1.1931556463241577, "eval_runtime": 0.8456, "eval_samples_per_second": 4.73, "eval_steps_per_second": 1.183, "step": 1920 }, { "epoch": 0.77, "learning_rate": 6.912e-08, "loss": 1.2642, "step": 1924 }, { "epoch": 0.77, "eval_loss": 1.1926673650741577, "eval_runtime": 0.8112, "eval_samples_per_second": 4.931, "eval_steps_per_second": 1.233, "step": 1924 }, { "epoch": 0.77, "learning_rate": 6.864e-08, "loss": 1.2903, "step": 1928 }, { "epoch": 0.77, "eval_loss": 1.1919676065444946, "eval_runtime": 0.865, "eval_samples_per_second": 4.624, "eval_steps_per_second": 1.156, "step": 1928 }, { "epoch": 0.77, "learning_rate": 6.816e-08, "loss": 1.2688, "step": 1932 }, { "epoch": 0.77, "eval_loss": 1.191756010055542, "eval_runtime": 0.8924, "eval_samples_per_second": 4.482, "eval_steps_per_second": 1.121, "step": 1932 }, { "epoch": 0.77, "learning_rate": 6.768e-08, "loss": 1.2677, "step": 1936 }, { "epoch": 0.77, "eval_loss": 1.1909488439559937, "eval_runtime": 0.8423, "eval_samples_per_second": 4.749, "eval_steps_per_second": 1.187, "step": 1936 }, { "epoch": 0.78, "learning_rate": 6.719999999999999e-08, "loss": 1.2747, "step": 1940 }, { "epoch": 0.78, "eval_loss": 1.1905359029769897, "eval_runtime": 0.8982, "eval_samples_per_second": 4.453, "eval_steps_per_second": 1.113, "step": 1940 }, { "epoch": 0.78, "learning_rate": 6.671999999999999e-08, "loss": 1.2512, "step": 1944 }, { "epoch": 0.78, "eval_loss": 1.19022798538208, "eval_runtime": 0.8246, "eval_samples_per_second": 4.851, "eval_steps_per_second": 1.213, "step": 1944 }, { "epoch": 0.78, "learning_rate": 6.623999999999999e-08, "loss": 1.2651, "step": 1948 }, { "epoch": 0.78, "eval_loss": 1.1898229122161865, "eval_runtime": 0.8116, "eval_samples_per_second": 4.928, "eval_steps_per_second": 1.232, "step": 1948 }, { "epoch": 0.78, "learning_rate": 6.576e-08, "loss": 1.2655, "step": 1952 }, { "epoch": 0.78, "eval_loss": 1.189262866973877, "eval_runtime": 1.0243, "eval_samples_per_second": 3.905, "eval_steps_per_second": 0.976, "step": 1952 }, { "epoch": 0.78, "learning_rate": 6.528e-08, "loss": 1.2617, "step": 1956 }, { "epoch": 0.78, "eval_loss": 1.1888002157211304, "eval_runtime": 0.8812, "eval_samples_per_second": 4.539, "eval_steps_per_second": 1.135, "step": 1956 }, { "epoch": 0.78, "learning_rate": 6.48e-08, "loss": 1.2764, "step": 1960 }, { "epoch": 0.78, "eval_loss": 1.1885006427764893, "eval_runtime": 0.8185, "eval_samples_per_second": 4.887, "eval_steps_per_second": 1.222, "step": 1960 }, { "epoch": 0.79, "learning_rate": 6.432e-08, "loss": 1.2531, "step": 1964 }, { "epoch": 0.79, "eval_loss": 1.188267469406128, "eval_runtime": 0.9375, "eval_samples_per_second": 4.267, "eval_steps_per_second": 1.067, "step": 1964 }, { "epoch": 0.79, "learning_rate": 6.384e-08, "loss": 1.2911, "step": 1968 }, { "epoch": 0.79, "eval_loss": 1.1874159574508667, "eval_runtime": 0.9562, "eval_samples_per_second": 4.183, "eval_steps_per_second": 1.046, "step": 1968 }, { "epoch": 0.79, "learning_rate": 6.335999999999999e-08, "loss": 1.2616, "step": 1972 }, { "epoch": 0.79, "eval_loss": 1.1871256828308105, "eval_runtime": 0.8052, "eval_samples_per_second": 4.968, "eval_steps_per_second": 1.242, "step": 1972 }, { "epoch": 0.79, "learning_rate": 6.288e-08, "loss": 1.2537, "step": 1976 }, { "epoch": 0.79, "eval_loss": 1.1868540048599243, "eval_runtime": 1.0118, "eval_samples_per_second": 3.953, "eval_steps_per_second": 0.988, "step": 1976 }, { "epoch": 0.79, "learning_rate": 6.239999999999999e-08, "loss": 1.2548, "step": 1980 }, { "epoch": 0.79, "eval_loss": 1.1864162683486938, "eval_runtime": 1.0123, "eval_samples_per_second": 3.951, "eval_steps_per_second": 0.988, "step": 1980 }, { "epoch": 0.79, "learning_rate": 6.192e-08, "loss": 1.2722, "step": 1984 }, { "epoch": 0.79, "eval_loss": 1.1861025094985962, "eval_runtime": 0.8118, "eval_samples_per_second": 4.927, "eval_steps_per_second": 1.232, "step": 1984 }, { "epoch": 0.8, "learning_rate": 6.144e-08, "loss": 1.2717, "step": 1988 }, { "epoch": 0.8, "eval_loss": 1.1857701539993286, "eval_runtime": 0.9863, "eval_samples_per_second": 4.056, "eval_steps_per_second": 1.014, "step": 1988 }, { "epoch": 0.8, "learning_rate": 6.096e-08, "loss": 1.281, "step": 1992 }, { "epoch": 0.8, "eval_loss": 1.1854195594787598, "eval_runtime": 0.8058, "eval_samples_per_second": 4.964, "eval_steps_per_second": 1.241, "step": 1992 }, { "epoch": 0.8, "learning_rate": 6.048e-08, "loss": 1.2766, "step": 1996 }, { "epoch": 0.8, "eval_loss": 1.1849565505981445, "eval_runtime": 0.6952, "eval_samples_per_second": 5.754, "eval_steps_per_second": 1.438, "step": 1996 }, { "epoch": 0.8, "learning_rate": 6e-08, "loss": 1.2962, "step": 2000 }, { "epoch": 0.8, "eval_loss": 1.1846204996109009, "eval_runtime": 0.5591, "eval_samples_per_second": 7.154, "eval_steps_per_second": 1.789, "step": 2000 }, { "epoch": 0.8, "learning_rate": 5.951999999999999e-08, "loss": 1.2732, "step": 2004 }, { "epoch": 0.8, "eval_loss": 1.1842423677444458, "eval_runtime": 0.6989, "eval_samples_per_second": 5.724, "eval_steps_per_second": 1.431, "step": 2004 }, { "epoch": 0.8, "learning_rate": 5.904e-08, "loss": 1.2588, "step": 2008 }, { "epoch": 0.8, "eval_loss": 1.1838123798370361, "eval_runtime": 0.533, "eval_samples_per_second": 7.505, "eval_steps_per_second": 1.876, "step": 2008 }, { "epoch": 0.8, "learning_rate": 5.856e-08, "loss": 1.2651, "step": 2012 }, { "epoch": 0.8, "eval_loss": 1.1836127042770386, "eval_runtime": 0.5522, "eval_samples_per_second": 7.243, "eval_steps_per_second": 1.811, "step": 2012 }, { "epoch": 0.81, "learning_rate": 5.8079999999999995e-08, "loss": 1.2755, "step": 2016 }, { "epoch": 0.81, "eval_loss": 1.1833029985427856, "eval_runtime": 0.5378, "eval_samples_per_second": 7.438, "eval_steps_per_second": 1.859, "step": 2016 }, { "epoch": 0.81, "learning_rate": 5.76e-08, "loss": 1.2513, "step": 2020 }, { "epoch": 0.81, "eval_loss": 1.1827694177627563, "eval_runtime": 0.5697, "eval_samples_per_second": 7.021, "eval_steps_per_second": 1.755, "step": 2020 }, { "epoch": 0.81, "learning_rate": 5.7120000000000005e-08, "loss": 1.2645, "step": 2024 }, { "epoch": 0.81, "eval_loss": 1.1824350357055664, "eval_runtime": 0.7918, "eval_samples_per_second": 5.052, "eval_steps_per_second": 1.263, "step": 2024 }, { "epoch": 0.81, "learning_rate": 5.6639999999999996e-08, "loss": 1.2593, "step": 2028 }, { "epoch": 0.81, "eval_loss": 1.181903600692749, "eval_runtime": 0.7394, "eval_samples_per_second": 5.41, "eval_steps_per_second": 1.353, "step": 2028 }, { "epoch": 0.81, "learning_rate": 5.616e-08, "loss": 1.2947, "step": 2032 }, { "epoch": 0.81, "eval_loss": 1.1818093061447144, "eval_runtime": 0.8202, "eval_samples_per_second": 4.877, "eval_steps_per_second": 1.219, "step": 2032 }, { "epoch": 0.81, "learning_rate": 5.567999999999999e-08, "loss": 1.2599, "step": 2036 }, { "epoch": 0.81, "eval_loss": 1.1813161373138428, "eval_runtime": 0.8494, "eval_samples_per_second": 4.709, "eval_steps_per_second": 1.177, "step": 2036 }, { "epoch": 0.82, "learning_rate": 5.52e-08, "loss": 1.2094, "step": 2040 }, { "epoch": 0.82, "eval_loss": 1.1808911561965942, "eval_runtime": 0.5448, "eval_samples_per_second": 7.342, "eval_steps_per_second": 1.835, "step": 2040 }, { "epoch": 0.82, "learning_rate": 5.4719999999999996e-08, "loss": 1.2707, "step": 2044 }, { "epoch": 0.82, "eval_loss": 1.180704116821289, "eval_runtime": 0.5486, "eval_samples_per_second": 7.291, "eval_steps_per_second": 1.823, "step": 2044 }, { "epoch": 0.82, "learning_rate": 5.4239999999999995e-08, "loss": 1.2653, "step": 2048 }, { "epoch": 0.82, "eval_loss": 1.1802550554275513, "eval_runtime": 0.5729, "eval_samples_per_second": 6.982, "eval_steps_per_second": 1.745, "step": 2048 }, { "epoch": 0.82, "learning_rate": 5.376e-08, "loss": 1.2637, "step": 2052 }, { "epoch": 0.82, "eval_loss": 1.1799267530441284, "eval_runtime": 0.55, "eval_samples_per_second": 7.273, "eval_steps_per_second": 1.818, "step": 2052 }, { "epoch": 0.82, "learning_rate": 5.328e-08, "loss": 1.2708, "step": 2056 }, { "epoch": 0.82, "eval_loss": 1.1799120903015137, "eval_runtime": 0.5421, "eval_samples_per_second": 7.379, "eval_steps_per_second": 1.845, "step": 2056 }, { "epoch": 0.82, "learning_rate": 5.2799999999999996e-08, "loss": 1.283, "step": 2060 }, { "epoch": 0.82, "eval_loss": 1.1794401407241821, "eval_runtime": 0.7544, "eval_samples_per_second": 5.302, "eval_steps_per_second": 1.325, "step": 2060 }, { "epoch": 0.83, "learning_rate": 5.2319999999999995e-08, "loss": 1.2853, "step": 2064 }, { "epoch": 0.83, "eval_loss": 1.1792134046554565, "eval_runtime": 0.7805, "eval_samples_per_second": 5.125, "eval_steps_per_second": 1.281, "step": 2064 }, { "epoch": 0.83, "learning_rate": 5.184e-08, "loss": 1.2617, "step": 2068 }, { "epoch": 0.83, "eval_loss": 1.1792347431182861, "eval_runtime": 0.7985, "eval_samples_per_second": 5.01, "eval_steps_per_second": 1.252, "step": 2068 }, { "epoch": 0.83, "learning_rate": 5.136e-08, "loss": 1.2476, "step": 2072 }, { "epoch": 0.83, "eval_loss": 1.178797721862793, "eval_runtime": 0.8348, "eval_samples_per_second": 4.792, "eval_steps_per_second": 1.198, "step": 2072 }, { "epoch": 0.83, "learning_rate": 5.0879999999999996e-08, "loss": 1.2355, "step": 2076 }, { "epoch": 0.83, "eval_loss": 1.1785314083099365, "eval_runtime": 0.5603, "eval_samples_per_second": 7.139, "eval_steps_per_second": 1.785, "step": 2076 }, { "epoch": 0.83, "learning_rate": 5.04e-08, "loss": 1.2348, "step": 2080 }, { "epoch": 0.83, "eval_loss": 1.1781315803527832, "eval_runtime": 0.5683, "eval_samples_per_second": 7.038, "eval_steps_per_second": 1.76, "step": 2080 }, { "epoch": 0.83, "learning_rate": 4.991999999999999e-08, "loss": 1.2468, "step": 2084 }, { "epoch": 0.83, "eval_loss": 1.1779569387435913, "eval_runtime": 0.5524, "eval_samples_per_second": 7.241, "eval_steps_per_second": 1.81, "step": 2084 }, { "epoch": 0.84, "learning_rate": 4.944e-08, "loss": 1.2715, "step": 2088 }, { "epoch": 0.84, "eval_loss": 1.177626609802246, "eval_runtime": 0.5397, "eval_samples_per_second": 7.412, "eval_steps_per_second": 1.853, "step": 2088 }, { "epoch": 0.84, "learning_rate": 4.896e-08, "loss": 1.2502, "step": 2092 }, { "epoch": 0.84, "eval_loss": 1.1772172451019287, "eval_runtime": 0.5501, "eval_samples_per_second": 7.271, "eval_steps_per_second": 1.818, "step": 2092 }, { "epoch": 0.84, "learning_rate": 4.8479999999999995e-08, "loss": 1.284, "step": 2096 }, { "epoch": 0.84, "eval_loss": 1.177082896232605, "eval_runtime": 0.7726, "eval_samples_per_second": 5.177, "eval_steps_per_second": 1.294, "step": 2096 }, { "epoch": 0.84, "learning_rate": 4.8e-08, "loss": 1.2417, "step": 2100 }, { "epoch": 0.84, "eval_loss": 1.1768074035644531, "eval_runtime": 0.8241, "eval_samples_per_second": 4.854, "eval_steps_per_second": 1.213, "step": 2100 }, { "epoch": 0.84, "learning_rate": 4.7520000000000005e-08, "loss": 1.2516, "step": 2104 }, { "epoch": 0.84, "eval_loss": 1.1765819787979126, "eval_runtime": 0.7686, "eval_samples_per_second": 5.204, "eval_steps_per_second": 1.301, "step": 2104 }, { "epoch": 0.84, "learning_rate": 4.7039999999999996e-08, "loss": 1.2748, "step": 2108 }, { "epoch": 0.84, "eval_loss": 1.1763153076171875, "eval_runtime": 0.5397, "eval_samples_per_second": 7.412, "eval_steps_per_second": 1.853, "step": 2108 }, { "epoch": 0.84, "learning_rate": 4.656e-08, "loss": 1.2744, "step": 2112 }, { "epoch": 0.84, "eval_loss": 1.1761606931686401, "eval_runtime": 0.559, "eval_samples_per_second": 7.156, "eval_steps_per_second": 1.789, "step": 2112 }, { "epoch": 0.85, "learning_rate": 4.607999999999999e-08, "loss": 1.2551, "step": 2116 }, { "epoch": 0.85, "eval_loss": 1.1757404804229736, "eval_runtime": 0.5541, "eval_samples_per_second": 7.219, "eval_steps_per_second": 1.805, "step": 2116 }, { "epoch": 0.85, "learning_rate": 4.56e-08, "loss": 1.2687, "step": 2120 }, { "epoch": 0.85, "eval_loss": 1.175508737564087, "eval_runtime": 0.5519, "eval_samples_per_second": 7.247, "eval_steps_per_second": 1.812, "step": 2120 }, { "epoch": 0.85, "learning_rate": 4.512e-08, "loss": 1.2575, "step": 2124 }, { "epoch": 0.85, "eval_loss": 1.1754050254821777, "eval_runtime": 0.5573, "eval_samples_per_second": 7.178, "eval_steps_per_second": 1.794, "step": 2124 }, { "epoch": 0.85, "learning_rate": 4.4639999999999995e-08, "loss": 1.2501, "step": 2128 }, { "epoch": 0.85, "eval_loss": 1.1749186515808105, "eval_runtime": 0.8148, "eval_samples_per_second": 4.909, "eval_steps_per_second": 1.227, "step": 2128 }, { "epoch": 0.85, "learning_rate": 4.416e-08, "loss": 1.2728, "step": 2132 }, { "epoch": 0.85, "eval_loss": 1.1750520467758179, "eval_runtime": 0.8081, "eval_samples_per_second": 4.95, "eval_steps_per_second": 1.237, "step": 2132 }, { "epoch": 0.85, "learning_rate": 4.368e-08, "loss": 1.2411, "step": 2136 }, { "epoch": 0.85, "eval_loss": 1.174950122833252, "eval_runtime": 0.7994, "eval_samples_per_second": 5.004, "eval_steps_per_second": 1.251, "step": 2136 }, { "epoch": 0.86, "learning_rate": 4.3199999999999996e-08, "loss": 1.2505, "step": 2140 }, { "epoch": 0.86, "eval_loss": 1.1743714809417725, "eval_runtime": 0.8793, "eval_samples_per_second": 4.549, "eval_steps_per_second": 1.137, "step": 2140 }, { "epoch": 0.86, "learning_rate": 4.2719999999999995e-08, "loss": 1.2363, "step": 2144 }, { "epoch": 0.86, "eval_loss": 1.1743040084838867, "eval_runtime": 0.5548, "eval_samples_per_second": 7.209, "eval_steps_per_second": 1.802, "step": 2144 }, { "epoch": 0.86, "learning_rate": 4.224e-08, "loss": 1.2408, "step": 2148 }, { "epoch": 0.86, "eval_loss": 1.1740877628326416, "eval_runtime": 0.5539, "eval_samples_per_second": 7.221, "eval_steps_per_second": 1.805, "step": 2148 }, { "epoch": 0.86, "learning_rate": 4.176e-08, "loss": 1.25, "step": 2152 }, { "epoch": 0.86, "eval_loss": 1.1735849380493164, "eval_runtime": 0.5552, "eval_samples_per_second": 7.204, "eval_steps_per_second": 1.801, "step": 2152 }, { "epoch": 0.86, "learning_rate": 4.1279999999999996e-08, "loss": 1.2729, "step": 2156 }, { "epoch": 0.86, "eval_loss": 1.1735070943832397, "eval_runtime": 0.5396, "eval_samples_per_second": 7.413, "eval_steps_per_second": 1.853, "step": 2156 }, { "epoch": 0.86, "learning_rate": 4.08e-08, "loss": 1.2467, "step": 2160 }, { "epoch": 0.86, "eval_loss": 1.1735484600067139, "eval_runtime": 0.5593, "eval_samples_per_second": 7.152, "eval_steps_per_second": 1.788, "step": 2160 }, { "epoch": 0.87, "learning_rate": 4.031999999999999e-08, "loss": 1.2377, "step": 2164 }, { "epoch": 0.87, "eval_loss": 1.1734050512313843, "eval_runtime": 0.7724, "eval_samples_per_second": 5.179, "eval_steps_per_second": 1.295, "step": 2164 }, { "epoch": 0.87, "learning_rate": 3.984e-08, "loss": 1.2876, "step": 2168 }, { "epoch": 0.87, "eval_loss": 1.172935962677002, "eval_runtime": 0.7737, "eval_samples_per_second": 5.17, "eval_steps_per_second": 1.292, "step": 2168 }, { "epoch": 0.87, "learning_rate": 3.936e-08, "loss": 1.255, "step": 2172 }, { "epoch": 0.87, "eval_loss": 1.172789216041565, "eval_runtime": 0.8022, "eval_samples_per_second": 4.986, "eval_steps_per_second": 1.247, "step": 2172 }, { "epoch": 0.87, "learning_rate": 3.8879999999999995e-08, "loss": 1.2471, "step": 2176 }, { "epoch": 0.87, "eval_loss": 1.1724050045013428, "eval_runtime": 0.5443, "eval_samples_per_second": 7.349, "eval_steps_per_second": 1.837, "step": 2176 }, { "epoch": 0.87, "learning_rate": 3.84e-08, "loss": 1.2641, "step": 2180 }, { "epoch": 0.87, "eval_loss": 1.1726024150848389, "eval_runtime": 0.5537, "eval_samples_per_second": 7.224, "eval_steps_per_second": 1.806, "step": 2180 }, { "epoch": 0.87, "learning_rate": 3.7920000000000005e-08, "loss": 1.2594, "step": 2184 }, { "epoch": 0.87, "eval_loss": 1.1722073554992676, "eval_runtime": 0.5672, "eval_samples_per_second": 7.053, "eval_steps_per_second": 1.763, "step": 2184 }, { "epoch": 0.88, "learning_rate": 3.7439999999999996e-08, "loss": 1.2803, "step": 2188 }, { "epoch": 0.88, "eval_loss": 1.1718605756759644, "eval_runtime": 0.5653, "eval_samples_per_second": 7.076, "eval_steps_per_second": 1.769, "step": 2188 }, { "epoch": 0.88, "learning_rate": 3.696e-08, "loss": 1.2404, "step": 2192 }, { "epoch": 0.88, "eval_loss": 1.1717666387557983, "eval_runtime": 0.5424, "eval_samples_per_second": 7.375, "eval_steps_per_second": 1.844, "step": 2192 }, { "epoch": 0.88, "learning_rate": 3.648e-08, "loss": 1.2538, "step": 2196 }, { "epoch": 0.88, "eval_loss": 1.1718065738677979, "eval_runtime": 0.8245, "eval_samples_per_second": 4.852, "eval_steps_per_second": 1.213, "step": 2196 }, { "epoch": 0.88, "learning_rate": 3.6e-08, "loss": 1.2423, "step": 2200 }, { "epoch": 0.88, "eval_loss": 1.1715894937515259, "eval_runtime": 0.7792, "eval_samples_per_second": 5.134, "eval_steps_per_second": 1.283, "step": 2200 }, { "epoch": 0.88, "learning_rate": 3.552e-08, "loss": 1.2344, "step": 2204 }, { "epoch": 0.88, "eval_loss": 1.1713180541992188, "eval_runtime": 0.8087, "eval_samples_per_second": 4.946, "eval_steps_per_second": 1.237, "step": 2204 }, { "epoch": 0.88, "learning_rate": 3.504e-08, "loss": 1.2646, "step": 2208 }, { "epoch": 0.88, "eval_loss": 1.1710913181304932, "eval_runtime": 0.7178, "eval_samples_per_second": 5.572, "eval_steps_per_second": 1.393, "step": 2208 }, { "epoch": 0.88, "learning_rate": 3.456e-08, "loss": 1.2501, "step": 2212 }, { "epoch": 0.88, "eval_loss": 1.1711719036102295, "eval_runtime": 0.5726, "eval_samples_per_second": 6.985, "eval_steps_per_second": 1.746, "step": 2212 }, { "epoch": 0.89, "learning_rate": 3.408e-08, "loss": 1.2455, "step": 2216 }, { "epoch": 0.89, "eval_loss": 1.1708979606628418, "eval_runtime": 0.5581, "eval_samples_per_second": 7.168, "eval_steps_per_second": 1.792, "step": 2216 }, { "epoch": 0.89, "learning_rate": 3.3599999999999996e-08, "loss": 1.2588, "step": 2220 }, { "epoch": 0.89, "eval_loss": 1.170513391494751, "eval_runtime": 0.5523, "eval_samples_per_second": 7.242, "eval_steps_per_second": 1.811, "step": 2220 }, { "epoch": 0.89, "learning_rate": 3.3119999999999995e-08, "loss": 1.2629, "step": 2224 }, { "epoch": 0.89, "eval_loss": 1.1705873012542725, "eval_runtime": 0.5461, "eval_samples_per_second": 7.325, "eval_steps_per_second": 1.831, "step": 2224 }, { "epoch": 0.89, "learning_rate": 3.264e-08, "loss": 1.2572, "step": 2228 }, { "epoch": 0.89, "eval_loss": 1.1705511808395386, "eval_runtime": 0.6444, "eval_samples_per_second": 6.208, "eval_steps_per_second": 1.552, "step": 2228 }, { "epoch": 0.89, "learning_rate": 3.216e-08, "loss": 1.2625, "step": 2232 }, { "epoch": 0.89, "eval_loss": 1.1704365015029907, "eval_runtime": 0.7974, "eval_samples_per_second": 5.016, "eval_steps_per_second": 1.254, "step": 2232 }, { "epoch": 0.89, "learning_rate": 3.1679999999999996e-08, "loss": 1.2479, "step": 2236 }, { "epoch": 0.89, "eval_loss": 1.16998291015625, "eval_runtime": 0.783, "eval_samples_per_second": 5.108, "eval_steps_per_second": 1.277, "step": 2236 }, { "epoch": 0.9, "learning_rate": 3.1199999999999995e-08, "loss": 1.2698, "step": 2240 }, { "epoch": 0.9, "eval_loss": 1.1701174974441528, "eval_runtime": 0.778, "eval_samples_per_second": 5.141, "eval_steps_per_second": 1.285, "step": 2240 }, { "epoch": 0.9, "learning_rate": 3.072e-08, "loss": 1.2619, "step": 2244 }, { "epoch": 0.9, "eval_loss": 1.1698575019836426, "eval_runtime": 0.5528, "eval_samples_per_second": 7.235, "eval_steps_per_second": 1.809, "step": 2244 }, { "epoch": 0.9, "learning_rate": 3.024e-08, "loss": 1.2455, "step": 2248 }, { "epoch": 0.9, "eval_loss": 1.1696205139160156, "eval_runtime": 0.5491, "eval_samples_per_second": 7.285, "eval_steps_per_second": 1.821, "step": 2248 }, { "epoch": 0.9, "learning_rate": 2.9759999999999996e-08, "loss": 1.2523, "step": 2252 }, { "epoch": 0.9, "eval_loss": 1.1695911884307861, "eval_runtime": 0.5404, "eval_samples_per_second": 7.402, "eval_steps_per_second": 1.85, "step": 2252 }, { "epoch": 0.9, "learning_rate": 2.928e-08, "loss": 1.2695, "step": 2256 }, { "epoch": 0.9, "eval_loss": 1.1692249774932861, "eval_runtime": 0.55, "eval_samples_per_second": 7.273, "eval_steps_per_second": 1.818, "step": 2256 }, { "epoch": 0.9, "learning_rate": 2.88e-08, "loss": 1.258, "step": 2260 }, { "epoch": 0.9, "eval_loss": 1.1691182851791382, "eval_runtime": 0.5535, "eval_samples_per_second": 7.227, "eval_steps_per_second": 1.807, "step": 2260 }, { "epoch": 0.91, "learning_rate": 2.8319999999999998e-08, "loss": 1.2393, "step": 2264 }, { "epoch": 0.91, "eval_loss": 1.169084906578064, "eval_runtime": 0.7449, "eval_samples_per_second": 5.37, "eval_steps_per_second": 1.342, "step": 2264 }, { "epoch": 0.91, "learning_rate": 2.7839999999999997e-08, "loss": 1.2567, "step": 2268 }, { "epoch": 0.91, "eval_loss": 1.1689860820770264, "eval_runtime": 0.7442, "eval_samples_per_second": 5.375, "eval_steps_per_second": 1.344, "step": 2268 }, { "epoch": 0.91, "learning_rate": 2.7359999999999998e-08, "loss": 1.2373, "step": 2272 }, { "epoch": 0.91, "eval_loss": 1.1687620878219604, "eval_runtime": 0.7861, "eval_samples_per_second": 5.089, "eval_steps_per_second": 1.272, "step": 2272 }, { "epoch": 0.91, "learning_rate": 2.688e-08, "loss": 1.2797, "step": 2276 }, { "epoch": 0.91, "eval_loss": 1.1686581373214722, "eval_runtime": 0.8296, "eval_samples_per_second": 4.822, "eval_steps_per_second": 1.205, "step": 2276 }, { "epoch": 0.91, "learning_rate": 2.6399999999999998e-08, "loss": 1.2646, "step": 2280 }, { "epoch": 0.91, "eval_loss": 1.1686054468154907, "eval_runtime": 0.5581, "eval_samples_per_second": 7.167, "eval_steps_per_second": 1.792, "step": 2280 }, { "epoch": 0.91, "learning_rate": 2.592e-08, "loss": 1.2502, "step": 2284 }, { "epoch": 0.91, "eval_loss": 1.16860032081604, "eval_runtime": 0.5609, "eval_samples_per_second": 7.131, "eval_steps_per_second": 1.783, "step": 2284 }, { "epoch": 0.92, "learning_rate": 2.5439999999999998e-08, "loss": 1.2317, "step": 2288 }, { "epoch": 0.92, "eval_loss": 1.1684176921844482, "eval_runtime": 0.5432, "eval_samples_per_second": 7.363, "eval_steps_per_second": 1.841, "step": 2288 }, { "epoch": 0.92, "learning_rate": 2.4959999999999997e-08, "loss": 1.2443, "step": 2292 }, { "epoch": 0.92, "eval_loss": 1.168290138244629, "eval_runtime": 0.5516, "eval_samples_per_second": 7.251, "eval_steps_per_second": 1.813, "step": 2292 }, { "epoch": 0.92, "learning_rate": 2.448e-08, "loss": 1.2563, "step": 2296 }, { "epoch": 0.92, "eval_loss": 1.1683218479156494, "eval_runtime": 0.5823, "eval_samples_per_second": 6.87, "eval_steps_per_second": 1.717, "step": 2296 }, { "epoch": 0.92, "learning_rate": 2.4e-08, "loss": 1.2485, "step": 2300 }, { "epoch": 0.92, "eval_loss": 1.168270230293274, "eval_runtime": 0.8027, "eval_samples_per_second": 4.983, "eval_steps_per_second": 1.246, "step": 2300 }, { "epoch": 0.92, "learning_rate": 2.3519999999999998e-08, "loss": 1.2542, "step": 2304 }, { "epoch": 0.92, "eval_loss": 1.1681807041168213, "eval_runtime": 0.8267, "eval_samples_per_second": 4.839, "eval_steps_per_second": 1.21, "step": 2304 }, { "epoch": 0.92, "learning_rate": 2.3039999999999997e-08, "loss": 1.2381, "step": 2308 }, { "epoch": 0.92, "eval_loss": 1.1679259538650513, "eval_runtime": 0.8423, "eval_samples_per_second": 4.749, "eval_steps_per_second": 1.187, "step": 2308 }, { "epoch": 0.92, "learning_rate": 2.256e-08, "loss": 1.2244, "step": 2312 }, { "epoch": 0.92, "eval_loss": 1.167891502380371, "eval_runtime": 0.5627, "eval_samples_per_second": 7.109, "eval_steps_per_second": 1.777, "step": 2312 }, { "epoch": 0.93, "learning_rate": 2.208e-08, "loss": 1.2725, "step": 2316 }, { "epoch": 0.93, "eval_loss": 1.1678441762924194, "eval_runtime": 0.5993, "eval_samples_per_second": 6.675, "eval_steps_per_second": 1.669, "step": 2316 }, { "epoch": 0.93, "learning_rate": 2.1599999999999998e-08, "loss": 1.2441, "step": 2320 }, { "epoch": 0.93, "eval_loss": 1.1677234172821045, "eval_runtime": 0.671, "eval_samples_per_second": 5.961, "eval_steps_per_second": 1.49, "step": 2320 }, { "epoch": 0.93, "learning_rate": 2.112e-08, "loss": 1.2441, "step": 2324 }, { "epoch": 0.93, "eval_loss": 1.1677547693252563, "eval_runtime": 0.5853, "eval_samples_per_second": 6.834, "eval_steps_per_second": 1.708, "step": 2324 }, { "epoch": 0.93, "learning_rate": 2.0639999999999998e-08, "loss": 1.2546, "step": 2328 }, { "epoch": 0.93, "eval_loss": 1.1675907373428345, "eval_runtime": 1.0373, "eval_samples_per_second": 3.856, "eval_steps_per_second": 0.964, "step": 2328 }, { "epoch": 0.93, "learning_rate": 2.0159999999999997e-08, "loss": 1.2279, "step": 2332 }, { "epoch": 0.93, "eval_loss": 1.1674381494522095, "eval_runtime": 1.2312, "eval_samples_per_second": 3.249, "eval_steps_per_second": 0.812, "step": 2332 }, { "epoch": 0.93, "learning_rate": 1.968e-08, "loss": 1.2635, "step": 2336 }, { "epoch": 0.93, "eval_loss": 1.1675413846969604, "eval_runtime": 1.8946, "eval_samples_per_second": 2.111, "eval_steps_per_second": 0.528, "step": 2336 }, { "epoch": 0.94, "learning_rate": 1.92e-08, "loss": 1.2572, "step": 2340 }, { "epoch": 0.94, "eval_loss": 1.1673345565795898, "eval_runtime": 1.8538, "eval_samples_per_second": 2.158, "eval_steps_per_second": 0.539, "step": 2340 }, { "epoch": 0.94, "learning_rate": 1.8719999999999998e-08, "loss": 1.2421, "step": 2344 }, { "epoch": 0.94, "eval_loss": 1.1672680377960205, "eval_runtime": 0.5759, "eval_samples_per_second": 6.945, "eval_steps_per_second": 1.736, "step": 2344 }, { "epoch": 0.94, "learning_rate": 1.824e-08, "loss": 1.2022, "step": 2348 }, { "epoch": 0.94, "eval_loss": 1.1671053171157837, "eval_runtime": 0.8746, "eval_samples_per_second": 4.574, "eval_steps_per_second": 1.143, "step": 2348 }, { "epoch": 0.94, "learning_rate": 1.776e-08, "loss": 1.2307, "step": 2352 }, { "epoch": 0.94, "eval_loss": 1.167036771774292, "eval_runtime": 0.5771, "eval_samples_per_second": 6.931, "eval_steps_per_second": 1.733, "step": 2352 }, { "epoch": 0.94, "learning_rate": 1.728e-08, "loss": 1.2525, "step": 2356 }, { "epoch": 0.94, "eval_loss": 1.1670310497283936, "eval_runtime": 0.5992, "eval_samples_per_second": 6.675, "eval_steps_per_second": 1.669, "step": 2356 }, { "epoch": 0.94, "learning_rate": 1.6799999999999998e-08, "loss": 1.2353, "step": 2360 }, { "epoch": 0.94, "eval_loss": 1.1670902967453003, "eval_runtime": 1.3469, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.742, "step": 2360 }, { "epoch": 0.95, "learning_rate": 1.632e-08, "loss": 1.25, "step": 2364 }, { "epoch": 0.95, "eval_loss": 1.1672464609146118, "eval_runtime": 0.8027, "eval_samples_per_second": 4.983, "eval_steps_per_second": 1.246, "step": 2364 }, { "epoch": 0.95, "learning_rate": 1.5839999999999998e-08, "loss": 1.2493, "step": 2368 }, { "epoch": 0.95, "eval_loss": 1.1669551134109497, "eval_runtime": 0.7946, "eval_samples_per_second": 5.034, "eval_steps_per_second": 1.258, "step": 2368 }, { "epoch": 0.95, "learning_rate": 1.536e-08, "loss": 1.2453, "step": 2372 }, { "epoch": 0.95, "eval_loss": 1.167033076286316, "eval_runtime": 0.7944, "eval_samples_per_second": 5.035, "eval_steps_per_second": 1.259, "step": 2372 }, { "epoch": 0.95, "learning_rate": 1.4879999999999998e-08, "loss": 1.2714, "step": 2376 }, { "epoch": 0.95, "eval_loss": 1.1669402122497559, "eval_runtime": 0.6166, "eval_samples_per_second": 6.487, "eval_steps_per_second": 1.622, "step": 2376 }, { "epoch": 0.95, "learning_rate": 1.44e-08, "loss": 1.2435, "step": 2380 }, { "epoch": 0.95, "eval_loss": 1.166806936264038, "eval_runtime": 0.5575, "eval_samples_per_second": 7.175, "eval_steps_per_second": 1.794, "step": 2380 }, { "epoch": 0.95, "learning_rate": 1.3919999999999998e-08, "loss": 1.2518, "step": 2384 }, { "epoch": 0.95, "eval_loss": 1.1668052673339844, "eval_runtime": 0.562, "eval_samples_per_second": 7.117, "eval_steps_per_second": 1.779, "step": 2384 }, { "epoch": 0.96, "learning_rate": 1.344e-08, "loss": 1.2594, "step": 2388 }, { "epoch": 0.96, "eval_loss": 1.166872262954712, "eval_runtime": 0.5759, "eval_samples_per_second": 6.946, "eval_steps_per_second": 1.736, "step": 2388 }, { "epoch": 0.96, "learning_rate": 1.296e-08, "loss": 1.2149, "step": 2392 }, { "epoch": 0.96, "eval_loss": 1.1669771671295166, "eval_runtime": 0.777, "eval_samples_per_second": 5.148, "eval_steps_per_second": 1.287, "step": 2392 }, { "epoch": 0.96, "learning_rate": 1.2479999999999998e-08, "loss": 1.2676, "step": 2396 }, { "epoch": 0.96, "eval_loss": 1.1666895151138306, "eval_runtime": 0.8143, "eval_samples_per_second": 4.912, "eval_steps_per_second": 1.228, "step": 2396 }, { "epoch": 0.96, "learning_rate": 1.2e-08, "loss": 1.2337, "step": 2400 }, { "epoch": 0.96, "eval_loss": 1.1668546199798584, "eval_runtime": 0.8514, "eval_samples_per_second": 4.698, "eval_steps_per_second": 1.175, "step": 2400 }, { "epoch": 0.96, "learning_rate": 1.1519999999999998e-08, "loss": 1.2329, "step": 2404 }, { "epoch": 0.96, "eval_loss": 1.1665568351745605, "eval_runtime": 0.5814, "eval_samples_per_second": 6.88, "eval_steps_per_second": 1.72, "step": 2404 }, { "epoch": 0.96, "learning_rate": 1.104e-08, "loss": 1.269, "step": 2408 }, { "epoch": 0.96, "eval_loss": 1.1666715145111084, "eval_runtime": 0.5762, "eval_samples_per_second": 6.942, "eval_steps_per_second": 1.735, "step": 2408 }, { "epoch": 0.96, "learning_rate": 1.056e-08, "loss": 1.2298, "step": 2412 }, { "epoch": 0.96, "eval_loss": 1.1665081977844238, "eval_runtime": 0.6241, "eval_samples_per_second": 6.409, "eval_steps_per_second": 1.602, "step": 2412 }, { "epoch": 0.97, "learning_rate": 1.0079999999999998e-08, "loss": 1.2481, "step": 2416 }, { "epoch": 0.97, "eval_loss": 1.1666505336761475, "eval_runtime": 0.5768, "eval_samples_per_second": 6.934, "eval_steps_per_second": 1.734, "step": 2416 }, { "epoch": 0.97, "learning_rate": 9.6e-09, "loss": 1.2674, "step": 2420 }, { "epoch": 0.97, "eval_loss": 1.1667877435684204, "eval_runtime": 0.555, "eval_samples_per_second": 7.207, "eval_steps_per_second": 1.802, "step": 2420 }, { "epoch": 0.97, "learning_rate": 9.12e-09, "loss": 1.2482, "step": 2424 }, { "epoch": 0.97, "eval_loss": 1.1666263341903687, "eval_runtime": 0.7907, "eval_samples_per_second": 5.058, "eval_steps_per_second": 1.265, "step": 2424 }, { "epoch": 0.97, "learning_rate": 8.64e-09, "loss": 1.2604, "step": 2428 }, { "epoch": 0.97, "eval_loss": 1.1666624546051025, "eval_runtime": 0.9212, "eval_samples_per_second": 4.342, "eval_steps_per_second": 1.086, "step": 2428 }, { "epoch": 0.97, "learning_rate": 8.16e-09, "loss": 1.2471, "step": 2432 }, { "epoch": 0.97, "eval_loss": 1.1666383743286133, "eval_runtime": 0.9451, "eval_samples_per_second": 4.232, "eval_steps_per_second": 1.058, "step": 2432 }, { "epoch": 0.97, "learning_rate": 7.68e-09, "loss": 1.2069, "step": 2436 }, { "epoch": 0.97, "eval_loss": 1.1664601564407349, "eval_runtime": 0.566, "eval_samples_per_second": 7.067, "eval_steps_per_second": 1.767, "step": 2436 }, { "epoch": 0.98, "learning_rate": 7.2e-09, "loss": 1.2734, "step": 2440 }, { "epoch": 0.98, "eval_loss": 1.1667420864105225, "eval_runtime": 0.5729, "eval_samples_per_second": 6.982, "eval_steps_per_second": 1.746, "step": 2440 }, { "epoch": 0.98, "learning_rate": 6.72e-09, "loss": 1.239, "step": 2444 }, { "epoch": 0.98, "eval_loss": 1.1666040420532227, "eval_runtime": 0.5792, "eval_samples_per_second": 6.906, "eval_steps_per_second": 1.727, "step": 2444 }, { "epoch": 0.98, "learning_rate": 6.239999999999999e-09, "loss": 1.2245, "step": 2448 }, { "epoch": 0.98, "eval_loss": 1.1663960218429565, "eval_runtime": 0.6104, "eval_samples_per_second": 6.553, "eval_steps_per_second": 1.638, "step": 2448 }, { "epoch": 0.98, "learning_rate": 5.759999999999999e-09, "loss": 1.244, "step": 2452 }, { "epoch": 0.98, "eval_loss": 1.1664263010025024, "eval_runtime": 0.5997, "eval_samples_per_second": 6.669, "eval_steps_per_second": 1.667, "step": 2452 }, { "epoch": 0.98, "learning_rate": 5.28e-09, "loss": 1.2458, "step": 2456 }, { "epoch": 0.98, "eval_loss": 1.1665791273117065, "eval_runtime": 0.8211, "eval_samples_per_second": 4.872, "eval_steps_per_second": 1.218, "step": 2456 }, { "epoch": 0.98, "learning_rate": 4.8e-09, "loss": 1.2566, "step": 2460 }, { "epoch": 0.98, "eval_loss": 1.1664774417877197, "eval_runtime": 0.8342, "eval_samples_per_second": 4.795, "eval_steps_per_second": 1.199, "step": 2460 }, { "epoch": 0.99, "learning_rate": 4.32e-09, "loss": 1.2582, "step": 2464 }, { "epoch": 0.99, "eval_loss": 1.1667250394821167, "eval_runtime": 0.8696, "eval_samples_per_second": 4.6, "eval_steps_per_second": 1.15, "step": 2464 }, { "epoch": 0.99, "learning_rate": 3.84e-09, "loss": 1.2312, "step": 2468 }, { "epoch": 0.99, "eval_loss": 1.1667333841323853, "eval_runtime": 0.5961, "eval_samples_per_second": 6.71, "eval_steps_per_second": 1.678, "step": 2468 }, { "epoch": 0.99, "learning_rate": 3.36e-09, "loss": 1.229, "step": 2472 }, { "epoch": 0.99, "eval_loss": 1.1666063070297241, "eval_runtime": 0.5936, "eval_samples_per_second": 6.739, "eval_steps_per_second": 1.685, "step": 2472 }, { "epoch": 0.99, "learning_rate": 2.8799999999999996e-09, "loss": 1.2186, "step": 2476 }, { "epoch": 0.99, "eval_loss": 1.1666913032531738, "eval_runtime": 0.5904, "eval_samples_per_second": 6.775, "eval_steps_per_second": 1.694, "step": 2476 }, { "epoch": 0.99, "learning_rate": 2.4e-09, "loss": 1.2423, "step": 2480 }, { "epoch": 0.99, "eval_loss": 1.1665089130401611, "eval_runtime": 0.5708, "eval_samples_per_second": 7.007, "eval_steps_per_second": 1.752, "step": 2480 }, { "epoch": 0.99, "learning_rate": 1.92e-09, "loss": 1.2465, "step": 2484 }, { "epoch": 0.99, "eval_loss": 1.166528344154358, "eval_runtime": 0.6394, "eval_samples_per_second": 6.256, "eval_steps_per_second": 1.564, "step": 2484 }, { "epoch": 1.0, "learning_rate": 1.4399999999999998e-09, "loss": 1.2332, "step": 2488 }, { "epoch": 1.0, "eval_loss": 1.1663110256195068, "eval_runtime": 0.8509, "eval_samples_per_second": 4.701, "eval_steps_per_second": 1.175, "step": 2488 }, { "epoch": 1.0, "learning_rate": 9.6e-10, "loss": 1.2466, "step": 2492 }, { "epoch": 1.0, "eval_loss": 1.1665472984313965, "eval_runtime": 0.8456, "eval_samples_per_second": 4.73, "eval_steps_per_second": 1.183, "step": 2492 }, { "epoch": 1.0, "learning_rate": 4.8e-10, "loss": 1.2308, "step": 2496 }, { "epoch": 1.0, "eval_loss": 1.1665329933166504, "eval_runtime": 0.8802, "eval_samples_per_second": 4.544, "eval_steps_per_second": 1.136, "step": 2496 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.2435, "step": 2500 }, { "epoch": 1.0, "eval_loss": 1.1666896343231201, "eval_runtime": 0.5837, "eval_samples_per_second": 6.853, "eval_steps_per_second": 1.713, "step": 2500 } ], "logging_steps": 4, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 3.178022043648e+17, "trial_name": null, "trial_params": null }