{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.2084432717678, "eval_steps": 1000, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.06, "learning_rate": 4.946808510638298e-05, "loss": 0.0232, "step": 100 }, { "epoch": 2.11, "learning_rate": 4.893617021276596e-05, "loss": 0.018, "step": 200 }, { "epoch": 3.17, "learning_rate": 4.840425531914894e-05, "loss": 0.0271, "step": 300 }, { "epoch": 4.22, "learning_rate": 4.787234042553192e-05, "loss": 0.0241, "step": 400 }, { "epoch": 5.28, "learning_rate": 4.734042553191489e-05, "loss": 0.025, "step": 500 }, { "epoch": 6.33, "learning_rate": 4.680851063829788e-05, "loss": 0.0183, "step": 600 }, { "epoch": 7.39, "learning_rate": 4.627659574468085e-05, "loss": 0.0216, "step": 700 }, { "epoch": 8.44, "learning_rate": 4.575e-05, "loss": 0.0239, "step": 800 }, { "epoch": 9.5, "learning_rate": 4.521808510638298e-05, "loss": 0.025, "step": 900 }, { "epoch": 10.55, "learning_rate": 4.468617021276596e-05, "loss": 0.0213, "step": 1000 }, { "epoch": 10.55, "eval_accuracy": 0.9459571242332458, "eval_loss": 0.2103370875120163, "eval_runtime": 49.3555, "eval_samples_per_second": 49.113, "eval_steps_per_second": 6.139, "step": 1000 }, { "epoch": 11.61, "learning_rate": 4.415425531914894e-05, "loss": 0.0226, "step": 1100 }, { "epoch": 12.66, "learning_rate": 4.362234042553192e-05, "loss": 0.0163, "step": 1200 }, { "epoch": 13.72, "learning_rate": 4.30904255319149e-05, "loss": 0.0196, "step": 1300 }, { "epoch": 14.78, "learning_rate": 4.2558510638297876e-05, "loss": 0.0184, "step": 1400 }, { "epoch": 15.83, "learning_rate": 4.2026595744680855e-05, "loss": 0.0209, "step": 1500 }, { "epoch": 16.89, "learning_rate": 4.1494680851063834e-05, "loss": 0.0186, "step": 1600 }, { "epoch": 17.94, "learning_rate": 4.096276595744681e-05, "loss": 0.0193, "step": 1700 }, { "epoch": 19.0, "learning_rate": 4.0430851063829786e-05, "loss": 0.0198, "step": 1800 }, { "epoch": 20.05, "learning_rate": 3.989893617021277e-05, "loss": 0.0206, "step": 1900 }, { "epoch": 21.11, "learning_rate": 3.9367021276595744e-05, "loss": 0.0192, "step": 2000 }, { "epoch": 21.11, "eval_accuracy": 0.948019802570343, "eval_loss": 0.19352349638938904, "eval_runtime": 49.5323, "eval_samples_per_second": 48.938, "eval_steps_per_second": 6.117, "step": 2000 }, { "epoch": 22.16, "learning_rate": 3.8835106382978724e-05, "loss": 0.015, "step": 2100 }, { "epoch": 23.22, "learning_rate": 3.83031914893617e-05, "loss": 0.0197, "step": 2200 }, { "epoch": 24.27, "learning_rate": 3.777127659574468e-05, "loss": 0.0159, "step": 2300 }, { "epoch": 25.33, "learning_rate": 3.723936170212766e-05, "loss": 0.0219, "step": 2400 }, { "epoch": 26.39, "learning_rate": 3.670744680851064e-05, "loss": 0.0145, "step": 2500 }, { "epoch": 27.44, "learning_rate": 3.617553191489362e-05, "loss": 0.0153, "step": 2600 }, { "epoch": 28.5, "learning_rate": 3.56436170212766e-05, "loss": 0.0214, "step": 2700 }, { "epoch": 29.55, "learning_rate": 3.511170212765957e-05, "loss": 0.0196, "step": 2800 }, { "epoch": 30.61, "learning_rate": 3.457978723404256e-05, "loss": 0.0147, "step": 2900 }, { "epoch": 31.66, "learning_rate": 3.4047872340425536e-05, "loss": 0.0196, "step": 3000 }, { "epoch": 31.66, "eval_accuracy": 0.9278053045272827, "eval_loss": 0.2776908278465271, "eval_runtime": 49.5681, "eval_samples_per_second": 48.902, "eval_steps_per_second": 6.113, "step": 3000 }, { "epoch": 32.72, "learning_rate": 3.351595744680851e-05, "loss": 0.0196, "step": 3100 }, { "epoch": 33.77, "learning_rate": 3.298404255319149e-05, "loss": 0.0134, "step": 3200 }, { "epoch": 34.83, "learning_rate": 3.2452127659574474e-05, "loss": 0.015, "step": 3300 }, { "epoch": 35.88, "learning_rate": 3.1920212765957446e-05, "loss": 0.0203, "step": 3400 }, { "epoch": 36.94, "learning_rate": 3.1388297872340426e-05, "loss": 0.0155, "step": 3500 }, { "epoch": 37.99, "learning_rate": 3.0861702127659576e-05, "loss": 0.0123, "step": 3600 }, { "epoch": 39.05, "learning_rate": 3.0329787234042556e-05, "loss": 0.0166, "step": 3700 }, { "epoch": 40.11, "learning_rate": 2.979787234042553e-05, "loss": 0.013, "step": 3800 }, { "epoch": 41.16, "learning_rate": 2.926595744680851e-05, "loss": 0.0176, "step": 3900 }, { "epoch": 42.22, "learning_rate": 2.8734042553191493e-05, "loss": 0.014, "step": 4000 }, { "epoch": 42.22, "eval_accuracy": 0.948019802570343, "eval_loss": 0.19270524382591248, "eval_runtime": 49.5193, "eval_samples_per_second": 48.951, "eval_steps_per_second": 6.119, "step": 4000 }, { "epoch": 43.27, "learning_rate": 2.820212765957447e-05, "loss": 0.0154, "step": 4100 }, { "epoch": 44.33, "learning_rate": 2.7670212765957448e-05, "loss": 0.0129, "step": 4200 }, { "epoch": 45.38, "learning_rate": 2.713829787234043e-05, "loss": 0.0121, "step": 4300 }, { "epoch": 46.44, "learning_rate": 2.6606382978723403e-05, "loss": 0.0151, "step": 4400 }, { "epoch": 47.49, "learning_rate": 2.607978723404255e-05, "loss": 0.0169, "step": 4500 }, { "epoch": 48.55, "learning_rate": 2.5547872340425533e-05, "loss": 0.0084, "step": 4600 }, { "epoch": 49.6, "learning_rate": 2.5015957446808512e-05, "loss": 0.0147, "step": 4700 }, { "epoch": 50.66, "learning_rate": 2.448404255319149e-05, "loss": 0.0108, "step": 4800 }, { "epoch": 51.72, "learning_rate": 2.395212765957447e-05, "loss": 0.0113, "step": 4900 }, { "epoch": 52.77, "learning_rate": 2.3420212765957447e-05, "loss": 0.0141, "step": 5000 }, { "epoch": 52.77, "eval_accuracy": 0.9438943862915039, "eval_loss": 0.2183828353881836, "eval_runtime": 50.0576, "eval_samples_per_second": 48.424, "eval_steps_per_second": 6.053, "step": 5000 }, { "epoch": 53.83, "learning_rate": 2.288829787234043e-05, "loss": 0.011, "step": 5100 }, { "epoch": 54.88, "learning_rate": 2.2356382978723405e-05, "loss": 0.014, "step": 5200 }, { "epoch": 55.94, "learning_rate": 2.1824468085106384e-05, "loss": 0.0101, "step": 5300 }, { "epoch": 56.99, "learning_rate": 2.1292553191489363e-05, "loss": 0.0148, "step": 5400 }, { "epoch": 58.05, "learning_rate": 2.0760638297872343e-05, "loss": 0.0105, "step": 5500 }, { "epoch": 59.1, "learning_rate": 2.0228723404255322e-05, "loss": 0.0113, "step": 5600 }, { "epoch": 60.16, "learning_rate": 1.9696808510638298e-05, "loss": 0.0089, "step": 5700 }, { "epoch": 61.21, "learning_rate": 1.9164893617021277e-05, "loss": 0.0111, "step": 5800 }, { "epoch": 62.27, "learning_rate": 1.8632978723404256e-05, "loss": 0.0118, "step": 5900 }, { "epoch": 63.32, "learning_rate": 1.8101063829787235e-05, "loss": 0.0106, "step": 6000 }, { "epoch": 63.32, "eval_accuracy": 0.9348185062408447, "eval_loss": 0.24008634686470032, "eval_runtime": 49.5139, "eval_samples_per_second": 48.956, "eval_steps_per_second": 6.119, "step": 6000 }, { "epoch": 64.38, "learning_rate": 1.7569148936170214e-05, "loss": 0.0104, "step": 6100 }, { "epoch": 65.44, "learning_rate": 1.7037234042553194e-05, "loss": 0.0099, "step": 6200 }, { "epoch": 66.49, "learning_rate": 1.6505319148936173e-05, "loss": 0.0105, "step": 6300 }, { "epoch": 67.55, "learning_rate": 1.597340425531915e-05, "loss": 0.0095, "step": 6400 }, { "epoch": 68.6, "learning_rate": 1.5441489361702128e-05, "loss": 0.0088, "step": 6500 }, { "epoch": 69.66, "learning_rate": 1.4909574468085109e-05, "loss": 0.0106, "step": 6600 }, { "epoch": 70.71, "learning_rate": 1.4377659574468086e-05, "loss": 0.0081, "step": 6700 }, { "epoch": 71.77, "learning_rate": 1.3845744680851064e-05, "loss": 0.0096, "step": 6800 }, { "epoch": 72.82, "learning_rate": 1.3313829787234045e-05, "loss": 0.0074, "step": 6900 }, { "epoch": 73.88, "learning_rate": 1.2781914893617022e-05, "loss": 0.0112, "step": 7000 }, { "epoch": 73.88, "eval_accuracy": 0.9492574334144592, "eval_loss": 0.22060245275497437, "eval_runtime": 49.9232, "eval_samples_per_second": 48.555, "eval_steps_per_second": 6.069, "step": 7000 }, { "epoch": 74.93, "learning_rate": 1.225e-05, "loss": 0.0097, "step": 7100 }, { "epoch": 75.99, "learning_rate": 1.171808510638298e-05, "loss": 0.0115, "step": 7200 }, { "epoch": 77.04, "learning_rate": 1.1186170212765958e-05, "loss": 0.0103, "step": 7300 }, { "epoch": 78.1, "learning_rate": 1.0654255319148937e-05, "loss": 0.0091, "step": 7400 }, { "epoch": 79.16, "learning_rate": 1.0122340425531915e-05, "loss": 0.0085, "step": 7500 }, { "epoch": 80.21, "learning_rate": 9.590425531914894e-06, "loss": 0.0074, "step": 7600 }, { "epoch": 81.27, "learning_rate": 9.058510638297871e-06, "loss": 0.0102, "step": 7700 }, { "epoch": 82.32, "learning_rate": 8.52659574468085e-06, "loss": 0.0072, "step": 7800 }, { "epoch": 83.38, "learning_rate": 7.994680851063832e-06, "loss": 0.0076, "step": 7900 }, { "epoch": 84.43, "learning_rate": 7.462765957446809e-06, "loss": 0.0085, "step": 8000 }, { "epoch": 84.43, "eval_accuracy": 0.9525577425956726, "eval_loss": 0.19072547554969788, "eval_runtime": 50.1001, "eval_samples_per_second": 48.383, "eval_steps_per_second": 6.048, "step": 8000 }, { "epoch": 85.49, "learning_rate": 6.930851063829788e-06, "loss": 0.0082, "step": 8100 }, { "epoch": 86.54, "learning_rate": 6.398936170212766e-06, "loss": 0.0068, "step": 8200 }, { "epoch": 87.6, "learning_rate": 5.867021276595745e-06, "loss": 0.0093, "step": 8300 }, { "epoch": 88.65, "learning_rate": 5.335106382978724e-06, "loss": 0.0112, "step": 8400 }, { "epoch": 89.71, "learning_rate": 4.8031914893617025e-06, "loss": 0.0066, "step": 8500 }, { "epoch": 90.77, "learning_rate": 4.271276595744681e-06, "loss": 0.007, "step": 8600 }, { "epoch": 91.82, "learning_rate": 3.7393617021276596e-06, "loss": 0.0079, "step": 8700 }, { "epoch": 92.88, "learning_rate": 3.2074468085106384e-06, "loss": 0.0075, "step": 8800 }, { "epoch": 93.93, "learning_rate": 2.6808510638297874e-06, "loss": 0.008, "step": 8900 }, { "epoch": 94.99, "learning_rate": 2.148936170212766e-06, "loss": 0.0079, "step": 9000 }, { "epoch": 94.99, "eval_accuracy": 0.9484323263168335, "eval_loss": 0.20523911714553833, "eval_runtime": 49.881, "eval_samples_per_second": 48.596, "eval_steps_per_second": 6.074, "step": 9000 }, { "epoch": 96.04, "learning_rate": 1.6170212765957448e-06, "loss": 0.0061, "step": 9100 }, { "epoch": 97.1, "learning_rate": 1.0851063829787236e-06, "loss": 0.0088, "step": 9200 }, { "epoch": 98.15, "learning_rate": 5.531914893617021e-07, "loss": 0.0077, "step": 9300 }, { "epoch": 99.21, "learning_rate": 2.1276595744680853e-08, "loss": 0.0077, "step": 9400 }, { "epoch": 99.21, "step": 9400, "total_flos": 3.653391792237703e+19, "train_loss": 0.013827496963612577, "train_runtime": 35671.7984, "train_samples_per_second": 33.976, "train_steps_per_second": 0.264 }, { "epoch": 99.21, "eval_accuracy": 0.9484323263168335, "eval_loss": 0.2045244723558426, "eval_runtime": 49.9991, "eval_samples_per_second": 48.481, "eval_steps_per_second": 6.06, "step": 9400 } ], "logging_steps": 100, "max_steps": 9400, "num_train_epochs": 100, "save_steps": 2000, "total_flos": 3.653391792237703e+19, "trial_name": null, "trial_params": null }