{ "best_metric": 74.36995381137226, "best_model_checkpoint": "/home/jcanete/ft-data/all_results/tar/albeto_base_6/epochs_2_bs_16_lr_5e-5/checkpoint-8700", "epoch": 2.0, "global_step": 10970, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 40.719016083254495, "eval_f1": 58.77945049839472, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.773473108477667e-05, "loss": 2.5693, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 45.44938505203406, "eval_f1": 63.936960472857905, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 48.42005676442763, "eval_f1": 66.7942849285666, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.545578851412944e-05, "loss": 1.9654, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 49.725638599810786, "eval_f1": 68.05815355034528, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.3176845943482223e-05, "loss": 1.8622, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 50.37842951750237, "eval_f1": 68.12485071659977, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 51.59886471144749, "eval_f1": 69.3844758904365, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.0897903372835004e-05, "loss": 1.8064, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 52.92336802270577, "eval_f1": 71.05206739179368, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 52.535477767265846, "eval_f1": 70.11029061313148, "step": 2400 }, { "epoch": 0.46, "learning_rate": 3.8618960802187785e-05, "loss": 1.7133, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 52.904446546830656, "eval_f1": 70.98616483675112, "step": 2700 }, { "epoch": 0.55, "learning_rate": 3.6340018231540566e-05, "loss": 1.7045, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 54.276253547776726, "eval_f1": 71.66909074175824, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 54.46546830652791, "eval_f1": 71.54479654097987, "step": 3300 }, { "epoch": 0.64, "learning_rate": 3.406107566089335e-05, "loss": 1.6732, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 55.0236518448439, "eval_f1": 72.43165471889594, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 55.21286660359508, "eval_f1": 72.64020525017258, "step": 3900 }, { "epoch": 0.73, "learning_rate": 3.178213309024613e-05, "loss": 1.6218, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 55.581835383159884, "eval_f1": 73.12194401332194, "step": 4200 }, { "epoch": 0.82, "learning_rate": 2.9503190519598906e-05, "loss": 1.6245, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 55.65752128666036, "eval_f1": 72.86127958084641, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 55.96026490066225, "eval_f1": 73.4084850665819, "step": 4800 }, { "epoch": 0.91, "learning_rate": 2.7224247948951686e-05, "loss": 1.6021, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 56.40491958372753, "eval_f1": 73.57999833438744, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 56.34815515610217, "eval_f1": 73.92632868373076, "step": 5400 }, { "epoch": 1.0, "learning_rate": 2.4949863263445765e-05, "loss": 1.5458, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 56.7360454115421, "eval_f1": 73.96946840195473, "step": 5700 }, { "epoch": 1.09, "learning_rate": 2.2670920692798542e-05, "loss": 1.2682, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 56.71712393566698, "eval_f1": 73.83807242592808, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 56.61305581835383, "eval_f1": 73.47868870604212, "step": 6300 }, { "epoch": 1.19, "learning_rate": 2.0391978122151323e-05, "loss": 1.2702, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 56.08325449385052, "eval_f1": 73.87945111610915, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 56.395458845789975, "eval_f1": 73.26116080395376, "step": 6900 }, { "epoch": 1.28, "learning_rate": 1.8113035551504104e-05, "loss": 1.2553, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 56.50898770104068, "eval_f1": 73.92339178593369, "step": 7200 }, { "epoch": 1.37, "learning_rate": 1.583409298085688e-05, "loss": 1.2378, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 56.471144749290445, "eval_f1": 74.01594585064186, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 56.61305581835383, "eval_f1": 74.11650132906428, "step": 7800 }, { "epoch": 1.46, "learning_rate": 1.3555150410209664e-05, "loss": 1.3004, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 56.64143803216651, "eval_f1": 74.03120203258294, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 56.84011352885525, "eval_f1": 74.3235867027388, "step": 8400 }, { "epoch": 1.55, "learning_rate": 1.1276207839562443e-05, "loss": 1.2482, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 57.28476821192053, "eval_f1": 74.36995381137226, "step": 8700 }, { "epoch": 1.64, "learning_rate": 8.997265268915224e-06, "loss": 1.2677, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 57.17123935666982, "eval_f1": 74.31360595646503, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 56.9914853358562, "eval_f1": 74.27345402299513, "step": 9300 }, { "epoch": 1.73, "learning_rate": 6.7183226982680034e-06, "loss": 1.257, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 56.80227057710501, "eval_f1": 74.05651657911156, "step": 9600 }, { "epoch": 1.8, "eval_exact_match": 57.05771050141911, "eval_f1": 74.23683269095346, "step": 9900 }, { "epoch": 1.82, "learning_rate": 4.448495897903373e-06, "loss": 1.2459, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 57.237464522232735, "eval_f1": 74.29842936518452, "step": 10200 }, { "epoch": 1.91, "learning_rate": 2.169553327256153e-06, "loss": 1.2465, "step": 10500 }, { "epoch": 1.91, "eval_exact_match": 57.17123935666982, "eval_f1": 74.35402701072998, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 57.142857142857146, "eval_f1": 74.2724134993522, "step": 10800 }, { "epoch": 2.0, "step": 10970, "total_flos": 2544773915225664.0, "train_loss": 1.524520064486953, "train_runtime": 1743.0774, "train_samples_per_second": 100.681, "train_steps_per_second": 6.293 } ], "max_steps": 10970, "num_train_epochs": 2, "total_flos": 2544773915225664.0, "trial_name": null, "trial_params": null }