{ "best_metric": 0.628333568572998, "best_model_checkpoint": "bert_uncased_L-4_H-256_A-4_stsb/checkpoint-253", "epoch": 16.0, "eval_steps": 500, "global_step": 368, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 23.512914657592773, "learning_rate": 4.9e-05, "loss": 5.5773, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.35940682306384497, "eval_loss": 2.741238594055176, "eval_pearson": 0.3844718888057109, "eval_runtime": 0.5362, "eval_samples_per_second": 2797.44, "eval_spearmanr": 0.334341757321979, "eval_steps_per_second": 11.19, "step": 23 }, { "epoch": 2.0, "grad_norm": 15.94455623626709, "learning_rate": 4.8e-05, "loss": 2.5793, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.7641849595072145, "eval_loss": 1.9157546758651733, "eval_pearson": 0.7726894274450853, "eval_runtime": 0.5382, "eval_samples_per_second": 2787.176, "eval_spearmanr": 0.7556804915693437, "eval_steps_per_second": 11.149, "step": 46 }, { "epoch": 3.0, "grad_norm": 10.711121559143066, "learning_rate": 4.7e-05, "loss": 1.5767, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.7589673053156418, "eval_loss": 0.9540784358978271, "eval_pearson": 0.7705953786726122, "eval_runtime": 0.537, "eval_samples_per_second": 2793.104, "eval_spearmanr": 0.7473392319586716, "eval_steps_per_second": 11.172, "step": 69 }, { "epoch": 4.0, "grad_norm": 7.894664764404297, "learning_rate": 4.600000000000001e-05, "loss": 0.9474, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.8101368624163152, "eval_loss": 0.7628255486488342, "eval_pearson": 0.8132575634904194, "eval_runtime": 0.538, "eval_samples_per_second": 2787.885, "eval_spearmanr": 0.8070161613422111, "eval_steps_per_second": 11.152, "step": 92 }, { "epoch": 5.0, "grad_norm": 7.180455684661865, "learning_rate": 4.5e-05, "loss": 0.7258, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.8405981737206667, "eval_loss": 0.6784510016441345, "eval_pearson": 0.8382918173715312, "eval_runtime": 0.5415, "eval_samples_per_second": 2770.208, "eval_spearmanr": 0.8429045300698023, "eval_steps_per_second": 11.081, "step": 115 }, { "epoch": 6.0, "grad_norm": 8.510337829589844, "learning_rate": 4.4000000000000006e-05, "loss": 0.6162, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.8437314098589771, "eval_loss": 0.6755565404891968, "eval_pearson": 0.8435713950894774, "eval_runtime": 0.5333, "eval_samples_per_second": 2812.417, "eval_spearmanr": 0.8438914246284768, "eval_steps_per_second": 11.25, "step": 138 }, { "epoch": 7.0, "grad_norm": 4.887516498565674, "learning_rate": 4.3e-05, "loss": 0.5455, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.849162211616322, "eval_loss": 0.6391196250915527, "eval_pearson": 0.8479729963672471, "eval_runtime": 0.5344, "eval_samples_per_second": 2806.816, "eval_spearmanr": 0.850351426865397, "eval_steps_per_second": 11.227, "step": 161 }, { "epoch": 8.0, "grad_norm": 8.618912696838379, "learning_rate": 4.2e-05, "loss": 0.4912, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.8466254837460676, "eval_loss": 0.6581634879112244, "eval_pearson": 0.8460862349631103, "eval_runtime": 0.539, "eval_samples_per_second": 2783.12, "eval_spearmanr": 0.8471647325290248, "eval_steps_per_second": 11.132, "step": 184 }, { "epoch": 9.0, "grad_norm": 6.421316146850586, "learning_rate": 4.1e-05, "loss": 0.4443, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.8476912767928249, "eval_loss": 0.6560913324356079, "eval_pearson": 0.8471629476366933, "eval_runtime": 0.5436, "eval_samples_per_second": 2759.539, "eval_spearmanr": 0.8482196059489565, "eval_steps_per_second": 11.038, "step": 207 }, { "epoch": 10.0, "grad_norm": 6.827481269836426, "learning_rate": 4e-05, "loss": 0.3995, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.8503318094261108, "eval_loss": 0.6429010629653931, "eval_pearson": 0.850358057284851, "eval_runtime": 0.5319, "eval_samples_per_second": 2820.144, "eval_spearmanr": 0.8503055615673704, "eval_steps_per_second": 11.281, "step": 230 }, { "epoch": 11.0, "grad_norm": 6.088059902191162, "learning_rate": 3.9000000000000006e-05, "loss": 0.3689, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.8543458921269349, "eval_loss": 0.628333568572998, "eval_pearson": 0.8545298128890402, "eval_runtime": 0.5445, "eval_samples_per_second": 2754.594, "eval_spearmanr": 0.8541619713648296, "eval_steps_per_second": 11.018, "step": 253 }, { "epoch": 12.0, "grad_norm": 4.50377893447876, "learning_rate": 3.8e-05, "loss": 0.3418, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.8520085335126549, "eval_loss": 0.6592078804969788, "eval_pearson": 0.8519775414066034, "eval_runtime": 0.5357, "eval_samples_per_second": 2800.12, "eval_spearmanr": 0.8520395256187064, "eval_steps_per_second": 11.2, "step": 276 }, { "epoch": 13.0, "grad_norm": 4.240983486175537, "learning_rate": 3.7e-05, "loss": 0.3302, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.8527115311132238, "eval_loss": 0.650736391544342, "eval_pearson": 0.8524420075355381, "eval_runtime": 0.5456, "eval_samples_per_second": 2749.213, "eval_spearmanr": 0.8529810546909096, "eval_steps_per_second": 10.997, "step": 299 }, { "epoch": 14.0, "grad_norm": 7.435579776763916, "learning_rate": 3.6e-05, "loss": 0.319, "step": 322 }, { "epoch": 14.0, "eval_combined_score": 0.8527060192476493, "eval_loss": 0.648432195186615, "eval_pearson": 0.8527872374234446, "eval_runtime": 0.5301, "eval_samples_per_second": 2829.767, "eval_spearmanr": 0.852624801071854, "eval_steps_per_second": 11.319, "step": 322 }, { "epoch": 15.0, "grad_norm": 5.8321027755737305, "learning_rate": 3.5e-05, "loss": 0.2863, "step": 345 }, { "epoch": 15.0, "eval_combined_score": 0.8526379883202206, "eval_loss": 0.639711320400238, "eval_pearson": 0.8525586066146398, "eval_runtime": 0.5532, "eval_samples_per_second": 2711.331, "eval_spearmanr": 0.8527173700258016, "eval_steps_per_second": 10.845, "step": 345 }, { "epoch": 16.0, "grad_norm": 4.939488887786865, "learning_rate": 3.4000000000000007e-05, "loss": 0.2774, "step": 368 }, { "epoch": 16.0, "eval_combined_score": 0.855685351715838, "eval_loss": 0.6379477977752686, "eval_pearson": 0.8558522598630551, "eval_runtime": 0.5326, "eval_samples_per_second": 2816.114, "eval_spearmanr": 0.855518443568621, "eval_steps_per_second": 11.264, "step": 368 }, { "epoch": 16.0, "step": 368, "total_flos": 455736856141824.0, "train_loss": 0.9891756526801897, "train_runtime": 56.9981, "train_samples_per_second": 5043.151, "train_steps_per_second": 20.176 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 455736856141824.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }