|
{ |
|
"best_metric": 0.3108769357204437, |
|
"best_model_checkpoint": "./checkpoint-xlm-v-base/checkpoint-15000", |
|
"epoch": 0.6640694173897644, |
|
"eval_steps": 1000, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 46.0612678527832, |
|
"learning_rate": 1.9873510587163855e-05, |
|
"loss": 0.943, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_LOC_f1": 0.6007898672649095, |
|
"eval_ORG_f1": 0.5205864729691723, |
|
"eval_PER_f1": 0.6537866457692849, |
|
"eval_loss": 0.5755352973937988, |
|
"eval_overall_accuracy": 0.813493507822672, |
|
"eval_overall_f1": 0.5959796923883924, |
|
"eval_overall_precision": 0.571592844668358, |
|
"eval_overall_recall": 0.622540194436182, |
|
"eval_runtime": 909.7639, |
|
"eval_samples_per_second": 72.217, |
|
"eval_steps_per_second": 0.282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.543514251708984, |
|
"learning_rate": 1.974702117432771e-05, |
|
"loss": 0.5825, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_LOC_f1": 0.6761863812260971, |
|
"eval_ORG_f1": 0.573783382357224, |
|
"eval_PER_f1": 0.7231398018028106, |
|
"eval_loss": 0.5157074332237244, |
|
"eval_overall_accuracy": 0.844698117312631, |
|
"eval_overall_f1": 0.664661446599853, |
|
"eval_overall_precision": 0.690302943433134, |
|
"eval_overall_recall": 0.640856646367237, |
|
"eval_runtime": 884.4485, |
|
"eval_samples_per_second": 74.284, |
|
"eval_steps_per_second": 0.291, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 30.680952072143555, |
|
"learning_rate": 1.9620531761491565e-05, |
|
"loss": 0.5153, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_LOC_f1": 0.6448332585613877, |
|
"eval_ORG_f1": 0.5780655943179445, |
|
"eval_PER_f1": 0.749264457627936, |
|
"eval_loss": 0.4841216504573822, |
|
"eval_overall_accuracy": 0.8415159100197845, |
|
"eval_overall_f1": 0.6599932560127353, |
|
"eval_overall_precision": 0.6612013701212998, |
|
"eval_overall_recall": 0.6587895486638383, |
|
"eval_runtime": 887.5751, |
|
"eval_samples_per_second": 74.022, |
|
"eval_steps_per_second": 0.29, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 14.460062026977539, |
|
"learning_rate": 1.949404234865542e-05, |
|
"loss": 0.4744, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_LOC_f1": 0.7135048963789569, |
|
"eval_ORG_f1": 0.6044746860257756, |
|
"eval_PER_f1": 0.7562344421814889, |
|
"eval_loss": 0.4284209907054901, |
|
"eval_overall_accuracy": 0.8650805108611886, |
|
"eval_overall_f1": 0.6945276906141412, |
|
"eval_overall_precision": 0.6951840204528166, |
|
"eval_overall_recall": 0.6938725989010129, |
|
"eval_runtime": 884.4505, |
|
"eval_samples_per_second": 74.283, |
|
"eval_steps_per_second": 0.291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.3655357360839844, |
|
"learning_rate": 1.9367552935819272e-05, |
|
"loss": 0.4385, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_LOC_f1": 0.7295629535257298, |
|
"eval_ORG_f1": 0.6200475216886777, |
|
"eval_PER_f1": 0.7408250910983861, |
|
"eval_loss": 0.4239448308944702, |
|
"eval_overall_accuracy": 0.8635053351958905, |
|
"eval_overall_f1": 0.7025319684063077, |
|
"eval_overall_precision": 0.7043199823770524, |
|
"eval_overall_recall": 0.7007530096904989, |
|
"eval_runtime": 907.5971, |
|
"eval_samples_per_second": 72.389, |
|
"eval_steps_per_second": 0.283, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 9.867854118347168, |
|
"learning_rate": 1.924106352298313e-05, |
|
"loss": 0.4279, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_LOC_f1": 0.7370099725835874, |
|
"eval_ORG_f1": 0.6403813434199981, |
|
"eval_PER_f1": 0.7776639577500056, |
|
"eval_loss": 0.38233184814453125, |
|
"eval_overall_accuracy": 0.8816618781055326, |
|
"eval_overall_f1": 0.7238370468534203, |
|
"eval_overall_precision": 0.7385234418271267, |
|
"eval_overall_recall": 0.7097233746105797, |
|
"eval_runtime": 948.2573, |
|
"eval_samples_per_second": 69.285, |
|
"eval_steps_per_second": 0.271, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 25.877347946166992, |
|
"learning_rate": 1.9114574110146982e-05, |
|
"loss": 0.4099, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_LOC_f1": 0.7575684397708062, |
|
"eval_ORG_f1": 0.630874803840732, |
|
"eval_PER_f1": 0.7838224767358626, |
|
"eval_loss": 0.38040244579315186, |
|
"eval_overall_accuracy": 0.883136741379065, |
|
"eval_overall_f1": 0.7288917006049582, |
|
"eval_overall_precision": 0.7364569017865703, |
|
"eval_overall_recall": 0.7214803450381201, |
|
"eval_runtime": 907.1265, |
|
"eval_samples_per_second": 72.427, |
|
"eval_steps_per_second": 0.283, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 30.637121200561523, |
|
"learning_rate": 1.8988084697310836e-05, |
|
"loss": 0.3874, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_LOC_f1": 0.7474734456723695, |
|
"eval_ORG_f1": 0.6407748343462335, |
|
"eval_PER_f1": 0.7854664027017585, |
|
"eval_loss": 0.37021398544311523, |
|
"eval_overall_accuracy": 0.8872800498308584, |
|
"eval_overall_f1": 0.7295473133392094, |
|
"eval_overall_precision": 0.74022719948437, |
|
"eval_overall_recall": 0.7191712196878376, |
|
"eval_runtime": 884.9913, |
|
"eval_samples_per_second": 74.238, |
|
"eval_steps_per_second": 0.29, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 20.109619140625, |
|
"learning_rate": 1.8861595284474693e-05, |
|
"loss": 0.3841, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_LOC_f1": 0.7684967782745274, |
|
"eval_ORG_f1": 0.655326947582435, |
|
"eval_PER_f1": 0.789217873159736, |
|
"eval_loss": 0.3808096945285797, |
|
"eval_overall_accuracy": 0.8879015799879489, |
|
"eval_overall_f1": 0.743771496693436, |
|
"eval_overall_precision": 0.76602787456446, |
|
"eval_overall_recall": 0.7227718897255663, |
|
"eval_runtime": 885.0879, |
|
"eval_samples_per_second": 74.23, |
|
"eval_steps_per_second": 0.29, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 10.265982627868652, |
|
"learning_rate": 1.8735105871638546e-05, |
|
"loss": 0.3764, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_LOC_f1": 0.7831821749367751, |
|
"eval_ORG_f1": 0.6622161847467495, |
|
"eval_PER_f1": 0.7948864849077164, |
|
"eval_loss": 0.34247785806655884, |
|
"eval_overall_accuracy": 0.8911041208737209, |
|
"eval_overall_f1": 0.7509862429761675, |
|
"eval_overall_precision": 0.7436605881991772, |
|
"eval_overall_recall": 0.7584576608168825, |
|
"eval_runtime": 885.3889, |
|
"eval_samples_per_second": 74.205, |
|
"eval_steps_per_second": 0.29, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.401586055755615, |
|
"learning_rate": 1.86086164588024e-05, |
|
"loss": 0.3564, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_LOC_f1": 0.789610444706057, |
|
"eval_ORG_f1": 0.6701892389389907, |
|
"eval_PER_f1": 0.8054954166474735, |
|
"eval_loss": 0.35062676668167114, |
|
"eval_overall_accuracy": 0.8928101093201735, |
|
"eval_overall_f1": 0.7598466310260445, |
|
"eval_overall_precision": 0.7565214692509428, |
|
"eval_overall_recall": 0.7632011522144121, |
|
"eval_runtime": 885.0018, |
|
"eval_samples_per_second": 74.237, |
|
"eval_steps_per_second": 0.29, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0011909008026123, |
|
"learning_rate": 1.8482127045966253e-05, |
|
"loss": 0.3484, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_LOC_f1": 0.7712024123633622, |
|
"eval_ORG_f1": 0.6809512535185331, |
|
"eval_PER_f1": 0.8100081183474653, |
|
"eval_loss": 0.37064051628112793, |
|
"eval_overall_accuracy": 0.8851477337194005, |
|
"eval_overall_f1": 0.758193057536852, |
|
"eval_overall_precision": 0.7375241450255696, |
|
"eval_overall_recall": 0.7800538534996947, |
|
"eval_runtime": 950.4008, |
|
"eval_samples_per_second": 69.129, |
|
"eval_steps_per_second": 0.27, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 13.891754150390625, |
|
"learning_rate": 1.8355637633130106e-05, |
|
"loss": 0.3563, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_LOC_f1": 0.7934942596408595, |
|
"eval_ORG_f1": 0.6686377545091862, |
|
"eval_PER_f1": 0.8152987398240509, |
|
"eval_loss": 0.3389296531677246, |
|
"eval_overall_accuracy": 0.8935766857734662, |
|
"eval_overall_f1": 0.7638085016673694, |
|
"eval_overall_precision": 0.7483700877298401, |
|
"eval_overall_recall": 0.7798973026284891, |
|
"eval_runtime": 970.0425, |
|
"eval_samples_per_second": 67.729, |
|
"eval_steps_per_second": 0.265, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 19.86951446533203, |
|
"learning_rate": 1.8229148220293963e-05, |
|
"loss": 0.3396, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_LOC_f1": 0.7943446440452429, |
|
"eval_ORG_f1": 0.6860717813631874, |
|
"eval_PER_f1": 0.808841180333809, |
|
"eval_loss": 0.33801111578941345, |
|
"eval_overall_accuracy": 0.8965501348456104, |
|
"eval_overall_f1": 0.767065352823492, |
|
"eval_overall_precision": 0.770362767931157, |
|
"eval_overall_recall": 0.7637960455249934, |
|
"eval_runtime": 892.9487, |
|
"eval_samples_per_second": 73.576, |
|
"eval_steps_per_second": 0.288, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 6.008892059326172, |
|
"learning_rate": 1.8102658807457817e-05, |
|
"loss": 0.3513, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_LOC_f1": 0.796750172086523, |
|
"eval_ORG_f1": 0.6869723599718148, |
|
"eval_PER_f1": 0.8222321051851345, |
|
"eval_loss": 0.3108769357204437, |
|
"eval_overall_accuracy": 0.900560597156957, |
|
"eval_overall_f1": 0.773478314631055, |
|
"eval_overall_precision": 0.7637860773210824, |
|
"eval_overall_recall": 0.7834196972306151, |
|
"eval_runtime": 885.3039, |
|
"eval_samples_per_second": 74.212, |
|
"eval_steps_per_second": 0.29, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 158116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 4767734068578096.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|