{ "best_metric": 0.8125, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-p3x3m6lq/checkpoint-400", "epoch": 1.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025, "grad_norm": 2.7488832473754883, "learning_rate": 3.0592064018837074e-05, "loss": 0.6298, "step": 10 }, { "epoch": 0.05, "grad_norm": 4.7462663650512695, "learning_rate": 3.0399661100479608e-05, "loss": 0.4571, "step": 20 }, { "epoch": 0.075, "grad_norm": 2.3427932262420654, "learning_rate": 3.020725818212214e-05, "loss": 0.5418, "step": 30 }, { "epoch": 0.1, "grad_norm": 8.313847541809082, "learning_rate": 3.0014855263764674e-05, "loss": 0.7154, "step": 40 }, { "epoch": 0.125, "grad_norm": 5.623049736022949, "learning_rate": 2.982245234540721e-05, "loss": 0.5042, "step": 50 }, { "epoch": 0.15, "grad_norm": 8.3486328125, "learning_rate": 2.9630049427049746e-05, "loss": 0.4505, "step": 60 }, { "epoch": 0.175, "grad_norm": 8.559453964233398, "learning_rate": 2.943764650869228e-05, "loss": 0.6013, "step": 70 }, { "epoch": 0.2, "grad_norm": 4.411623477935791, "learning_rate": 2.924524359033481e-05, "loss": 0.4038, "step": 80 }, { "epoch": 0.225, "grad_norm": 13.413679122924805, "learning_rate": 2.9052840671977346e-05, "loss": 0.5139, "step": 90 }, { "epoch": 0.25, "grad_norm": 58.22578048706055, "learning_rate": 2.886043775361988e-05, "loss": 0.4629, "step": 100 }, { "epoch": 0.275, "grad_norm": 23.204715728759766, "learning_rate": 2.8668034835262415e-05, "loss": 0.4164, "step": 110 }, { "epoch": 0.3, "grad_norm": 10.875760078430176, "learning_rate": 2.847563191690495e-05, "loss": 0.4867, "step": 120 }, { "epoch": 0.325, "grad_norm": 0.31249693036079407, "learning_rate": 2.828322899854748e-05, "loss": 0.354, "step": 130 }, { "epoch": 0.35, "grad_norm": 2.2056236267089844, "learning_rate": 2.8090826080190015e-05, "loss": 0.3876, "step": 140 }, { "epoch": 0.375, "grad_norm": 25.380603790283203, "learning_rate": 2.789842316183255e-05, "loss": 0.5685, "step": 150 }, { "epoch": 0.4, "grad_norm": 24.43907356262207, "learning_rate": 2.7706020243475087e-05, "loss": 0.4975, "step": 160 }, { "epoch": 0.425, "grad_norm": 10.648700714111328, "learning_rate": 2.7513617325117622e-05, "loss": 0.4734, "step": 170 }, { "epoch": 0.45, "grad_norm": 18.05457878112793, "learning_rate": 2.7321214406760153e-05, "loss": 0.445, "step": 180 }, { "epoch": 0.475, "grad_norm": 0.766493558883667, "learning_rate": 2.7128811488402687e-05, "loss": 0.4758, "step": 190 }, { "epoch": 0.5, "grad_norm": 14.576363563537598, "learning_rate": 2.6936408570045222e-05, "loss": 0.8975, "step": 200 }, { "epoch": 0.525, "grad_norm": 8.905998229980469, "learning_rate": 2.6744005651687756e-05, "loss": 0.3158, "step": 210 }, { "epoch": 0.55, "grad_norm": 0.797244668006897, "learning_rate": 2.655160273333029e-05, "loss": 0.1047, "step": 220 }, { "epoch": 0.575, "grad_norm": 5.661525726318359, "learning_rate": 2.6359199814972822e-05, "loss": 0.4476, "step": 230 }, { "epoch": 0.6, "grad_norm": 20.741662979125977, "learning_rate": 2.6166796896615356e-05, "loss": 0.5761, "step": 240 }, { "epoch": 0.625, "grad_norm": 25.319080352783203, "learning_rate": 2.5974393978257894e-05, "loss": 0.497, "step": 250 }, { "epoch": 0.65, "grad_norm": 19.864778518676758, "learning_rate": 2.578199105990043e-05, "loss": 0.3253, "step": 260 }, { "epoch": 0.675, "grad_norm": 6.358182907104492, "learning_rate": 2.5589588141542963e-05, "loss": 0.3098, "step": 270 }, { "epoch": 0.7, "grad_norm": 60.80244827270508, "learning_rate": 2.5397185223185494e-05, "loss": 0.4264, "step": 280 }, { "epoch": 0.725, "grad_norm": 30.098163604736328, "learning_rate": 2.520478230482803e-05, "loss": 0.6623, "step": 290 }, { "epoch": 0.75, "grad_norm": 6.609621524810791, "learning_rate": 2.5012379386470563e-05, "loss": 0.4582, "step": 300 }, { "epoch": 0.775, "grad_norm": 2.0694580078125, "learning_rate": 2.4819976468113098e-05, "loss": 0.8613, "step": 310 }, { "epoch": 0.8, "grad_norm": 0.5483848452568054, "learning_rate": 2.4627573549755632e-05, "loss": 0.3662, "step": 320 }, { "epoch": 0.825, "grad_norm": 9.217021942138672, "learning_rate": 2.4435170631398163e-05, "loss": 0.4583, "step": 330 }, { "epoch": 0.85, "grad_norm": 1.3683093786239624, "learning_rate": 2.4242767713040698e-05, "loss": 0.4021, "step": 340 }, { "epoch": 0.875, "grad_norm": 6.494662284851074, "learning_rate": 2.4050364794683232e-05, "loss": 0.3119, "step": 350 }, { "epoch": 0.9, "grad_norm": 0.22819127142429352, "learning_rate": 2.385796187632577e-05, "loss": 0.5917, "step": 360 }, { "epoch": 0.925, "grad_norm": 5.446119785308838, "learning_rate": 2.3665558957968304e-05, "loss": 0.1389, "step": 370 }, { "epoch": 0.95, "grad_norm": 0.17951630055904388, "learning_rate": 2.3473156039610836e-05, "loss": 0.4694, "step": 380 }, { "epoch": 0.975, "grad_norm": 4.937567710876465, "learning_rate": 2.328075312125337e-05, "loss": 0.3972, "step": 390 }, { "epoch": 1.0, "grad_norm": 0.4045691192150116, "learning_rate": 2.3088350202895905e-05, "loss": 0.1727, "step": 400 }, { "epoch": 1.0, "eval_accuracy": 0.895, "eval_f1": 0.8125, "eval_loss": 0.4174700975418091, "eval_precision": 0.900990099009901, "eval_recall": 0.7398373983739838, "eval_runtime": 1.5257, "eval_samples_per_second": 262.167, "eval_steps_per_second": 16.385, "step": 400 } ], "logging_steps": 10, "max_steps": 1600, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 211815370450944.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 3.078446693719454e-05, "metric": "eval/loss", "num_train_epochs": 4, "per_device_train_batch_size": 4, "seed": 37 } }