{ "best_metric": 0.8005753739930955, "best_model_checkpoint": "results/facebook/wav2vec2-large-960h-lv60-self/42/_retain/checkpoint-30000", "epoch": 75.80543272267846, "eval_steps": 400, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.010739102969046, "grad_norm": 3.2389800548553467, "learning_rate": 6.666666666666667e-05, "loss": 4.0919, "step": 400 }, { "epoch": 1.010739102969046, "eval_accuracy": 0.10586881472957423, "eval_f1_macro": 0.008128718856806105, "eval_loss": 3.68546199798584, "eval_runtime": 133.4265, "eval_samples_per_second": 65.129, "eval_steps_per_second": 2.039, "step": 400 }, { "epoch": 2.021478205938092, "grad_norm": 5.276159286499023, "learning_rate": 0.00013333333333333334, "loss": 2.9391, "step": 800 }, { "epoch": 2.021478205938092, "eval_accuracy": 0.5268124280782509, "eval_f1_macro": 0.2773414885221941, "eval_loss": 1.907711386680603, "eval_runtime": 132.9453, "eval_samples_per_second": 65.365, "eval_steps_per_second": 2.046, "step": 800 }, { "epoch": 3.0322173089071383, "grad_norm": 5.944188117980957, "learning_rate": 0.0002, "loss": 1.583, "step": 1200 }, { "epoch": 3.0322173089071383, "eval_accuracy": 0.6894131185270426, "eval_f1_macro": 0.48707209156248815, "eval_loss": 1.2798452377319336, "eval_runtime": 133.055, "eval_samples_per_second": 65.311, "eval_steps_per_second": 2.044, "step": 1200 }, { "epoch": 4.042956411876184, "grad_norm": 6.609740257263184, "learning_rate": 0.0002666666666666667, "loss": 1.0089, "step": 1600 }, { "epoch": 4.042956411876184, "eval_accuracy": 0.7447640966628308, "eval_f1_macro": 0.5630866427455141, "eval_loss": 1.1743698120117188, "eval_runtime": 132.7655, "eval_samples_per_second": 65.454, "eval_steps_per_second": 2.049, "step": 1600 }, { "epoch": 5.053695514845231, "grad_norm": 9.530195236206055, "learning_rate": 0.0003333333333333333, "loss": 0.7348, "step": 2000 }, { "epoch": 5.053695514845231, "eval_accuracy": 0.7604142692750288, "eval_f1_macro": 0.5961285021365654, "eval_loss": 1.1527246236801147, "eval_runtime": 127.8747, "eval_samples_per_second": 67.957, "eval_steps_per_second": 2.127, "step": 2000 }, { "epoch": 6.0644346178142765, "grad_norm": 6.680343151092529, "learning_rate": 0.0004, "loss": 0.5957, "step": 2400 }, { "epoch": 6.0644346178142765, "eval_accuracy": 0.7676639815880322, "eval_f1_macro": 0.6054951189790404, "eval_loss": 1.198480248451233, "eval_runtime": 128.28, "eval_samples_per_second": 67.742, "eval_steps_per_second": 2.12, "step": 2400 }, { "epoch": 7.075173720783323, "grad_norm": 3.774092435836792, "learning_rate": 0.00046666666666666666, "loss": 0.521, "step": 2800 }, { "epoch": 7.075173720783323, "eval_accuracy": 0.7630609896432681, "eval_f1_macro": 0.5903658522565237, "eval_loss": 1.1921718120574951, "eval_runtime": 128.2033, "eval_samples_per_second": 67.783, "eval_steps_per_second": 2.122, "step": 2800 }, { "epoch": 8.085912823752368, "grad_norm": 3.719675302505493, "learning_rate": 0.0004962962962962963, "loss": 0.4667, "step": 3200 }, { "epoch": 8.085912823752368, "eval_accuracy": 0.7619102416570771, "eval_f1_macro": 0.6061718024259425, "eval_loss": 1.2508888244628906, "eval_runtime": 109.5839, "eval_samples_per_second": 79.3, "eval_steps_per_second": 2.482, "step": 3200 }, { "epoch": 9.096651926721416, "grad_norm": 3.703678607940674, "learning_rate": 0.0004888888888888889, "loss": 0.3861, "step": 3600 }, { "epoch": 9.096651926721416, "eval_accuracy": 0.7640966628308401, "eval_f1_macro": 0.5910106640214171, "eval_loss": 1.2851234674453735, "eval_runtime": 109.2588, "eval_samples_per_second": 79.536, "eval_steps_per_second": 2.49, "step": 3600 }, { "epoch": 10.107391029690461, "grad_norm": 5.4869585037231445, "learning_rate": 0.00048148148148148144, "loss": 0.32, "step": 4000 }, { "epoch": 10.107391029690461, "eval_accuracy": 0.7590333716915996, "eval_f1_macro": 0.5804751345832923, "eval_loss": 1.4432213306427002, "eval_runtime": 109.3455, "eval_samples_per_second": 79.473, "eval_steps_per_second": 2.488, "step": 4000 }, { "epoch": 11.118130132659507, "grad_norm": 2.1531548500061035, "learning_rate": 0.0004740740740740741, "loss": 0.2828, "step": 4400 }, { "epoch": 11.118130132659507, "eval_accuracy": 0.7590333716915996, "eval_f1_macro": 0.6021086310983942, "eval_loss": 1.3173363208770752, "eval_runtime": 109.3574, "eval_samples_per_second": 79.464, "eval_steps_per_second": 2.487, "step": 4400 }, { "epoch": 12.128869235628553, "grad_norm": 2.9061076641082764, "learning_rate": 0.00046666666666666666, "loss": 0.2367, "step": 4800 }, { "epoch": 12.128869235628553, "eval_accuracy": 0.7543153049482163, "eval_f1_macro": 0.6092446104843484, "eval_loss": 1.4384377002716064, "eval_runtime": 109.3136, "eval_samples_per_second": 79.496, "eval_steps_per_second": 2.488, "step": 4800 }, { "epoch": 13.139608338597599, "grad_norm": 2.8866333961486816, "learning_rate": 0.00045925925925925925, "loss": 0.2187, "step": 5200 }, { "epoch": 13.139608338597599, "eval_accuracy": 0.7654775604142693, "eval_f1_macro": 0.5880603922791815, "eval_loss": 1.4380950927734375, "eval_runtime": 109.4554, "eval_samples_per_second": 79.393, "eval_steps_per_second": 2.485, "step": 5200 }, { "epoch": 14.150347441566646, "grad_norm": 1.7574183940887451, "learning_rate": 0.00045185185185185183, "loss": 0.1847, "step": 5600 }, { "epoch": 14.150347441566646, "eval_accuracy": 0.7730724971231301, "eval_f1_macro": 0.5690127519635726, "eval_loss": 1.4231289625167847, "eval_runtime": 109.3887, "eval_samples_per_second": 79.441, "eval_steps_per_second": 2.487, "step": 5600 }, { "epoch": 15.161086544535692, "grad_norm": 1.8373284339904785, "learning_rate": 0.0004444444444444444, "loss": 0.1701, "step": 6000 }, { "epoch": 15.161086544535692, "eval_accuracy": 0.7680092059838896, "eval_f1_macro": 0.5878361109327175, "eval_loss": 1.5120900869369507, "eval_runtime": 109.6944, "eval_samples_per_second": 79.22, "eval_steps_per_second": 2.48, "step": 6000 }, { "epoch": 16.171825647504736, "grad_norm": 2.9617397785186768, "learning_rate": 0.00043703703703703705, "loss": 0.1504, "step": 6400 }, { "epoch": 16.171825647504736, "eval_accuracy": 0.7609896432681242, "eval_f1_macro": 0.6017434401264726, "eval_loss": 1.5701994895935059, "eval_runtime": 108.7867, "eval_samples_per_second": 79.881, "eval_steps_per_second": 2.5, "step": 6400 }, { "epoch": 17.182564750473784, "grad_norm": 1.9067094326019287, "learning_rate": 0.00042962962962962963, "loss": 0.1416, "step": 6800 }, { "epoch": 17.182564750473784, "eval_accuracy": 0.7680092059838896, "eval_f1_macro": 0.5846132297229183, "eval_loss": 1.6262372732162476, "eval_runtime": 109.3355, "eval_samples_per_second": 79.48, "eval_steps_per_second": 2.488, "step": 6800 }, { "epoch": 18.19330385344283, "grad_norm": 1.788485050201416, "learning_rate": 0.0004222222222222222, "loss": 0.1345, "step": 7200 }, { "epoch": 18.19330385344283, "eval_accuracy": 0.7582278481012659, "eval_f1_macro": 0.606730101292868, "eval_loss": 1.6317014694213867, "eval_runtime": 109.1193, "eval_samples_per_second": 79.638, "eval_steps_per_second": 2.493, "step": 7200 }, { "epoch": 19.204042956411875, "grad_norm": 3.0378000736236572, "learning_rate": 0.0004148148148148148, "loss": 0.1226, "step": 7600 }, { "epoch": 19.204042956411875, "eval_accuracy": 0.7739930955120828, "eval_f1_macro": 0.6193094447560485, "eval_loss": 1.486433982849121, "eval_runtime": 109.0558, "eval_samples_per_second": 79.684, "eval_steps_per_second": 2.494, "step": 7600 }, { "epoch": 20.214782059380923, "grad_norm": 3.1991524696350098, "learning_rate": 0.0004074074074074074, "loss": 0.114, "step": 8000 }, { "epoch": 20.214782059380923, "eval_accuracy": 0.774108170310702, "eval_f1_macro": 0.6157091732739274, "eval_loss": 1.5931099653244019, "eval_runtime": 109.0943, "eval_samples_per_second": 79.656, "eval_steps_per_second": 2.493, "step": 8000 }, { "epoch": 21.225521162349967, "grad_norm": 2.1036899089813232, "learning_rate": 0.0004, "loss": 0.1064, "step": 8400 }, { "epoch": 21.225521162349967, "eval_accuracy": 0.7730724971231301, "eval_f1_macro": 0.6020232192562277, "eval_loss": 1.7101207971572876, "eval_runtime": 108.899, "eval_samples_per_second": 79.799, "eval_steps_per_second": 2.498, "step": 8400 }, { "epoch": 22.236260265319014, "grad_norm": 2.786360025405884, "learning_rate": 0.0003925925925925926, "loss": 0.1009, "step": 8800 }, { "epoch": 22.236260265319014, "eval_accuracy": 0.7655926352128883, "eval_f1_macro": 0.5794753743607411, "eval_loss": 1.6664392948150635, "eval_runtime": 109.2502, "eval_samples_per_second": 79.542, "eval_steps_per_second": 2.49, "step": 8800 }, { "epoch": 23.246999368288062, "grad_norm": 1.0751720666885376, "learning_rate": 0.0003851851851851852, "loss": 0.0941, "step": 9200 }, { "epoch": 23.246999368288062, "eval_accuracy": 0.7772151898734178, "eval_f1_macro": 0.5717636011134882, "eval_loss": 1.5253993272781372, "eval_runtime": 109.0143, "eval_samples_per_second": 79.714, "eval_steps_per_second": 2.495, "step": 9200 }, { "epoch": 24.257738471257106, "grad_norm": 1.744019865989685, "learning_rate": 0.00037777777777777777, "loss": 0.0861, "step": 9600 }, { "epoch": 24.257738471257106, "eval_accuracy": 0.777445339470656, "eval_f1_macro": 0.625140306336925, "eval_loss": 1.6324084997177124, "eval_runtime": 108.6336, "eval_samples_per_second": 79.994, "eval_steps_per_second": 2.504, "step": 9600 }, { "epoch": 25.268477574226154, "grad_norm": 1.838752269744873, "learning_rate": 0.00037037037037037035, "loss": 0.0807, "step": 10000 }, { "epoch": 25.268477574226154, "eval_accuracy": 0.7728423475258919, "eval_f1_macro": 0.5870939911644882, "eval_loss": 1.7057673931121826, "eval_runtime": 108.6842, "eval_samples_per_second": 79.956, "eval_steps_per_second": 2.503, "step": 10000 }, { "epoch": 26.279216677195198, "grad_norm": 2.3391871452331543, "learning_rate": 0.000362962962962963, "loss": 0.0739, "step": 10400 }, { "epoch": 26.279216677195198, "eval_accuracy": 0.774108170310702, "eval_f1_macro": 0.6190123341706849, "eval_loss": 1.6950148344039917, "eval_runtime": 108.9167, "eval_samples_per_second": 79.786, "eval_steps_per_second": 2.497, "step": 10400 }, { "epoch": 27.289955780164245, "grad_norm": 1.3197505474090576, "learning_rate": 0.00035555555555555557, "loss": 0.0685, "step": 10800 }, { "epoch": 27.289955780164245, "eval_accuracy": 0.7652474108170311, "eval_f1_macro": 0.5984200620053731, "eval_loss": 1.8148038387298584, "eval_runtime": 108.998, "eval_samples_per_second": 79.726, "eval_steps_per_second": 2.495, "step": 10800 }, { "epoch": 28.300694883133293, "grad_norm": 0.8027063608169556, "learning_rate": 0.00034814814814814816, "loss": 0.0692, "step": 11200 }, { "epoch": 28.300694883133293, "eval_accuracy": 0.776409666283084, "eval_f1_macro": 0.6002766778970904, "eval_loss": 1.6219606399536133, "eval_runtime": 108.9613, "eval_samples_per_second": 79.753, "eval_steps_per_second": 2.496, "step": 11200 }, { "epoch": 29.311433986102337, "grad_norm": 0.8713662028312683, "learning_rate": 0.00034074074074074074, "loss": 0.0662, "step": 11600 }, { "epoch": 29.311433986102337, "eval_accuracy": 0.7794016110471806, "eval_f1_macro": 0.6123819840203646, "eval_loss": 1.6953762769699097, "eval_runtime": 109.1585, "eval_samples_per_second": 79.609, "eval_steps_per_second": 2.492, "step": 11600 }, { "epoch": 30.322173089071384, "grad_norm": 0.9094525575637817, "learning_rate": 0.0003333333333333333, "loss": 0.0639, "step": 12000 }, { "epoch": 30.322173089071384, "eval_accuracy": 0.7785960874568469, "eval_f1_macro": 0.5900178041075752, "eval_loss": 1.7562154531478882, "eval_runtime": 108.917, "eval_samples_per_second": 79.786, "eval_steps_per_second": 2.497, "step": 12000 }, { "epoch": 31.33291219204043, "grad_norm": 2.3824515342712402, "learning_rate": 0.00032592592592592596, "loss": 0.0613, "step": 12400 }, { "epoch": 31.33291219204043, "eval_accuracy": 0.7708860759493671, "eval_f1_macro": 0.5886611331241638, "eval_loss": 1.7263332605361938, "eval_runtime": 109.2037, "eval_samples_per_second": 79.576, "eval_steps_per_second": 2.491, "step": 12400 }, { "epoch": 32.34365129500947, "grad_norm": 1.1265066862106323, "learning_rate": 0.00031851851851851854, "loss": 0.0562, "step": 12800 }, { "epoch": 32.34365129500947, "eval_accuracy": 0.777445339470656, "eval_f1_macro": 0.6069323146272442, "eval_loss": 1.595489263534546, "eval_runtime": 110.1086, "eval_samples_per_second": 78.922, "eval_steps_per_second": 2.47, "step": 12800 }, { "epoch": 33.35439039797852, "grad_norm": 0.765870988368988, "learning_rate": 0.0003111111111111111, "loss": 0.0482, "step": 13200 }, { "epoch": 33.35439039797852, "eval_accuracy": 0.7858457997698504, "eval_f1_macro": 0.6152260699722518, "eval_loss": 1.6528053283691406, "eval_runtime": 109.0363, "eval_samples_per_second": 79.698, "eval_steps_per_second": 2.495, "step": 13200 }, { "epoch": 34.36512950094757, "grad_norm": 2.386359930038452, "learning_rate": 0.0003037037037037037, "loss": 0.0516, "step": 13600 }, { "epoch": 34.36512950094757, "eval_accuracy": 0.7713463751438435, "eval_f1_macro": 0.5894778786253475, "eval_loss": 1.65277099609375, "eval_runtime": 109.1673, "eval_samples_per_second": 79.603, "eval_steps_per_second": 2.492, "step": 13600 }, { "epoch": 35.375868603916615, "grad_norm": 1.8987774848937988, "learning_rate": 0.0002962962962962963, "loss": 0.0447, "step": 14000 }, { "epoch": 35.375868603916615, "eval_accuracy": 0.7799769850402761, "eval_f1_macro": 0.6297477374058172, "eval_loss": 1.813390851020813, "eval_runtime": 109.6977, "eval_samples_per_second": 79.218, "eval_steps_per_second": 2.48, "step": 14000 }, { "epoch": 36.38660770688566, "grad_norm": 1.353411078453064, "learning_rate": 0.0002888888888888889, "loss": 0.047, "step": 14400 }, { "epoch": 36.38660770688566, "eval_accuracy": 0.7795166858457998, "eval_f1_macro": 0.5795862617467612, "eval_loss": 1.663203477859497, "eval_runtime": 109.0323, "eval_samples_per_second": 79.701, "eval_steps_per_second": 2.495, "step": 14400 }, { "epoch": 37.3973468098547, "grad_norm": 1.1114296913146973, "learning_rate": 0.0002814814814814815, "loss": 0.0436, "step": 14800 }, { "epoch": 37.3973468098547, "eval_accuracy": 0.784234752589183, "eval_f1_macro": 0.5995152264247978, "eval_loss": 1.783818006515503, "eval_runtime": 109.4106, "eval_samples_per_second": 79.426, "eval_steps_per_second": 2.486, "step": 14800 }, { "epoch": 38.40808591282375, "grad_norm": 1.3422303199768066, "learning_rate": 0.0002740740740740741, "loss": 0.0422, "step": 15200 }, { "epoch": 38.40808591282375, "eval_accuracy": 0.7838895281933257, "eval_f1_macro": 0.6189287691248615, "eval_loss": 1.7172709703445435, "eval_runtime": 108.6629, "eval_samples_per_second": 79.972, "eval_steps_per_second": 2.503, "step": 15200 }, { "epoch": 39.4188250157928, "grad_norm": 1.8279023170471191, "learning_rate": 0.0002666666666666667, "loss": 0.0377, "step": 15600 }, { "epoch": 39.4188250157928, "eval_accuracy": 0.7834292289988493, "eval_f1_macro": 0.5814739153081228, "eval_loss": 1.7523770332336426, "eval_runtime": 108.9839, "eval_samples_per_second": 79.737, "eval_steps_per_second": 2.496, "step": 15600 }, { "epoch": 40.429564118761846, "grad_norm": 2.154459238052368, "learning_rate": 0.00025925925925925926, "loss": 0.0359, "step": 16000 }, { "epoch": 40.429564118761846, "eval_accuracy": 0.7886075949367088, "eval_f1_macro": 0.6293741181702724, "eval_loss": 1.623598337173462, "eval_runtime": 108.8195, "eval_samples_per_second": 79.857, "eval_steps_per_second": 2.5, "step": 16000 }, { "epoch": 41.44030322173089, "grad_norm": 0.8551483154296875, "learning_rate": 0.00025185185185185185, "loss": 0.0344, "step": 16400 }, { "epoch": 41.44030322173089, "eval_accuracy": 0.7815880322209436, "eval_f1_macro": 0.6087804648227756, "eval_loss": 1.7353272438049316, "eval_runtime": 109.2273, "eval_samples_per_second": 79.559, "eval_steps_per_second": 2.49, "step": 16400 }, { "epoch": 42.451042324699934, "grad_norm": 0.5178919434547424, "learning_rate": 0.00024444444444444443, "loss": 0.033, "step": 16800 }, { "epoch": 42.451042324699934, "eval_accuracy": 0.7820483314154201, "eval_f1_macro": 0.6001569016578011, "eval_loss": 1.727620244026184, "eval_runtime": 109.4385, "eval_samples_per_second": 79.405, "eval_steps_per_second": 2.485, "step": 16800 }, { "epoch": 43.46178142766898, "grad_norm": 0.4940205514431, "learning_rate": 0.00023703703703703704, "loss": 0.0325, "step": 17200 }, { "epoch": 43.46178142766898, "eval_accuracy": 0.7783659378596087, "eval_f1_macro": 0.6283289368126677, "eval_loss": 1.7798371315002441, "eval_runtime": 109.2576, "eval_samples_per_second": 79.537, "eval_steps_per_second": 2.49, "step": 17200 }, { "epoch": 44.47252053063803, "grad_norm": 0.8661497235298157, "learning_rate": 0.00022962962962962962, "loss": 0.0302, "step": 17600 }, { "epoch": 44.47252053063803, "eval_accuracy": 0.7828538550057538, "eval_f1_macro": 0.6164776778280789, "eval_loss": 1.7507109642028809, "eval_runtime": 109.1869, "eval_samples_per_second": 79.588, "eval_steps_per_second": 2.491, "step": 17600 }, { "epoch": 45.48325963360708, "grad_norm": 0.015332411043345928, "learning_rate": 0.0002222222222222222, "loss": 0.0268, "step": 18000 }, { "epoch": 45.48325963360708, "eval_accuracy": 0.7826237054085156, "eval_f1_macro": 0.6031617249417177, "eval_loss": 1.7825220823287964, "eval_runtime": 109.3518, "eval_samples_per_second": 79.468, "eval_steps_per_second": 2.487, "step": 18000 }, { "epoch": 46.493998736576124, "grad_norm": 0.5325392484664917, "learning_rate": 0.00021481481481481482, "loss": 0.0287, "step": 18400 }, { "epoch": 46.493998736576124, "eval_accuracy": 0.7882623705408516, "eval_f1_macro": 0.6256320010133759, "eval_loss": 1.6932624578475952, "eval_runtime": 108.513, "eval_samples_per_second": 80.083, "eval_steps_per_second": 2.507, "step": 18400 }, { "epoch": 47.504737839545164, "grad_norm": 0.5086055994033813, "learning_rate": 0.0002074074074074074, "loss": 0.0252, "step": 18800 }, { "epoch": 47.504737839545164, "eval_accuracy": 0.7856156501726121, "eval_f1_macro": 0.6143416230351354, "eval_loss": 1.7501070499420166, "eval_runtime": 109.2365, "eval_samples_per_second": 79.552, "eval_steps_per_second": 2.49, "step": 18800 }, { "epoch": 48.51547694251421, "grad_norm": 1.229317545890808, "learning_rate": 0.0002, "loss": 0.0283, "step": 19200 }, { "epoch": 48.51547694251421, "eval_accuracy": 0.7843498273878021, "eval_f1_macro": 0.6189575264715401, "eval_loss": 1.9032423496246338, "eval_runtime": 108.2906, "eval_samples_per_second": 80.247, "eval_steps_per_second": 2.512, "step": 19200 }, { "epoch": 49.52621604548326, "grad_norm": 0.05275914818048477, "learning_rate": 0.0001925925925925926, "loss": 0.024, "step": 19600 }, { "epoch": 49.52621604548326, "eval_accuracy": 0.7874568469505179, "eval_f1_macro": 0.6393370936978522, "eval_loss": 1.8691409826278687, "eval_runtime": 108.1545, "eval_samples_per_second": 80.348, "eval_steps_per_second": 2.515, "step": 19600 }, { "epoch": 50.53695514845231, "grad_norm": 0.9653208255767822, "learning_rate": 0.00018518518518518518, "loss": 0.0229, "step": 20000 }, { "epoch": 50.53695514845231, "eval_accuracy": 0.786536248561565, "eval_f1_macro": 0.6026385719720891, "eval_loss": 1.7541390657424927, "eval_runtime": 107.9085, "eval_samples_per_second": 80.531, "eval_steps_per_second": 2.521, "step": 20000 }, { "epoch": 51.547694251421355, "grad_norm": 0.4658529758453369, "learning_rate": 0.00017777777777777779, "loss": 0.0219, "step": 20400 }, { "epoch": 51.547694251421355, "eval_accuracy": 0.7872266973532797, "eval_f1_macro": 0.6309747652348119, "eval_loss": 1.7537351846694946, "eval_runtime": 107.7743, "eval_samples_per_second": 80.632, "eval_steps_per_second": 2.524, "step": 20400 }, { "epoch": 52.558433354390395, "grad_norm": 0.32756420969963074, "learning_rate": 0.00017037037037037037, "loss": 0.0211, "step": 20800 }, { "epoch": 52.558433354390395, "eval_accuracy": 0.7934407364787112, "eval_f1_macro": 0.6206166338546538, "eval_loss": 1.6842619180679321, "eval_runtime": 107.7209, "eval_samples_per_second": 80.671, "eval_steps_per_second": 2.525, "step": 20800 }, { "epoch": 53.56917245735944, "grad_norm": 0.584701418876648, "learning_rate": 0.00016296296296296298, "loss": 0.0203, "step": 21200 }, { "epoch": 53.56917245735944, "eval_accuracy": 0.7950517836593786, "eval_f1_macro": 0.6206542591204762, "eval_loss": 1.699610710144043, "eval_runtime": 107.6954, "eval_samples_per_second": 80.691, "eval_steps_per_second": 2.526, "step": 21200 }, { "epoch": 54.57991156032849, "grad_norm": 0.0553191676735878, "learning_rate": 0.00015555555555555556, "loss": 0.0174, "step": 21600 }, { "epoch": 54.57991156032849, "eval_accuracy": 0.7894131185270425, "eval_f1_macro": 0.6214961351780512, "eval_loss": 1.8445045948028564, "eval_runtime": 107.7853, "eval_samples_per_second": 80.623, "eval_steps_per_second": 2.524, "step": 21600 }, { "epoch": 55.59065066329754, "grad_norm": 0.4328874945640564, "learning_rate": 0.00014814814814814815, "loss": 0.0197, "step": 22000 }, { "epoch": 55.59065066329754, "eval_accuracy": 0.792059838895282, "eval_f1_macro": 0.6308138834712996, "eval_loss": 1.8310879468917847, "eval_runtime": 107.7421, "eval_samples_per_second": 80.656, "eval_steps_per_second": 2.525, "step": 22000 }, { "epoch": 56.601389766266585, "grad_norm": 0.02704198658466339, "learning_rate": 0.00014074074074074076, "loss": 0.0169, "step": 22400 }, { "epoch": 56.601389766266585, "eval_accuracy": 0.7879171461449942, "eval_f1_macro": 0.5896127682611725, "eval_loss": 1.8162003755569458, "eval_runtime": 107.8141, "eval_samples_per_second": 80.602, "eval_steps_per_second": 2.523, "step": 22400 }, { "epoch": 57.612128869235626, "grad_norm": 0.2748865485191345, "learning_rate": 0.00013333333333333334, "loss": 0.0121, "step": 22800 }, { "epoch": 57.612128869235626, "eval_accuracy": 0.7852704257767549, "eval_f1_macro": 0.5951106108532582, "eval_loss": 1.924727201461792, "eval_runtime": 107.712, "eval_samples_per_second": 80.678, "eval_steps_per_second": 2.525, "step": 22800 }, { "epoch": 58.62286797220467, "grad_norm": 0.0328911654651165, "learning_rate": 0.00012592592592592592, "loss": 0.0152, "step": 23200 }, { "epoch": 58.62286797220467, "eval_accuracy": 0.7881472957422324, "eval_f1_macro": 0.6063430405057288, "eval_loss": 1.8502182960510254, "eval_runtime": 107.788, "eval_samples_per_second": 80.621, "eval_steps_per_second": 2.523, "step": 23200 }, { "epoch": 59.63360707517372, "grad_norm": 0.00955616869032383, "learning_rate": 0.00011851851851851852, "loss": 0.0142, "step": 23600 }, { "epoch": 59.63360707517372, "eval_accuracy": 0.789873417721519, "eval_f1_macro": 0.617993825444742, "eval_loss": 1.7803289890289307, "eval_runtime": 107.8043, "eval_samples_per_second": 80.609, "eval_steps_per_second": 2.523, "step": 23600 }, { "epoch": 60.64434617814277, "grad_norm": 0.06125176325440407, "learning_rate": 0.0001111111111111111, "loss": 0.0105, "step": 24000 }, { "epoch": 60.64434617814277, "eval_accuracy": 0.7861910241657077, "eval_f1_macro": 0.6254018987758924, "eval_loss": 1.916595458984375, "eval_runtime": 107.7673, "eval_samples_per_second": 80.637, "eval_steps_per_second": 2.524, "step": 24000 }, { "epoch": 61.655085281111816, "grad_norm": 0.10605888813734055, "learning_rate": 0.0001037037037037037, "loss": 0.0116, "step": 24400 }, { "epoch": 61.655085281111816, "eval_accuracy": 0.7858457997698504, "eval_f1_macro": 0.5961002471321352, "eval_loss": 1.9204109907150269, "eval_runtime": 107.7648, "eval_samples_per_second": 80.639, "eval_steps_per_second": 2.524, "step": 24400 }, { "epoch": 62.66582438408086, "grad_norm": 0.044181693345308304, "learning_rate": 9.62962962962963e-05, "loss": 0.0112, "step": 24800 }, { "epoch": 62.66582438408086, "eval_accuracy": 0.7878020713463751, "eval_f1_macro": 0.6235710102313945, "eval_loss": 1.9822152853012085, "eval_runtime": 107.735, "eval_samples_per_second": 80.661, "eval_steps_per_second": 2.525, "step": 24800 }, { "epoch": 63.676563487049904, "grad_norm": 0.023459970951080322, "learning_rate": 8.888888888888889e-05, "loss": 0.0102, "step": 25200 }, { "epoch": 63.676563487049904, "eval_accuracy": 0.7840046029919447, "eval_f1_macro": 0.6155669395709024, "eval_loss": 1.9653674364089966, "eval_runtime": 107.7821, "eval_samples_per_second": 80.626, "eval_steps_per_second": 2.524, "step": 25200 }, { "epoch": 64.68730259001894, "grad_norm": 1.9076263904571533, "learning_rate": 8.148148148148149e-05, "loss": 0.01, "step": 25600 }, { "epoch": 64.68730259001894, "eval_accuracy": 0.7880322209436134, "eval_f1_macro": 0.6226637633596005, "eval_loss": 1.938231348991394, "eval_runtime": 107.7205, "eval_samples_per_second": 80.672, "eval_steps_per_second": 2.525, "step": 25600 }, { "epoch": 65.698041692988, "grad_norm": 0.4948989748954773, "learning_rate": 7.407407407407407e-05, "loss": 0.0101, "step": 26000 }, { "epoch": 65.698041692988, "eval_accuracy": 0.7960874568469505, "eval_f1_macro": 0.6277935659004009, "eval_loss": 1.8299671411514282, "eval_runtime": 107.7348, "eval_samples_per_second": 80.661, "eval_steps_per_second": 2.525, "step": 26000 }, { "epoch": 66.70878079595704, "grad_norm": 0.00608784519135952, "learning_rate": 6.666666666666667e-05, "loss": 0.0086, "step": 26400 }, { "epoch": 66.70878079595704, "eval_accuracy": 0.7968929804372842, "eval_f1_macro": 0.6234372893298947, "eval_loss": 1.9254202842712402, "eval_runtime": 108.035, "eval_samples_per_second": 80.437, "eval_steps_per_second": 2.518, "step": 26400 }, { "epoch": 67.7195198989261, "grad_norm": 0.08328448981046677, "learning_rate": 5.925925925925926e-05, "loss": 0.0073, "step": 26800 }, { "epoch": 67.7195198989261, "eval_accuracy": 0.7915995397008055, "eval_f1_macro": 0.6320923241131308, "eval_loss": 1.8887046575546265, "eval_runtime": 107.8399, "eval_samples_per_second": 80.582, "eval_steps_per_second": 2.522, "step": 26800 }, { "epoch": 68.73025900189513, "grad_norm": 0.02061997540295124, "learning_rate": 5.185185185185185e-05, "loss": 0.0069, "step": 27200 }, { "epoch": 68.73025900189513, "eval_accuracy": 0.794361334867664, "eval_f1_macro": 0.636665979654867, "eval_loss": 1.9074466228485107, "eval_runtime": 107.6829, "eval_samples_per_second": 80.7, "eval_steps_per_second": 2.526, "step": 27200 }, { "epoch": 69.74099810486418, "grad_norm": 0.012987918220460415, "learning_rate": 4.4444444444444447e-05, "loss": 0.0059, "step": 27600 }, { "epoch": 69.74099810486418, "eval_accuracy": 0.792059838895282, "eval_f1_macro": 0.6315720450251525, "eval_loss": 1.9398057460784912, "eval_runtime": 107.8991, "eval_samples_per_second": 80.538, "eval_steps_per_second": 2.521, "step": 27600 }, { "epoch": 70.75173720783323, "grad_norm": 0.005101632326841354, "learning_rate": 3.7037037037037037e-05, "loss": 0.0066, "step": 28000 }, { "epoch": 70.75173720783323, "eval_accuracy": 0.794361334867664, "eval_f1_macro": 0.6349818220797456, "eval_loss": 1.8699119091033936, "eval_runtime": 109.2809, "eval_samples_per_second": 79.52, "eval_steps_per_second": 2.489, "step": 28000 }, { "epoch": 71.76247631080227, "grad_norm": 0.6047748923301697, "learning_rate": 2.962962962962963e-05, "loss": 0.0062, "step": 28400 }, { "epoch": 71.76247631080227, "eval_accuracy": 0.7951668584579977, "eval_f1_macro": 0.6343250573277666, "eval_loss": 1.8893409967422485, "eval_runtime": 109.2978, "eval_samples_per_second": 79.508, "eval_steps_per_second": 2.489, "step": 28400 }, { "epoch": 72.77321541377133, "grad_norm": 0.012553258799016476, "learning_rate": 2.2222222222222223e-05, "loss": 0.0058, "step": 28800 }, { "epoch": 72.77321541377133, "eval_accuracy": 0.7982738780207135, "eval_f1_macro": 0.6409643965446785, "eval_loss": 1.883091926574707, "eval_runtime": 109.2468, "eval_samples_per_second": 79.545, "eval_steps_per_second": 2.49, "step": 28800 }, { "epoch": 73.78395451674037, "grad_norm": 0.0007793375989422202, "learning_rate": 1.4814814814814815e-05, "loss": 0.0056, "step": 29200 }, { "epoch": 73.78395451674037, "eval_accuracy": 0.7958573072497123, "eval_f1_macro": 0.6356613761441215, "eval_loss": 1.8901586532592773, "eval_runtime": 108.6154, "eval_samples_per_second": 80.007, "eval_steps_per_second": 2.504, "step": 29200 }, { "epoch": 74.7946936197094, "grad_norm": 0.14352725446224213, "learning_rate": 7.4074074074074075e-06, "loss": 0.0053, "step": 29600 }, { "epoch": 74.7946936197094, "eval_accuracy": 0.7991944764096662, "eval_f1_macro": 0.643747242061282, "eval_loss": 1.888542890548706, "eval_runtime": 108.5316, "eval_samples_per_second": 80.069, "eval_steps_per_second": 2.506, "step": 29600 }, { "epoch": 75.80543272267846, "grad_norm": 0.9781034588813782, "learning_rate": 0.0, "loss": 0.0046, "step": 30000 }, { "epoch": 75.80543272267846, "eval_accuracy": 0.8005753739930955, "eval_f1_macro": 0.6435443913467072, "eval_loss": 1.888439655303955, "eval_runtime": 108.5256, "eval_samples_per_second": 80.073, "eval_steps_per_second": 2.506, "step": 30000 } ], "logging_steps": 400, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 76, "save_steps": 1200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.8164789316384843e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }