{ "best_metric": 0.3662048876285553, "best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-combined-25hrs-model/checkpoint-4000", "epoch": 2.28504034761018, "eval_steps": 200, "global_step": 4600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012414649286157667, "grad_norm": 128.67588806152344, "learning_rate": 4.0000000000000003e-07, "loss": 10.5484, "step": 25 }, { "epoch": 0.024829298572315334, "grad_norm": 123.8255844116211, "learning_rate": 9.000000000000001e-07, "loss": 8.1464, "step": 50 }, { "epoch": 0.037243947858473, "grad_norm": 79.1299057006836, "learning_rate": 1.4000000000000001e-06, "loss": 5.9439, "step": 75 }, { "epoch": 0.04965859714463067, "grad_norm": 79.47651672363281, "learning_rate": 1.9000000000000002e-06, "loss": 4.1515, "step": 100 }, { "epoch": 0.06207324643078833, "grad_norm": 68.2268295288086, "learning_rate": 2.4000000000000003e-06, "loss": 3.4012, "step": 125 }, { "epoch": 0.074487895716946, "grad_norm": 81.23241424560547, "learning_rate": 2.9e-06, "loss": 3.3427, "step": 150 }, { "epoch": 0.08690254500310367, "grad_norm": 66.99320983886719, "learning_rate": 3.4000000000000005e-06, "loss": 2.968, "step": 175 }, { "epoch": 0.09931719428926133, "grad_norm": 78.05485534667969, "learning_rate": 3.900000000000001e-06, "loss": 2.8233, "step": 200 }, { "epoch": 0.09931719428926133, "eval_loss": 0.804746150970459, "eval_runtime": 563.0664, "eval_samples_per_second": 1.931, "eval_steps_per_second": 0.966, "eval_wer": 0.489650974025974, "step": 200 }, { "epoch": 0.11173184357541899, "grad_norm": 46.1437873840332, "learning_rate": 4.4e-06, "loss": 2.6012, "step": 225 }, { "epoch": 0.12414649286157665, "grad_norm": 99.51728057861328, "learning_rate": 4.9000000000000005e-06, "loss": 2.2989, "step": 250 }, { "epoch": 0.13656114214773432, "grad_norm": 49.41315460205078, "learning_rate": 5.400000000000001e-06, "loss": 2.2207, "step": 275 }, { "epoch": 0.148975791433892, "grad_norm": 53.38062286376953, "learning_rate": 5.9e-06, "loss": 2.202, "step": 300 }, { "epoch": 0.16139044072004965, "grad_norm": 49.83573913574219, "learning_rate": 6.4000000000000006e-06, "loss": 2.2695, "step": 325 }, { "epoch": 0.17380509000620734, "grad_norm": 75.33547973632812, "learning_rate": 6.9e-06, "loss": 1.9705, "step": 350 }, { "epoch": 0.186219739292365, "grad_norm": 55.35056686401367, "learning_rate": 7.4e-06, "loss": 2.0473, "step": 375 }, { "epoch": 0.19863438857852267, "grad_norm": 46.99931335449219, "learning_rate": 7.9e-06, "loss": 1.9329, "step": 400 }, { "epoch": 0.19863438857852267, "eval_loss": 0.6190668940544128, "eval_runtime": 574.5315, "eval_samples_per_second": 1.892, "eval_steps_per_second": 0.947, "eval_wer": 0.401075487012987, "step": 400 }, { "epoch": 0.21104903786468032, "grad_norm": 57.05539321899414, "learning_rate": 8.400000000000001e-06, "loss": 1.8312, "step": 425 }, { "epoch": 0.22346368715083798, "grad_norm": 53.04418182373047, "learning_rate": 8.900000000000001e-06, "loss": 1.8474, "step": 450 }, { "epoch": 0.23587833643699566, "grad_norm": 53.90583801269531, "learning_rate": 9.4e-06, "loss": 1.9193, "step": 475 }, { "epoch": 0.2482929857231533, "grad_norm": 51.17042922973633, "learning_rate": 9.9e-06, "loss": 1.725, "step": 500 }, { "epoch": 0.260707635009311, "grad_norm": 42.38318634033203, "learning_rate": 9.996660544331274e-06, "loss": 1.8561, "step": 525 }, { "epoch": 0.27312228429546864, "grad_norm": 51.599029541015625, "learning_rate": 9.992486224745367e-06, "loss": 1.9738, "step": 550 }, { "epoch": 0.2855369335816263, "grad_norm": 59.115108489990234, "learning_rate": 9.98831190515946e-06, "loss": 1.6793, "step": 575 }, { "epoch": 0.297951582867784, "grad_norm": 42.64860534667969, "learning_rate": 9.984137585573552e-06, "loss": 1.6927, "step": 600 }, { "epoch": 0.297951582867784, "eval_loss": 0.5420816540718079, "eval_runtime": 589.5719, "eval_samples_per_second": 1.844, "eval_steps_per_second": 0.923, "eval_wer": 0.37905844155844154, "step": 600 }, { "epoch": 0.31036623215394166, "grad_norm": 44.46907043457031, "learning_rate": 9.979963265987644e-06, "loss": 1.6992, "step": 625 }, { "epoch": 0.3227808814400993, "grad_norm": 38.840396881103516, "learning_rate": 9.975788946401737e-06, "loss": 1.5042, "step": 650 }, { "epoch": 0.33519553072625696, "grad_norm": 46.88064956665039, "learning_rate": 9.97161462681583e-06, "loss": 1.7753, "step": 675 }, { "epoch": 0.34761018001241467, "grad_norm": 44.91327667236328, "learning_rate": 9.967440307229922e-06, "loss": 1.7618, "step": 700 }, { "epoch": 0.3600248292985723, "grad_norm": 42.24628448486328, "learning_rate": 9.963265987644016e-06, "loss": 1.6682, "step": 725 }, { "epoch": 0.37243947858473, "grad_norm": 45.74182891845703, "learning_rate": 9.959091668058107e-06, "loss": 1.5824, "step": 750 }, { "epoch": 0.38485412787088763, "grad_norm": 25.388633728027344, "learning_rate": 9.954917348472199e-06, "loss": 1.6692, "step": 775 }, { "epoch": 0.39726877715704534, "grad_norm": 33.251548767089844, "learning_rate": 9.950743028886292e-06, "loss": 1.6183, "step": 800 }, { "epoch": 0.39726877715704534, "eval_loss": 0.48888257145881653, "eval_runtime": 577.1733, "eval_samples_per_second": 1.883, "eval_steps_per_second": 0.943, "eval_wer": 0.3210227272727273, "step": 800 }, { "epoch": 0.409683426443203, "grad_norm": 30.38732147216797, "learning_rate": 9.946568709300385e-06, "loss": 1.4884, "step": 825 }, { "epoch": 0.42209807572936064, "grad_norm": 48.94175338745117, "learning_rate": 9.942394389714477e-06, "loss": 1.4615, "step": 850 }, { "epoch": 0.4345127250155183, "grad_norm": 29.04236602783203, "learning_rate": 9.93822007012857e-06, "loss": 1.5201, "step": 875 }, { "epoch": 0.44692737430167595, "grad_norm": 41.91320037841797, "learning_rate": 9.934045750542662e-06, "loss": 1.5147, "step": 900 }, { "epoch": 0.45934202358783366, "grad_norm": 40.610572814941406, "learning_rate": 9.929871430956755e-06, "loss": 1.4561, "step": 925 }, { "epoch": 0.4717566728739913, "grad_norm": 33.01325988769531, "learning_rate": 9.925697111370847e-06, "loss": 1.3772, "step": 950 }, { "epoch": 0.48417132216014896, "grad_norm": 40.93734359741211, "learning_rate": 9.92152279178494e-06, "loss": 1.549, "step": 975 }, { "epoch": 0.4965859714463066, "grad_norm": 41.81599044799805, "learning_rate": 9.917348472199032e-06, "loss": 1.4431, "step": 1000 }, { "epoch": 0.4965859714463066, "eval_loss": 0.4683995544910431, "eval_runtime": 563.8925, "eval_samples_per_second": 1.928, "eval_steps_per_second": 0.965, "eval_wer": 0.28662743506493504, "step": 1000 }, { "epoch": 0.5090006207324643, "grad_norm": 23.732839584350586, "learning_rate": 9.913174152613125e-06, "loss": 1.2911, "step": 1025 }, { "epoch": 0.521415270018622, "grad_norm": 35.39672088623047, "learning_rate": 9.908999833027217e-06, "loss": 1.2753, "step": 1050 }, { "epoch": 0.5338299193047796, "grad_norm": 20.741168975830078, "learning_rate": 9.90482551344131e-06, "loss": 1.4464, "step": 1075 }, { "epoch": 0.5462445685909373, "grad_norm": 44.05943298339844, "learning_rate": 9.900651193855404e-06, "loss": 1.2189, "step": 1100 }, { "epoch": 0.5586592178770949, "grad_norm": 30.3934268951416, "learning_rate": 9.896476874269495e-06, "loss": 1.357, "step": 1125 }, { "epoch": 0.5710738671632526, "grad_norm": 39.36647415161133, "learning_rate": 9.892302554683587e-06, "loss": 1.3864, "step": 1150 }, { "epoch": 0.5834885164494104, "grad_norm": 39.50497055053711, "learning_rate": 9.88812823509768e-06, "loss": 1.5879, "step": 1175 }, { "epoch": 0.595903165735568, "grad_norm": 52.04657745361328, "learning_rate": 9.883953915511772e-06, "loss": 1.4117, "step": 1200 }, { "epoch": 0.595903165735568, "eval_loss": 0.42576098442077637, "eval_runtime": 574.3755, "eval_samples_per_second": 1.892, "eval_steps_per_second": 0.947, "eval_wer": 0.2650162337662338, "step": 1200 }, { "epoch": 0.6083178150217257, "grad_norm": 41.47892761230469, "learning_rate": 9.879779595925865e-06, "loss": 1.2806, "step": 1225 }, { "epoch": 0.6207324643078833, "grad_norm": 35.136695861816406, "learning_rate": 9.875605276339958e-06, "loss": 1.2739, "step": 1250 }, { "epoch": 0.633147113594041, "grad_norm": 29.484039306640625, "learning_rate": 9.87143095675405e-06, "loss": 1.1364, "step": 1275 }, { "epoch": 0.6455617628801986, "grad_norm": 47.20607376098633, "learning_rate": 9.867256637168142e-06, "loss": 1.1565, "step": 1300 }, { "epoch": 0.6579764121663563, "grad_norm": 44.51639938354492, "learning_rate": 9.863082317582235e-06, "loss": 1.2704, "step": 1325 }, { "epoch": 0.6703910614525139, "grad_norm": 56.79221725463867, "learning_rate": 9.858907997996328e-06, "loss": 1.3655, "step": 1350 }, { "epoch": 0.6828057107386716, "grad_norm": 31.851566314697266, "learning_rate": 9.85473367841042e-06, "loss": 1.2962, "step": 1375 }, { "epoch": 0.6952203600248293, "grad_norm": 48.65141677856445, "learning_rate": 9.850559358824512e-06, "loss": 1.2699, "step": 1400 }, { "epoch": 0.6952203600248293, "eval_loss": 0.4222487807273865, "eval_runtime": 572.8012, "eval_samples_per_second": 1.898, "eval_steps_per_second": 0.95, "eval_wer": 0.26653814935064934, "step": 1400 }, { "epoch": 0.707635009310987, "grad_norm": 32.21327209472656, "learning_rate": 9.846385039238605e-06, "loss": 1.1561, "step": 1425 }, { "epoch": 0.7200496585971446, "grad_norm": 38.31489181518555, "learning_rate": 9.842210719652696e-06, "loss": 1.3146, "step": 1450 }, { "epoch": 0.7324643078833023, "grad_norm": 56.665260314941406, "learning_rate": 9.83803640006679e-06, "loss": 1.3184, "step": 1475 }, { "epoch": 0.74487895716946, "grad_norm": 49.64814758300781, "learning_rate": 9.833862080480883e-06, "loss": 1.0521, "step": 1500 }, { "epoch": 0.7572936064556176, "grad_norm": 32.33070373535156, "learning_rate": 9.829687760894975e-06, "loss": 1.2677, "step": 1525 }, { "epoch": 0.7697082557417753, "grad_norm": 27.896947860717773, "learning_rate": 9.825513441309066e-06, "loss": 1.3059, "step": 1550 }, { "epoch": 0.7821229050279329, "grad_norm": 28.060487747192383, "learning_rate": 9.82133912172316e-06, "loss": 1.3901, "step": 1575 }, { "epoch": 0.7945375543140907, "grad_norm": 32.01655578613281, "learning_rate": 9.817164802137253e-06, "loss": 1.0532, "step": 1600 }, { "epoch": 0.7945375543140907, "eval_loss": 0.41084742546081543, "eval_runtime": 564.5825, "eval_samples_per_second": 1.925, "eval_steps_per_second": 0.964, "eval_wer": 0.2513189935064935, "step": 1600 }, { "epoch": 0.8069522036002483, "grad_norm": 37.56877136230469, "learning_rate": 9.812990482551345e-06, "loss": 1.2314, "step": 1625 }, { "epoch": 0.819366852886406, "grad_norm": 23.31650161743164, "learning_rate": 9.808816162965438e-06, "loss": 1.214, "step": 1650 }, { "epoch": 0.8317815021725636, "grad_norm": 52.62869644165039, "learning_rate": 9.80464184337953e-06, "loss": 1.1148, "step": 1675 }, { "epoch": 0.8441961514587213, "grad_norm": 37.902523040771484, "learning_rate": 9.800467523793621e-06, "loss": 1.1947, "step": 1700 }, { "epoch": 0.8566108007448789, "grad_norm": 46.63554382324219, "learning_rate": 9.796293204207715e-06, "loss": 1.1841, "step": 1725 }, { "epoch": 0.8690254500310366, "grad_norm": 24.407249450683594, "learning_rate": 9.792118884621808e-06, "loss": 1.0706, "step": 1750 }, { "epoch": 0.8814400993171942, "grad_norm": 33.92270278930664, "learning_rate": 9.7879445650359e-06, "loss": 1.0771, "step": 1775 }, { "epoch": 0.8938547486033519, "grad_norm": 36.15495681762695, "learning_rate": 9.783770245449993e-06, "loss": 1.0589, "step": 1800 }, { "epoch": 0.8938547486033519, "eval_loss": 0.39820805191993713, "eval_runtime": 559.8426, "eval_samples_per_second": 1.942, "eval_steps_per_second": 0.972, "eval_wer": 0.22909902597402598, "step": 1800 }, { "epoch": 0.9062693978895097, "grad_norm": 39.94309616088867, "learning_rate": 9.779595925864084e-06, "loss": 1.219, "step": 1825 }, { "epoch": 0.9186840471756673, "grad_norm": 29.685474395751953, "learning_rate": 9.775421606278178e-06, "loss": 1.2091, "step": 1850 }, { "epoch": 0.931098696461825, "grad_norm": 39.77056121826172, "learning_rate": 9.771247286692271e-06, "loss": 1.2096, "step": 1875 }, { "epoch": 0.9435133457479826, "grad_norm": 22.495344161987305, "learning_rate": 9.767072967106363e-06, "loss": 1.1108, "step": 1900 }, { "epoch": 0.9559279950341403, "grad_norm": 42.11180114746094, "learning_rate": 9.762898647520454e-06, "loss": 1.0949, "step": 1925 }, { "epoch": 0.9683426443202979, "grad_norm": 41.73212432861328, "learning_rate": 9.758724327934548e-06, "loss": 1.2428, "step": 1950 }, { "epoch": 0.9807572936064556, "grad_norm": 39.16131591796875, "learning_rate": 9.75455000834864e-06, "loss": 0.9964, "step": 1975 }, { "epoch": 0.9931719428926132, "grad_norm": 27.52761459350586, "learning_rate": 9.750375688762733e-06, "loss": 1.1856, "step": 2000 }, { "epoch": 0.9931719428926132, "eval_loss": 0.3853071331977844, "eval_runtime": 565.6151, "eval_samples_per_second": 1.922, "eval_steps_per_second": 0.962, "eval_wer": 0.23549107142857142, "step": 2000 }, { "epoch": 1.0059590316573557, "grad_norm": 19.790531158447266, "learning_rate": 9.746201369176826e-06, "loss": 0.9702, "step": 2025 }, { "epoch": 1.0183736809435133, "grad_norm": 24.30504035949707, "learning_rate": 9.742027049590918e-06, "loss": 0.6177, "step": 2050 }, { "epoch": 1.030788330229671, "grad_norm": 25.81077003479004, "learning_rate": 9.73785273000501e-06, "loss": 0.5878, "step": 2075 }, { "epoch": 1.0432029795158286, "grad_norm": 29.500877380371094, "learning_rate": 9.733678410419102e-06, "loss": 0.6152, "step": 2100 }, { "epoch": 1.0556176288019863, "grad_norm": 18.39103889465332, "learning_rate": 9.729504090833196e-06, "loss": 0.5966, "step": 2125 }, { "epoch": 1.068032278088144, "grad_norm": 42.394142150878906, "learning_rate": 9.725329771247287e-06, "loss": 0.6365, "step": 2150 }, { "epoch": 1.0804469273743016, "grad_norm": 19.30755043029785, "learning_rate": 9.72115545166138e-06, "loss": 0.6584, "step": 2175 }, { "epoch": 1.0928615766604592, "grad_norm": 22.643875122070312, "learning_rate": 9.716981132075472e-06, "loss": 0.6692, "step": 2200 }, { "epoch": 1.0928615766604592, "eval_loss": 0.40007734298706055, "eval_runtime": 581.1524, "eval_samples_per_second": 1.87, "eval_steps_per_second": 0.936, "eval_wer": 0.2650162337662338, "step": 2200 }, { "epoch": 1.105276225946617, "grad_norm": 28.355436325073242, "learning_rate": 9.712806812489564e-06, "loss": 0.6398, "step": 2225 }, { "epoch": 1.1176908752327748, "grad_norm": 29.392656326293945, "learning_rate": 9.708632492903657e-06, "loss": 0.64, "step": 2250 }, { "epoch": 1.1301055245189324, "grad_norm": 25.52250099182129, "learning_rate": 9.70445817331775e-06, "loss": 0.6339, "step": 2275 }, { "epoch": 1.14252017380509, "grad_norm": 26.52411460876465, "learning_rate": 9.700283853731842e-06, "loss": 0.5372, "step": 2300 }, { "epoch": 1.1549348230912477, "grad_norm": 26.201452255249023, "learning_rate": 9.696109534145936e-06, "loss": 0.5878, "step": 2325 }, { "epoch": 1.1673494723774054, "grad_norm": 23.98987579345703, "learning_rate": 9.691935214560027e-06, "loss": 0.5349, "step": 2350 }, { "epoch": 1.179764121663563, "grad_norm": 32.815521240234375, "learning_rate": 9.68776089497412e-06, "loss": 0.7508, "step": 2375 }, { "epoch": 1.1921787709497207, "grad_norm": 23.12726593017578, "learning_rate": 9.683586575388212e-06, "loss": 0.6505, "step": 2400 }, { "epoch": 1.1921787709497207, "eval_loss": 0.39191773533821106, "eval_runtime": 571.3726, "eval_samples_per_second": 1.902, "eval_steps_per_second": 0.952, "eval_wer": 0.23894074675324675, "step": 2400 }, { "epoch": 1.2045934202358783, "grad_norm": 19.867704391479492, "learning_rate": 9.679412255802305e-06, "loss": 0.5807, "step": 2425 }, { "epoch": 1.217008069522036, "grad_norm": 19.685293197631836, "learning_rate": 9.675237936216397e-06, "loss": 0.7044, "step": 2450 }, { "epoch": 1.2294227188081936, "grad_norm": 28.70237159729004, "learning_rate": 9.67106361663049e-06, "loss": 0.6598, "step": 2475 }, { "epoch": 1.2418373680943513, "grad_norm": 36.98805618286133, "learning_rate": 9.666889297044582e-06, "loss": 0.6079, "step": 2500 }, { "epoch": 1.254252017380509, "grad_norm": 22.906494140625, "learning_rate": 9.662714977458675e-06, "loss": 0.7132, "step": 2525 }, { "epoch": 1.2666666666666666, "grad_norm": 21.013233184814453, "learning_rate": 9.658540657872769e-06, "loss": 0.6346, "step": 2550 }, { "epoch": 1.2790813159528243, "grad_norm": 22.889606475830078, "learning_rate": 9.65436633828686e-06, "loss": 0.5689, "step": 2575 }, { "epoch": 1.291495965238982, "grad_norm": 21.3165225982666, "learning_rate": 9.650192018700952e-06, "loss": 0.6613, "step": 2600 }, { "epoch": 1.291495965238982, "eval_loss": 0.3809148669242859, "eval_runtime": 575.5999, "eval_samples_per_second": 1.888, "eval_steps_per_second": 0.945, "eval_wer": 0.2385349025974026, "step": 2600 }, { "epoch": 1.3039106145251398, "grad_norm": 31.75080108642578, "learning_rate": 9.646017699115045e-06, "loss": 0.6436, "step": 2625 }, { "epoch": 1.3163252638112972, "grad_norm": 30.9864559173584, "learning_rate": 9.641843379529137e-06, "loss": 0.696, "step": 2650 }, { "epoch": 1.328739913097455, "grad_norm": 30.82682991027832, "learning_rate": 9.63766905994323e-06, "loss": 0.5955, "step": 2675 }, { "epoch": 1.3411545623836125, "grad_norm": 34.10749435424805, "learning_rate": 9.633494740357322e-06, "loss": 0.7117, "step": 2700 }, { "epoch": 1.3535692116697704, "grad_norm": 30.104955673217773, "learning_rate": 9.629320420771415e-06, "loss": 0.5666, "step": 2725 }, { "epoch": 1.365983860955928, "grad_norm": 23.225740432739258, "learning_rate": 9.625146101185507e-06, "loss": 0.5734, "step": 2750 }, { "epoch": 1.3783985102420857, "grad_norm": 20.32614517211914, "learning_rate": 9.6209717815996e-06, "loss": 0.6535, "step": 2775 }, { "epoch": 1.3908131595282434, "grad_norm": 23.999792098999023, "learning_rate": 9.616797462013693e-06, "loss": 0.6194, "step": 2800 }, { "epoch": 1.3908131595282434, "eval_loss": 0.3873368799686432, "eval_runtime": 568.9878, "eval_samples_per_second": 1.91, "eval_steps_per_second": 0.956, "eval_wer": 0.23427353896103897, "step": 2800 }, { "epoch": 1.403227808814401, "grad_norm": 18.715627670288086, "learning_rate": 9.612623142427785e-06, "loss": 0.5924, "step": 2825 }, { "epoch": 1.4156424581005587, "grad_norm": 24.6026611328125, "learning_rate": 9.608448822841877e-06, "loss": 0.5588, "step": 2850 }, { "epoch": 1.4280571073867163, "grad_norm": 32.74100875854492, "learning_rate": 9.60427450325597e-06, "loss": 0.6261, "step": 2875 }, { "epoch": 1.440471756672874, "grad_norm": 31.3200740814209, "learning_rate": 9.600100183670062e-06, "loss": 0.756, "step": 2900 }, { "epoch": 1.4528864059590316, "grad_norm": 19.404541015625, "learning_rate": 9.595925864084155e-06, "loss": 0.6082, "step": 2925 }, { "epoch": 1.4653010552451893, "grad_norm": 16.61175537109375, "learning_rate": 9.591751544498248e-06, "loss": 0.6567, "step": 2950 }, { "epoch": 1.477715704531347, "grad_norm": 22.71599006652832, "learning_rate": 9.587744197695776e-06, "loss": 0.6098, "step": 2975 }, { "epoch": 1.4901303538175046, "grad_norm": 32.15653610229492, "learning_rate": 9.583569878109869e-06, "loss": 0.6358, "step": 3000 }, { "epoch": 1.4901303538175046, "eval_loss": 0.38495373725891113, "eval_runtime": 561.3182, "eval_samples_per_second": 1.937, "eval_steps_per_second": 0.969, "eval_wer": 0.21418425324675325, "step": 3000 }, { "epoch": 1.5025450031036622, "grad_norm": 22.268293380737305, "learning_rate": 9.579395558523962e-06, "loss": 0.5949, "step": 3025 }, { "epoch": 1.51495965238982, "grad_norm": 28.58846092224121, "learning_rate": 9.575221238938054e-06, "loss": 0.6006, "step": 3050 }, { "epoch": 1.5273743016759775, "grad_norm": 25.382551193237305, "learning_rate": 9.571046919352145e-06, "loss": 0.5811, "step": 3075 }, { "epoch": 1.5397889509621354, "grad_norm": 34.780006408691406, "learning_rate": 9.566872599766239e-06, "loss": 0.5968, "step": 3100 }, { "epoch": 1.5522036002482928, "grad_norm": 21.326889038085938, "learning_rate": 9.562698280180332e-06, "loss": 0.4749, "step": 3125 }, { "epoch": 1.5646182495344507, "grad_norm": 27.90545654296875, "learning_rate": 9.558523960594424e-06, "loss": 0.6064, "step": 3150 }, { "epoch": 1.5770328988206082, "grad_norm": 22.328035354614258, "learning_rate": 9.554349641008517e-06, "loss": 0.5755, "step": 3175 }, { "epoch": 1.589447548106766, "grad_norm": 23.400901794433594, "learning_rate": 9.550175321422609e-06, "loss": 0.6208, "step": 3200 }, { "epoch": 1.589447548106766, "eval_loss": 0.37794527411460876, "eval_runtime": 565.9599, "eval_samples_per_second": 1.921, "eval_steps_per_second": 0.961, "eval_wer": 0.23883928571428573, "step": 3200 }, { "epoch": 1.6018621973929237, "grad_norm": 21.570287704467773, "learning_rate": 9.5460010018367e-06, "loss": 0.5788, "step": 3225 }, { "epoch": 1.6142768466790813, "grad_norm": 27.813451766967773, "learning_rate": 9.541826682250794e-06, "loss": 0.6945, "step": 3250 }, { "epoch": 1.626691495965239, "grad_norm": 30.955820083618164, "learning_rate": 9.537652362664887e-06, "loss": 0.5379, "step": 3275 }, { "epoch": 1.6391061452513966, "grad_norm": 20.53118133544922, "learning_rate": 9.533478043078979e-06, "loss": 0.6171, "step": 3300 }, { "epoch": 1.6515207945375543, "grad_norm": 23.763132095336914, "learning_rate": 9.529303723493072e-06, "loss": 0.6021, "step": 3325 }, { "epoch": 1.663935443823712, "grad_norm": 26.67987632751465, "learning_rate": 9.525129403907164e-06, "loss": 0.6727, "step": 3350 }, { "epoch": 1.6763500931098696, "grad_norm": 25.991594314575195, "learning_rate": 9.520955084321257e-06, "loss": 0.7155, "step": 3375 }, { "epoch": 1.6887647423960273, "grad_norm": 19.079315185546875, "learning_rate": 9.51678076473535e-06, "loss": 0.5932, "step": 3400 }, { "epoch": 1.6887647423960273, "eval_loss": 0.3724534511566162, "eval_runtime": 550.7846, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.988, "eval_wer": 0.20403814935064934, "step": 3400 }, { "epoch": 1.7011793916821851, "grad_norm": 18.52420997619629, "learning_rate": 9.512606445149442e-06, "loss": 0.6704, "step": 3425 }, { "epoch": 1.7135940409683426, "grad_norm": 19.514951705932617, "learning_rate": 9.508432125563533e-06, "loss": 0.5896, "step": 3450 }, { "epoch": 1.7260086902545004, "grad_norm": 28.89137840270996, "learning_rate": 9.504257805977627e-06, "loss": 0.5097, "step": 3475 }, { "epoch": 1.7384233395406579, "grad_norm": 32.02205276489258, "learning_rate": 9.500083486391718e-06, "loss": 0.6217, "step": 3500 }, { "epoch": 1.7508379888268157, "grad_norm": 36.85642623901367, "learning_rate": 9.495909166805812e-06, "loss": 0.666, "step": 3525 }, { "epoch": 1.7632526381129732, "grad_norm": 37.10481262207031, "learning_rate": 9.491734847219905e-06, "loss": 0.5903, "step": 3550 }, { "epoch": 1.775667287399131, "grad_norm": 19.526355743408203, "learning_rate": 9.487560527633997e-06, "loss": 0.5304, "step": 3575 }, { "epoch": 1.7880819366852885, "grad_norm": 30.528167724609375, "learning_rate": 9.483386208048088e-06, "loss": 0.5797, "step": 3600 }, { "epoch": 1.7880819366852885, "eval_loss": 0.3712182641029358, "eval_runtime": 558.4122, "eval_samples_per_second": 1.947, "eval_steps_per_second": 0.974, "eval_wer": 0.20921266233766234, "step": 3600 }, { "epoch": 1.8004965859714464, "grad_norm": 29.263221740722656, "learning_rate": 9.479211888462182e-06, "loss": 0.6156, "step": 3625 }, { "epoch": 1.812911235257604, "grad_norm": 23.728296279907227, "learning_rate": 9.475037568876275e-06, "loss": 0.6568, "step": 3650 }, { "epoch": 1.8253258845437617, "grad_norm": 15.723759651184082, "learning_rate": 9.470863249290367e-06, "loss": 0.566, "step": 3675 }, { "epoch": 1.8377405338299193, "grad_norm": 39.088584899902344, "learning_rate": 9.466688929704458e-06, "loss": 0.7007, "step": 3700 }, { "epoch": 1.850155183116077, "grad_norm": 20.931364059448242, "learning_rate": 9.462514610118551e-06, "loss": 0.6843, "step": 3725 }, { "epoch": 1.8625698324022346, "grad_norm": 23.179536819458008, "learning_rate": 9.458340290532643e-06, "loss": 0.6391, "step": 3750 }, { "epoch": 1.8749844816883923, "grad_norm": 31.087736129760742, "learning_rate": 9.454165970946736e-06, "loss": 0.6611, "step": 3775 }, { "epoch": 1.88739913097455, "grad_norm": 22.13474464416504, "learning_rate": 9.44999165136083e-06, "loss": 0.5707, "step": 3800 }, { "epoch": 1.88739913097455, "eval_loss": 0.37375178933143616, "eval_runtime": 565.1592, "eval_samples_per_second": 1.923, "eval_steps_per_second": 0.963, "eval_wer": 0.23417207792207792, "step": 3800 }, { "epoch": 1.8998137802607076, "grad_norm": 22.615114212036133, "learning_rate": 9.445817331774921e-06, "loss": 0.5573, "step": 3825 }, { "epoch": 1.9122284295468654, "grad_norm": 32.943199157714844, "learning_rate": 9.441643012189013e-06, "loss": 0.6528, "step": 3850 }, { "epoch": 1.9246430788330229, "grad_norm": 29.096609115600586, "learning_rate": 9.437468692603106e-06, "loss": 0.7014, "step": 3875 }, { "epoch": 1.9370577281191808, "grad_norm": 18.50649642944336, "learning_rate": 9.4332943730172e-06, "loss": 0.5836, "step": 3900 }, { "epoch": 1.9494723774053382, "grad_norm": 27.316129684448242, "learning_rate": 9.429120053431291e-06, "loss": 0.5993, "step": 3925 }, { "epoch": 1.961887026691496, "grad_norm": 26.35407257080078, "learning_rate": 9.424945733845385e-06, "loss": 0.5874, "step": 3950 }, { "epoch": 1.9743016759776535, "grad_norm": 23.183897018432617, "learning_rate": 9.420771414259476e-06, "loss": 0.6319, "step": 3975 }, { "epoch": 1.9867163252638114, "grad_norm": 25.644729614257812, "learning_rate": 9.416597094673568e-06, "loss": 0.5928, "step": 4000 }, { "epoch": 1.9867163252638114, "eval_loss": 0.3662048876285553, "eval_runtime": 574.0467, "eval_samples_per_second": 1.894, "eval_steps_per_second": 0.948, "eval_wer": 0.25892857142857145, "step": 4000 }, { "epoch": 1.9991309745499688, "grad_norm": 15.862359046936035, "learning_rate": 9.412422775087661e-06, "loss": 0.5867, "step": 4025 }, { "epoch": 2.0119180633147113, "grad_norm": 15.233346939086914, "learning_rate": 9.408248455501754e-06, "loss": 0.2928, "step": 4050 }, { "epoch": 2.024332712600869, "grad_norm": 41.226078033447266, "learning_rate": 9.404074135915846e-06, "loss": 0.2906, "step": 4075 }, { "epoch": 2.0367473618870267, "grad_norm": 16.719274520874023, "learning_rate": 9.39989981632994e-06, "loss": 0.3043, "step": 4100 }, { "epoch": 2.0491620111731845, "grad_norm": 17.11972999572754, "learning_rate": 9.395725496744031e-06, "loss": 0.3007, "step": 4125 }, { "epoch": 2.061576660459342, "grad_norm": 25.817195892333984, "learning_rate": 9.391551177158124e-06, "loss": 0.3189, "step": 4150 }, { "epoch": 2.0739913097455, "grad_norm": 22.05105972290039, "learning_rate": 9.387376857572218e-06, "loss": 0.2891, "step": 4175 }, { "epoch": 2.0864059590316573, "grad_norm": 21.231904983520508, "learning_rate": 9.38320253798631e-06, "loss": 0.2626, "step": 4200 }, { "epoch": 2.0864059590316573, "eval_loss": 0.3803122341632843, "eval_runtime": 575.7956, "eval_samples_per_second": 1.888, "eval_steps_per_second": 0.945, "eval_wer": 0.26968344155844154, "step": 4200 }, { "epoch": 2.098820608317815, "grad_norm": 21.424543380737305, "learning_rate": 9.379028218400401e-06, "loss": 0.2837, "step": 4225 }, { "epoch": 2.1112352576039726, "grad_norm": 20.14120864868164, "learning_rate": 9.374853898814494e-06, "loss": 0.2861, "step": 4250 }, { "epoch": 2.1236499068901304, "grad_norm": 29.401103973388672, "learning_rate": 9.370679579228586e-06, "loss": 0.28, "step": 4275 }, { "epoch": 2.136064556176288, "grad_norm": 15.73469352722168, "learning_rate": 9.36650525964268e-06, "loss": 0.2564, "step": 4300 }, { "epoch": 2.1484792054624458, "grad_norm": 16.33969497680664, "learning_rate": 9.362330940056773e-06, "loss": 0.2648, "step": 4325 }, { "epoch": 2.160893854748603, "grad_norm": 13.485337257385254, "learning_rate": 9.358156620470864e-06, "loss": 0.2615, "step": 4350 }, { "epoch": 2.173308504034761, "grad_norm": 17.95641326904297, "learning_rate": 9.353982300884956e-06, "loss": 0.2943, "step": 4375 }, { "epoch": 2.1857231533209185, "grad_norm": 20.702796936035156, "learning_rate": 9.349807981299049e-06, "loss": 0.2557, "step": 4400 }, { "epoch": 2.1857231533209185, "eval_loss": 0.3853345811367035, "eval_runtime": 558.2923, "eval_samples_per_second": 1.947, "eval_steps_per_second": 0.974, "eval_wer": 0.21022727272727273, "step": 4400 }, { "epoch": 2.1981378026070764, "grad_norm": 19.750181198120117, "learning_rate": 9.345633661713142e-06, "loss": 0.2404, "step": 4425 }, { "epoch": 2.210552451893234, "grad_norm": 21.653345108032227, "learning_rate": 9.341459342127234e-06, "loss": 0.3591, "step": 4450 }, { "epoch": 2.2229671011793917, "grad_norm": 17.557680130004883, "learning_rate": 9.337285022541327e-06, "loss": 0.3089, "step": 4475 }, { "epoch": 2.2353817504655495, "grad_norm": 6.009192943572998, "learning_rate": 9.333110702955419e-06, "loss": 0.3418, "step": 4500 }, { "epoch": 2.247796399751707, "grad_norm": 17.482463836669922, "learning_rate": 9.32893638336951e-06, "loss": 0.2301, "step": 4525 }, { "epoch": 2.260211049037865, "grad_norm": 13.825834274291992, "learning_rate": 9.324762063783604e-06, "loss": 0.3232, "step": 4550 }, { "epoch": 2.2726256983240223, "grad_norm": 9.021127700805664, "learning_rate": 9.320587744197697e-06, "loss": 0.3317, "step": 4575 }, { "epoch": 2.28504034761018, "grad_norm": 22.399105072021484, "learning_rate": 9.316413424611789e-06, "loss": 0.3342, "step": 4600 }, { "epoch": 2.28504034761018, "eval_loss": 0.38909900188446045, "eval_runtime": 555.7727, "eval_samples_per_second": 1.956, "eval_steps_per_second": 0.979, "eval_wer": 0.20616883116883117, "step": 4600 }, { "epoch": 2.28504034761018, "step": 4600, "total_flos": 3.756642543599616e+19, "train_loss": 1.1144439057681872, "train_runtime": 22344.0923, "train_samples_per_second": 21.63, "train_steps_per_second": 2.703 } ], "logging_steps": 25, "max_steps": 60390, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.756642543599616e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }