diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7466 @@ +{ + "best_metric": 0.7750564813613892, + "best_model_checkpoint": "temp/checkpoint-30660", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 30660, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 0.00042, + "loss": 4.34, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 0.00092, + "loss": 2.3611, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 0.000999313949689644, + "loss": 1.8959, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009984972231296962, + "loss": 1.7016, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009976804965697486, + "loss": 1.4917, + "step": 125 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009968637700098007, + "loss": 1.3633, + "step": 150 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996047043449853, + "loss": 1.3293, + "step": 175 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952303168899053, + "loss": 1.3843, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009944135903299576, + "loss": 1.4711, + "step": 225 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935968637700097, + "loss": 1.4568, + "step": 250 + }, + { + "epoch": 0.09, + "learning_rate": 0.000992780137210062, + "loss": 1.2875, + "step": 275 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009919634106501144, + "loss": 1.3972, + "step": 300 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009911466840901667, + "loss": 1.2847, + "step": 325 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009903299575302188, + "loss": 1.43, + "step": 350 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009895132309702711, + "loss": 1.306, + "step": 375 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009886965044103235, + "loss": 1.3228, + "step": 400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009878797778503758, + "loss": 1.3421, + "step": 425 + }, + { + "epoch": 0.15, + "learning_rate": 0.000987063051290428, + "loss": 1.3667, + "step": 450 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009862463247304802, + "loss": 1.2756, + "step": 475 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009854295981705325, + "loss": 1.2254, + "step": 500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009846128716105849, + "loss": 1.2671, + "step": 525 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009837961450506372, + "loss": 1.3432, + "step": 550 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009829794184906893, + "loss": 1.2765, + "step": 575 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009821626919307416, + "loss": 1.1244, + "step": 600 + }, + { + "epoch": 0.2, + "learning_rate": 0.000981345965370794, + "loss": 1.3241, + "step": 625 + }, + { + "epoch": 0.21, + "learning_rate": 0.000980529238810846, + "loss": 1.3232, + "step": 650 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009797125122508984, + "loss": 1.3353, + "step": 675 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009788957856909507, + "loss": 1.2559, + "step": 700 + }, + { + "epoch": 0.24, + "learning_rate": 0.000978079059131003, + "loss": 1.2461, + "step": 725 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009772623325710553, + "loss": 1.2864, + "step": 750 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009764456060111076, + "loss": 1.2687, + "step": 775 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009756288794511598, + "loss": 1.2791, + "step": 800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009748121528912121, + "loss": 1.2676, + "step": 825 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009739954263312644, + "loss": 1.4354, + "step": 850 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009731786997713165, + "loss": 1.2231, + "step": 875 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009723619732113688, + "loss": 1.2008, + "step": 900 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009715452466514211, + "loss": 1.2522, + "step": 925 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009707285200914734, + "loss": 1.2547, + "step": 950 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009699117935315257, + "loss": 1.2101, + "step": 975 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009690950669715779, + "loss": 1.2404, + "step": 1000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009682783404116302, + "loss": 1.3073, + "step": 1025 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009674616138516826, + "loss": 1.1727, + "step": 1050 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009666448872917347, + "loss": 1.5219, + "step": 1075 + }, + { + "epoch": 0.36, + "learning_rate": 0.000965828160731787, + "loss": 1.3067, + "step": 1100 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009650114341718393, + "loss": 1.2617, + "step": 1125 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009641947076118915, + "loss": 1.1939, + "step": 1150 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009633779810519439, + "loss": 1.2605, + "step": 1175 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009625612544919961, + "loss": 1.2543, + "step": 1200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009617445279320484, + "loss": 1.255, + "step": 1225 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009609278013721007, + "loss": 1.2582, + "step": 1250 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009601110748121528, + "loss": 1.2397, + "step": 1275 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009592943482522051, + "loss": 1.2944, + "step": 1300 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009584776216922575, + "loss": 1.1534, + "step": 1325 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009576608951323097, + "loss": 1.2319, + "step": 1350 + }, + { + "epoch": 0.45, + "learning_rate": 0.000956844168572362, + "loss": 1.2521, + "step": 1375 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009560274420124143, + "loss": 1.224, + "step": 1400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009552107154524665, + "loss": 1.2421, + "step": 1425 + }, + { + "epoch": 0.47, + "learning_rate": 0.0009543939888925189, + "loss": 1.1581, + "step": 1450 + }, + { + "epoch": 0.48, + "learning_rate": 0.0009535772623325711, + "loss": 1.2528, + "step": 1475 + }, + { + "epoch": 0.49, + "learning_rate": 0.0009527605357726233, + "loss": 1.141, + "step": 1500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0009519438092126756, + "loss": 1.1966, + "step": 1525 + }, + { + "epoch": 0.51, + "learning_rate": 0.0009511270826527278, + "loss": 1.1853, + "step": 1550 + }, + { + "epoch": 0.51, + "learning_rate": 0.0009503103560927802, + "loss": 1.1506, + "step": 1575 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009494936295328325, + "loss": 1.2139, + "step": 1600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009486769029728847, + "loss": 1.2569, + "step": 1625 + }, + { + "epoch": 0.54, + "learning_rate": 0.000947860176412937, + "loss": 1.2148, + "step": 1650 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009470434498529893, + "loss": 1.1866, + "step": 1675 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009462267232930414, + "loss": 1.228, + "step": 1700 + }, + { + "epoch": 0.56, + "learning_rate": 0.0009454099967330938, + "loss": 1.2117, + "step": 1725 + }, + { + "epoch": 0.57, + "learning_rate": 0.000944593270173146, + "loss": 1.2629, + "step": 1750 + }, + { + "epoch": 0.58, + "learning_rate": 0.0009437765436131983, + "loss": 1.2341, + "step": 1775 + }, + { + "epoch": 0.59, + "learning_rate": 0.0009429598170532506, + "loss": 1.1796, + "step": 1800 + }, + { + "epoch": 0.6, + "learning_rate": 0.0009421430904933028, + "loss": 1.2528, + "step": 1825 + }, + { + "epoch": 0.6, + "learning_rate": 0.0009413263639333552, + "loss": 1.1945, + "step": 1850 + }, + { + "epoch": 0.61, + "learning_rate": 0.0009405096373734075, + "loss": 1.2459, + "step": 1875 + }, + { + "epoch": 0.62, + "learning_rate": 0.0009396929108134596, + "loss": 1.104, + "step": 1900 + }, + { + "epoch": 0.63, + "learning_rate": 0.0009388761842535119, + "loss": 1.1864, + "step": 1925 + }, + { + "epoch": 0.64, + "learning_rate": 0.0009380594576935642, + "loss": 1.129, + "step": 1950 + }, + { + "epoch": 0.64, + "learning_rate": 0.0009372427311336165, + "loss": 1.2213, + "step": 1975 + }, + { + "epoch": 0.65, + "learning_rate": 0.0009364260045736688, + "loss": 1.1773, + "step": 2000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0009356419470761189, + "loss": 1.2489, + "step": 2025 + }, + { + "epoch": 0.67, + "learning_rate": 0.0009348252205161712, + "loss": 1.1637, + "step": 2050 + }, + { + "epoch": 0.68, + "learning_rate": 0.0009340084939562234, + "loss": 1.2635, + "step": 2075 + }, + { + "epoch": 0.68, + "learning_rate": 0.0009331917673962757, + "loss": 1.2673, + "step": 2100 + }, + { + "epoch": 0.69, + "learning_rate": 0.0009323750408363281, + "loss": 1.2722, + "step": 2125 + }, + { + "epoch": 0.7, + "learning_rate": 0.0009315583142763803, + "loss": 1.1882, + "step": 2150 + }, + { + "epoch": 0.71, + "learning_rate": 0.0009307415877164326, + "loss": 1.1911, + "step": 2175 + }, + { + "epoch": 0.72, + "learning_rate": 0.0009299248611564848, + "loss": 1.2546, + "step": 2200 + }, + { + "epoch": 0.73, + "learning_rate": 0.0009291081345965371, + "loss": 1.1473, + "step": 2225 + }, + { + "epoch": 0.73, + "learning_rate": 0.0009282914080365894, + "loss": 1.2054, + "step": 2250 + }, + { + "epoch": 0.74, + "learning_rate": 0.0009274746814766416, + "loss": 1.1305, + "step": 2275 + }, + { + "epoch": 0.75, + "learning_rate": 0.0009266579549166939, + "loss": 1.2261, + "step": 2300 + }, + { + "epoch": 0.76, + "learning_rate": 0.0009258412283567462, + "loss": 1.1463, + "step": 2325 + }, + { + "epoch": 0.77, + "learning_rate": 0.0009250245017967984, + "loss": 1.2333, + "step": 2350 + }, + { + "epoch": 0.77, + "learning_rate": 0.0009242077752368508, + "loss": 1.1699, + "step": 2375 + }, + { + "epoch": 0.78, + "learning_rate": 0.0009233910486769031, + "loss": 1.2033, + "step": 2400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0009225743221169553, + "loss": 1.1624, + "step": 2425 + }, + { + "epoch": 0.8, + "learning_rate": 0.0009217575955570075, + "loss": 1.1374, + "step": 2450 + }, + { + "epoch": 0.81, + "learning_rate": 0.0009209408689970597, + "loss": 1.214, + "step": 2475 + }, + { + "epoch": 0.82, + "learning_rate": 0.000920124142437112, + "loss": 1.2615, + "step": 2500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0009193074158771644, + "loss": 1.2242, + "step": 2525 + }, + { + "epoch": 0.83, + "learning_rate": 0.0009184906893172166, + "loss": 1.2355, + "step": 2550 + }, + { + "epoch": 0.84, + "learning_rate": 0.0009176739627572689, + "loss": 1.2533, + "step": 2575 + }, + { + "epoch": 0.85, + "learning_rate": 0.0009168572361973212, + "loss": 1.1497, + "step": 2600 + }, + { + "epoch": 0.86, + "learning_rate": 0.0009160405096373734, + "loss": 1.1764, + "step": 2625 + }, + { + "epoch": 0.86, + "learning_rate": 0.0009152237830774257, + "loss": 1.2131, + "step": 2650 + }, + { + "epoch": 0.87, + "learning_rate": 0.000914407056517478, + "loss": 1.2165, + "step": 2675 + }, + { + "epoch": 0.88, + "learning_rate": 0.0009135903299575302, + "loss": 1.2087, + "step": 2700 + }, + { + "epoch": 0.89, + "learning_rate": 0.0009127736033975825, + "loss": 1.2069, + "step": 2725 + }, + { + "epoch": 0.9, + "learning_rate": 0.0009119568768376347, + "loss": 1.1186, + "step": 2750 + }, + { + "epoch": 0.91, + "learning_rate": 0.0009111401502776871, + "loss": 1.1936, + "step": 2775 + }, + { + "epoch": 0.91, + "learning_rate": 0.0009103234237177394, + "loss": 1.1926, + "step": 2800 + }, + { + "epoch": 0.92, + "learning_rate": 0.0009095066971577916, + "loss": 1.175, + "step": 2825 + }, + { + "epoch": 0.93, + "learning_rate": 0.0009086899705978438, + "loss": 1.2437, + "step": 2850 + }, + { + "epoch": 0.94, + "learning_rate": 0.0009078732440378961, + "loss": 1.156, + "step": 2875 + }, + { + "epoch": 0.95, + "learning_rate": 0.0009070565174779483, + "loss": 1.2573, + "step": 2900 + }, + { + "epoch": 0.95, + "learning_rate": 0.0009062397909180007, + "loss": 1.1603, + "step": 2925 + }, + { + "epoch": 0.96, + "learning_rate": 0.000905423064358053, + "loss": 1.2515, + "step": 2950 + }, + { + "epoch": 0.97, + "learning_rate": 0.0009046063377981052, + "loss": 1.173, + "step": 2975 + }, + { + "epoch": 0.98, + "learning_rate": 0.0009037896112381575, + "loss": 1.1754, + "step": 3000 + }, + { + "epoch": 0.99, + "learning_rate": 0.0009029728846782097, + "loss": 1.1455, + "step": 3025 + }, + { + "epoch": 0.99, + "learning_rate": 0.0009021561581182621, + "loss": 1.1903, + "step": 3050 + }, + { + "epoch": 1.0, + "eval_loss": 1.0751750469207764, + "eval_runtime": 957.3865, + "eval_samples_per_second": 1.045, + "eval_steps_per_second": 0.131, + "eval_wer": 14.025925580527836, + "step": 3066 + }, + { + "epoch": 1.0, + "learning_rate": 0.0009013394315583143, + "loss": 1.0824, + "step": 3075 + }, + { + "epoch": 1.01, + "learning_rate": 0.0009005227049983665, + "loss": 1.0884, + "step": 3100 + }, + { + "epoch": 1.02, + "learning_rate": 0.0008997059784384188, + "loss": 1.0469, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 0.0008988892518784711, + "loss": 1.1158, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 0.0008980725253185234, + "loss": 1.1601, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 0.0008972557987585757, + "loss": 1.1254, + "step": 3200 + }, + { + "epoch": 1.05, + "learning_rate": 0.000896439072198628, + "loss": 1.083, + "step": 3225 + }, + { + "epoch": 1.06, + "learning_rate": 0.0008956223456386802, + "loss": 1.437, + "step": 3250 + }, + { + "epoch": 1.07, + "learning_rate": 0.0008948056190787324, + "loss": 1.1374, + "step": 3275 + }, + { + "epoch": 1.08, + "learning_rate": 0.0008939888925187846, + "loss": 1.1438, + "step": 3300 + }, + { + "epoch": 1.08, + "learning_rate": 0.000893172165958837, + "loss": 1.1136, + "step": 3325 + }, + { + "epoch": 1.09, + "learning_rate": 0.0008923554393988893, + "loss": 1.0219, + "step": 3350 + }, + { + "epoch": 1.1, + "learning_rate": 0.0008915387128389415, + "loss": 1.1524, + "step": 3375 + }, + { + "epoch": 1.11, + "learning_rate": 0.0008907219862789938, + "loss": 1.1272, + "step": 3400 + }, + { + "epoch": 1.12, + "learning_rate": 0.0008899052597190462, + "loss": 1.1296, + "step": 3425 + }, + { + "epoch": 1.13, + "learning_rate": 0.0008890885331590984, + "loss": 1.168, + "step": 3450 + }, + { + "epoch": 1.13, + "learning_rate": 0.0008882718065991506, + "loss": 1.167, + "step": 3475 + }, + { + "epoch": 1.14, + "learning_rate": 0.0008874550800392029, + "loss": 1.127, + "step": 3500 + }, + { + "epoch": 1.15, + "learning_rate": 0.0008866383534792551, + "loss": 1.1062, + "step": 3525 + }, + { + "epoch": 1.16, + "learning_rate": 0.0008858216269193074, + "loss": 1.1726, + "step": 3550 + }, + { + "epoch": 1.17, + "learning_rate": 0.0008850049003593597, + "loss": 1.1571, + "step": 3575 + }, + { + "epoch": 1.17, + "learning_rate": 0.000884188173799412, + "loss": 1.0593, + "step": 3600 + }, + { + "epoch": 1.18, + "learning_rate": 0.0008833714472394643, + "loss": 1.1024, + "step": 3625 + }, + { + "epoch": 1.19, + "learning_rate": 0.0008825547206795165, + "loss": 1.1431, + "step": 3650 + }, + { + "epoch": 1.2, + "learning_rate": 0.0008817379941195687, + "loss": 1.1414, + "step": 3675 + }, + { + "epoch": 1.21, + "learning_rate": 0.0008809212675596211, + "loss": 1.1188, + "step": 3700 + }, + { + "epoch": 1.21, + "learning_rate": 0.0008801045409996733, + "loss": 1.1289, + "step": 3725 + }, + { + "epoch": 1.22, + "learning_rate": 0.0008792878144397256, + "loss": 1.12, + "step": 3750 + }, + { + "epoch": 1.23, + "learning_rate": 0.0008784710878797779, + "loss": 1.0852, + "step": 3775 + }, + { + "epoch": 1.24, + "learning_rate": 0.0008776543613198301, + "loss": 1.123, + "step": 3800 + }, + { + "epoch": 1.25, + "learning_rate": 0.0008768376347598825, + "loss": 1.2024, + "step": 3825 + }, + { + "epoch": 1.26, + "learning_rate": 0.0008760209081999347, + "loss": 1.1671, + "step": 3850 + }, + { + "epoch": 1.26, + "learning_rate": 0.000875204181639987, + "loss": 1.1649, + "step": 3875 + }, + { + "epoch": 1.27, + "learning_rate": 0.0008743874550800392, + "loss": 1.1101, + "step": 3900 + }, + { + "epoch": 1.28, + "learning_rate": 0.0008735707285200914, + "loss": 1.1336, + "step": 3925 + }, + { + "epoch": 1.29, + "learning_rate": 0.0008727540019601437, + "loss": 1.1928, + "step": 3950 + }, + { + "epoch": 1.3, + "learning_rate": 0.0008719372754001961, + "loss": 1.1132, + "step": 3975 + }, + { + "epoch": 1.3, + "learning_rate": 0.0008711205488402483, + "loss": 1.1441, + "step": 4000 + }, + { + "epoch": 1.31, + "learning_rate": 0.0008703038222803006, + "loss": 1.0876, + "step": 4025 + }, + { + "epoch": 1.32, + "learning_rate": 0.0008694870957203529, + "loss": 1.1469, + "step": 4050 + }, + { + "epoch": 1.33, + "learning_rate": 0.0008686703691604051, + "loss": 1.0801, + "step": 4075 + }, + { + "epoch": 1.34, + "learning_rate": 0.0008678536426004574, + "loss": 1.2046, + "step": 4100 + }, + { + "epoch": 1.35, + "learning_rate": 0.0008670369160405096, + "loss": 1.16, + "step": 4125 + }, + { + "epoch": 1.35, + "learning_rate": 0.0008662201894805619, + "loss": 1.0832, + "step": 4150 + }, + { + "epoch": 1.36, + "learning_rate": 0.0008654034629206142, + "loss": 1.0785, + "step": 4175 + }, + { + "epoch": 1.37, + "learning_rate": 0.0008645867363606664, + "loss": 1.1291, + "step": 4200 + }, + { + "epoch": 1.38, + "learning_rate": 0.0008637700098007188, + "loss": 1.1209, + "step": 4225 + }, + { + "epoch": 1.39, + "learning_rate": 0.0008629532832407711, + "loss": 1.1116, + "step": 4250 + }, + { + "epoch": 1.39, + "learning_rate": 0.0008621365566808233, + "loss": 1.1478, + "step": 4275 + }, + { + "epoch": 1.4, + "learning_rate": 0.0008613198301208755, + "loss": 1.1547, + "step": 4300 + }, + { + "epoch": 1.41, + "learning_rate": 0.0008605031035609278, + "loss": 1.1724, + "step": 4325 + }, + { + "epoch": 1.42, + "learning_rate": 0.00085968637700098, + "loss": 1.2658, + "step": 4350 + }, + { + "epoch": 1.43, + "learning_rate": 0.0008588696504410324, + "loss": 1.0794, + "step": 4375 + }, + { + "epoch": 1.44, + "learning_rate": 0.0008580529238810846, + "loss": 1.1891, + "step": 4400 + }, + { + "epoch": 1.44, + "learning_rate": 0.0008572361973211369, + "loss": 1.207, + "step": 4425 + }, + { + "epoch": 1.45, + "learning_rate": 0.0008564194707611892, + "loss": 1.0941, + "step": 4450 + }, + { + "epoch": 1.46, + "learning_rate": 0.0008556027442012414, + "loss": 1.155, + "step": 4475 + }, + { + "epoch": 1.47, + "learning_rate": 0.0008547860176412938, + "loss": 1.1785, + "step": 4500 + }, + { + "epoch": 1.48, + "learning_rate": 0.000853969291081346, + "loss": 1.0882, + "step": 4525 + }, + { + "epoch": 1.48, + "learning_rate": 0.0008531525645213982, + "loss": 1.1544, + "step": 4550 + }, + { + "epoch": 1.49, + "learning_rate": 0.0008523358379614505, + "loss": 1.0764, + "step": 4575 + }, + { + "epoch": 1.5, + "learning_rate": 0.0008515191114015028, + "loss": 1.099, + "step": 4600 + }, + { + "epoch": 1.51, + "learning_rate": 0.0008507023848415551, + "loss": 1.0939, + "step": 4625 + }, + { + "epoch": 1.52, + "learning_rate": 0.0008498856582816074, + "loss": 1.057, + "step": 4650 + }, + { + "epoch": 1.52, + "learning_rate": 0.0008490689317216596, + "loss": 1.2006, + "step": 4675 + }, + { + "epoch": 1.53, + "learning_rate": 0.0008482522051617119, + "loss": 1.1362, + "step": 4700 + }, + { + "epoch": 1.54, + "learning_rate": 0.0008474354786017641, + "loss": 1.1804, + "step": 4725 + }, + { + "epoch": 1.55, + "learning_rate": 0.0008466187520418163, + "loss": 1.2434, + "step": 4750 + }, + { + "epoch": 1.56, + "learning_rate": 0.0008458020254818687, + "loss": 1.2353, + "step": 4775 + }, + { + "epoch": 1.57, + "learning_rate": 0.000844985298921921, + "loss": 1.1643, + "step": 4800 + }, + { + "epoch": 1.57, + "learning_rate": 0.0008441685723619732, + "loss": 1.114, + "step": 4825 + }, + { + "epoch": 1.58, + "learning_rate": 0.0008433518458020255, + "loss": 1.1803, + "step": 4850 + }, + { + "epoch": 1.59, + "learning_rate": 0.0008425351192420779, + "loss": 1.0897, + "step": 4875 + }, + { + "epoch": 1.6, + "learning_rate": 0.0008417183926821301, + "loss": 1.0807, + "step": 4900 + }, + { + "epoch": 1.61, + "learning_rate": 0.0008409016661221823, + "loss": 1.1861, + "step": 4925 + }, + { + "epoch": 1.61, + "learning_rate": 0.0008400849395622345, + "loss": 1.1627, + "step": 4950 + }, + { + "epoch": 1.62, + "learning_rate": 0.0008392682130022868, + "loss": 1.1307, + "step": 4975 + }, + { + "epoch": 1.63, + "learning_rate": 0.0008384514864423391, + "loss": 1.1836, + "step": 5000 + }, + { + "epoch": 1.64, + "learning_rate": 0.0008376347598823914, + "loss": 1.1408, + "step": 5025 + }, + { + "epoch": 1.65, + "learning_rate": 0.0008368180333224437, + "loss": 1.1182, + "step": 5050 + }, + { + "epoch": 1.66, + "learning_rate": 0.000836001306762496, + "loss": 1.0922, + "step": 5075 + }, + { + "epoch": 1.66, + "learning_rate": 0.0008351845802025482, + "loss": 1.1128, + "step": 5100 + }, + { + "epoch": 1.67, + "learning_rate": 0.0008343678536426004, + "loss": 1.0759, + "step": 5125 + }, + { + "epoch": 1.68, + "learning_rate": 0.0008335511270826528, + "loss": 1.1109, + "step": 5150 + }, + { + "epoch": 1.69, + "learning_rate": 0.000832734400522705, + "loss": 1.0652, + "step": 5175 + }, + { + "epoch": 1.7, + "learning_rate": 0.0008319176739627573, + "loss": 1.1616, + "step": 5200 + }, + { + "epoch": 1.7, + "learning_rate": 0.0008311009474028095, + "loss": 1.125, + "step": 5225 + }, + { + "epoch": 1.71, + "learning_rate": 0.0008302842208428618, + "loss": 1.1628, + "step": 5250 + }, + { + "epoch": 1.72, + "learning_rate": 0.0008294674942829142, + "loss": 1.1775, + "step": 5275 + }, + { + "epoch": 1.73, + "learning_rate": 0.0008286507677229664, + "loss": 1.1475, + "step": 5300 + }, + { + "epoch": 1.74, + "learning_rate": 0.0008278340411630187, + "loss": 1.1174, + "step": 5325 + }, + { + "epoch": 1.74, + "learning_rate": 0.0008270173146030709, + "loss": 1.1714, + "step": 5350 + }, + { + "epoch": 1.75, + "learning_rate": 0.0008262005880431231, + "loss": 1.0809, + "step": 5375 + }, + { + "epoch": 1.76, + "learning_rate": 0.0008253838614831754, + "loss": 1.1005, + "step": 5400 + }, + { + "epoch": 1.77, + "learning_rate": 0.0008245671349232278, + "loss": 1.1567, + "step": 5425 + }, + { + "epoch": 1.78, + "learning_rate": 0.00082375040836328, + "loss": 1.1455, + "step": 5450 + }, + { + "epoch": 1.79, + "learning_rate": 0.0008229336818033323, + "loss": 1.0424, + "step": 5475 + }, + { + "epoch": 1.79, + "learning_rate": 0.0008221169552433845, + "loss": 1.1255, + "step": 5500 + }, + { + "epoch": 1.8, + "learning_rate": 0.0008213002286834368, + "loss": 1.0574, + "step": 5525 + }, + { + "epoch": 1.81, + "learning_rate": 0.000820483502123489, + "loss": 1.0636, + "step": 5550 + }, + { + "epoch": 1.82, + "learning_rate": 0.0008196667755635413, + "loss": 1.1092, + "step": 5575 + }, + { + "epoch": 1.83, + "learning_rate": 0.0008188500490035936, + "loss": 1.104, + "step": 5600 + }, + { + "epoch": 1.83, + "learning_rate": 0.0008180333224436459, + "loss": 1.1652, + "step": 5625 + }, + { + "epoch": 1.84, + "learning_rate": 0.0008172165958836981, + "loss": 1.084, + "step": 5650 + }, + { + "epoch": 1.85, + "learning_rate": 0.0008163998693237505, + "loss": 1.0867, + "step": 5675 + }, + { + "epoch": 1.86, + "learning_rate": 0.0008155831427638028, + "loss": 1.0894, + "step": 5700 + }, + { + "epoch": 1.87, + "learning_rate": 0.000814766416203855, + "loss": 1.1023, + "step": 5725 + }, + { + "epoch": 1.88, + "learning_rate": 0.0008139496896439072, + "loss": 1.1705, + "step": 5750 + }, + { + "epoch": 1.88, + "learning_rate": 0.0008131329630839594, + "loss": 1.1505, + "step": 5775 + }, + { + "epoch": 1.89, + "learning_rate": 0.0008123162365240117, + "loss": 1.0699, + "step": 5800 + }, + { + "epoch": 1.9, + "learning_rate": 0.0008114995099640641, + "loss": 1.1623, + "step": 5825 + }, + { + "epoch": 1.91, + "learning_rate": 0.0008106827834041163, + "loss": 1.104, + "step": 5850 + }, + { + "epoch": 1.92, + "learning_rate": 0.0008098660568441686, + "loss": 1.087, + "step": 5875 + }, + { + "epoch": 1.92, + "learning_rate": 0.0008090493302842209, + "loss": 1.1056, + "step": 5900 + }, + { + "epoch": 1.93, + "learning_rate": 0.0008082326037242731, + "loss": 1.094, + "step": 5925 + }, + { + "epoch": 1.94, + "learning_rate": 0.0008074158771643254, + "loss": 1.0792, + "step": 5950 + }, + { + "epoch": 1.95, + "learning_rate": 0.0008065991506043777, + "loss": 1.1011, + "step": 5975 + }, + { + "epoch": 1.96, + "learning_rate": 0.0008057824240444299, + "loss": 1.077, + "step": 6000 + }, + { + "epoch": 1.97, + "learning_rate": 0.0008049656974844822, + "loss": 1.1078, + "step": 6025 + }, + { + "epoch": 1.97, + "learning_rate": 0.0008041489709245344, + "loss": 1.0974, + "step": 6050 + }, + { + "epoch": 1.98, + "learning_rate": 0.0008033322443645868, + "loss": 1.1687, + "step": 6075 + }, + { + "epoch": 1.99, + "learning_rate": 0.0008025155178046391, + "loss": 1.1413, + "step": 6100 + }, + { + "epoch": 2.0, + "learning_rate": 0.0008016987912446913, + "loss": 1.1263, + "step": 6125 + }, + { + "epoch": 2.0, + "eval_loss": 0.996329128742218, + "eval_runtime": 962.9173, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.13, + "eval_wer": 11.694488482700736, + "step": 6132 + }, + { + "epoch": 2.01, + "learning_rate": 0.0008008820646847436, + "loss": 1.0098, + "step": 6150 + }, + { + "epoch": 2.01, + "learning_rate": 0.0008000653381247958, + "loss": 1.065, + "step": 6175 + }, + { + "epoch": 2.02, + "learning_rate": 0.000799248611564848, + "loss": 0.9865, + "step": 6200 + }, + { + "epoch": 2.03, + "learning_rate": 0.0007984318850049004, + "loss": 1.1061, + "step": 6225 + }, + { + "epoch": 2.04, + "learning_rate": 0.0007976151584449527, + "loss": 1.148, + "step": 6250 + }, + { + "epoch": 2.05, + "learning_rate": 0.0007967984318850049, + "loss": 1.003, + "step": 6275 + }, + { + "epoch": 2.05, + "learning_rate": 0.0007959817053250572, + "loss": 1.0571, + "step": 6300 + }, + { + "epoch": 2.06, + "learning_rate": 0.0007951649787651094, + "loss": 1.036, + "step": 6325 + }, + { + "epoch": 2.07, + "learning_rate": 0.0007943482522051618, + "loss": 1.0794, + "step": 6350 + }, + { + "epoch": 2.08, + "learning_rate": 0.000793531525645214, + "loss": 1.0093, + "step": 6375 + }, + { + "epoch": 2.09, + "learning_rate": 0.0007927147990852662, + "loss": 1.0416, + "step": 6400 + }, + { + "epoch": 2.1, + "learning_rate": 0.0007918980725253185, + "loss": 1.0552, + "step": 6425 + }, + { + "epoch": 2.1, + "learning_rate": 0.0007910813459653708, + "loss": 1.061, + "step": 6450 + }, + { + "epoch": 2.11, + "learning_rate": 0.000790264619405423, + "loss": 1.0884, + "step": 6475 + }, + { + "epoch": 2.12, + "learning_rate": 0.0007894478928454754, + "loss": 1.042, + "step": 6500 + }, + { + "epoch": 2.13, + "learning_rate": 0.0007886311662855277, + "loss": 1.0047, + "step": 6525 + }, + { + "epoch": 2.14, + "learning_rate": 0.0007878144397255799, + "loss": 1.0589, + "step": 6550 + }, + { + "epoch": 2.14, + "learning_rate": 0.0007869977131656321, + "loss": 1.108, + "step": 6575 + }, + { + "epoch": 2.15, + "learning_rate": 0.0007861809866056843, + "loss": 1.0201, + "step": 6600 + }, + { + "epoch": 2.16, + "learning_rate": 0.0007853642600457367, + "loss": 1.0152, + "step": 6625 + }, + { + "epoch": 2.17, + "learning_rate": 0.000784547533485789, + "loss": 1.049, + "step": 6650 + }, + { + "epoch": 2.18, + "learning_rate": 0.0007837308069258412, + "loss": 1.1016, + "step": 6675 + }, + { + "epoch": 2.19, + "learning_rate": 0.0007829140803658935, + "loss": 1.057, + "step": 6700 + }, + { + "epoch": 2.19, + "learning_rate": 0.0007820973538059458, + "loss": 0.9956, + "step": 6725 + }, + { + "epoch": 2.2, + "learning_rate": 0.0007812806272459981, + "loss": 1.0933, + "step": 6750 + }, + { + "epoch": 2.21, + "learning_rate": 0.0007804639006860504, + "loss": 1.1044, + "step": 6775 + }, + { + "epoch": 2.22, + "learning_rate": 0.0007796471741261026, + "loss": 1.064, + "step": 6800 + }, + { + "epoch": 2.23, + "learning_rate": 0.0007788304475661548, + "loss": 1.0255, + "step": 6825 + }, + { + "epoch": 2.23, + "learning_rate": 0.0007780137210062071, + "loss": 1.0522, + "step": 6850 + }, + { + "epoch": 2.24, + "learning_rate": 0.0007771969944462594, + "loss": 1.0389, + "step": 6875 + }, + { + "epoch": 2.25, + "learning_rate": 0.0007763802678863117, + "loss": 1.0294, + "step": 6900 + }, + { + "epoch": 2.26, + "learning_rate": 0.000775563541326364, + "loss": 1.0551, + "step": 6925 + }, + { + "epoch": 2.27, + "learning_rate": 0.0007747468147664162, + "loss": 1.0042, + "step": 6950 + }, + { + "epoch": 2.27, + "learning_rate": 0.0007739300882064685, + "loss": 1.0697, + "step": 6975 + }, + { + "epoch": 2.28, + "learning_rate": 0.0007731133616465207, + "loss": 1.1156, + "step": 7000 + }, + { + "epoch": 2.29, + "learning_rate": 0.000772296635086573, + "loss": 1.0384, + "step": 7025 + }, + { + "epoch": 2.3, + "learning_rate": 0.0007714799085266253, + "loss": 1.0968, + "step": 7050 + }, + { + "epoch": 2.31, + "learning_rate": 0.0007706631819666776, + "loss": 1.0474, + "step": 7075 + }, + { + "epoch": 2.32, + "learning_rate": 0.0007698464554067298, + "loss": 1.0105, + "step": 7100 + }, + { + "epoch": 2.32, + "learning_rate": 0.0007690297288467821, + "loss": 1.1244, + "step": 7125 + }, + { + "epoch": 2.33, + "learning_rate": 0.0007682130022868345, + "loss": 1.0803, + "step": 7150 + }, + { + "epoch": 2.34, + "learning_rate": 0.0007673962757268867, + "loss": 1.0919, + "step": 7175 + }, + { + "epoch": 2.35, + "learning_rate": 0.0007665795491669389, + "loss": 1.1078, + "step": 7200 + }, + { + "epoch": 2.36, + "learning_rate": 0.0007657628226069911, + "loss": 1.0405, + "step": 7225 + }, + { + "epoch": 2.36, + "learning_rate": 0.0007649460960470434, + "loss": 1.1196, + "step": 7250 + }, + { + "epoch": 2.37, + "learning_rate": 0.0007641293694870958, + "loss": 1.1042, + "step": 7275 + }, + { + "epoch": 2.38, + "learning_rate": 0.000763312642927148, + "loss": 1.0317, + "step": 7300 + }, + { + "epoch": 2.39, + "learning_rate": 0.0007624959163672003, + "loss": 1.0347, + "step": 7325 + }, + { + "epoch": 2.4, + "learning_rate": 0.0007616791898072526, + "loss": 1.0893, + "step": 7350 + }, + { + "epoch": 2.41, + "learning_rate": 0.0007608624632473048, + "loss": 1.1129, + "step": 7375 + }, + { + "epoch": 2.41, + "learning_rate": 0.000760045736687357, + "loss": 1.0961, + "step": 7400 + }, + { + "epoch": 2.42, + "learning_rate": 0.0007592290101274094, + "loss": 1.0584, + "step": 7425 + }, + { + "epoch": 2.43, + "learning_rate": 0.0007584122835674616, + "loss": 1.0908, + "step": 7450 + }, + { + "epoch": 2.44, + "learning_rate": 0.0007575955570075139, + "loss": 1.1904, + "step": 7475 + }, + { + "epoch": 2.45, + "learning_rate": 0.0007567788304475661, + "loss": 1.0264, + "step": 7500 + }, + { + "epoch": 2.45, + "learning_rate": 0.0007559621038876184, + "loss": 1.1083, + "step": 7525 + }, + { + "epoch": 2.46, + "learning_rate": 0.0007551453773276708, + "loss": 1.0739, + "step": 7550 + }, + { + "epoch": 2.47, + "learning_rate": 0.000754328650767723, + "loss": 1.0946, + "step": 7575 + }, + { + "epoch": 2.48, + "learning_rate": 0.0007535119242077753, + "loss": 1.0595, + "step": 7600 + }, + { + "epoch": 2.49, + "learning_rate": 0.0007526951976478275, + "loss": 0.985, + "step": 7625 + }, + { + "epoch": 2.5, + "learning_rate": 0.0007518784710878797, + "loss": 1.0613, + "step": 7650 + }, + { + "epoch": 2.5, + "learning_rate": 0.0007510617445279321, + "loss": 1.0642, + "step": 7675 + }, + { + "epoch": 2.51, + "learning_rate": 0.0007502450179679844, + "loss": 1.119, + "step": 7700 + }, + { + "epoch": 2.52, + "learning_rate": 0.0007494282914080366, + "loss": 1.0649, + "step": 7725 + }, + { + "epoch": 2.53, + "learning_rate": 0.0007486115648480889, + "loss": 1.0372, + "step": 7750 + }, + { + "epoch": 2.54, + "learning_rate": 0.0007477948382881411, + "loss": 1.1075, + "step": 7775 + }, + { + "epoch": 2.54, + "learning_rate": 0.0007469781117281935, + "loss": 1.1406, + "step": 7800 + }, + { + "epoch": 2.55, + "learning_rate": 0.0007461613851682457, + "loss": 1.0612, + "step": 7825 + }, + { + "epoch": 2.56, + "learning_rate": 0.0007453446586082979, + "loss": 1.1374, + "step": 7850 + }, + { + "epoch": 2.57, + "learning_rate": 0.0007445279320483502, + "loss": 1.1272, + "step": 7875 + }, + { + "epoch": 2.58, + "learning_rate": 0.0007437112054884025, + "loss": 1.0771, + "step": 7900 + }, + { + "epoch": 2.58, + "learning_rate": 0.0007428944789284547, + "loss": 1.0746, + "step": 7925 + }, + { + "epoch": 2.59, + "learning_rate": 0.0007420777523685071, + "loss": 1.0436, + "step": 7950 + }, + { + "epoch": 2.6, + "learning_rate": 0.0007412610258085594, + "loss": 1.0137, + "step": 7975 + }, + { + "epoch": 2.61, + "learning_rate": 0.0007404442992486116, + "loss": 1.039, + "step": 8000 + }, + { + "epoch": 2.62, + "learning_rate": 0.0007396275726886638, + "loss": 1.0621, + "step": 8025 + }, + { + "epoch": 2.63, + "learning_rate": 0.000738810846128716, + "loss": 1.0593, + "step": 8050 + }, + { + "epoch": 2.63, + "learning_rate": 0.0007379941195687684, + "loss": 1.0682, + "step": 8075 + }, + { + "epoch": 2.64, + "learning_rate": 0.0007371773930088207, + "loss": 1.1118, + "step": 8100 + }, + { + "epoch": 2.65, + "learning_rate": 0.0007363606664488729, + "loss": 1.0247, + "step": 8125 + }, + { + "epoch": 2.66, + "learning_rate": 0.0007355439398889252, + "loss": 1.0238, + "step": 8150 + }, + { + "epoch": 2.67, + "learning_rate": 0.0007347272133289775, + "loss": 0.9952, + "step": 8175 + }, + { + "epoch": 2.67, + "learning_rate": 0.0007339104867690298, + "loss": 1.1454, + "step": 8200 + }, + { + "epoch": 2.68, + "learning_rate": 0.0007330937602090821, + "loss": 1.1397, + "step": 8225 + }, + { + "epoch": 2.69, + "learning_rate": 0.0007322770336491343, + "loss": 1.047, + "step": 8250 + }, + { + "epoch": 2.7, + "learning_rate": 0.0007314603070891865, + "loss": 1.0736, + "step": 8275 + }, + { + "epoch": 2.71, + "learning_rate": 0.0007306435805292388, + "loss": 1.0854, + "step": 8300 + }, + { + "epoch": 2.72, + "learning_rate": 0.000729826853969291, + "loss": 1.0001, + "step": 8325 + }, + { + "epoch": 2.72, + "learning_rate": 0.0007290101274093434, + "loss": 1.1361, + "step": 8350 + }, + { + "epoch": 2.73, + "learning_rate": 0.0007281934008493957, + "loss": 1.1009, + "step": 8375 + }, + { + "epoch": 2.74, + "learning_rate": 0.0007273766742894479, + "loss": 1.0611, + "step": 8400 + }, + { + "epoch": 2.75, + "learning_rate": 0.0007265599477295002, + "loss": 1.0593, + "step": 8425 + }, + { + "epoch": 2.76, + "learning_rate": 0.0007257432211695524, + "loss": 1.0397, + "step": 8450 + }, + { + "epoch": 2.76, + "learning_rate": 0.0007249264946096047, + "loss": 1.0726, + "step": 8475 + }, + { + "epoch": 2.77, + "learning_rate": 0.000724109768049657, + "loss": 1.0879, + "step": 8500 + }, + { + "epoch": 2.78, + "learning_rate": 0.0007232930414897093, + "loss": 1.0216, + "step": 8525 + }, + { + "epoch": 2.79, + "learning_rate": 0.0007224763149297615, + "loss": 0.965, + "step": 8550 + }, + { + "epoch": 2.8, + "learning_rate": 0.0007216595883698138, + "loss": 1.0499, + "step": 8575 + }, + { + "epoch": 2.8, + "learning_rate": 0.0007208428618098661, + "loss": 1.0893, + "step": 8600 + }, + { + "epoch": 2.81, + "learning_rate": 0.0007200261352499184, + "loss": 1.0375, + "step": 8625 + }, + { + "epoch": 2.82, + "learning_rate": 0.0007192094086899706, + "loss": 0.9769, + "step": 8650 + }, + { + "epoch": 2.83, + "learning_rate": 0.0007183926821300228, + "loss": 1.0305, + "step": 8675 + }, + { + "epoch": 2.84, + "learning_rate": 0.0007175759555700751, + "loss": 1.0713, + "step": 8700 + }, + { + "epoch": 2.85, + "learning_rate": 0.0007167592290101275, + "loss": 1.0575, + "step": 8725 + }, + { + "epoch": 2.85, + "learning_rate": 0.0007159425024501797, + "loss": 1.0231, + "step": 8750 + }, + { + "epoch": 2.86, + "learning_rate": 0.000715125775890232, + "loss": 1.0553, + "step": 8775 + }, + { + "epoch": 2.87, + "learning_rate": 0.0007143090493302843, + "loss": 1.0258, + "step": 8800 + }, + { + "epoch": 2.88, + "learning_rate": 0.0007134923227703365, + "loss": 1.0678, + "step": 8825 + }, + { + "epoch": 2.89, + "learning_rate": 0.0007127082652727866, + "loss": 1.1467, + "step": 8850 + }, + { + "epoch": 2.89, + "learning_rate": 0.000711891538712839, + "loss": 1.074, + "step": 8875 + }, + { + "epoch": 2.9, + "learning_rate": 0.0007110748121528913, + "loss": 1.1102, + "step": 8900 + }, + { + "epoch": 2.91, + "learning_rate": 0.0007102580855929435, + "loss": 1.0164, + "step": 8925 + }, + { + "epoch": 2.92, + "learning_rate": 0.0007094413590329958, + "loss": 1.0392, + "step": 8950 + }, + { + "epoch": 2.93, + "learning_rate": 0.000708624632473048, + "loss": 1.1001, + "step": 8975 + }, + { + "epoch": 2.94, + "learning_rate": 0.0007078079059131003, + "loss": 1.1367, + "step": 9000 + }, + { + "epoch": 2.94, + "learning_rate": 0.0007069911793531526, + "loss": 1.0218, + "step": 9025 + }, + { + "epoch": 2.95, + "learning_rate": 0.0007061744527932048, + "loss": 1.0521, + "step": 9050 + }, + { + "epoch": 2.96, + "learning_rate": 0.0007053577262332571, + "loss": 1.0717, + "step": 9075 + }, + { + "epoch": 2.97, + "learning_rate": 0.0007045409996733094, + "loss": 1.0535, + "step": 9100 + }, + { + "epoch": 2.98, + "learning_rate": 0.0007037242731133617, + "loss": 1.0772, + "step": 9125 + }, + { + "epoch": 2.98, + "learning_rate": 0.000702907546553414, + "loss": 1.0168, + "step": 9150 + }, + { + "epoch": 2.99, + "learning_rate": 0.0007020908199934663, + "loss": 1.0555, + "step": 9175 + }, + { + "epoch": 3.0, + "eval_loss": 0.9617839455604553, + "eval_runtime": 961.3646, + "eval_samples_per_second": 1.04, + "eval_steps_per_second": 0.13, + "eval_wer": 8.094749603655693, + "step": 9198 + }, + { + "epoch": 3.0, + "learning_rate": 0.0007012740934335184, + "loss": 1.1186, + "step": 9200 + }, + { + "epoch": 3.01, + "learning_rate": 0.0007004573668735707, + "loss": 1.0314, + "step": 9225 + }, + { + "epoch": 3.02, + "learning_rate": 0.000699640640313623, + "loss": 0.9924, + "step": 9250 + }, + { + "epoch": 3.03, + "learning_rate": 0.0006988239137536753, + "loss": 0.9701, + "step": 9275 + }, + { + "epoch": 3.03, + "learning_rate": 0.0006980071871937276, + "loss": 0.9698, + "step": 9300 + }, + { + "epoch": 3.04, + "learning_rate": 0.0006971904606337798, + "loss": 0.962, + "step": 9325 + }, + { + "epoch": 3.05, + "learning_rate": 0.0006963737340738321, + "loss": 0.9654, + "step": 9350 + }, + { + "epoch": 3.06, + "learning_rate": 0.0006955570075138844, + "loss": 0.964, + "step": 9375 + }, + { + "epoch": 3.07, + "learning_rate": 0.0006947402809539366, + "loss": 0.986, + "step": 9400 + }, + { + "epoch": 3.07, + "learning_rate": 0.0006939235543939889, + "loss": 0.9084, + "step": 9425 + }, + { + "epoch": 3.08, + "learning_rate": 0.0006931068278340412, + "loss": 0.9674, + "step": 9450 + }, + { + "epoch": 3.09, + "learning_rate": 0.0006922901012740934, + "loss": 0.9632, + "step": 9475 + }, + { + "epoch": 3.1, + "learning_rate": 0.0006914733747141457, + "loss": 1.002, + "step": 9500 + }, + { + "epoch": 3.11, + "learning_rate": 0.0006906566481541981, + "loss": 0.9838, + "step": 9525 + }, + { + "epoch": 3.11, + "learning_rate": 0.0006898399215942503, + "loss": 0.9844, + "step": 9550 + }, + { + "epoch": 3.12, + "learning_rate": 0.0006890231950343026, + "loss": 0.9847, + "step": 9575 + }, + { + "epoch": 3.13, + "learning_rate": 0.0006882064684743547, + "loss": 1.0419, + "step": 9600 + }, + { + "epoch": 3.14, + "learning_rate": 0.000687389741914407, + "loss": 0.989, + "step": 9625 + }, + { + "epoch": 3.15, + "learning_rate": 0.0006865730153544593, + "loss": 0.9602, + "step": 9650 + }, + { + "epoch": 3.16, + "learning_rate": 0.0006857562887945116, + "loss": 0.9614, + "step": 9675 + }, + { + "epoch": 3.16, + "learning_rate": 0.0006849395622345639, + "loss": 0.9817, + "step": 9700 + }, + { + "epoch": 3.17, + "learning_rate": 0.0006841228356746162, + "loss": 1.0453, + "step": 9725 + }, + { + "epoch": 3.18, + "learning_rate": 0.0006833061091146684, + "loss": 0.9884, + "step": 9750 + }, + { + "epoch": 3.19, + "learning_rate": 0.0006824893825547207, + "loss": 0.9821, + "step": 9775 + }, + { + "epoch": 3.2, + "learning_rate": 0.000681672655994773, + "loss": 0.9874, + "step": 9800 + }, + { + "epoch": 3.2, + "learning_rate": 0.0006808559294348252, + "loss": 0.99, + "step": 9825 + }, + { + "epoch": 3.21, + "learning_rate": 0.0006800392028748775, + "loss": 1.0586, + "step": 9850 + }, + { + "epoch": 3.22, + "learning_rate": 0.0006792224763149297, + "loss": 1.0218, + "step": 9875 + }, + { + "epoch": 3.23, + "learning_rate": 0.000678405749754982, + "loss": 0.9834, + "step": 9900 + }, + { + "epoch": 3.24, + "learning_rate": 0.0006775890231950344, + "loss": 1.0296, + "step": 9925 + }, + { + "epoch": 3.25, + "learning_rate": 0.0006767722966350866, + "loss": 1.0014, + "step": 9950 + }, + { + "epoch": 3.25, + "learning_rate": 0.0006759555700751389, + "loss": 0.9946, + "step": 9975 + }, + { + "epoch": 3.26, + "learning_rate": 0.0006751388435151912, + "loss": 1.0409, + "step": 10000 + }, + { + "epoch": 3.27, + "learning_rate": 0.0006743221169552433, + "loss": 1.0256, + "step": 10025 + }, + { + "epoch": 3.28, + "learning_rate": 0.0006735053903952957, + "loss": 1.0599, + "step": 10050 + }, + { + "epoch": 3.29, + "learning_rate": 0.000672688663835348, + "loss": 1.092, + "step": 10075 + }, + { + "epoch": 3.29, + "learning_rate": 0.0006718719372754002, + "loss": 1.0112, + "step": 10100 + }, + { + "epoch": 3.3, + "learning_rate": 0.0006710552107154525, + "loss": 0.99, + "step": 10125 + }, + { + "epoch": 3.31, + "learning_rate": 0.0006702384841555047, + "loss": 0.9849, + "step": 10150 + }, + { + "epoch": 3.32, + "learning_rate": 0.000669421757595557, + "loss": 0.9633, + "step": 10175 + }, + { + "epoch": 3.33, + "learning_rate": 0.0006686050310356094, + "loss": 1.0148, + "step": 10200 + }, + { + "epoch": 3.33, + "learning_rate": 0.0006677883044756615, + "loss": 1.0537, + "step": 10225 + }, + { + "epoch": 3.34, + "learning_rate": 0.0006669715779157138, + "loss": 1.0297, + "step": 10250 + }, + { + "epoch": 3.35, + "learning_rate": 0.0006661548513557661, + "loss": 0.9652, + "step": 10275 + }, + { + "epoch": 3.36, + "learning_rate": 0.0006653381247958183, + "loss": 1.0758, + "step": 10300 + }, + { + "epoch": 3.37, + "learning_rate": 0.0006645213982358707, + "loss": 0.9836, + "step": 10325 + }, + { + "epoch": 3.38, + "learning_rate": 0.000663704671675923, + "loss": 1.0076, + "step": 10350 + }, + { + "epoch": 3.38, + "learning_rate": 0.0006628879451159752, + "loss": 1.0426, + "step": 10375 + }, + { + "epoch": 3.39, + "learning_rate": 0.0006620712185560275, + "loss": 1.0186, + "step": 10400 + }, + { + "epoch": 3.4, + "learning_rate": 0.0006612544919960796, + "loss": 0.9763, + "step": 10425 + }, + { + "epoch": 3.41, + "learning_rate": 0.000660437765436132, + "loss": 0.9896, + "step": 10450 + }, + { + "epoch": 3.42, + "learning_rate": 0.0006596210388761843, + "loss": 1.0314, + "step": 10475 + }, + { + "epoch": 3.42, + "learning_rate": 0.0006588043123162365, + "loss": 1.0192, + "step": 10500 + }, + { + "epoch": 3.43, + "learning_rate": 0.0006579875857562888, + "loss": 1.0586, + "step": 10525 + }, + { + "epoch": 3.44, + "learning_rate": 0.0006571708591963411, + "loss": 1.062, + "step": 10550 + }, + { + "epoch": 3.45, + "learning_rate": 0.0006563541326363933, + "loss": 1.0372, + "step": 10575 + }, + { + "epoch": 3.46, + "learning_rate": 0.0006555374060764457, + "loss": 0.9983, + "step": 10600 + }, + { + "epoch": 3.47, + "learning_rate": 0.000654720679516498, + "loss": 0.9564, + "step": 10625 + }, + { + "epoch": 3.47, + "learning_rate": 0.0006539039529565501, + "loss": 1.0326, + "step": 10650 + }, + { + "epoch": 3.48, + "learning_rate": 0.0006530872263966024, + "loss": 1.0218, + "step": 10675 + }, + { + "epoch": 3.49, + "learning_rate": 0.0006522704998366546, + "loss": 0.9739, + "step": 10700 + }, + { + "epoch": 3.5, + "learning_rate": 0.000651453773276707, + "loss": 0.9456, + "step": 10725 + }, + { + "epoch": 3.51, + "learning_rate": 0.0006506370467167593, + "loss": 0.9507, + "step": 10750 + }, + { + "epoch": 3.51, + "learning_rate": 0.0006498203201568115, + "loss": 1.0446, + "step": 10775 + }, + { + "epoch": 3.52, + "learning_rate": 0.0006490035935968638, + "loss": 1.0757, + "step": 10800 + }, + { + "epoch": 3.53, + "learning_rate": 0.0006481868670369161, + "loss": 0.9148, + "step": 10825 + }, + { + "epoch": 3.54, + "learning_rate": 0.0006473701404769683, + "loss": 1.0086, + "step": 10850 + }, + { + "epoch": 3.55, + "learning_rate": 0.0006465534139170206, + "loss": 1.0549, + "step": 10875 + }, + { + "epoch": 3.56, + "learning_rate": 0.0006457366873570729, + "loss": 0.9891, + "step": 10900 + }, + { + "epoch": 3.56, + "learning_rate": 0.0006449199607971251, + "loss": 1.0153, + "step": 10925 + }, + { + "epoch": 3.57, + "learning_rate": 0.0006441032342371774, + "loss": 1.0154, + "step": 10950 + }, + { + "epoch": 3.58, + "learning_rate": 0.0006432865076772296, + "loss": 1.0767, + "step": 10975 + }, + { + "epoch": 3.59, + "learning_rate": 0.000642469781117282, + "loss": 1.0556, + "step": 11000 + }, + { + "epoch": 3.6, + "learning_rate": 0.0006416530545573343, + "loss": 1.0148, + "step": 11025 + }, + { + "epoch": 3.6, + "learning_rate": 0.0006408363279973864, + "loss": 1.043, + "step": 11050 + }, + { + "epoch": 3.61, + "learning_rate": 0.0006400196014374387, + "loss": 1.0198, + "step": 11075 + }, + { + "epoch": 3.62, + "learning_rate": 0.000639202874877491, + "loss": 0.9839, + "step": 11100 + }, + { + "epoch": 3.63, + "learning_rate": 0.0006383861483175433, + "loss": 1.0209, + "step": 11125 + }, + { + "epoch": 3.64, + "learning_rate": 0.0006375694217575956, + "loss": 1.0115, + "step": 11150 + }, + { + "epoch": 3.64, + "learning_rate": 0.0006367526951976479, + "loss": 1.0739, + "step": 11175 + }, + { + "epoch": 3.65, + "learning_rate": 0.0006359359686377001, + "loss": 1.0323, + "step": 11200 + }, + { + "epoch": 3.66, + "learning_rate": 0.0006351192420777524, + "loss": 0.9769, + "step": 11225 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006343025155178046, + "loss": 1.0332, + "step": 11250 + }, + { + "epoch": 3.68, + "learning_rate": 0.0006334857889578569, + "loss": 0.9927, + "step": 11275 + }, + { + "epoch": 3.69, + "learning_rate": 0.0006326690623979092, + "loss": 1.0269, + "step": 11300 + }, + { + "epoch": 3.69, + "learning_rate": 0.0006318523358379614, + "loss": 1.0341, + "step": 11325 + }, + { + "epoch": 3.7, + "learning_rate": 0.0006310356092780137, + "loss": 0.9852, + "step": 11350 + }, + { + "epoch": 3.71, + "learning_rate": 0.0006302188827180661, + "loss": 1.08, + "step": 11375 + }, + { + "epoch": 3.72, + "learning_rate": 0.0006294021561581183, + "loss": 1.0056, + "step": 11400 + }, + { + "epoch": 3.73, + "learning_rate": 0.0006285854295981706, + "loss": 1.0044, + "step": 11425 + }, + { + "epoch": 3.73, + "learning_rate": 0.0006277687030382229, + "loss": 0.9931, + "step": 11450 + }, + { + "epoch": 3.74, + "learning_rate": 0.000626951976478275, + "loss": 1.0077, + "step": 11475 + }, + { + "epoch": 3.75, + "learning_rate": 0.0006261352499183273, + "loss": 1.0072, + "step": 11500 + }, + { + "epoch": 3.76, + "learning_rate": 0.0006253185233583796, + "loss": 0.975, + "step": 11525 + }, + { + "epoch": 3.77, + "learning_rate": 0.0006245017967984319, + "loss": 0.9875, + "step": 11550 + }, + { + "epoch": 3.78, + "learning_rate": 0.0006236850702384842, + "loss": 0.9519, + "step": 11575 + }, + { + "epoch": 3.78, + "learning_rate": 0.0006228683436785364, + "loss": 1.0437, + "step": 11600 + }, + { + "epoch": 3.79, + "learning_rate": 0.0006220516171185887, + "loss": 0.9725, + "step": 11625 + }, + { + "epoch": 3.8, + "learning_rate": 0.0006212348905586411, + "loss": 1.0102, + "step": 11650 + }, + { + "epoch": 3.81, + "learning_rate": 0.0006204181639986932, + "loss": 1.072, + "step": 11675 + }, + { + "epoch": 3.82, + "learning_rate": 0.0006196014374387455, + "loss": 1.0051, + "step": 11700 + }, + { + "epoch": 3.82, + "learning_rate": 0.0006187847108787978, + "loss": 0.9997, + "step": 11725 + }, + { + "epoch": 3.83, + "learning_rate": 0.00061796798431885, + "loss": 1.0409, + "step": 11750 + }, + { + "epoch": 3.84, + "learning_rate": 0.0006171512577589024, + "loss": 1.0285, + "step": 11775 + }, + { + "epoch": 3.85, + "learning_rate": 0.0006163345311989546, + "loss": 1.0697, + "step": 11800 + }, + { + "epoch": 3.86, + "learning_rate": 0.0006155178046390069, + "loss": 1.0923, + "step": 11825 + }, + { + "epoch": 3.86, + "learning_rate": 0.0006147010780790592, + "loss": 1.0785, + "step": 11850 + }, + { + "epoch": 3.87, + "learning_rate": 0.0006138843515191113, + "loss": 0.9977, + "step": 11875 + }, + { + "epoch": 3.88, + "learning_rate": 0.0006130676249591636, + "loss": 1.0268, + "step": 11900 + }, + { + "epoch": 3.89, + "learning_rate": 0.000612250898399216, + "loss": 1.022, + "step": 11925 + }, + { + "epoch": 3.9, + "learning_rate": 0.0006114341718392682, + "loss": 0.9922, + "step": 11950 + }, + { + "epoch": 3.91, + "learning_rate": 0.0006106174452793205, + "loss": 1.0643, + "step": 11975 + }, + { + "epoch": 3.91, + "learning_rate": 0.0006098007187193728, + "loss": 1.0241, + "step": 12000 + }, + { + "epoch": 3.92, + "learning_rate": 0.000608983992159425, + "loss": 0.9835, + "step": 12025 + }, + { + "epoch": 3.93, + "learning_rate": 0.0006081672655994774, + "loss": 0.9964, + "step": 12050 + }, + { + "epoch": 3.94, + "learning_rate": 0.0006073505390395295, + "loss": 0.9939, + "step": 12075 + }, + { + "epoch": 3.95, + "learning_rate": 0.0006065338124795818, + "loss": 1.0776, + "step": 12100 + }, + { + "epoch": 3.95, + "learning_rate": 0.0006057170859196341, + "loss": 1.0002, + "step": 12125 + }, + { + "epoch": 3.96, + "learning_rate": 0.0006049003593596863, + "loss": 1.053, + "step": 12150 + }, + { + "epoch": 3.97, + "learning_rate": 0.0006040836327997387, + "loss": 0.9921, + "step": 12175 + }, + { + "epoch": 3.98, + "learning_rate": 0.000603266906239791, + "loss": 1.0601, + "step": 12200 + }, + { + "epoch": 3.99, + "learning_rate": 0.0006024501796798432, + "loss": 1.0023, + "step": 12225 + }, + { + "epoch": 4.0, + "learning_rate": 0.0006016334531198955, + "loss": 1.0065, + "step": 12250 + }, + { + "epoch": 4.0, + "eval_loss": 0.9367652535438538, + "eval_runtime": 929.0448, + "eval_samples_per_second": 1.076, + "eval_steps_per_second": 0.135, + "eval_wer": 10.500792688613261, + "step": 12264 + }, + { + "epoch": 4.0, + "learning_rate": 0.0006008167265599478, + "loss": 0.9477, + "step": 12275 + }, + { + "epoch": 4.01, + "learning_rate": 0.0006, + "loss": 0.9413, + "step": 12300 + }, + { + "epoch": 4.02, + "learning_rate": 0.0005991832734400523, + "loss": 0.999, + "step": 12325 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005983665468801045, + "loss": 0.9664, + "step": 12350 + }, + { + "epoch": 4.04, + "learning_rate": 0.0005975498203201568, + "loss": 0.9532, + "step": 12375 + }, + { + "epoch": 4.04, + "learning_rate": 0.0005967330937602091, + "loss": 0.9227, + "step": 12400 + }, + { + "epoch": 4.05, + "learning_rate": 0.0005959163672002613, + "loss": 0.916, + "step": 12425 + }, + { + "epoch": 4.06, + "learning_rate": 0.0005950996406403137, + "loss": 0.9664, + "step": 12450 + }, + { + "epoch": 4.07, + "learning_rate": 0.000594282914080366, + "loss": 0.9344, + "step": 12475 + }, + { + "epoch": 4.08, + "learning_rate": 0.0005934661875204181, + "loss": 0.9729, + "step": 12500 + }, + { + "epoch": 4.09, + "learning_rate": 0.0005926494609604704, + "loss": 0.9404, + "step": 12525 + }, + { + "epoch": 4.09, + "learning_rate": 0.0005918327344005227, + "loss": 0.9351, + "step": 12550 + }, + { + "epoch": 4.1, + "learning_rate": 0.000591016007840575, + "loss": 0.9692, + "step": 12575 + }, + { + "epoch": 4.11, + "learning_rate": 0.0005901992812806273, + "loss": 0.9366, + "step": 12600 + }, + { + "epoch": 4.12, + "learning_rate": 0.0005893825547206795, + "loss": 0.9594, + "step": 12625 + }, + { + "epoch": 4.13, + "learning_rate": 0.0005885658281607318, + "loss": 0.9101, + "step": 12650 + }, + { + "epoch": 4.13, + "learning_rate": 0.0005877491016007841, + "loss": 0.9675, + "step": 12675 + }, + { + "epoch": 4.14, + "learning_rate": 0.0005869323750408362, + "loss": 0.9396, + "step": 12700 + }, + { + "epoch": 4.15, + "learning_rate": 0.0005861156484808886, + "loss": 0.9851, + "step": 12725 + }, + { + "epoch": 4.16, + "learning_rate": 0.0005852989219209409, + "loss": 0.9424, + "step": 12750 + }, + { + "epoch": 4.17, + "learning_rate": 0.0005844821953609931, + "loss": 0.9211, + "step": 12775 + }, + { + "epoch": 4.17, + "learning_rate": 0.0005836654688010454, + "loss": 0.9562, + "step": 12800 + }, + { + "epoch": 4.18, + "learning_rate": 0.0005828487422410978, + "loss": 0.9599, + "step": 12825 + }, + { + "epoch": 4.19, + "learning_rate": 0.00058203201568115, + "loss": 0.9343, + "step": 12850 + }, + { + "epoch": 4.2, + "learning_rate": 0.0005812152891212023, + "loss": 0.9537, + "step": 12875 + }, + { + "epoch": 4.21, + "learning_rate": 0.0005803985625612544, + "loss": 0.9199, + "step": 12900 + }, + { + "epoch": 4.22, + "learning_rate": 0.0005795818360013067, + "loss": 0.9185, + "step": 12925 + }, + { + "epoch": 4.22, + "learning_rate": 0.000578765109441359, + "loss": 0.9382, + "step": 12950 + }, + { + "epoch": 4.23, + "learning_rate": 0.0005779483828814113, + "loss": 0.9689, + "step": 12975 + }, + { + "epoch": 4.24, + "learning_rate": 0.0005771316563214636, + "loss": 0.9916, + "step": 13000 + }, + { + "epoch": 4.25, + "learning_rate": 0.0005763149297615159, + "loss": 0.9593, + "step": 13025 + }, + { + "epoch": 4.26, + "learning_rate": 0.0005754982032015681, + "loss": 0.9291, + "step": 13050 + }, + { + "epoch": 4.26, + "learning_rate": 0.0005747141457040182, + "loss": 1.0283, + "step": 13075 + }, + { + "epoch": 4.27, + "learning_rate": 0.0005738974191440706, + "loss": 0.9577, + "step": 13100 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005730806925841229, + "loss": 0.9594, + "step": 13125 + }, + { + "epoch": 4.29, + "learning_rate": 0.0005722639660241751, + "loss": 0.9411, + "step": 13150 + }, + { + "epoch": 4.3, + "learning_rate": 0.0005714472394642274, + "loss": 0.9607, + "step": 13175 + }, + { + "epoch": 4.31, + "learning_rate": 0.0005706305129042797, + "loss": 0.9902, + "step": 13200 + }, + { + "epoch": 4.31, + "learning_rate": 0.000569813786344332, + "loss": 0.979, + "step": 13225 + }, + { + "epoch": 4.32, + "learning_rate": 0.0005689970597843842, + "loss": 0.9485, + "step": 13250 + }, + { + "epoch": 4.33, + "learning_rate": 0.0005681803332244365, + "loss": 0.9492, + "step": 13275 + }, + { + "epoch": 4.34, + "learning_rate": 0.0005673636066644887, + "loss": 0.9136, + "step": 13300 + }, + { + "epoch": 4.35, + "learning_rate": 0.000566546880104541, + "loss": 0.9569, + "step": 13325 + }, + { + "epoch": 4.35, + "learning_rate": 0.0005657301535445932, + "loss": 0.9184, + "step": 13350 + }, + { + "epoch": 4.36, + "learning_rate": 0.0005649134269846456, + "loss": 0.9555, + "step": 13375 + }, + { + "epoch": 4.37, + "learning_rate": 0.0005640967004246979, + "loss": 0.9633, + "step": 13400 + }, + { + "epoch": 4.38, + "learning_rate": 0.0005632799738647501, + "loss": 0.9738, + "step": 13425 + }, + { + "epoch": 4.39, + "learning_rate": 0.0005624632473048023, + "loss": 0.9897, + "step": 13450 + }, + { + "epoch": 4.39, + "learning_rate": 0.0005616465207448546, + "loss": 1.0043, + "step": 13475 + }, + { + "epoch": 4.4, + "learning_rate": 0.0005608297941849069, + "loss": 0.9531, + "step": 13500 + }, + { + "epoch": 4.41, + "learning_rate": 0.0005600130676249592, + "loss": 0.9441, + "step": 13525 + }, + { + "epoch": 4.42, + "learning_rate": 0.0005591963410650115, + "loss": 0.9505, + "step": 13550 + }, + { + "epoch": 4.43, + "learning_rate": 0.0005583796145050637, + "loss": 0.9985, + "step": 13575 + }, + { + "epoch": 4.44, + "learning_rate": 0.000557562887945116, + "loss": 0.939, + "step": 13600 + }, + { + "epoch": 4.44, + "learning_rate": 0.0005567461613851682, + "loss": 1.0095, + "step": 13625 + }, + { + "epoch": 4.45, + "learning_rate": 0.0005559294348252205, + "loss": 0.9849, + "step": 13650 + }, + { + "epoch": 4.46, + "learning_rate": 0.0005551127082652728, + "loss": 0.9495, + "step": 13675 + }, + { + "epoch": 4.47, + "learning_rate": 0.000554295981705325, + "loss": 0.9426, + "step": 13700 + }, + { + "epoch": 4.48, + "learning_rate": 0.0005534792551453773, + "loss": 0.9301, + "step": 13725 + }, + { + "epoch": 4.48, + "learning_rate": 0.0005526625285854296, + "loss": 0.9652, + "step": 13750 + }, + { + "epoch": 4.49, + "learning_rate": 0.0005518458020254819, + "loss": 1.0232, + "step": 13775 + }, + { + "epoch": 4.5, + "learning_rate": 0.0005510290754655342, + "loss": 0.9095, + "step": 13800 + }, + { + "epoch": 4.51, + "learning_rate": 0.0005502123489055865, + "loss": 1.0281, + "step": 13825 + }, + { + "epoch": 4.52, + "learning_rate": 0.0005493956223456387, + "loss": 0.9334, + "step": 13850 + }, + { + "epoch": 4.53, + "learning_rate": 0.0005485788957856909, + "loss": 0.9301, + "step": 13875 + }, + { + "epoch": 4.53, + "learning_rate": 0.0005477621692257432, + "loss": 0.9297, + "step": 13900 + }, + { + "epoch": 4.54, + "learning_rate": 0.0005469454426657955, + "loss": 0.9506, + "step": 13925 + }, + { + "epoch": 4.55, + "learning_rate": 0.0005461287161058478, + "loss": 0.949, + "step": 13950 + }, + { + "epoch": 4.56, + "learning_rate": 0.0005453119895459, + "loss": 0.9812, + "step": 13975 + }, + { + "epoch": 4.57, + "learning_rate": 0.0005444952629859523, + "loss": 0.9207, + "step": 14000 + }, + { + "epoch": 4.57, + "learning_rate": 0.0005436785364260047, + "loss": 0.9633, + "step": 14025 + }, + { + "epoch": 4.58, + "learning_rate": 0.0005428618098660569, + "loss": 0.9787, + "step": 14050 + }, + { + "epoch": 4.59, + "learning_rate": 0.0005420450833061091, + "loss": 0.9924, + "step": 14075 + }, + { + "epoch": 4.6, + "learning_rate": 0.0005412283567461614, + "loss": 0.9027, + "step": 14100 + }, + { + "epoch": 4.61, + "learning_rate": 0.0005404116301862136, + "loss": 0.9882, + "step": 14125 + }, + { + "epoch": 4.62, + "learning_rate": 0.000539594903626266, + "loss": 0.9808, + "step": 14150 + }, + { + "epoch": 4.62, + "learning_rate": 0.0005387781770663182, + "loss": 0.9603, + "step": 14175 + }, + { + "epoch": 4.63, + "learning_rate": 0.0005379614505063705, + "loss": 1.01, + "step": 14200 + }, + { + "epoch": 4.64, + "learning_rate": 0.0005371447239464228, + "loss": 1.0208, + "step": 14225 + }, + { + "epoch": 4.65, + "learning_rate": 0.000536327997386475, + "loss": 0.988, + "step": 14250 + }, + { + "epoch": 4.66, + "learning_rate": 0.0005355112708265272, + "loss": 0.9962, + "step": 14275 + }, + { + "epoch": 4.66, + "learning_rate": 0.0005346945442665796, + "loss": 1.0374, + "step": 14300 + }, + { + "epoch": 4.67, + "learning_rate": 0.0005338778177066318, + "loss": 0.9575, + "step": 14325 + }, + { + "epoch": 4.68, + "learning_rate": 0.0005330610911466841, + "loss": 0.933, + "step": 14350 + }, + { + "epoch": 4.69, + "learning_rate": 0.0005322443645867364, + "loss": 0.9409, + "step": 14375 + }, + { + "epoch": 4.7, + "learning_rate": 0.0005314276380267886, + "loss": 0.9495, + "step": 14400 + }, + { + "epoch": 4.7, + "learning_rate": 0.000530610911466841, + "loss": 1.0001, + "step": 14425 + }, + { + "epoch": 4.71, + "learning_rate": 0.0005297941849068932, + "loss": 1.0216, + "step": 14450 + }, + { + "epoch": 4.72, + "learning_rate": 0.0005290101274093434, + "loss": 0.9431, + "step": 14475 + }, + { + "epoch": 4.73, + "learning_rate": 0.0005281934008493956, + "loss": 0.9686, + "step": 14500 + }, + { + "epoch": 4.74, + "learning_rate": 0.0005273766742894479, + "loss": 0.9477, + "step": 14525 + }, + { + "epoch": 4.75, + "learning_rate": 0.0005265599477295002, + "loss": 0.9446, + "step": 14550 + }, + { + "epoch": 4.75, + "learning_rate": 0.0005257432211695525, + "loss": 0.9808, + "step": 14575 + }, + { + "epoch": 4.76, + "learning_rate": 0.0005249264946096047, + "loss": 0.9629, + "step": 14600 + }, + { + "epoch": 4.77, + "learning_rate": 0.0005241097680496569, + "loss": 0.9066, + "step": 14625 + }, + { + "epoch": 4.78, + "learning_rate": 0.0005232930414897092, + "loss": 0.9627, + "step": 14650 + }, + { + "epoch": 4.79, + "learning_rate": 0.0005224763149297615, + "loss": 0.9822, + "step": 14675 + }, + { + "epoch": 4.79, + "learning_rate": 0.0005216595883698138, + "loss": 0.9371, + "step": 14700 + }, + { + "epoch": 4.8, + "learning_rate": 0.0005208428618098661, + "loss": 1.0388, + "step": 14725 + }, + { + "epoch": 4.81, + "learning_rate": 0.0005200261352499184, + "loss": 0.9602, + "step": 14750 + }, + { + "epoch": 4.82, + "learning_rate": 0.0005192094086899706, + "loss": 0.968, + "step": 14775 + }, + { + "epoch": 4.83, + "learning_rate": 0.0005183926821300229, + "loss": 0.9375, + "step": 14800 + }, + { + "epoch": 4.84, + "learning_rate": 0.0005175759555700752, + "loss": 0.9769, + "step": 14825 + }, + { + "epoch": 4.84, + "learning_rate": 0.0005167592290101274, + "loss": 0.9493, + "step": 14850 + }, + { + "epoch": 4.85, + "learning_rate": 0.0005159425024501797, + "loss": 0.9451, + "step": 14875 + }, + { + "epoch": 4.86, + "learning_rate": 0.0005151257758902319, + "loss": 0.9599, + "step": 14900 + }, + { + "epoch": 4.87, + "learning_rate": 0.0005143090493302842, + "loss": 0.9742, + "step": 14925 + }, + { + "epoch": 4.88, + "learning_rate": 0.0005134923227703365, + "loss": 0.9618, + "step": 14950 + }, + { + "epoch": 4.88, + "learning_rate": 0.0005126755962103888, + "loss": 1.0095, + "step": 14975 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005118588696504411, + "loss": 0.9383, + "step": 15000 + }, + { + "epoch": 4.9, + "learning_rate": 0.0005110421430904933, + "loss": 0.9605, + "step": 15025 + }, + { + "epoch": 4.91, + "learning_rate": 0.0005102254165305455, + "loss": 0.9599, + "step": 15050 + }, + { + "epoch": 4.92, + "learning_rate": 0.0005094086899705978, + "loss": 0.9536, + "step": 15075 + }, + { + "epoch": 4.92, + "learning_rate": 0.0005085919634106502, + "loss": 0.9486, + "step": 15100 + }, + { + "epoch": 4.93, + "learning_rate": 0.0005077752368507024, + "loss": 0.9549, + "step": 15125 + }, + { + "epoch": 4.94, + "learning_rate": 0.0005069585102907547, + "loss": 0.9811, + "step": 15150 + }, + { + "epoch": 4.95, + "learning_rate": 0.0005061417837308069, + "loss": 0.9321, + "step": 15175 + }, + { + "epoch": 4.96, + "learning_rate": 0.0005053250571708592, + "loss": 0.9358, + "step": 15200 + }, + { + "epoch": 4.97, + "learning_rate": 0.0005045083306109115, + "loss": 0.952, + "step": 15225 + }, + { + "epoch": 4.97, + "learning_rate": 0.0005036916040509637, + "loss": 0.9713, + "step": 15250 + }, + { + "epoch": 4.98, + "learning_rate": 0.000502874877491016, + "loss": 0.9529, + "step": 15275 + }, + { + "epoch": 4.99, + "learning_rate": 0.0005020581509310683, + "loss": 1.0018, + "step": 15300 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005012414243711205, + "loss": 0.9907, + "step": 15325 + }, + { + "epoch": 5.0, + "eval_loss": 0.8856672644615173, + "eval_runtime": 957.3834, + "eval_samples_per_second": 1.045, + "eval_steps_per_second": 0.131, + "eval_wer": 6.835773570829059, + "step": 15330 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005004246978111729, + "loss": 0.8702, + "step": 15350 + }, + { + "epoch": 5.01, + "learning_rate": 0.0004996079712512251, + "loss": 0.8384, + "step": 15375 + }, + { + "epoch": 5.02, + "learning_rate": 0.0004987912446912774, + "loss": 0.8731, + "step": 15400 + }, + { + "epoch": 5.03, + "learning_rate": 0.0004979745181313297, + "loss": 0.9217, + "step": 15425 + }, + { + "epoch": 5.04, + "learning_rate": 0.0004971577915713819, + "loss": 0.885, + "step": 15450 + }, + { + "epoch": 5.05, + "learning_rate": 0.0004963410650114341, + "loss": 0.9239, + "step": 15475 + }, + { + "epoch": 5.06, + "learning_rate": 0.0004955243384514865, + "loss": 0.9232, + "step": 15500 + }, + { + "epoch": 5.06, + "learning_rate": 0.0004947076118915388, + "loss": 0.899, + "step": 15525 + }, + { + "epoch": 5.07, + "learning_rate": 0.000493890885331591, + "loss": 0.9134, + "step": 15550 + }, + { + "epoch": 5.08, + "learning_rate": 0.0004930741587716432, + "loss": 0.8702, + "step": 15575 + }, + { + "epoch": 5.09, + "learning_rate": 0.0004922574322116955, + "loss": 0.9255, + "step": 15600 + }, + { + "epoch": 5.1, + "learning_rate": 0.0004914407056517479, + "loss": 0.8858, + "step": 15625 + }, + { + "epoch": 5.1, + "learning_rate": 0.0004906239790918001, + "loss": 0.874, + "step": 15650 + }, + { + "epoch": 5.11, + "learning_rate": 0.0004898072525318523, + "loss": 0.9004, + "step": 15675 + }, + { + "epoch": 5.12, + "learning_rate": 0.0004889905259719046, + "loss": 0.8701, + "step": 15700 + }, + { + "epoch": 5.13, + "learning_rate": 0.0004881737994119569, + "loss": 0.9114, + "step": 15725 + }, + { + "epoch": 5.14, + "learning_rate": 0.00048735707285200915, + "loss": 0.8608, + "step": 15750 + }, + { + "epoch": 5.15, + "learning_rate": 0.00048657301535445937, + "loss": 0.9103, + "step": 15775 + }, + { + "epoch": 5.15, + "learning_rate": 0.00048575628879451164, + "loss": 0.9099, + "step": 15800 + }, + { + "epoch": 5.16, + "learning_rate": 0.0004849722312969618, + "loss": 0.9542, + "step": 15825 + }, + { + "epoch": 5.17, + "learning_rate": 0.00048415550473701407, + "loss": 0.9748, + "step": 15850 + }, + { + "epoch": 5.18, + "learning_rate": 0.00048333877817706634, + "loss": 0.8518, + "step": 15875 + }, + { + "epoch": 5.19, + "learning_rate": 0.0004825220516171186, + "loss": 0.9075, + "step": 15900 + }, + { + "epoch": 5.19, + "learning_rate": 0.0004817053250571709, + "loss": 0.9004, + "step": 15925 + }, + { + "epoch": 5.2, + "learning_rate": 0.00048088859849722314, + "loss": 0.9015, + "step": 15950 + }, + { + "epoch": 5.21, + "learning_rate": 0.0004800718719372754, + "loss": 0.9192, + "step": 15975 + }, + { + "epoch": 5.22, + "learning_rate": 0.0004792551453773277, + "loss": 0.8597, + "step": 16000 + }, + { + "epoch": 5.23, + "learning_rate": 0.00047843841881737995, + "loss": 0.8859, + "step": 16025 + }, + { + "epoch": 5.23, + "learning_rate": 0.0004776216922574322, + "loss": 0.924, + "step": 16050 + }, + { + "epoch": 5.24, + "learning_rate": 0.0004768049656974845, + "loss": 0.8799, + "step": 16075 + }, + { + "epoch": 5.25, + "learning_rate": 0.00047598823913753676, + "loss": 0.9405, + "step": 16100 + }, + { + "epoch": 5.26, + "learning_rate": 0.0004751715125775891, + "loss": 0.8748, + "step": 16125 + }, + { + "epoch": 5.27, + "learning_rate": 0.0004743547860176413, + "loss": 0.9115, + "step": 16150 + }, + { + "epoch": 5.28, + "learning_rate": 0.00047353805945769356, + "loss": 0.9161, + "step": 16175 + }, + { + "epoch": 5.28, + "learning_rate": 0.00047272133289774583, + "loss": 0.9297, + "step": 16200 + }, + { + "epoch": 5.29, + "learning_rate": 0.00047190460633779815, + "loss": 0.9021, + "step": 16225 + }, + { + "epoch": 5.3, + "learning_rate": 0.00047108787977785037, + "loss": 0.8839, + "step": 16250 + }, + { + "epoch": 5.31, + "learning_rate": 0.00047027115321790264, + "loss": 0.9481, + "step": 16275 + }, + { + "epoch": 5.32, + "learning_rate": 0.0004694544266579549, + "loss": 0.8923, + "step": 16300 + }, + { + "epoch": 5.32, + "learning_rate": 0.00046863770009800723, + "loss": 0.8888, + "step": 16325 + }, + { + "epoch": 5.33, + "learning_rate": 0.00046782097353805944, + "loss": 0.9642, + "step": 16350 + }, + { + "epoch": 5.34, + "learning_rate": 0.0004670042469781117, + "loss": 0.8957, + "step": 16375 + }, + { + "epoch": 5.35, + "learning_rate": 0.00046618752041816403, + "loss": 0.8488, + "step": 16400 + }, + { + "epoch": 5.36, + "learning_rate": 0.0004653707938582163, + "loss": 0.9738, + "step": 16425 + }, + { + "epoch": 5.37, + "learning_rate": 0.00046455406729826857, + "loss": 0.9539, + "step": 16450 + }, + { + "epoch": 5.37, + "learning_rate": 0.0004637373407383208, + "loss": 0.8878, + "step": 16475 + }, + { + "epoch": 5.38, + "learning_rate": 0.0004629206141783731, + "loss": 0.9682, + "step": 16500 + }, + { + "epoch": 5.39, + "learning_rate": 0.0004621038876184254, + "loss": 0.9023, + "step": 16525 + }, + { + "epoch": 5.4, + "learning_rate": 0.00046128716105847765, + "loss": 0.8979, + "step": 16550 + }, + { + "epoch": 5.41, + "learning_rate": 0.00046047043449852986, + "loss": 0.9489, + "step": 16575 + }, + { + "epoch": 5.41, + "learning_rate": 0.0004596537079385822, + "loss": 0.9734, + "step": 16600 + }, + { + "epoch": 5.42, + "learning_rate": 0.00045883698137863445, + "loss": 0.9354, + "step": 16625 + }, + { + "epoch": 5.43, + "learning_rate": 0.0004580202548186867, + "loss": 0.8973, + "step": 16650 + }, + { + "epoch": 5.44, + "learning_rate": 0.000457203528258739, + "loss": 0.8845, + "step": 16675 + }, + { + "epoch": 5.45, + "learning_rate": 0.00045638680169879126, + "loss": 0.9092, + "step": 16700 + }, + { + "epoch": 5.45, + "learning_rate": 0.00045557007513884353, + "loss": 0.9234, + "step": 16725 + }, + { + "epoch": 5.46, + "learning_rate": 0.0004547533485788958, + "loss": 0.904, + "step": 16750 + }, + { + "epoch": 5.47, + "learning_rate": 0.00045393662201894807, + "loss": 0.9256, + "step": 16775 + }, + { + "epoch": 5.48, + "learning_rate": 0.00045311989545900034, + "loss": 1.0002, + "step": 16800 + }, + { + "epoch": 5.49, + "learning_rate": 0.0004523031688990526, + "loss": 0.9155, + "step": 16825 + }, + { + "epoch": 5.5, + "learning_rate": 0.00045148644233910487, + "loss": 0.8999, + "step": 16850 + }, + { + "epoch": 5.5, + "learning_rate": 0.00045066971577915714, + "loss": 0.8973, + "step": 16875 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004498529892192094, + "loss": 0.9038, + "step": 16900 + }, + { + "epoch": 5.52, + "learning_rate": 0.0004490362626592617, + "loss": 0.963, + "step": 16925 + }, + { + "epoch": 5.53, + "learning_rate": 0.000448219536099314, + "loss": 0.9102, + "step": 16950 + }, + { + "epoch": 5.54, + "learning_rate": 0.0004474028095393662, + "loss": 0.9021, + "step": 16975 + }, + { + "epoch": 5.54, + "learning_rate": 0.0004465860829794185, + "loss": 0.9318, + "step": 17000 + }, + { + "epoch": 5.55, + "learning_rate": 0.00044576935641947075, + "loss": 0.9126, + "step": 17025 + }, + { + "epoch": 5.56, + "learning_rate": 0.0004449526298595231, + "loss": 0.9213, + "step": 17050 + }, + { + "epoch": 5.57, + "learning_rate": 0.0004441359032995753, + "loss": 0.8985, + "step": 17075 + }, + { + "epoch": 5.58, + "learning_rate": 0.00044331917673962756, + "loss": 0.9006, + "step": 17100 + }, + { + "epoch": 5.59, + "learning_rate": 0.00044250245017967983, + "loss": 0.8745, + "step": 17125 + }, + { + "epoch": 5.59, + "learning_rate": 0.00044168572361973215, + "loss": 0.9848, + "step": 17150 + }, + { + "epoch": 5.6, + "learning_rate": 0.00044086899705978437, + "loss": 0.8967, + "step": 17175 + }, + { + "epoch": 5.61, + "learning_rate": 0.00044005227049983664, + "loss": 0.972, + "step": 17200 + }, + { + "epoch": 5.62, + "learning_rate": 0.00043923554393988896, + "loss": 0.9553, + "step": 17225 + }, + { + "epoch": 5.63, + "learning_rate": 0.0004384188173799412, + "loss": 0.9128, + "step": 17250 + }, + { + "epoch": 5.63, + "learning_rate": 0.0004376020908199935, + "loss": 0.9333, + "step": 17275 + }, + { + "epoch": 5.64, + "learning_rate": 0.0004367853642600457, + "loss": 0.9197, + "step": 17300 + }, + { + "epoch": 5.65, + "learning_rate": 0.00043596863770009803, + "loss": 0.9185, + "step": 17325 + }, + { + "epoch": 5.66, + "learning_rate": 0.0004351519111401503, + "loss": 0.8902, + "step": 17350 + }, + { + "epoch": 5.67, + "learning_rate": 0.00043433518458020257, + "loss": 0.9017, + "step": 17375 + }, + { + "epoch": 5.68, + "learning_rate": 0.0004335184580202548, + "loss": 0.8876, + "step": 17400 + }, + { + "epoch": 5.68, + "learning_rate": 0.0004327017314603071, + "loss": 0.8792, + "step": 17425 + }, + { + "epoch": 5.69, + "learning_rate": 0.0004318850049003594, + "loss": 0.8989, + "step": 17450 + }, + { + "epoch": 5.7, + "learning_rate": 0.00043106827834041165, + "loss": 0.8694, + "step": 17475 + }, + { + "epoch": 5.71, + "learning_rate": 0.0004302515517804639, + "loss": 0.8945, + "step": 17500 + }, + { + "epoch": 5.72, + "learning_rate": 0.0004294348252205162, + "loss": 0.8897, + "step": 17525 + }, + { + "epoch": 5.72, + "learning_rate": 0.00042861809866056845, + "loss": 0.925, + "step": 17550 + }, + { + "epoch": 5.73, + "learning_rate": 0.0004278013721006207, + "loss": 0.9442, + "step": 17575 + }, + { + "epoch": 5.74, + "learning_rate": 0.000426984645540673, + "loss": 0.9025, + "step": 17600 + }, + { + "epoch": 5.75, + "learning_rate": 0.00042616791898072526, + "loss": 0.8805, + "step": 17625 + }, + { + "epoch": 5.76, + "learning_rate": 0.00042535119242077753, + "loss": 0.9071, + "step": 17650 + }, + { + "epoch": 5.76, + "learning_rate": 0.0004245344658608298, + "loss": 0.9194, + "step": 17675 + }, + { + "epoch": 5.77, + "learning_rate": 0.00042371773930088206, + "loss": 0.8833, + "step": 17700 + }, + { + "epoch": 5.78, + "learning_rate": 0.00042290101274093433, + "loss": 0.9591, + "step": 17725 + }, + { + "epoch": 5.79, + "learning_rate": 0.0004220842861809866, + "loss": 0.9122, + "step": 17750 + }, + { + "epoch": 5.8, + "learning_rate": 0.0004212675596210389, + "loss": 0.9193, + "step": 17775 + }, + { + "epoch": 5.81, + "learning_rate": 0.00042045083306109114, + "loss": 0.9287, + "step": 17800 + }, + { + "epoch": 5.81, + "learning_rate": 0.0004196341065011434, + "loss": 0.9433, + "step": 17825 + }, + { + "epoch": 5.82, + "learning_rate": 0.0004188173799411957, + "loss": 0.8985, + "step": 17850 + }, + { + "epoch": 5.83, + "learning_rate": 0.000418000653381248, + "loss": 0.9895, + "step": 17875 + }, + { + "epoch": 5.84, + "learning_rate": 0.0004171839268213002, + "loss": 0.9005, + "step": 17900 + }, + { + "epoch": 5.85, + "learning_rate": 0.0004163672002613525, + "loss": 0.9078, + "step": 17925 + }, + { + "epoch": 5.85, + "learning_rate": 0.00041555047370140475, + "loss": 0.8996, + "step": 17950 + }, + { + "epoch": 5.86, + "learning_rate": 0.0004147337471414571, + "loss": 0.9454, + "step": 17975 + }, + { + "epoch": 5.87, + "learning_rate": 0.00041391702058150934, + "loss": 0.9536, + "step": 18000 + }, + { + "epoch": 5.88, + "learning_rate": 0.00041310029402156156, + "loss": 0.908, + "step": 18025 + }, + { + "epoch": 5.89, + "learning_rate": 0.0004122835674616139, + "loss": 0.9281, + "step": 18050 + }, + { + "epoch": 5.9, + "learning_rate": 0.00041146684090166615, + "loss": 0.9143, + "step": 18075 + }, + { + "epoch": 5.9, + "learning_rate": 0.0004106501143417184, + "loss": 0.9711, + "step": 18100 + }, + { + "epoch": 5.91, + "learning_rate": 0.00040983338778177063, + "loss": 0.9095, + "step": 18125 + }, + { + "epoch": 5.92, + "learning_rate": 0.00040901666122182296, + "loss": 0.9171, + "step": 18150 + }, + { + "epoch": 5.93, + "learning_rate": 0.0004081999346618752, + "loss": 0.9006, + "step": 18175 + }, + { + "epoch": 5.94, + "learning_rate": 0.0004073832081019275, + "loss": 0.8891, + "step": 18200 + }, + { + "epoch": 5.94, + "learning_rate": 0.0004065664815419797, + "loss": 0.9096, + "step": 18225 + }, + { + "epoch": 5.95, + "learning_rate": 0.00040574975498203203, + "loss": 0.9266, + "step": 18250 + }, + { + "epoch": 5.96, + "learning_rate": 0.0004049330284220843, + "loss": 0.8653, + "step": 18275 + }, + { + "epoch": 5.97, + "learning_rate": 0.00040411630186213657, + "loss": 0.9335, + "step": 18300 + }, + { + "epoch": 5.98, + "learning_rate": 0.00040329957530218884, + "loss": 0.9124, + "step": 18325 + }, + { + "epoch": 5.98, + "learning_rate": 0.0004024828487422411, + "loss": 0.8719, + "step": 18350 + }, + { + "epoch": 5.99, + "learning_rate": 0.0004016661221822934, + "loss": 0.8971, + "step": 18375 + }, + { + "epoch": 6.0, + "eval_loss": 0.8379027247428894, + "eval_runtime": 924.2552, + "eval_samples_per_second": 1.082, + "eval_steps_per_second": 0.135, + "eval_wer": 3.4785041499580336, + "step": 18396 + }, + { + "epoch": 6.0, + "learning_rate": 0.00040084939562234564, + "loss": 0.859, + "step": 18400 + }, + { + "epoch": 6.01, + "learning_rate": 0.0004000326690623979, + "loss": 0.8687, + "step": 18425 + }, + { + "epoch": 6.02, + "learning_rate": 0.0003992159425024502, + "loss": 0.9106, + "step": 18450 + }, + { + "epoch": 6.03, + "learning_rate": 0.00039839921594250245, + "loss": 0.8549, + "step": 18475 + }, + { + "epoch": 6.03, + "learning_rate": 0.0003975824893825547, + "loss": 0.8658, + "step": 18500 + }, + { + "epoch": 6.04, + "learning_rate": 0.000396765762822607, + "loss": 0.884, + "step": 18525 + }, + { + "epoch": 6.05, + "learning_rate": 0.00039594903626265926, + "loss": 0.8732, + "step": 18550 + }, + { + "epoch": 6.06, + "learning_rate": 0.0003951323097027115, + "loss": 0.865, + "step": 18575 + }, + { + "epoch": 6.07, + "learning_rate": 0.00039431558314276385, + "loss": 0.8562, + "step": 18600 + }, + { + "epoch": 6.07, + "learning_rate": 0.00039349885658281606, + "loss": 0.8829, + "step": 18625 + }, + { + "epoch": 6.08, + "learning_rate": 0.00039268213002286833, + "loss": 0.841, + "step": 18650 + }, + { + "epoch": 6.09, + "learning_rate": 0.0003918654034629206, + "loss": 0.8788, + "step": 18675 + }, + { + "epoch": 6.1, + "learning_rate": 0.0003910486769029729, + "loss": 0.8661, + "step": 18700 + }, + { + "epoch": 6.11, + "learning_rate": 0.0003902319503430252, + "loss": 0.8646, + "step": 18725 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003894152237830774, + "loss": 0.8711, + "step": 18750 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003885984972231297, + "loss": 0.867, + "step": 18775 + }, + { + "epoch": 6.13, + "learning_rate": 0.000387781770663182, + "loss": 0.8376, + "step": 18800 + }, + { + "epoch": 6.14, + "learning_rate": 0.00038696504410323427, + "loss": 0.8376, + "step": 18825 + }, + { + "epoch": 6.15, + "learning_rate": 0.0003861483175432865, + "loss": 0.868, + "step": 18850 + }, + { + "epoch": 6.16, + "learning_rate": 0.0003853315909833388, + "loss": 0.87, + "step": 18875 + }, + { + "epoch": 6.16, + "learning_rate": 0.0003845148644233911, + "loss": 0.8565, + "step": 18900 + }, + { + "epoch": 6.17, + "learning_rate": 0.00038369813786344334, + "loss": 0.8978, + "step": 18925 + }, + { + "epoch": 6.18, + "learning_rate": 0.00038288141130349556, + "loss": 0.8337, + "step": 18950 + }, + { + "epoch": 6.19, + "learning_rate": 0.0003820646847435479, + "loss": 0.8872, + "step": 18975 + }, + { + "epoch": 6.2, + "learning_rate": 0.00038124795818360015, + "loss": 0.8982, + "step": 19000 + }, + { + "epoch": 6.21, + "learning_rate": 0.0003804312316236524, + "loss": 0.8826, + "step": 19025 + }, + { + "epoch": 6.21, + "learning_rate": 0.0003796145050637047, + "loss": 0.8575, + "step": 19050 + }, + { + "epoch": 6.22, + "learning_rate": 0.00037879777850375696, + "loss": 0.8727, + "step": 19075 + }, + { + "epoch": 6.23, + "learning_rate": 0.0003779810519438092, + "loss": 0.8679, + "step": 19100 + }, + { + "epoch": 6.24, + "learning_rate": 0.0003771643253838615, + "loss": 0.8943, + "step": 19125 + }, + { + "epoch": 6.25, + "learning_rate": 0.00037634759882391376, + "loss": 0.8655, + "step": 19150 + }, + { + "epoch": 6.25, + "learning_rate": 0.00037553087226396603, + "loss": 0.8608, + "step": 19175 + }, + { + "epoch": 6.26, + "learning_rate": 0.0003747141457040183, + "loss": 0.8632, + "step": 19200 + }, + { + "epoch": 6.27, + "learning_rate": 0.00037389741914407057, + "loss": 0.9066, + "step": 19225 + }, + { + "epoch": 6.28, + "learning_rate": 0.00037308069258412284, + "loss": 0.8252, + "step": 19250 + }, + { + "epoch": 6.29, + "learning_rate": 0.0003722639660241751, + "loss": 0.845, + "step": 19275 + }, + { + "epoch": 6.29, + "learning_rate": 0.0003714472394642274, + "loss": 0.8753, + "step": 19300 + }, + { + "epoch": 6.3, + "learning_rate": 0.0003706305129042797, + "loss": 0.8675, + "step": 19325 + }, + { + "epoch": 6.31, + "learning_rate": 0.0003698137863443319, + "loss": 0.8494, + "step": 19350 + }, + { + "epoch": 6.32, + "learning_rate": 0.0003689970597843842, + "loss": 0.8422, + "step": 19375 + }, + { + "epoch": 6.33, + "learning_rate": 0.00036818033322443645, + "loss": 0.8895, + "step": 19400 + }, + { + "epoch": 6.34, + "learning_rate": 0.00036736360666448877, + "loss": 0.8138, + "step": 19425 + }, + { + "epoch": 6.34, + "learning_rate": 0.00036654688010454104, + "loss": 0.8782, + "step": 19450 + }, + { + "epoch": 6.35, + "learning_rate": 0.00036573015354459326, + "loss": 0.898, + "step": 19475 + }, + { + "epoch": 6.36, + "learning_rate": 0.0003649134269846455, + "loss": 0.8641, + "step": 19500 + }, + { + "epoch": 6.37, + "learning_rate": 0.00036409670042469785, + "loss": 0.9043, + "step": 19525 + }, + { + "epoch": 6.38, + "learning_rate": 0.0003632799738647501, + "loss": 0.8375, + "step": 19550 + }, + { + "epoch": 6.38, + "learning_rate": 0.00036246324730480233, + "loss": 0.8787, + "step": 19575 + }, + { + "epoch": 6.39, + "learning_rate": 0.00036164652074485465, + "loss": 0.8633, + "step": 19600 + }, + { + "epoch": 6.4, + "learning_rate": 0.0003608297941849069, + "loss": 0.8185, + "step": 19625 + }, + { + "epoch": 6.41, + "learning_rate": 0.0003600130676249592, + "loss": 0.8812, + "step": 19650 + }, + { + "epoch": 6.42, + "learning_rate": 0.0003591963410650114, + "loss": 0.8775, + "step": 19675 + }, + { + "epoch": 6.43, + "learning_rate": 0.00035837961450506373, + "loss": 0.8696, + "step": 19700 + }, + { + "epoch": 6.43, + "learning_rate": 0.000357562887945116, + "loss": 0.8679, + "step": 19725 + }, + { + "epoch": 6.44, + "learning_rate": 0.00035674616138516827, + "loss": 0.8761, + "step": 19750 + }, + { + "epoch": 6.45, + "learning_rate": 0.0003559294348252205, + "loss": 0.8419, + "step": 19775 + }, + { + "epoch": 6.46, + "learning_rate": 0.0003551127082652728, + "loss": 0.819, + "step": 19800 + }, + { + "epoch": 6.47, + "learning_rate": 0.00035429598170532507, + "loss": 0.8853, + "step": 19825 + }, + { + "epoch": 6.47, + "learning_rate": 0.00035347925514537734, + "loss": 0.8783, + "step": 19850 + }, + { + "epoch": 6.48, + "learning_rate": 0.0003526625285854296, + "loss": 0.8378, + "step": 19875 + }, + { + "epoch": 6.49, + "learning_rate": 0.0003518458020254819, + "loss": 0.8761, + "step": 19900 + }, + { + "epoch": 6.5, + "learning_rate": 0.00035102907546553415, + "loss": 0.8666, + "step": 19925 + }, + { + "epoch": 6.51, + "learning_rate": 0.0003502123489055864, + "loss": 0.8914, + "step": 19950 + }, + { + "epoch": 6.52, + "learning_rate": 0.0003493956223456387, + "loss": 0.8462, + "step": 19975 + }, + { + "epoch": 6.52, + "learning_rate": 0.00034857889578569095, + "loss": 0.8955, + "step": 20000 + }, + { + "epoch": 6.53, + "learning_rate": 0.0003477621692257432, + "loss": 0.877, + "step": 20025 + }, + { + "epoch": 6.54, + "learning_rate": 0.0003469454426657955, + "loss": 0.9005, + "step": 20050 + }, + { + "epoch": 6.55, + "learning_rate": 0.00034612871610584776, + "loss": 0.8345, + "step": 20075 + }, + { + "epoch": 6.56, + "learning_rate": 0.00034531198954590003, + "loss": 0.8579, + "step": 20100 + }, + { + "epoch": 6.56, + "learning_rate": 0.0003444952629859523, + "loss": 0.9066, + "step": 20125 + }, + { + "epoch": 6.57, + "learning_rate": 0.0003436785364260046, + "loss": 0.9039, + "step": 20150 + }, + { + "epoch": 6.58, + "learning_rate": 0.00034286180986605684, + "loss": 0.8852, + "step": 20175 + }, + { + "epoch": 6.59, + "learning_rate": 0.0003420450833061091, + "loss": 0.8837, + "step": 20200 + }, + { + "epoch": 6.6, + "learning_rate": 0.00034122835674616137, + "loss": 0.8865, + "step": 20225 + }, + { + "epoch": 6.6, + "learning_rate": 0.0003404116301862137, + "loss": 0.8624, + "step": 20250 + }, + { + "epoch": 6.61, + "learning_rate": 0.00033959490362626596, + "loss": 0.8882, + "step": 20275 + }, + { + "epoch": 6.62, + "learning_rate": 0.0003387781770663182, + "loss": 0.8593, + "step": 20300 + }, + { + "epoch": 6.63, + "learning_rate": 0.00033796145050637045, + "loss": 0.8604, + "step": 20325 + }, + { + "epoch": 6.64, + "learning_rate": 0.00033714472394642277, + "loss": 0.8864, + "step": 20350 + }, + { + "epoch": 6.65, + "learning_rate": 0.00033632799738647504, + "loss": 0.8714, + "step": 20375 + }, + { + "epoch": 6.65, + "learning_rate": 0.00033551127082652725, + "loss": 0.865, + "step": 20400 + }, + { + "epoch": 6.66, + "learning_rate": 0.0003346945442665796, + "loss": 0.8669, + "step": 20425 + }, + { + "epoch": 6.67, + "learning_rate": 0.00033387781770663185, + "loss": 0.8539, + "step": 20450 + }, + { + "epoch": 6.68, + "learning_rate": 0.0003330610911466841, + "loss": 0.8656, + "step": 20475 + }, + { + "epoch": 6.69, + "learning_rate": 0.00033224436458673633, + "loss": 0.8657, + "step": 20500 + }, + { + "epoch": 6.69, + "learning_rate": 0.00033142763802678865, + "loss": 0.8555, + "step": 20525 + }, + { + "epoch": 6.7, + "learning_rate": 0.0003306109114668409, + "loss": 0.8165, + "step": 20550 + }, + { + "epoch": 6.71, + "learning_rate": 0.0003297941849068932, + "loss": 0.8965, + "step": 20575 + }, + { + "epoch": 6.72, + "learning_rate": 0.0003289774583469454, + "loss": 0.875, + "step": 20600 + }, + { + "epoch": 6.73, + "learning_rate": 0.00032816073178699773, + "loss": 0.8652, + "step": 20625 + }, + { + "epoch": 6.74, + "learning_rate": 0.00032734400522705, + "loss": 0.8254, + "step": 20650 + }, + { + "epoch": 6.74, + "learning_rate": 0.00032652727866710226, + "loss": 0.8958, + "step": 20675 + }, + { + "epoch": 6.75, + "learning_rate": 0.00032571055210715453, + "loss": 0.8497, + "step": 20700 + }, + { + "epoch": 6.76, + "learning_rate": 0.0003248938255472068, + "loss": 0.9016, + "step": 20725 + }, + { + "epoch": 6.77, + "learning_rate": 0.00032407709898725907, + "loss": 0.8201, + "step": 20750 + }, + { + "epoch": 6.78, + "learning_rate": 0.00032326037242731134, + "loss": 0.8374, + "step": 20775 + }, + { + "epoch": 6.78, + "learning_rate": 0.0003224436458673636, + "loss": 0.8702, + "step": 20800 + }, + { + "epoch": 6.79, + "learning_rate": 0.0003216269193074159, + "loss": 0.8392, + "step": 20825 + }, + { + "epoch": 6.8, + "learning_rate": 0.00032081019274746815, + "loss": 0.8368, + "step": 20850 + }, + { + "epoch": 6.81, + "learning_rate": 0.0003199934661875204, + "loss": 0.8286, + "step": 20875 + }, + { + "epoch": 6.82, + "learning_rate": 0.0003191767396275727, + "loss": 0.8603, + "step": 20900 + }, + { + "epoch": 6.82, + "learning_rate": 0.00031836001306762495, + "loss": 0.8719, + "step": 20925 + }, + { + "epoch": 6.83, + "learning_rate": 0.0003175432865076772, + "loss": 0.9351, + "step": 20950 + }, + { + "epoch": 6.84, + "learning_rate": 0.00031672655994772954, + "loss": 0.8594, + "step": 20975 + }, + { + "epoch": 6.85, + "learning_rate": 0.0003159098333877818, + "loss": 0.8628, + "step": 21000 + }, + { + "epoch": 6.86, + "learning_rate": 0.00031509310682783403, + "loss": 0.8722, + "step": 21025 + }, + { + "epoch": 6.87, + "learning_rate": 0.0003142763802678863, + "loss": 0.9016, + "step": 21050 + }, + { + "epoch": 6.87, + "learning_rate": 0.0003134596537079386, + "loss": 0.8682, + "step": 21075 + }, + { + "epoch": 6.88, + "learning_rate": 0.0003126429271479909, + "loss": 0.8727, + "step": 21100 + }, + { + "epoch": 6.89, + "learning_rate": 0.0003118262005880431, + "loss": 0.9259, + "step": 21125 + }, + { + "epoch": 6.9, + "learning_rate": 0.00031100947402809537, + "loss": 0.8866, + "step": 21150 + }, + { + "epoch": 6.91, + "learning_rate": 0.0003101927474681477, + "loss": 0.8884, + "step": 21175 + }, + { + "epoch": 6.91, + "learning_rate": 0.00030937602090819996, + "loss": 0.8934, + "step": 21200 + }, + { + "epoch": 6.92, + "learning_rate": 0.0003085592943482522, + "loss": 0.8695, + "step": 21225 + }, + { + "epoch": 6.93, + "learning_rate": 0.0003077425677883045, + "loss": 0.9416, + "step": 21250 + }, + { + "epoch": 6.94, + "learning_rate": 0.00030692584122835677, + "loss": 0.8355, + "step": 21275 + }, + { + "epoch": 6.95, + "learning_rate": 0.00030610911466840904, + "loss": 0.9009, + "step": 21300 + }, + { + "epoch": 6.96, + "learning_rate": 0.00030529238810846125, + "loss": 0.8709, + "step": 21325 + }, + { + "epoch": 6.96, + "learning_rate": 0.0003044756615485136, + "loss": 0.8767, + "step": 21350 + }, + { + "epoch": 6.97, + "learning_rate": 0.00030365893498856584, + "loss": 0.8528, + "step": 21375 + }, + { + "epoch": 6.98, + "learning_rate": 0.0003028422084286181, + "loss": 0.8456, + "step": 21400 + }, + { + "epoch": 6.99, + "learning_rate": 0.00030202548186867033, + "loss": 0.8351, + "step": 21425 + }, + { + "epoch": 7.0, + "learning_rate": 0.00030120875530872265, + "loss": 0.858, + "step": 21450 + }, + { + "epoch": 7.0, + "eval_loss": 0.812373161315918, + "eval_runtime": 934.4652, + "eval_samples_per_second": 1.07, + "eval_steps_per_second": 0.134, + "eval_wer": 2.2754826074792502, + "step": 21462 + }, + { + "epoch": 7.0, + "learning_rate": 0.0003003920287487749, + "loss": 0.8563, + "step": 21475 + }, + { + "epoch": 7.01, + "learning_rate": 0.0002995753021888272, + "loss": 0.825, + "step": 21500 + }, + { + "epoch": 7.02, + "learning_rate": 0.00029875857562887946, + "loss": 0.8714, + "step": 21525 + }, + { + "epoch": 7.03, + "learning_rate": 0.0002979418490689317, + "loss": 0.7912, + "step": 21550 + }, + { + "epoch": 7.04, + "learning_rate": 0.000297125122508984, + "loss": 0.8249, + "step": 21575 + }, + { + "epoch": 7.05, + "learning_rate": 0.00029630839594903626, + "loss": 0.8296, + "step": 21600 + }, + { + "epoch": 7.05, + "learning_rate": 0.00029549166938908853, + "loss": 0.8464, + "step": 21625 + }, + { + "epoch": 7.06, + "learning_rate": 0.0002946749428291408, + "loss": 0.8263, + "step": 21650 + }, + { + "epoch": 7.07, + "learning_rate": 0.00029385821626919307, + "loss": 0.834, + "step": 21675 + }, + { + "epoch": 7.08, + "learning_rate": 0.00029304148970924534, + "loss": 0.8206, + "step": 21700 + }, + { + "epoch": 7.09, + "learning_rate": 0.00029222476314929766, + "loss": 0.8296, + "step": 21725 + }, + { + "epoch": 7.09, + "learning_rate": 0.0002914080365893499, + "loss": 0.8253, + "step": 21750 + }, + { + "epoch": 7.1, + "learning_rate": 0.00029059131002940214, + "loss": 0.8362, + "step": 21775 + }, + { + "epoch": 7.11, + "learning_rate": 0.00028977458346945447, + "loss": 0.8686, + "step": 21800 + }, + { + "epoch": 7.12, + "learning_rate": 0.00028895785690950674, + "loss": 0.8181, + "step": 21825 + }, + { + "epoch": 7.13, + "learning_rate": 0.00028814113034955895, + "loss": 0.7907, + "step": 21850 + }, + { + "epoch": 7.13, + "learning_rate": 0.0002873244037896112, + "loss": 0.8777, + "step": 21875 + }, + { + "epoch": 7.14, + "learning_rate": 0.00028650767722966354, + "loss": 0.88, + "step": 21900 + }, + { + "epoch": 7.15, + "learning_rate": 0.0002856909506697158, + "loss": 0.8507, + "step": 21925 + }, + { + "epoch": 7.16, + "learning_rate": 0.000284874224109768, + "loss": 0.8222, + "step": 21950 + }, + { + "epoch": 7.17, + "learning_rate": 0.0002840574975498203, + "loss": 0.802, + "step": 21975 + }, + { + "epoch": 7.18, + "learning_rate": 0.0002832407709898726, + "loss": 0.8305, + "step": 22000 + }, + { + "epoch": 7.18, + "learning_rate": 0.0002824240444299249, + "loss": 0.8268, + "step": 22025 + }, + { + "epoch": 7.19, + "learning_rate": 0.0002816073178699771, + "loss": 0.8203, + "step": 22050 + }, + { + "epoch": 7.2, + "learning_rate": 0.0002807905913100294, + "loss": 0.828, + "step": 22075 + }, + { + "epoch": 7.21, + "learning_rate": 0.0002799738647500817, + "loss": 0.8244, + "step": 22100 + }, + { + "epoch": 7.22, + "learning_rate": 0.00027915713819013396, + "loss": 0.8224, + "step": 22125 + }, + { + "epoch": 7.22, + "learning_rate": 0.0002783404116301862, + "loss": 0.8037, + "step": 22150 + }, + { + "epoch": 7.23, + "learning_rate": 0.0002775236850702385, + "loss": 0.8378, + "step": 22175 + }, + { + "epoch": 7.24, + "learning_rate": 0.00027670695851029077, + "loss": 0.8376, + "step": 22200 + }, + { + "epoch": 7.25, + "learning_rate": 0.00027589023195034304, + "loss": 0.8312, + "step": 22225 + }, + { + "epoch": 7.26, + "learning_rate": 0.0002750735053903953, + "loss": 0.8517, + "step": 22250 + }, + { + "epoch": 7.27, + "learning_rate": 0.0002742567788304476, + "loss": 0.8209, + "step": 22275 + }, + { + "epoch": 7.27, + "learning_rate": 0.00027344005227049984, + "loss": 0.7937, + "step": 22300 + }, + { + "epoch": 7.28, + "learning_rate": 0.0002726233257105521, + "loss": 0.8252, + "step": 22325 + }, + { + "epoch": 7.29, + "learning_rate": 0.0002718065991506044, + "loss": 0.8369, + "step": 22350 + }, + { + "epoch": 7.3, + "learning_rate": 0.00027098987259065665, + "loss": 0.8664, + "step": 22375 + }, + { + "epoch": 7.31, + "learning_rate": 0.0002701731460307089, + "loss": 0.8301, + "step": 22400 + }, + { + "epoch": 7.31, + "learning_rate": 0.0002693564194707612, + "loss": 0.8137, + "step": 22425 + }, + { + "epoch": 7.32, + "learning_rate": 0.00026853969291081346, + "loss": 0.799, + "step": 22450 + }, + { + "epoch": 7.33, + "learning_rate": 0.0002677229663508657, + "loss": 0.8315, + "step": 22475 + }, + { + "epoch": 7.34, + "learning_rate": 0.000266906239790918, + "loss": 0.8476, + "step": 22500 + }, + { + "epoch": 7.35, + "learning_rate": 0.0002660895132309703, + "loss": 0.8102, + "step": 22525 + }, + { + "epoch": 7.35, + "learning_rate": 0.0002652727866710226, + "loss": 0.8072, + "step": 22550 + }, + { + "epoch": 7.36, + "learning_rate": 0.0002644560601110748, + "loss": 0.8209, + "step": 22575 + }, + { + "epoch": 7.37, + "learning_rate": 0.00026363933355112707, + "loss": 0.8533, + "step": 22600 + }, + { + "epoch": 7.38, + "learning_rate": 0.0002628226069911794, + "loss": 0.8236, + "step": 22625 + }, + { + "epoch": 7.39, + "learning_rate": 0.00026200588043123166, + "loss": 0.8425, + "step": 22650 + }, + { + "epoch": 7.4, + "learning_rate": 0.0002611891538712839, + "loss": 0.8203, + "step": 22675 + }, + { + "epoch": 7.4, + "learning_rate": 0.00026037242731133614, + "loss": 0.8542, + "step": 22700 + }, + { + "epoch": 7.41, + "learning_rate": 0.00025955570075138847, + "loss": 0.8355, + "step": 22725 + }, + { + "epoch": 7.42, + "learning_rate": 0.00025873897419144073, + "loss": 0.8262, + "step": 22750 + }, + { + "epoch": 7.43, + "learning_rate": 0.00025792224763149295, + "loss": 0.8016, + "step": 22775 + }, + { + "epoch": 7.44, + "learning_rate": 0.00025710552107154527, + "loss": 0.8357, + "step": 22800 + }, + { + "epoch": 7.44, + "learning_rate": 0.00025628879451159754, + "loss": 0.8348, + "step": 22825 + }, + { + "epoch": 7.45, + "learning_rate": 0.0002554720679516498, + "loss": 0.8382, + "step": 22850 + }, + { + "epoch": 7.46, + "learning_rate": 0.000254655341391702, + "loss": 0.8915, + "step": 22875 + }, + { + "epoch": 7.47, + "learning_rate": 0.00025383861483175435, + "loss": 0.8582, + "step": 22900 + }, + { + "epoch": 7.48, + "learning_rate": 0.0002530218882718066, + "loss": 0.8403, + "step": 22925 + }, + { + "epoch": 7.49, + "learning_rate": 0.0002522051617118589, + "loss": 0.8239, + "step": 22950 + }, + { + "epoch": 7.49, + "learning_rate": 0.0002513884351519111, + "loss": 0.8343, + "step": 22975 + }, + { + "epoch": 7.5, + "learning_rate": 0.0002505717085919634, + "loss": 0.8199, + "step": 23000 + }, + { + "epoch": 7.51, + "learning_rate": 0.0002497549820320157, + "loss": 0.8383, + "step": 23025 + }, + { + "epoch": 7.52, + "learning_rate": 0.00024893825547206796, + "loss": 0.8285, + "step": 23050 + }, + { + "epoch": 7.53, + "learning_rate": 0.00024812152891212023, + "loss": 0.8192, + "step": 23075 + }, + { + "epoch": 7.53, + "learning_rate": 0.0002473048023521725, + "loss": 0.8531, + "step": 23100 + }, + { + "epoch": 7.54, + "learning_rate": 0.00024648807579222477, + "loss": 0.8463, + "step": 23125 + }, + { + "epoch": 7.55, + "learning_rate": 0.00024567134923227704, + "loss": 0.8328, + "step": 23150 + }, + { + "epoch": 7.56, + "learning_rate": 0.0002448546226723293, + "loss": 0.8301, + "step": 23175 + }, + { + "epoch": 7.57, + "learning_rate": 0.00024403789611238157, + "loss": 0.7949, + "step": 23200 + }, + { + "epoch": 7.58, + "learning_rate": 0.00024322116955243384, + "loss": 0.8713, + "step": 23225 + }, + { + "epoch": 7.58, + "learning_rate": 0.00024240444299248614, + "loss": 0.8304, + "step": 23250 + }, + { + "epoch": 7.59, + "learning_rate": 0.00024158771643253838, + "loss": 0.8424, + "step": 23275 + }, + { + "epoch": 7.6, + "learning_rate": 0.00024077098987259067, + "loss": 0.8135, + "step": 23300 + }, + { + "epoch": 7.61, + "learning_rate": 0.00023995426331264294, + "loss": 0.8487, + "step": 23325 + }, + { + "epoch": 7.62, + "learning_rate": 0.0002391375367526952, + "loss": 0.79, + "step": 23350 + }, + { + "epoch": 7.62, + "learning_rate": 0.00023832081019274748, + "loss": 0.8142, + "step": 23375 + }, + { + "epoch": 7.63, + "learning_rate": 0.00023750408363279975, + "loss": 0.8285, + "step": 23400 + }, + { + "epoch": 7.64, + "learning_rate": 0.00023668735707285202, + "loss": 0.8223, + "step": 23425 + }, + { + "epoch": 7.65, + "learning_rate": 0.0002358706305129043, + "loss": 0.8366, + "step": 23450 + }, + { + "epoch": 7.66, + "learning_rate": 0.00023505390395295656, + "loss": 0.8234, + "step": 23475 + }, + { + "epoch": 7.66, + "learning_rate": 0.00023423717739300882, + "loss": 0.8345, + "step": 23500 + }, + { + "epoch": 7.67, + "learning_rate": 0.0002334204508330611, + "loss": 0.7979, + "step": 23525 + }, + { + "epoch": 7.68, + "learning_rate": 0.00023260372427311336, + "loss": 0.8581, + "step": 23550 + }, + { + "epoch": 7.69, + "learning_rate": 0.00023178699771316563, + "loss": 0.8349, + "step": 23575 + }, + { + "epoch": 7.7, + "learning_rate": 0.00023097027115321793, + "loss": 0.799, + "step": 23600 + }, + { + "epoch": 7.71, + "learning_rate": 0.00023015354459327017, + "loss": 0.8198, + "step": 23625 + }, + { + "epoch": 7.71, + "learning_rate": 0.00022933681803332246, + "loss": 0.7925, + "step": 23650 + }, + { + "epoch": 7.72, + "learning_rate": 0.0002285200914733747, + "loss": 0.8696, + "step": 23675 + }, + { + "epoch": 7.73, + "learning_rate": 0.000227703364913427, + "loss": 0.8114, + "step": 23700 + }, + { + "epoch": 7.74, + "learning_rate": 0.00022688663835347924, + "loss": 0.8593, + "step": 23725 + }, + { + "epoch": 7.75, + "learning_rate": 0.00022606991179353154, + "loss": 0.8285, + "step": 23750 + }, + { + "epoch": 7.75, + "learning_rate": 0.00022525318523358378, + "loss": 0.8469, + "step": 23775 + }, + { + "epoch": 7.76, + "learning_rate": 0.00022443645867363608, + "loss": 0.8801, + "step": 23800 + }, + { + "epoch": 7.77, + "learning_rate": 0.00022361973211368832, + "loss": 0.8604, + "step": 23825 + }, + { + "epoch": 7.78, + "learning_rate": 0.00022280300555374061, + "loss": 0.8238, + "step": 23850 + }, + { + "epoch": 7.79, + "learning_rate": 0.00022198627899379288, + "loss": 0.8033, + "step": 23875 + }, + { + "epoch": 7.8, + "learning_rate": 0.00022116955243384515, + "loss": 0.8529, + "step": 23900 + }, + { + "epoch": 7.8, + "learning_rate": 0.00022035282587389742, + "loss": 0.8373, + "step": 23925 + }, + { + "epoch": 7.81, + "learning_rate": 0.0002195360993139497, + "loss": 0.8218, + "step": 23950 + }, + { + "epoch": 7.82, + "learning_rate": 0.00021871937275400196, + "loss": 0.8206, + "step": 23975 + }, + { + "epoch": 7.83, + "learning_rate": 0.00021790264619405423, + "loss": 0.8293, + "step": 24000 + }, + { + "epoch": 7.84, + "learning_rate": 0.00021708591963410652, + "loss": 0.8172, + "step": 24025 + }, + { + "epoch": 7.84, + "learning_rate": 0.00021626919307415876, + "loss": 0.8329, + "step": 24050 + }, + { + "epoch": 7.85, + "learning_rate": 0.00021545246651421106, + "loss": 0.8494, + "step": 24075 + }, + { + "epoch": 7.86, + "learning_rate": 0.0002146357399542633, + "loss": 0.8686, + "step": 24100 + }, + { + "epoch": 7.87, + "learning_rate": 0.0002138190133943156, + "loss": 0.8416, + "step": 24125 + }, + { + "epoch": 7.88, + "learning_rate": 0.00021300228683436787, + "loss": 0.8036, + "step": 24150 + }, + { + "epoch": 7.88, + "learning_rate": 0.00021218556027442014, + "loss": 0.8794, + "step": 24175 + }, + { + "epoch": 7.89, + "learning_rate": 0.0002113688337144724, + "loss": 0.7951, + "step": 24200 + }, + { + "epoch": 7.9, + "learning_rate": 0.00021055210715452467, + "loss": 0.8146, + "step": 24225 + }, + { + "epoch": 7.91, + "learning_rate": 0.00020973538059457694, + "loss": 0.8773, + "step": 24250 + }, + { + "epoch": 7.92, + "learning_rate": 0.0002089186540346292, + "loss": 0.8265, + "step": 24275 + }, + { + "epoch": 7.93, + "learning_rate": 0.00020810192747468148, + "loss": 0.8296, + "step": 24300 + }, + { + "epoch": 7.93, + "learning_rate": 0.00020728520091473375, + "loss": 0.8041, + "step": 24325 + }, + { + "epoch": 7.94, + "learning_rate": 0.00020646847435478602, + "loss": 0.8803, + "step": 24350 + }, + { + "epoch": 7.95, + "learning_rate": 0.00020565174779483829, + "loss": 0.8693, + "step": 24375 + }, + { + "epoch": 7.96, + "learning_rate": 0.00020483502123489055, + "loss": 0.856, + "step": 24400 + }, + { + "epoch": 7.97, + "learning_rate": 0.00020401829467494285, + "loss": 0.7898, + "step": 24425 + }, + { + "epoch": 7.97, + "learning_rate": 0.0002032015681149951, + "loss": 0.8292, + "step": 24450 + }, + { + "epoch": 7.98, + "learning_rate": 0.0002023848415550474, + "loss": 0.8262, + "step": 24475 + }, + { + "epoch": 7.99, + "learning_rate": 0.00020156811499509963, + "loss": 0.824, + "step": 24500 + }, + { + "epoch": 8.0, + "learning_rate": 0.00020075138843515193, + "loss": 0.8035, + "step": 24525 + }, + { + "epoch": 8.0, + "eval_loss": 0.7898829579353333, + "eval_runtime": 928.0218, + "eval_samples_per_second": 1.078, + "eval_steps_per_second": 0.135, + "eval_wer": 0.9045975939569151, + "step": 24528 + }, + { + "epoch": 8.01, + "learning_rate": 0.00019993466187520417, + "loss": 0.7917, + "step": 24550 + }, + { + "epoch": 8.02, + "learning_rate": 0.00019911793531525646, + "loss": 0.8023, + "step": 24575 + }, + { + "epoch": 8.02, + "learning_rate": 0.0001983012087553087, + "loss": 0.8186, + "step": 24600 + }, + { + "epoch": 8.03, + "learning_rate": 0.000197484482195361, + "loss": 0.8236, + "step": 24625 + }, + { + "epoch": 8.04, + "learning_rate": 0.00019666775563541327, + "loss": 0.787, + "step": 24650 + }, + { + "epoch": 8.05, + "learning_rate": 0.00019585102907546554, + "loss": 0.8501, + "step": 24675 + }, + { + "epoch": 8.06, + "learning_rate": 0.0001950343025155178, + "loss": 0.8028, + "step": 24700 + }, + { + "epoch": 8.06, + "learning_rate": 0.00019421757595557008, + "loss": 0.7872, + "step": 24725 + }, + { + "epoch": 8.07, + "learning_rate": 0.00019340084939562237, + "loss": 0.7791, + "step": 24750 + }, + { + "epoch": 8.08, + "learning_rate": 0.0001925841228356746, + "loss": 0.7923, + "step": 24775 + }, + { + "epoch": 8.09, + "learning_rate": 0.0001917673962757269, + "loss": 0.7652, + "step": 24800 + }, + { + "epoch": 8.1, + "learning_rate": 0.00019095066971577915, + "loss": 0.7888, + "step": 24825 + }, + { + "epoch": 8.11, + "learning_rate": 0.00019013394315583145, + "loss": 0.8608, + "step": 24850 + }, + { + "epoch": 8.11, + "learning_rate": 0.0001893172165958837, + "loss": 0.8283, + "step": 24875 + }, + { + "epoch": 8.12, + "learning_rate": 0.00018850049003593598, + "loss": 0.7833, + "step": 24900 + }, + { + "epoch": 8.13, + "learning_rate": 0.00018768376347598825, + "loss": 0.8041, + "step": 24925 + }, + { + "epoch": 8.14, + "learning_rate": 0.00018686703691604052, + "loss": 0.7879, + "step": 24950 + }, + { + "epoch": 8.15, + "learning_rate": 0.0001860503103560928, + "loss": 0.7992, + "step": 24975 + }, + { + "epoch": 8.15, + "learning_rate": 0.00018523358379614506, + "loss": 0.8202, + "step": 25000 + }, + { + "epoch": 8.16, + "learning_rate": 0.00018441685723619733, + "loss": 0.7904, + "step": 25025 + }, + { + "epoch": 8.17, + "learning_rate": 0.0001836001306762496, + "loss": 0.7857, + "step": 25050 + }, + { + "epoch": 8.18, + "learning_rate": 0.00018278340411630187, + "loss": 0.832, + "step": 25075 + }, + { + "epoch": 8.19, + "learning_rate": 0.00018196667755635413, + "loss": 0.802, + "step": 25100 + }, + { + "epoch": 8.19, + "learning_rate": 0.0001811499509964064, + "loss": 0.8337, + "step": 25125 + }, + { + "epoch": 8.2, + "learning_rate": 0.00018033322443645867, + "loss": 0.8053, + "step": 25150 + }, + { + "epoch": 8.21, + "learning_rate": 0.00017951649787651094, + "loss": 0.8313, + "step": 25175 + }, + { + "epoch": 8.22, + "learning_rate": 0.00017869977131656324, + "loss": 0.8284, + "step": 25200 + }, + { + "epoch": 8.23, + "learning_rate": 0.00017788304475661548, + "loss": 0.82, + "step": 25225 + }, + { + "epoch": 8.24, + "learning_rate": 0.00017706631819666777, + "loss": 0.8016, + "step": 25250 + }, + { + "epoch": 8.24, + "learning_rate": 0.00017624959163672002, + "loss": 0.8122, + "step": 25275 + }, + { + "epoch": 8.25, + "learning_rate": 0.0001754328650767723, + "loss": 0.8121, + "step": 25300 + }, + { + "epoch": 8.26, + "learning_rate": 0.00017461613851682455, + "loss": 0.8173, + "step": 25325 + }, + { + "epoch": 8.27, + "learning_rate": 0.00017379941195687685, + "loss": 0.838, + "step": 25350 + }, + { + "epoch": 8.28, + "learning_rate": 0.0001729826853969291, + "loss": 0.7872, + "step": 25375 + }, + { + "epoch": 8.28, + "learning_rate": 0.0001721659588369814, + "loss": 0.8176, + "step": 25400 + }, + { + "epoch": 8.29, + "learning_rate": 0.00017134923227703363, + "loss": 0.8363, + "step": 25425 + }, + { + "epoch": 8.3, + "learning_rate": 0.00017053250571708592, + "loss": 0.8252, + "step": 25450 + }, + { + "epoch": 8.31, + "learning_rate": 0.0001697157791571382, + "loss": 0.8278, + "step": 25475 + }, + { + "epoch": 8.32, + "learning_rate": 0.00016889905259719046, + "loss": 0.8216, + "step": 25500 + }, + { + "epoch": 8.33, + "learning_rate": 0.00016808232603724276, + "loss": 0.7899, + "step": 25525 + }, + { + "epoch": 8.33, + "learning_rate": 0.000167265599477295, + "loss": 0.7876, + "step": 25550 + }, + { + "epoch": 8.34, + "learning_rate": 0.0001664488729173473, + "loss": 0.8127, + "step": 25575 + }, + { + "epoch": 8.35, + "learning_rate": 0.00016563214635739954, + "loss": 0.7943, + "step": 25600 + }, + { + "epoch": 8.36, + "learning_rate": 0.00016481541979745183, + "loss": 0.8343, + "step": 25625 + }, + { + "epoch": 8.37, + "learning_rate": 0.00016399869323750407, + "loss": 0.7859, + "step": 25650 + }, + { + "epoch": 8.37, + "learning_rate": 0.00016318196667755637, + "loss": 0.7914, + "step": 25675 + }, + { + "epoch": 8.38, + "learning_rate": 0.0001623652401176086, + "loss": 0.7867, + "step": 25700 + }, + { + "epoch": 8.39, + "learning_rate": 0.0001615485135576609, + "loss": 0.8232, + "step": 25725 + }, + { + "epoch": 8.4, + "learning_rate": 0.00016073178699771318, + "loss": 0.8661, + "step": 25750 + }, + { + "epoch": 8.41, + "learning_rate": 0.00015991506043776545, + "loss": 0.7897, + "step": 25775 + }, + { + "epoch": 8.41, + "learning_rate": 0.00015909833387781771, + "loss": 0.7948, + "step": 25800 + }, + { + "epoch": 8.42, + "learning_rate": 0.00015828160731786998, + "loss": 0.8237, + "step": 25825 + }, + { + "epoch": 8.43, + "learning_rate": 0.00015746488075792225, + "loss": 0.7961, + "step": 25850 + }, + { + "epoch": 8.44, + "learning_rate": 0.00015664815419797452, + "loss": 0.8382, + "step": 25875 + }, + { + "epoch": 8.45, + "learning_rate": 0.0001558314276380268, + "loss": 0.8154, + "step": 25900 + }, + { + "epoch": 8.46, + "learning_rate": 0.00015501470107807906, + "loss": 0.8529, + "step": 25925 + }, + { + "epoch": 8.46, + "learning_rate": 0.00015419797451813133, + "loss": 0.8029, + "step": 25950 + }, + { + "epoch": 8.47, + "learning_rate": 0.0001533812479581836, + "loss": 0.7945, + "step": 25975 + }, + { + "epoch": 8.48, + "learning_rate": 0.00015256452139823586, + "loss": 0.8124, + "step": 26000 + }, + { + "epoch": 8.49, + "learning_rate": 0.00015174779483828816, + "loss": 0.8042, + "step": 26025 + }, + { + "epoch": 8.5, + "learning_rate": 0.0001509310682783404, + "loss": 0.7905, + "step": 26050 + }, + { + "epoch": 8.5, + "learning_rate": 0.0001501143417183927, + "loss": 0.813, + "step": 26075 + }, + { + "epoch": 8.51, + "learning_rate": 0.00014929761515844494, + "loss": 0.7823, + "step": 26100 + }, + { + "epoch": 8.52, + "learning_rate": 0.00014848088859849723, + "loss": 0.7981, + "step": 26125 + }, + { + "epoch": 8.53, + "learning_rate": 0.0001476968311009474, + "loss": 0.7958, + "step": 26150 + }, + { + "epoch": 8.54, + "learning_rate": 0.00014688010454099967, + "loss": 0.82, + "step": 26175 + }, + { + "epoch": 8.55, + "learning_rate": 0.00014606337798105196, + "loss": 0.8012, + "step": 26200 + }, + { + "epoch": 8.55, + "learning_rate": 0.0001452466514211042, + "loss": 0.7772, + "step": 26225 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001444299248611565, + "loss": 0.8139, + "step": 26250 + }, + { + "epoch": 8.57, + "learning_rate": 0.00014361319830120874, + "loss": 0.8207, + "step": 26275 + }, + { + "epoch": 8.58, + "learning_rate": 0.00014279647174126104, + "loss": 0.8665, + "step": 26300 + }, + { + "epoch": 8.59, + "learning_rate": 0.00014197974518131328, + "loss": 0.8271, + "step": 26325 + }, + { + "epoch": 8.59, + "learning_rate": 0.00014116301862136558, + "loss": 0.8198, + "step": 26350 + }, + { + "epoch": 8.6, + "learning_rate": 0.00014034629206141782, + "loss": 0.7893, + "step": 26375 + }, + { + "epoch": 8.61, + "learning_rate": 0.0001395295655014701, + "loss": 0.8025, + "step": 26400 + }, + { + "epoch": 8.62, + "learning_rate": 0.00013871283894152238, + "loss": 0.8238, + "step": 26425 + }, + { + "epoch": 8.63, + "learning_rate": 0.00013789611238157465, + "loss": 0.7924, + "step": 26450 + }, + { + "epoch": 8.64, + "learning_rate": 0.00013707938582162695, + "loss": 0.7637, + "step": 26475 + }, + { + "epoch": 8.64, + "learning_rate": 0.0001362626592616792, + "loss": 0.82, + "step": 26500 + }, + { + "epoch": 8.65, + "learning_rate": 0.00013544593270173148, + "loss": 0.7799, + "step": 26525 + }, + { + "epoch": 8.66, + "learning_rate": 0.00013462920614178373, + "loss": 0.8069, + "step": 26550 + }, + { + "epoch": 8.67, + "learning_rate": 0.00013381247958183602, + "loss": 0.8108, + "step": 26575 + }, + { + "epoch": 8.68, + "learning_rate": 0.00013299575302188826, + "loss": 0.7857, + "step": 26600 + }, + { + "epoch": 8.68, + "learning_rate": 0.00013217902646194056, + "loss": 0.8306, + "step": 26625 + }, + { + "epoch": 8.69, + "learning_rate": 0.0001313622999019928, + "loss": 0.7965, + "step": 26650 + }, + { + "epoch": 8.7, + "learning_rate": 0.0001305455733420451, + "loss": 0.8243, + "step": 26675 + }, + { + "epoch": 8.71, + "learning_rate": 0.00012972884678209734, + "loss": 0.7843, + "step": 26700 + }, + { + "epoch": 8.72, + "learning_rate": 0.00012891212022214963, + "loss": 0.7699, + "step": 26725 + }, + { + "epoch": 8.72, + "learning_rate": 0.0001280953936622019, + "loss": 0.8122, + "step": 26750 + }, + { + "epoch": 8.73, + "learning_rate": 0.00012727866710225417, + "loss": 0.815, + "step": 26775 + }, + { + "epoch": 8.74, + "learning_rate": 0.00012646194054230644, + "loss": 0.7969, + "step": 26800 + }, + { + "epoch": 8.75, + "learning_rate": 0.0001256452139823587, + "loss": 0.8181, + "step": 26825 + }, + { + "epoch": 8.76, + "learning_rate": 0.00012482848742241098, + "loss": 0.8133, + "step": 26850 + }, + { + "epoch": 8.77, + "learning_rate": 0.00012401176086246325, + "loss": 0.7942, + "step": 26875 + }, + { + "epoch": 8.77, + "learning_rate": 0.00012319503430251551, + "loss": 0.8325, + "step": 26900 + }, + { + "epoch": 8.78, + "learning_rate": 0.00012237830774256778, + "loss": 0.818, + "step": 26925 + }, + { + "epoch": 8.79, + "learning_rate": 0.00012156158118262007, + "loss": 0.8149, + "step": 26950 + }, + { + "epoch": 8.8, + "learning_rate": 0.00012074485462267233, + "loss": 0.8104, + "step": 26975 + }, + { + "epoch": 8.81, + "learning_rate": 0.0001199281280627246, + "loss": 0.7673, + "step": 27000 + }, + { + "epoch": 8.81, + "learning_rate": 0.00011911140150277687, + "loss": 0.8226, + "step": 27025 + }, + { + "epoch": 8.82, + "learning_rate": 0.00011829467494282914, + "loss": 0.7903, + "step": 27050 + }, + { + "epoch": 8.83, + "learning_rate": 0.00011747794838288141, + "loss": 0.7812, + "step": 27075 + }, + { + "epoch": 8.84, + "learning_rate": 0.00011666122182293368, + "loss": 0.7588, + "step": 27100 + }, + { + "epoch": 8.85, + "learning_rate": 0.00011584449526298596, + "loss": 0.7999, + "step": 27125 + }, + { + "epoch": 8.86, + "learning_rate": 0.00011502776870303823, + "loss": 0.8294, + "step": 27150 + }, + { + "epoch": 8.86, + "learning_rate": 0.0001142110421430905, + "loss": 0.7698, + "step": 27175 + }, + { + "epoch": 8.87, + "learning_rate": 0.00011339431558314277, + "loss": 0.8146, + "step": 27200 + }, + { + "epoch": 8.88, + "learning_rate": 0.00011257758902319504, + "loss": 0.8294, + "step": 27225 + }, + { + "epoch": 8.89, + "learning_rate": 0.0001117608624632473, + "loss": 0.7958, + "step": 27250 + }, + { + "epoch": 8.9, + "learning_rate": 0.00011094413590329957, + "loss": 0.8342, + "step": 27275 + }, + { + "epoch": 8.9, + "learning_rate": 0.00011012740934335184, + "loss": 0.7979, + "step": 27300 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010931068278340411, + "loss": 0.8213, + "step": 27325 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010849395622345638, + "loss": 0.7923, + "step": 27350 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010767722966350866, + "loss": 0.789, + "step": 27375 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010686050310356093, + "loss": 0.8077, + "step": 27400 + }, + { + "epoch": 8.94, + "learning_rate": 0.0001060437765436132, + "loss": 0.786, + "step": 27425 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010522704998366547, + "loss": 0.7545, + "step": 27450 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010441032342371774, + "loss": 0.8159, + "step": 27475 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010359359686377, + "loss": 0.7748, + "step": 27500 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010277687030382227, + "loss": 0.8255, + "step": 27525 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010196014374387456, + "loss": 0.8138, + "step": 27550 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010114341718392683, + "loss": 0.8034, + "step": 27575 + }, + { + "epoch": 9.0, + "eval_loss": 0.7793058753013611, + "eval_runtime": 987.3495, + "eval_samples_per_second": 1.013, + "eval_steps_per_second": 0.127, + "eval_wer": 0.2611209549566353, + "step": 27594 + }, + { + "epoch": 9.0, + "learning_rate": 0.0001003266906239791, + "loss": 0.8083, + "step": 27600 + }, + { + "epoch": 9.01, + "learning_rate": 9.950996406403136e-05, + "loss": 0.8269, + "step": 27625 + }, + { + "epoch": 9.02, + "learning_rate": 9.869323750408365e-05, + "loss": 0.7802, + "step": 27650 + }, + { + "epoch": 9.03, + "learning_rate": 9.787651094413591e-05, + "loss": 0.8287, + "step": 27675 + }, + { + "epoch": 9.03, + "learning_rate": 9.705978438418818e-05, + "loss": 0.799, + "step": 27700 + }, + { + "epoch": 9.04, + "learning_rate": 9.624305782424045e-05, + "loss": 0.8113, + "step": 27725 + }, + { + "epoch": 9.05, + "learning_rate": 9.542633126429272e-05, + "loss": 0.7731, + "step": 27750 + }, + { + "epoch": 9.06, + "learning_rate": 9.460960470434499e-05, + "loss": 0.8259, + "step": 27775 + }, + { + "epoch": 9.07, + "learning_rate": 9.379287814439726e-05, + "loss": 0.7874, + "step": 27800 + }, + { + "epoch": 9.08, + "learning_rate": 9.297615158444953e-05, + "loss": 0.7963, + "step": 27825 + }, + { + "epoch": 9.08, + "learning_rate": 9.21594250245018e-05, + "loss": 0.8033, + "step": 27850 + }, + { + "epoch": 9.09, + "learning_rate": 9.134269846455406e-05, + "loss": 0.7849, + "step": 27875 + }, + { + "epoch": 9.1, + "learning_rate": 9.052597190460633e-05, + "loss": 0.7614, + "step": 27900 + }, + { + "epoch": 9.11, + "learning_rate": 8.970924534465862e-05, + "loss": 0.8092, + "step": 27925 + }, + { + "epoch": 9.12, + "learning_rate": 8.889251878471088e-05, + "loss": 0.8077, + "step": 27950 + }, + { + "epoch": 9.12, + "learning_rate": 8.807579222476315e-05, + "loss": 0.745, + "step": 27975 + }, + { + "epoch": 9.13, + "learning_rate": 8.725906566481542e-05, + "loss": 0.7826, + "step": 28000 + }, + { + "epoch": 9.14, + "learning_rate": 8.644233910486769e-05, + "loss": 0.7996, + "step": 28025 + }, + { + "epoch": 9.15, + "learning_rate": 8.562561254491996e-05, + "loss": 0.7794, + "step": 28050 + }, + { + "epoch": 9.16, + "learning_rate": 8.480888598497223e-05, + "loss": 0.7683, + "step": 28075 + }, + { + "epoch": 9.17, + "learning_rate": 8.39921594250245e-05, + "loss": 0.7636, + "step": 28100 + }, + { + "epoch": 9.17, + "learning_rate": 8.317543286507677e-05, + "loss": 0.8064, + "step": 28125 + }, + { + "epoch": 9.18, + "learning_rate": 8.235870630512903e-05, + "loss": 0.7704, + "step": 28150 + }, + { + "epoch": 9.19, + "learning_rate": 8.154197974518132e-05, + "loss": 0.7917, + "step": 28175 + }, + { + "epoch": 9.2, + "learning_rate": 8.072525318523359e-05, + "loss": 0.7519, + "step": 28200 + }, + { + "epoch": 9.21, + "learning_rate": 7.990852662528585e-05, + "loss": 0.7986, + "step": 28225 + }, + { + "epoch": 9.21, + "learning_rate": 7.909180006533812e-05, + "loss": 0.8103, + "step": 28250 + }, + { + "epoch": 9.22, + "learning_rate": 7.827507350539039e-05, + "loss": 0.8082, + "step": 28275 + }, + { + "epoch": 9.23, + "learning_rate": 7.745834694544267e-05, + "loss": 0.8105, + "step": 28300 + }, + { + "epoch": 9.24, + "learning_rate": 7.664162038549494e-05, + "loss": 0.7817, + "step": 28325 + }, + { + "epoch": 9.25, + "learning_rate": 7.582489382554721e-05, + "loss": 0.7758, + "step": 28350 + }, + { + "epoch": 9.25, + "learning_rate": 7.500816726559948e-05, + "loss": 0.8288, + "step": 28375 + }, + { + "epoch": 9.26, + "learning_rate": 7.419144070565175e-05, + "loss": 0.8038, + "step": 28400 + }, + { + "epoch": 9.27, + "learning_rate": 7.337471414570402e-05, + "loss": 0.7801, + "step": 28425 + }, + { + "epoch": 9.28, + "learning_rate": 7.25579875857563e-05, + "loss": 0.8335, + "step": 28450 + }, + { + "epoch": 9.29, + "learning_rate": 7.174126102580857e-05, + "loss": 0.7795, + "step": 28475 + }, + { + "epoch": 9.3, + "learning_rate": 7.092453446586084e-05, + "loss": 0.773, + "step": 28500 + }, + { + "epoch": 9.3, + "learning_rate": 7.01078079059131e-05, + "loss": 0.8057, + "step": 28525 + }, + { + "epoch": 9.31, + "learning_rate": 6.929108134596538e-05, + "loss": 0.8015, + "step": 28550 + }, + { + "epoch": 9.32, + "learning_rate": 6.847435478601764e-05, + "loss": 0.7955, + "step": 28575 + }, + { + "epoch": 9.33, + "learning_rate": 6.765762822606991e-05, + "loss": 0.7673, + "step": 28600 + }, + { + "epoch": 9.34, + "learning_rate": 6.684090166612218e-05, + "loss": 0.8372, + "step": 28625 + }, + { + "epoch": 9.34, + "learning_rate": 6.602417510617445e-05, + "loss": 0.791, + "step": 28650 + }, + { + "epoch": 9.35, + "learning_rate": 6.520744854622672e-05, + "loss": 0.7455, + "step": 28675 + }, + { + "epoch": 9.36, + "learning_rate": 6.439072198627899e-05, + "loss": 0.8192, + "step": 28700 + }, + { + "epoch": 9.37, + "learning_rate": 6.357399542633127e-05, + "loss": 0.7503, + "step": 28725 + }, + { + "epoch": 9.38, + "learning_rate": 6.275726886638354e-05, + "loss": 0.7876, + "step": 28750 + }, + { + "epoch": 9.39, + "learning_rate": 6.194054230643581e-05, + "loss": 0.7727, + "step": 28775 + }, + { + "epoch": 9.39, + "learning_rate": 6.112381574648808e-05, + "loss": 0.7724, + "step": 28800 + }, + { + "epoch": 9.4, + "learning_rate": 6.0307089186540345e-05, + "loss": 0.7848, + "step": 28825 + }, + { + "epoch": 9.41, + "learning_rate": 5.9490362626592614e-05, + "loss": 0.7885, + "step": 28850 + }, + { + "epoch": 9.42, + "learning_rate": 5.867363606664489e-05, + "loss": 0.7819, + "step": 28875 + }, + { + "epoch": 9.43, + "learning_rate": 5.785690950669716e-05, + "loss": 0.7699, + "step": 28900 + }, + { + "epoch": 9.43, + "learning_rate": 5.7040182946749434e-05, + "loss": 0.8049, + "step": 28925 + }, + { + "epoch": 9.44, + "learning_rate": 5.62234563868017e-05, + "loss": 0.8224, + "step": 28950 + }, + { + "epoch": 9.45, + "learning_rate": 5.540672982685397e-05, + "loss": 0.7652, + "step": 28975 + }, + { + "epoch": 9.46, + "learning_rate": 5.459000326690624e-05, + "loss": 0.824, + "step": 29000 + }, + { + "epoch": 9.47, + "learning_rate": 5.377327670695851e-05, + "loss": 0.7782, + "step": 29025 + }, + { + "epoch": 9.47, + "learning_rate": 5.2956550147010785e-05, + "loss": 0.7705, + "step": 29050 + }, + { + "epoch": 9.48, + "learning_rate": 5.213982358706305e-05, + "loss": 0.771, + "step": 29075 + }, + { + "epoch": 9.49, + "learning_rate": 5.132309702711532e-05, + "loss": 0.781, + "step": 29100 + }, + { + "epoch": 9.5, + "learning_rate": 5.050637046716759e-05, + "loss": 0.8164, + "step": 29125 + }, + { + "epoch": 9.51, + "learning_rate": 4.968964390721986e-05, + "loss": 0.8296, + "step": 29150 + }, + { + "epoch": 9.52, + "learning_rate": 4.8872917347272135e-05, + "loss": 0.7553, + "step": 29175 + }, + { + "epoch": 9.52, + "learning_rate": 4.8056190787324404e-05, + "loss": 0.8039, + "step": 29200 + }, + { + "epoch": 9.53, + "learning_rate": 4.723946422737667e-05, + "loss": 0.7857, + "step": 29225 + }, + { + "epoch": 9.54, + "learning_rate": 4.642273766742895e-05, + "loss": 0.8134, + "step": 29250 + }, + { + "epoch": 9.55, + "learning_rate": 4.560601110748122e-05, + "loss": 0.7865, + "step": 29275 + }, + { + "epoch": 9.56, + "learning_rate": 4.4789284547533486e-05, + "loss": 0.8112, + "step": 29300 + }, + { + "epoch": 9.56, + "learning_rate": 4.397255798758576e-05, + "loss": 0.8086, + "step": 29325 + }, + { + "epoch": 9.57, + "learning_rate": 4.315583142763803e-05, + "loss": 0.794, + "step": 29350 + }, + { + "epoch": 9.58, + "learning_rate": 4.23391048676903e-05, + "loss": 0.8027, + "step": 29375 + }, + { + "epoch": 9.59, + "learning_rate": 4.152237830774257e-05, + "loss": 0.802, + "step": 29400 + }, + { + "epoch": 9.6, + "learning_rate": 4.0705651747794836e-05, + "loss": 0.7727, + "step": 29425 + }, + { + "epoch": 9.61, + "learning_rate": 3.988892518784711e-05, + "loss": 0.7935, + "step": 29450 + }, + { + "epoch": 9.61, + "learning_rate": 3.907219862789938e-05, + "loss": 0.7929, + "step": 29475 + }, + { + "epoch": 9.62, + "learning_rate": 3.825547206795165e-05, + "loss": 0.7946, + "step": 29500 + }, + { + "epoch": 9.63, + "learning_rate": 3.743874550800392e-05, + "loss": 0.7881, + "step": 29525 + }, + { + "epoch": 9.64, + "learning_rate": 3.662201894805619e-05, + "loss": 0.7713, + "step": 29550 + }, + { + "epoch": 9.65, + "learning_rate": 3.580529238810846e-05, + "loss": 0.814, + "step": 29575 + }, + { + "epoch": 9.65, + "learning_rate": 3.498856582816073e-05, + "loss": 0.7921, + "step": 29600 + }, + { + "epoch": 9.66, + "learning_rate": 3.417183926821301e-05, + "loss": 0.747, + "step": 29625 + }, + { + "epoch": 9.67, + "learning_rate": 3.3355112708265276e-05, + "loss": 0.7977, + "step": 29650 + }, + { + "epoch": 9.68, + "learning_rate": 3.2538386148317544e-05, + "loss": 0.7985, + "step": 29675 + }, + { + "epoch": 9.69, + "learning_rate": 3.172165958836981e-05, + "loss": 0.7921, + "step": 29700 + }, + { + "epoch": 9.7, + "learning_rate": 3.090493302842208e-05, + "loss": 0.7724, + "step": 29725 + }, + { + "epoch": 9.7, + "learning_rate": 3.0088206468474357e-05, + "loss": 0.7708, + "step": 29750 + }, + { + "epoch": 9.71, + "learning_rate": 2.9271479908526626e-05, + "loss": 0.7724, + "step": 29775 + }, + { + "epoch": 9.72, + "learning_rate": 2.8454753348578895e-05, + "loss": 0.8304, + "step": 29800 + }, + { + "epoch": 9.73, + "learning_rate": 2.7638026788631167e-05, + "loss": 0.7765, + "step": 29825 + }, + { + "epoch": 9.74, + "learning_rate": 2.6821300228683436e-05, + "loss": 0.8349, + "step": 29850 + }, + { + "epoch": 9.74, + "learning_rate": 2.6004573668735708e-05, + "loss": 0.8128, + "step": 29875 + }, + { + "epoch": 9.75, + "learning_rate": 2.5187847108787977e-05, + "loss": 0.7528, + "step": 29900 + }, + { + "epoch": 9.76, + "learning_rate": 2.437112054884025e-05, + "loss": 0.8442, + "step": 29925 + }, + { + "epoch": 9.77, + "learning_rate": 2.355439398889252e-05, + "loss": 0.7797, + "step": 29950 + }, + { + "epoch": 9.78, + "learning_rate": 2.273766742894479e-05, + "loss": 0.8008, + "step": 29975 + }, + { + "epoch": 9.78, + "learning_rate": 2.192094086899706e-05, + "loss": 0.7834, + "step": 30000 + }, + { + "epoch": 9.79, + "learning_rate": 2.110421430904933e-05, + "loss": 0.774, + "step": 30025 + }, + { + "epoch": 9.8, + "learning_rate": 2.02874877491016e-05, + "loss": 0.8219, + "step": 30050 + }, + { + "epoch": 9.81, + "learning_rate": 1.9470761189153872e-05, + "loss": 0.8094, + "step": 30075 + }, + { + "epoch": 9.82, + "learning_rate": 1.8654034629206144e-05, + "loss": 0.7588, + "step": 30100 + }, + { + "epoch": 9.83, + "learning_rate": 1.7837308069258413e-05, + "loss": 0.7986, + "step": 30125 + }, + { + "epoch": 9.83, + "learning_rate": 1.7020581509310685e-05, + "loss": 0.7798, + "step": 30150 + }, + { + "epoch": 9.84, + "learning_rate": 1.6203854949362954e-05, + "loss": 0.8109, + "step": 30175 + }, + { + "epoch": 9.85, + "learning_rate": 1.5387128389415226e-05, + "loss": 0.809, + "step": 30200 + }, + { + "epoch": 9.86, + "learning_rate": 1.4570401829467494e-05, + "loss": 0.8146, + "step": 30225 + }, + { + "epoch": 9.87, + "learning_rate": 1.3753675269519765e-05, + "loss": 0.7787, + "step": 30250 + }, + { + "epoch": 9.87, + "learning_rate": 1.2936948709572035e-05, + "loss": 0.7642, + "step": 30275 + }, + { + "epoch": 9.88, + "learning_rate": 1.2120222149624306e-05, + "loss": 0.8149, + "step": 30300 + }, + { + "epoch": 9.89, + "learning_rate": 1.1303495589676576e-05, + "loss": 0.7651, + "step": 30325 + }, + { + "epoch": 9.9, + "learning_rate": 1.0486769029728847e-05, + "loss": 0.8262, + "step": 30350 + }, + { + "epoch": 9.91, + "learning_rate": 9.670042469781117e-06, + "loss": 0.8193, + "step": 30375 + }, + { + "epoch": 9.92, + "learning_rate": 8.853315909833388e-06, + "loss": 0.7813, + "step": 30400 + }, + { + "epoch": 9.92, + "learning_rate": 8.03658934988566e-06, + "loss": 0.7949, + "step": 30425 + }, + { + "epoch": 9.93, + "learning_rate": 7.2198627899379294e-06, + "loss": 0.8129, + "step": 30450 + }, + { + "epoch": 9.94, + "learning_rate": 6.403136229990199e-06, + "loss": 0.833, + "step": 30475 + }, + { + "epoch": 9.95, + "learning_rate": 5.58640967004247e-06, + "loss": 0.7824, + "step": 30500 + }, + { + "epoch": 9.96, + "learning_rate": 4.76968311009474e-06, + "loss": 0.7823, + "step": 30525 + }, + { + "epoch": 9.96, + "learning_rate": 3.952956550147011e-06, + "loss": 0.8411, + "step": 30550 + }, + { + "epoch": 9.97, + "learning_rate": 3.1362299901992813e-06, + "loss": 0.8127, + "step": 30575 + }, + { + "epoch": 9.98, + "learning_rate": 2.3195034302515518e-06, + "loss": 0.8256, + "step": 30600 + }, + { + "epoch": 9.99, + "learning_rate": 1.5027768703038222e-06, + "loss": 0.7862, + "step": 30625 + }, + { + "epoch": 10.0, + "learning_rate": 6.860503103560928e-07, + "loss": 0.7774, + "step": 30650 + }, + { + "epoch": 10.0, + "eval_loss": 0.7750564813613892, + "eval_runtime": 942.8578, + "eval_samples_per_second": 1.061, + "eval_steps_per_second": 0.133, + "eval_wer": 0.11190898069570084, + "step": 30660 + } + ], + "logging_steps": 25, + "max_steps": 30660, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2.636131979132928e+20, + "trial_name": null, + "trial_params": null +}