{ "best_metric": 43.67604267701261, "best_model_checkpoint": "./checkpoint-5000", "epoch": 399.0025, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 5e-09, "loss": 0.3934, "step": 25 }, { "epoch": 3.0, "learning_rate": 1e-08, "loss": 0.3953, "step": 50 }, { "epoch": 5.0, "learning_rate": 1.5e-08, "loss": 0.392, "step": 75 }, { "epoch": 7.0, "learning_rate": 2e-08, "loss": 0.3899, "step": 100 }, { "epoch": 9.0, "learning_rate": 2.5e-08, "loss": 0.3868, "step": 125 }, { "epoch": 11.0, "learning_rate": 3e-08, "loss": 0.3835, "step": 150 }, { "epoch": 13.0, "learning_rate": 3.4999999999999996e-08, "loss": 0.3789, "step": 175 }, { "epoch": 15.0, "learning_rate": 4e-08, "loss": 0.377, "step": 200 }, { "epoch": 17.0, "learning_rate": 4.5e-08, "loss": 0.3706, "step": 225 }, { "epoch": 19.0, "learning_rate": 5e-08, "loss": 0.3669, "step": 250 }, { "epoch": 21.0, "learning_rate": 5.5e-08, "loss": 0.3616, "step": 275 }, { "epoch": 23.0, "learning_rate": 6e-08, "loss": 0.3557, "step": 300 }, { "epoch": 25.0, "learning_rate": 6.5e-08, "loss": 0.3527, "step": 325 }, { "epoch": 27.0, "learning_rate": 6.999999999999999e-08, "loss": 0.3482, "step": 350 }, { "epoch": 29.0, "learning_rate": 7.5e-08, "loss": 0.3453, "step": 375 }, { "epoch": 31.0, "learning_rate": 8e-08, "loss": 0.3423, "step": 400 }, { "epoch": 33.0, "learning_rate": 8.5e-08, "loss": 0.3398, "step": 425 }, { "epoch": 35.0, "learning_rate": 9e-08, "loss": 0.3336, "step": 450 }, { "epoch": 37.0, "learning_rate": 9.499999999999999e-08, "loss": 0.3316, "step": 475 }, { "epoch": 39.0, "learning_rate": 1e-07, "loss": 0.3282, "step": 500 }, { "epoch": 41.0, "learning_rate": 9.944444444444444e-08, "loss": 0.3281, "step": 525 }, { "epoch": 43.0, "learning_rate": 9.888888888888889e-08, "loss": 0.3235, "step": 550 }, { "epoch": 45.0, "learning_rate": 9.833333333333333e-08, "loss": 0.3191, "step": 575 }, { "epoch": 47.0, "learning_rate": 9.777777777777778e-08, "loss": 0.3169, "step": 600 }, { "epoch": 49.0, "learning_rate": 9.722222222222221e-08, "loss": 0.3145, "step": 625 }, { "epoch": 51.0, "learning_rate": 9.666666666666666e-08, "loss": 0.3132, "step": 650 }, { "epoch": 53.0, "learning_rate": 9.611111111111111e-08, "loss": 0.3098, "step": 675 }, { "epoch": 55.0, "learning_rate": 9.555555555555556e-08, "loss": 0.3081, "step": 700 }, { "epoch": 57.0, "learning_rate": 9.499999999999999e-08, "loss": 0.3071, "step": 725 }, { "epoch": 59.0, "learning_rate": 9.444444444444444e-08, "loss": 0.3049, "step": 750 }, { "epoch": 61.0, "learning_rate": 9.388888888888889e-08, "loss": 0.3055, "step": 775 }, { "epoch": 63.0, "learning_rate": 9.333333333333334e-08, "loss": 0.3025, "step": 800 }, { "epoch": 65.0, "learning_rate": 9.277777777777778e-08, "loss": 0.299, "step": 825 }, { "epoch": 67.0, "learning_rate": 9.222222222222222e-08, "loss": 0.2982, "step": 850 }, { "epoch": 69.0, "learning_rate": 9.166666666666665e-08, "loss": 0.2941, "step": 875 }, { "epoch": 71.0, "learning_rate": 9.11111111111111e-08, "loss": 0.2956, "step": 900 }, { "epoch": 73.0, "learning_rate": 9.055555555555555e-08, "loss": 0.296, "step": 925 }, { "epoch": 75.0, "learning_rate": 9e-08, "loss": 0.2913, "step": 950 }, { "epoch": 77.0, "learning_rate": 8.944444444444445e-08, "loss": 0.2905, "step": 975 }, { "epoch": 79.0, "learning_rate": 8.888888888888888e-08, "loss": 0.2889, "step": 1000 }, { "epoch": 79.0, "eval_loss": 0.27302461862564087, "eval_runtime": 254.0636, "eval_samples_per_second": 3.491, "eval_steps_per_second": 0.11, "eval_wer": 45.024248302618815, "step": 1000 }, { "epoch": 81.0, "learning_rate": 8.833333333333333e-08, "loss": 0.2881, "step": 1025 }, { "epoch": 83.0, "learning_rate": 8.777777777777778e-08, "loss": 0.2864, "step": 1050 }, { "epoch": 85.0, "learning_rate": 8.722222222222221e-08, "loss": 0.2846, "step": 1075 }, { "epoch": 87.0, "learning_rate": 8.666666666666666e-08, "loss": 0.2832, "step": 1100 }, { "epoch": 89.0, "learning_rate": 8.611111111111111e-08, "loss": 0.2821, "step": 1125 }, { "epoch": 91.0, "learning_rate": 8.555555555555555e-08, "loss": 0.2809, "step": 1150 }, { "epoch": 93.0, "learning_rate": 8.5e-08, "loss": 0.2802, "step": 1175 }, { "epoch": 95.0, "learning_rate": 8.444444444444444e-08, "loss": 0.2809, "step": 1200 }, { "epoch": 97.0, "learning_rate": 8.388888888888889e-08, "loss": 0.2774, "step": 1225 }, { "epoch": 99.0, "learning_rate": 8.333333333333334e-08, "loss": 0.2772, "step": 1250 }, { "epoch": 101.0, "learning_rate": 8.277777777777777e-08, "loss": 0.2761, "step": 1275 }, { "epoch": 103.0, "learning_rate": 8.222222222222221e-08, "loss": 0.2739, "step": 1300 }, { "epoch": 105.0, "learning_rate": 8.166666666666666e-08, "loss": 0.2738, "step": 1325 }, { "epoch": 107.0, "learning_rate": 8.11111111111111e-08, "loss": 0.2719, "step": 1350 }, { "epoch": 109.0, "learning_rate": 8.055555555555555e-08, "loss": 0.2725, "step": 1375 }, { "epoch": 111.0, "learning_rate": 8e-08, "loss": 0.2704, "step": 1400 }, { "epoch": 113.0, "learning_rate": 7.944444444444444e-08, "loss": 0.2699, "step": 1425 }, { "epoch": 115.0, "learning_rate": 7.888888888888889e-08, "loss": 0.2698, "step": 1450 }, { "epoch": 117.0, "learning_rate": 7.833333333333333e-08, "loss": 0.2675, "step": 1475 }, { "epoch": 119.0, "learning_rate": 7.777777777777778e-08, "loss": 0.2676, "step": 1500 }, { "epoch": 121.0, "learning_rate": 7.722222222222222e-08, "loss": 0.2664, "step": 1525 }, { "epoch": 123.0, "learning_rate": 7.666666666666666e-08, "loss": 0.2638, "step": 1550 }, { "epoch": 125.0, "learning_rate": 7.61111111111111e-08, "loss": 0.2645, "step": 1575 }, { "epoch": 127.0, "learning_rate": 7.555555555555555e-08, "loss": 0.2631, "step": 1600 }, { "epoch": 129.0, "learning_rate": 7.5e-08, "loss": 0.263, "step": 1625 }, { "epoch": 131.0, "learning_rate": 7.444444444444444e-08, "loss": 0.2617, "step": 1650 }, { "epoch": 133.0, "learning_rate": 7.388888888888889e-08, "loss": 0.2608, "step": 1675 }, { "epoch": 135.0, "learning_rate": 7.333333333333333e-08, "loss": 0.2592, "step": 1700 }, { "epoch": 137.0, "learning_rate": 7.277777777777778e-08, "loss": 0.2582, "step": 1725 }, { "epoch": 139.0, "learning_rate": 7.222222222222221e-08, "loss": 0.2577, "step": 1750 }, { "epoch": 141.0, "learning_rate": 7.166666666666666e-08, "loss": 0.2568, "step": 1775 }, { "epoch": 143.0, "learning_rate": 7.111111111111111e-08, "loss": 0.2576, "step": 1800 }, { "epoch": 145.0, "learning_rate": 7.055555555555556e-08, "loss": 0.2562, "step": 1825 }, { "epoch": 147.0, "learning_rate": 6.999999999999999e-08, "loss": 0.2552, "step": 1850 }, { "epoch": 149.0, "learning_rate": 6.944444444444444e-08, "loss": 0.2566, "step": 1875 }, { "epoch": 151.0, "learning_rate": 6.888888888888889e-08, "loss": 0.253, "step": 1900 }, { "epoch": 153.0, "learning_rate": 6.833333333333334e-08, "loss": 0.2518, "step": 1925 }, { "epoch": 155.0, "learning_rate": 6.777777777777778e-08, "loss": 0.2521, "step": 1950 }, { "epoch": 157.0, "learning_rate": 6.722222222222222e-08, "loss": 0.2508, "step": 1975 }, { "epoch": 159.0, "learning_rate": 6.666666666666665e-08, "loss": 0.2527, "step": 2000 }, { "epoch": 159.0, "eval_loss": 0.2593269646167755, "eval_runtime": 240.0382, "eval_samples_per_second": 3.695, "eval_steps_per_second": 0.117, "eval_wer": 44.46168768186227, "step": 2000 }, { "epoch": 161.0, "learning_rate": 6.61111111111111e-08, "loss": 0.2506, "step": 2025 }, { "epoch": 163.0, "learning_rate": 6.555555555555555e-08, "loss": 0.2492, "step": 2050 }, { "epoch": 165.0, "learning_rate": 6.5e-08, "loss": 0.2494, "step": 2075 }, { "epoch": 167.0, "learning_rate": 6.444444444444445e-08, "loss": 0.2481, "step": 2100 }, { "epoch": 169.0, "learning_rate": 6.388888888888888e-08, "loss": 0.2478, "step": 2125 }, { "epoch": 171.0, "learning_rate": 6.333333333333333e-08, "loss": 0.2472, "step": 2150 }, { "epoch": 173.0, "learning_rate": 6.277777777777778e-08, "loss": 0.2468, "step": 2175 }, { "epoch": 175.0, "learning_rate": 6.222222222222221e-08, "loss": 0.2475, "step": 2200 }, { "epoch": 177.0, "learning_rate": 6.166666666666666e-08, "loss": 0.2463, "step": 2225 }, { "epoch": 179.0, "learning_rate": 6.111111111111111e-08, "loss": 0.2447, "step": 2250 }, { "epoch": 181.0, "learning_rate": 6.055555555555555e-08, "loss": 0.2441, "step": 2275 }, { "epoch": 183.0, "learning_rate": 6e-08, "loss": 0.243, "step": 2300 }, { "epoch": 185.0, "learning_rate": 5.944444444444444e-08, "loss": 0.2428, "step": 2325 }, { "epoch": 187.0, "learning_rate": 5.888888888888889e-08, "loss": 0.2431, "step": 2350 }, { "epoch": 189.0, "learning_rate": 5.833333333333333e-08, "loss": 0.2417, "step": 2375 }, { "epoch": 191.0, "learning_rate": 5.777777777777777e-08, "loss": 0.2413, "step": 2400 }, { "epoch": 193.0, "learning_rate": 5.7222222222222216e-08, "loss": 0.2425, "step": 2425 }, { "epoch": 195.0, "learning_rate": 5.6666666666666665e-08, "loss": 0.2403, "step": 2450 }, { "epoch": 197.0, "learning_rate": 5.6111111111111106e-08, "loss": 0.2405, "step": 2475 }, { "epoch": 199.0, "learning_rate": 5.5555555555555555e-08, "loss": 0.2406, "step": 2500 }, { "epoch": 201.0, "learning_rate": 5.5e-08, "loss": 0.2387, "step": 2525 }, { "epoch": 203.0, "learning_rate": 5.444444444444444e-08, "loss": 0.2382, "step": 2550 }, { "epoch": 205.0, "learning_rate": 5.3888888888888886e-08, "loss": 0.2373, "step": 2575 }, { "epoch": 207.0, "learning_rate": 5.333333333333333e-08, "loss": 0.2377, "step": 2600 }, { "epoch": 209.0, "learning_rate": 5.2777777777777776e-08, "loss": 0.2379, "step": 2625 }, { "epoch": 211.0, "learning_rate": 5.2222222222222224e-08, "loss": 0.2356, "step": 2650 }, { "epoch": 213.0, "learning_rate": 5.166666666666667e-08, "loss": 0.2367, "step": 2675 }, { "epoch": 215.0, "learning_rate": 5.111111111111111e-08, "loss": 0.2355, "step": 2700 }, { "epoch": 217.0, "learning_rate": 5.055555555555555e-08, "loss": 0.2346, "step": 2725 }, { "epoch": 219.0, "learning_rate": 5e-08, "loss": 0.2341, "step": 2750 }, { "epoch": 221.0, "learning_rate": 4.9444444444444446e-08, "loss": 0.2343, "step": 2775 }, { "epoch": 223.0, "learning_rate": 4.888888888888889e-08, "loss": 0.2343, "step": 2800 }, { "epoch": 225.0, "learning_rate": 4.833333333333333e-08, "loss": 0.234, "step": 2825 }, { "epoch": 227.0, "learning_rate": 4.777777777777778e-08, "loss": 0.2321, "step": 2850 }, { "epoch": 229.0, "learning_rate": 4.722222222222222e-08, "loss": 0.2324, "step": 2875 }, { "epoch": 231.0, "learning_rate": 4.666666666666667e-08, "loss": 0.2322, "step": 2900 }, { "epoch": 233.0, "learning_rate": 4.611111111111111e-08, "loss": 0.2322, "step": 2925 }, { "epoch": 235.0, "learning_rate": 4.555555555555555e-08, "loss": 0.2316, "step": 2950 }, { "epoch": 237.0, "learning_rate": 4.5e-08, "loss": 0.2313, "step": 2975 }, { "epoch": 239.0, "learning_rate": 4.444444444444444e-08, "loss": 0.2306, "step": 3000 }, { "epoch": 239.0, "eval_loss": 0.2538779079914093, "eval_runtime": 204.9012, "eval_samples_per_second": 4.329, "eval_steps_per_second": 0.137, "eval_wer": 44.06159068865179, "step": 3000 }, { "epoch": 241.0, "learning_rate": 4.388888888888889e-08, "loss": 0.2297, "step": 3025 }, { "epoch": 243.0, "learning_rate": 4.333333333333333e-08, "loss": 0.2298, "step": 3050 }, { "epoch": 245.0, "learning_rate": 4.277777777777777e-08, "loss": 0.2299, "step": 3075 }, { "epoch": 247.0, "learning_rate": 4.222222222222222e-08, "loss": 0.2291, "step": 3100 }, { "epoch": 249.0, "learning_rate": 4.166666666666667e-08, "loss": 0.229, "step": 3125 }, { "epoch": 251.0, "learning_rate": 4.1111111111111104e-08, "loss": 0.2289, "step": 3150 }, { "epoch": 253.0, "learning_rate": 4.055555555555555e-08, "loss": 0.2276, "step": 3175 }, { "epoch": 255.0, "learning_rate": 4e-08, "loss": 0.227, "step": 3200 }, { "epoch": 257.0, "learning_rate": 3.944444444444444e-08, "loss": 0.2274, "step": 3225 }, { "epoch": 259.0, "learning_rate": 3.888888888888889e-08, "loss": 0.2258, "step": 3250 }, { "epoch": 261.0, "learning_rate": 3.833333333333333e-08, "loss": 0.2274, "step": 3275 }, { "epoch": 263.0, "learning_rate": 3.7777777777777774e-08, "loss": 0.2259, "step": 3300 }, { "epoch": 265.0, "learning_rate": 3.722222222222222e-08, "loss": 0.2263, "step": 3325 }, { "epoch": 267.0, "learning_rate": 3.6666666666666664e-08, "loss": 0.2269, "step": 3350 }, { "epoch": 269.0, "learning_rate": 3.6111111111111106e-08, "loss": 0.2258, "step": 3375 }, { "epoch": 271.0, "learning_rate": 3.5555555555555554e-08, "loss": 0.2238, "step": 3400 }, { "epoch": 273.0, "learning_rate": 3.4999999999999996e-08, "loss": 0.2268, "step": 3425 }, { "epoch": 275.0, "learning_rate": 3.4444444444444444e-08, "loss": 0.2261, "step": 3450 }, { "epoch": 277.0, "learning_rate": 3.388888888888889e-08, "loss": 0.2256, "step": 3475 }, { "epoch": 279.0, "learning_rate": 3.333333333333333e-08, "loss": 0.2244, "step": 3500 }, { "epoch": 281.0, "learning_rate": 3.2777777777777776e-08, "loss": 0.2238, "step": 3525 }, { "epoch": 283.0, "learning_rate": 3.2222222222222224e-08, "loss": 0.224, "step": 3550 }, { "epoch": 285.0, "learning_rate": 3.1666666666666666e-08, "loss": 0.223, "step": 3575 }, { "epoch": 287.0, "learning_rate": 3.111111111111111e-08, "loss": 0.2227, "step": 3600 }, { "epoch": 289.0, "learning_rate": 3.0555555555555556e-08, "loss": 0.2226, "step": 3625 }, { "epoch": 291.0, "learning_rate": 3e-08, "loss": 0.2227, "step": 3650 }, { "epoch": 293.0, "learning_rate": 2.9444444444444446e-08, "loss": 0.223, "step": 3675 }, { "epoch": 295.0, "learning_rate": 2.8888888888888884e-08, "loss": 0.2226, "step": 3700 }, { "epoch": 297.0, "learning_rate": 2.8333333333333332e-08, "loss": 0.222, "step": 3725 }, { "epoch": 299.0, "learning_rate": 2.7777777777777777e-08, "loss": 0.223, "step": 3750 }, { "epoch": 301.0, "learning_rate": 2.722222222222222e-08, "loss": 0.222, "step": 3775 }, { "epoch": 303.0, "learning_rate": 2.6666666666666664e-08, "loss": 0.2205, "step": 3800 }, { "epoch": 305.0, "learning_rate": 2.6111111111111112e-08, "loss": 0.22, "step": 3825 }, { "epoch": 307.0, "learning_rate": 2.5555555555555554e-08, "loss": 0.2215, "step": 3850 }, { "epoch": 309.0, "learning_rate": 2.5e-08, "loss": 0.2208, "step": 3875 }, { "epoch": 311.0, "learning_rate": 2.4444444444444444e-08, "loss": 0.2205, "step": 3900 }, { "epoch": 313.0, "learning_rate": 2.388888888888889e-08, "loss": 0.2201, "step": 3925 }, { "epoch": 315.0, "learning_rate": 2.3333333333333334e-08, "loss": 0.22, "step": 3950 }, { "epoch": 317.0, "learning_rate": 2.2777777777777775e-08, "loss": 0.2204, "step": 3975 }, { "epoch": 319.0, "learning_rate": 2.222222222222222e-08, "loss": 0.2191, "step": 4000 }, { "epoch": 319.0, "eval_loss": 0.2515379786491394, "eval_runtime": 204.8512, "eval_samples_per_second": 4.33, "eval_steps_per_second": 0.137, "eval_wer": 43.736663433559656, "step": 4000 }, { "epoch": 321.0, "learning_rate": 2.1666666666666665e-08, "loss": 0.2207, "step": 4025 }, { "epoch": 323.0, "learning_rate": 2.111111111111111e-08, "loss": 0.2188, "step": 4050 }, { "epoch": 325.0, "learning_rate": 2.0555555555555552e-08, "loss": 0.2191, "step": 4075 }, { "epoch": 327.0, "learning_rate": 2e-08, "loss": 0.2192, "step": 4100 }, { "epoch": 329.0, "learning_rate": 1.9466666666666666e-08, "loss": 0.2188, "step": 4125 }, { "epoch": 331.0, "learning_rate": 1.891111111111111e-08, "loss": 0.2185, "step": 4150 }, { "epoch": 333.0, "learning_rate": 1.8355555555555556e-08, "loss": 0.2201, "step": 4175 }, { "epoch": 335.0, "learning_rate": 1.7799999999999997e-08, "loss": 0.2177, "step": 4200 }, { "epoch": 337.0, "learning_rate": 1.7244444444444446e-08, "loss": 0.2186, "step": 4225 }, { "epoch": 339.0, "learning_rate": 1.6688888888888887e-08, "loss": 0.2176, "step": 4250 }, { "epoch": 341.0, "learning_rate": 1.6133333333333332e-08, "loss": 0.2184, "step": 4275 }, { "epoch": 343.0, "learning_rate": 1.5577777777777777e-08, "loss": 0.2189, "step": 4300 }, { "epoch": 345.0, "learning_rate": 1.5022222222222222e-08, "loss": 0.2179, "step": 4325 }, { "epoch": 347.0, "learning_rate": 1.4466666666666666e-08, "loss": 0.2173, "step": 4350 }, { "epoch": 349.0, "learning_rate": 1.3911111111111109e-08, "loss": 0.2169, "step": 4375 }, { "epoch": 351.0, "learning_rate": 1.3355555555555555e-08, "loss": 0.2172, "step": 4400 }, { "epoch": 353.0, "learning_rate": 1.28e-08, "loss": 0.2168, "step": 4425 }, { "epoch": 355.0, "learning_rate": 1.2244444444444444e-08, "loss": 0.217, "step": 4450 }, { "epoch": 357.0, "learning_rate": 1.1688888888888889e-08, "loss": 0.2185, "step": 4475 }, { "epoch": 359.0, "learning_rate": 1.1133333333333334e-08, "loss": 0.2172, "step": 4500 }, { "epoch": 361.0, "learning_rate": 1.0577777777777777e-08, "loss": 0.2173, "step": 4525 }, { "epoch": 363.0, "learning_rate": 1.0022222222222222e-08, "loss": 0.2176, "step": 4550 }, { "epoch": 365.0, "learning_rate": 9.466666666666665e-09, "loss": 0.2158, "step": 4575 }, { "epoch": 367.0, "learning_rate": 8.91111111111111e-09, "loss": 0.2158, "step": 4600 }, { "epoch": 369.0, "learning_rate": 8.355555555555555e-09, "loss": 0.2167, "step": 4625 }, { "epoch": 371.0, "learning_rate": 7.8e-09, "loss": 0.2172, "step": 4650 }, { "epoch": 373.0, "learning_rate": 7.2444444444444445e-09, "loss": 0.2161, "step": 4675 }, { "epoch": 375.0, "learning_rate": 6.688888888888889e-09, "loss": 0.2155, "step": 4700 }, { "epoch": 377.0, "learning_rate": 6.133333333333333e-09, "loss": 0.2166, "step": 4725 }, { "epoch": 379.0, "learning_rate": 5.577777777777778e-09, "loss": 0.2168, "step": 4750 }, { "epoch": 381.0, "learning_rate": 5.022222222222222e-09, "loss": 0.2172, "step": 4775 }, { "epoch": 383.0, "learning_rate": 4.466666666666666e-09, "loss": 0.2149, "step": 4800 }, { "epoch": 385.0, "learning_rate": 3.911111111111111e-09, "loss": 0.2161, "step": 4825 }, { "epoch": 387.0, "learning_rate": 3.3555555555555553e-09, "loss": 0.2164, "step": 4850 }, { "epoch": 389.0, "learning_rate": 2.8e-09, "loss": 0.2162, "step": 4875 }, { "epoch": 391.0, "learning_rate": 2.2444444444444444e-09, "loss": 0.2157, "step": 4900 }, { "epoch": 393.0, "learning_rate": 1.6888888888888886e-09, "loss": 0.2178, "step": 4925 }, { "epoch": 395.0, "learning_rate": 1.1333333333333333e-09, "loss": 0.2158, "step": 4950 }, { "epoch": 397.0, "learning_rate": 5.777777777777777e-10, "loss": 0.2157, "step": 4975 }, { "epoch": 399.0, "learning_rate": 2.2222222222222222e-11, "loss": 0.2164, "step": 5000 }, { "epoch": 399.0, "eval_loss": 0.25085458159446716, "eval_runtime": 204.438, "eval_samples_per_second": 4.339, "eval_steps_per_second": 0.137, "eval_wer": 43.67604267701261, "step": 5000 }, { "epoch": 399.0, "step": 5000, "total_flos": 2.0184430804992e+19, "train_loss": 0.2566693991661072, "train_runtime": 24404.6676, "train_samples_per_second": 13.112, "train_steps_per_second": 0.205 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "total_flos": 2.0184430804992e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }