|
{ |
|
"best_metric": 0.75, |
|
"best_model_checkpoint": "CTMAE-P2-V2-S5/checkpoint-5481", |
|
"epoch": 49.02, |
|
"eval_steps": 500, |
|
"global_step": 13050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007662835249042146, |
|
"grad_norm": 6.346773624420166, |
|
"learning_rate": 7.662835249042146e-08, |
|
"loss": 0.6907, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0015325670498084292, |
|
"grad_norm": 5.90932559967041, |
|
"learning_rate": 1.5325670498084292e-07, |
|
"loss": 0.7055, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0022988505747126436, |
|
"grad_norm": 5.780531883239746, |
|
"learning_rate": 2.2988505747126437e-07, |
|
"loss": 0.7097, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0030651340996168583, |
|
"grad_norm": 5.774871826171875, |
|
"learning_rate": 3.0651340996168583e-07, |
|
"loss": 0.7055, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0038314176245210726, |
|
"grad_norm": 5.868458271026611, |
|
"learning_rate": 3.831417624521073e-07, |
|
"loss": 0.6849, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004597701149425287, |
|
"grad_norm": 6.17386531829834, |
|
"learning_rate": 4.5977011494252875e-07, |
|
"loss": 0.6856, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0053639846743295016, |
|
"grad_norm": 5.900820732116699, |
|
"learning_rate": 5.363984674329502e-07, |
|
"loss": 0.6583, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.006130268199233717, |
|
"grad_norm": 5.429736137390137, |
|
"learning_rate": 6.130268199233717e-07, |
|
"loss": 0.6776, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006896551724137931, |
|
"grad_norm": 5.372462272644043, |
|
"learning_rate": 6.896551724137931e-07, |
|
"loss": 0.6184, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007662835249042145, |
|
"grad_norm": 6.988162994384766, |
|
"learning_rate": 7.662835249042146e-07, |
|
"loss": 0.7473, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00842911877394636, |
|
"grad_norm": 6.189302921295166, |
|
"learning_rate": 8.429118773946361e-07, |
|
"loss": 0.6017, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.009195402298850575, |
|
"grad_norm": 10.103041648864746, |
|
"learning_rate": 9.195402298850575e-07, |
|
"loss": 0.5932, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.00996168582375479, |
|
"grad_norm": 6.596471786499023, |
|
"learning_rate": 9.96168582375479e-07, |
|
"loss": 0.4923, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.010727969348659003, |
|
"grad_norm": 11.013703346252441, |
|
"learning_rate": 1.0727969348659004e-06, |
|
"loss": 0.7138, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.011494252873563218, |
|
"grad_norm": 36.27220916748047, |
|
"learning_rate": 1.1494252873563219e-06, |
|
"loss": 0.5364, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012260536398467433, |
|
"grad_norm": 47.619544982910156, |
|
"learning_rate": 1.2260536398467433e-06, |
|
"loss": 0.5242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.013026819923371647, |
|
"grad_norm": 9.586103439331055, |
|
"learning_rate": 1.3026819923371648e-06, |
|
"loss": 0.5501, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.013793103448275862, |
|
"grad_norm": 4.5196404457092285, |
|
"learning_rate": 1.3793103448275862e-06, |
|
"loss": 0.5952, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.014559386973180077, |
|
"grad_norm": 2.0037076473236084, |
|
"learning_rate": 1.455938697318008e-06, |
|
"loss": 0.9824, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01532567049808429, |
|
"grad_norm": 2.1096646785736084, |
|
"learning_rate": 1.5325670498084292e-06, |
|
"loss": 1.1898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.016091954022988506, |
|
"grad_norm": 1.9899245500564575, |
|
"learning_rate": 1.6091954022988506e-06, |
|
"loss": 1.17, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01685823754789272, |
|
"grad_norm": 1.1962019205093384, |
|
"learning_rate": 1.6858237547892723e-06, |
|
"loss": 1.6618, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.017624521072796936, |
|
"grad_norm": 1.718502402305603, |
|
"learning_rate": 1.7624521072796935e-06, |
|
"loss": 0.0149, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01839080459770115, |
|
"grad_norm": 0.6003769040107727, |
|
"learning_rate": 1.839080459770115e-06, |
|
"loss": 0.4818, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.019157088122605363, |
|
"grad_norm": 0.2685077488422394, |
|
"learning_rate": 1.9157088122605367e-06, |
|
"loss": 1.4995, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01992337164750958, |
|
"grad_norm": 3.3518788814544678, |
|
"learning_rate": 1.992337164750958e-06, |
|
"loss": 1.5874, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.2577168941497803, |
|
"eval_runtime": 17.2962, |
|
"eval_samples_per_second": 2.544, |
|
"eval_steps_per_second": 2.544, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.0006896551724138, |
|
"grad_norm": 0.36198291182518005, |
|
"learning_rate": 2.0689655172413796e-06, |
|
"loss": 1.1205, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.001455938697318, |
|
"grad_norm": 200.73394775390625, |
|
"learning_rate": 2.145593869731801e-06, |
|
"loss": 1.5736, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0022222222222221, |
|
"grad_norm": 0.7660533785820007, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.0927, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0029885057471264, |
|
"grad_norm": 0.678782045841217, |
|
"learning_rate": 2.2988505747126437e-06, |
|
"loss": 1.1059, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0037547892720307, |
|
"grad_norm": 0.39177432656288147, |
|
"learning_rate": 2.3754789272030654e-06, |
|
"loss": 2.7744, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.004521072796935, |
|
"grad_norm": 0.5383715033531189, |
|
"learning_rate": 2.4521072796934867e-06, |
|
"loss": 1.4481, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0052873563218392, |
|
"grad_norm": 0.34945148229599, |
|
"learning_rate": 2.5287356321839083e-06, |
|
"loss": 1.7115, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0060536398467432, |
|
"grad_norm": 0.3675661087036133, |
|
"learning_rate": 2.6053639846743296e-06, |
|
"loss": 1.0641, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0068199233716475, |
|
"grad_norm": 3.8542375564575195, |
|
"learning_rate": 2.6819923371647512e-06, |
|
"loss": 1.8624, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0075862068965518, |
|
"grad_norm": 58.72666549682617, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 1.485, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.008352490421456, |
|
"grad_norm": 1.154325246810913, |
|
"learning_rate": 2.835249042145594e-06, |
|
"loss": 1.9096, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.00911877394636, |
|
"grad_norm": 0.2620074450969696, |
|
"learning_rate": 2.911877394636016e-06, |
|
"loss": 0.5438, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0098850574712643, |
|
"grad_norm": 140.94412231445312, |
|
"learning_rate": 2.988505747126437e-06, |
|
"loss": 2.6305, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0106513409961686, |
|
"grad_norm": 0.6025245785713196, |
|
"learning_rate": 3.0651340996168583e-06, |
|
"loss": 0.511, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0114176245210729, |
|
"grad_norm": 53.17525100708008, |
|
"learning_rate": 3.14176245210728e-06, |
|
"loss": 1.0735, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.012183908045977, |
|
"grad_norm": 0.38336479663848877, |
|
"learning_rate": 3.2183908045977012e-06, |
|
"loss": 1.5545, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0129501915708812, |
|
"grad_norm": 0.14846405386924744, |
|
"learning_rate": 3.295019157088123e-06, |
|
"loss": 1.5034, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0137164750957854, |
|
"grad_norm": 0.36722663044929504, |
|
"learning_rate": 3.3716475095785446e-06, |
|
"loss": 1.6936, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0144827586206897, |
|
"grad_norm": 0.12956643104553223, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 0.0044, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.015249042145594, |
|
"grad_norm": 0.4265776574611664, |
|
"learning_rate": 3.524904214559387e-06, |
|
"loss": 1.1646, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.016015325670498, |
|
"grad_norm": 0.769011914730072, |
|
"learning_rate": 3.6015325670498087e-06, |
|
"loss": 1.7305, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0167816091954023, |
|
"grad_norm": 0.31617698073387146, |
|
"learning_rate": 3.67816091954023e-06, |
|
"loss": 2.1435, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0175478927203065, |
|
"grad_norm": 0.43429428339004517, |
|
"learning_rate": 3.7547892720306517e-06, |
|
"loss": 2.1552, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0183141762452108, |
|
"grad_norm": 0.358375608921051, |
|
"learning_rate": 3.831417624521073e-06, |
|
"loss": 0.5247, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0190804597701149, |
|
"grad_norm": 0.1731932908296585, |
|
"learning_rate": 3.908045977011495e-06, |
|
"loss": 0.5345, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0198467432950191, |
|
"grad_norm": 0.14412061870098114, |
|
"learning_rate": 3.984674329501916e-06, |
|
"loss": 0.581, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.4954493045806885, |
|
"eval_runtime": 16.0898, |
|
"eval_samples_per_second": 2.735, |
|
"eval_steps_per_second": 2.735, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.0006130268199236, |
|
"grad_norm": 35.207305908203125, |
|
"learning_rate": 4.0613026819923375e-06, |
|
"loss": 1.7376, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.0013793103448276, |
|
"grad_norm": 2.767150640487671, |
|
"learning_rate": 4.137931034482759e-06, |
|
"loss": 2.3744, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.0021455938697317, |
|
"grad_norm": 38.506370544433594, |
|
"learning_rate": 4.214559386973181e-06, |
|
"loss": 0.8521, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.002911877394636, |
|
"grad_norm": 37.02397918701172, |
|
"learning_rate": 4.291187739463602e-06, |
|
"loss": 2.2811, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.00367816091954, |
|
"grad_norm": 0.6310027837753296, |
|
"learning_rate": 4.367816091954023e-06, |
|
"loss": 1.6249, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.0044444444444443, |
|
"grad_norm": 0.5581138730049133, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.2296, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.0052107279693487, |
|
"grad_norm": 0.3627966344356537, |
|
"learning_rate": 4.521072796934866e-06, |
|
"loss": 0.8842, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.005977011494253, |
|
"grad_norm": 0.6147855520248413, |
|
"learning_rate": 4.5977011494252875e-06, |
|
"loss": 2.2479, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0067432950191573, |
|
"grad_norm": 0.2630392909049988, |
|
"learning_rate": 4.674329501915709e-06, |
|
"loss": 0.4058, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.0075095785440613, |
|
"grad_norm": 0.28262192010879517, |
|
"learning_rate": 4.750957854406131e-06, |
|
"loss": 1.0011, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.0082758620689654, |
|
"grad_norm": 0.1196753978729248, |
|
"learning_rate": 4.8275862068965525e-06, |
|
"loss": 0.005, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.00904214559387, |
|
"grad_norm": 53.83554458618164, |
|
"learning_rate": 4.904214559386973e-06, |
|
"loss": 2.2931, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.009808429118774, |
|
"grad_norm": 41.31382751464844, |
|
"learning_rate": 4.980842911877395e-06, |
|
"loss": 1.026, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.0105747126436784, |
|
"grad_norm": 0.20709460973739624, |
|
"learning_rate": 5.057471264367817e-06, |
|
"loss": 0.5098, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.0113409961685824, |
|
"grad_norm": 37.621158599853516, |
|
"learning_rate": 5.134099616858238e-06, |
|
"loss": 1.0657, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.0121072796934865, |
|
"grad_norm": 36.911006927490234, |
|
"learning_rate": 5.210727969348659e-06, |
|
"loss": 1.9582, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.012873563218391, |
|
"grad_norm": 0.32122182846069336, |
|
"learning_rate": 5.287356321839081e-06, |
|
"loss": 0.4836, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.013639846743295, |
|
"grad_norm": 37.699398040771484, |
|
"learning_rate": 5.3639846743295025e-06, |
|
"loss": 1.4902, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.014406130268199, |
|
"grad_norm": 0.277885377407074, |
|
"learning_rate": 5.440613026819924e-06, |
|
"loss": 1.0307, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.0151724137931035, |
|
"grad_norm": 2.4057579040527344, |
|
"learning_rate": 5.517241379310345e-06, |
|
"loss": 1.7573, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0159386973180076, |
|
"grad_norm": 40.69501495361328, |
|
"learning_rate": 5.593869731800766e-06, |
|
"loss": 1.8534, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.016704980842912, |
|
"grad_norm": 0.2895404100418091, |
|
"learning_rate": 5.670498084291188e-06, |
|
"loss": 0.484, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.017471264367816, |
|
"grad_norm": 28.58993911743164, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 0.5675, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.01823754789272, |
|
"grad_norm": 0.10329358279705048, |
|
"learning_rate": 5.823754789272032e-06, |
|
"loss": 0.5667, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.0190038314176246, |
|
"grad_norm": 0.468605637550354, |
|
"learning_rate": 5.9003831417624525e-06, |
|
"loss": 1.0882, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.0197701149425287, |
|
"grad_norm": 0.1823910027742386, |
|
"learning_rate": 5.977011494252874e-06, |
|
"loss": 1.5552, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.214362382888794, |
|
"eval_runtime": 13.8812, |
|
"eval_samples_per_second": 3.17, |
|
"eval_steps_per_second": 3.17, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 3.000536398467433, |
|
"grad_norm": 29.735700607299805, |
|
"learning_rate": 6.053639846743296e-06, |
|
"loss": 1.4541, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.001302681992337, |
|
"grad_norm": 0.21792061626911163, |
|
"learning_rate": 6.130268199233717e-06, |
|
"loss": 0.5156, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0020689655172412, |
|
"grad_norm": 0.23201414942741394, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 1.0177, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.0028352490421457, |
|
"grad_norm": 0.14051030576229095, |
|
"learning_rate": 6.28352490421456e-06, |
|
"loss": 1.0353, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.0036015325670498, |
|
"grad_norm": 0.3842865228652954, |
|
"learning_rate": 6.360153256704982e-06, |
|
"loss": 1.4528, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.004367816091954, |
|
"grad_norm": 31.66147232055664, |
|
"learning_rate": 6.4367816091954025e-06, |
|
"loss": 0.8975, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.0051340996168583, |
|
"grad_norm": 0.5294602513313293, |
|
"learning_rate": 6.513409961685824e-06, |
|
"loss": 1.8061, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.0059003831417623, |
|
"grad_norm": 0.23839212954044342, |
|
"learning_rate": 6.590038314176246e-06, |
|
"loss": 0.8339, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.006666666666667, |
|
"grad_norm": 0.3017413914203644, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.457, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.007432950191571, |
|
"grad_norm": 0.20103318989276886, |
|
"learning_rate": 6.743295019157089e-06, |
|
"loss": 0.009, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.008199233716475, |
|
"grad_norm": 32.83889389038086, |
|
"learning_rate": 6.81992337164751e-06, |
|
"loss": 1.067, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.0089655172413794, |
|
"grad_norm": 0.5330418348312378, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 1.5405, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.0097318007662834, |
|
"grad_norm": 0.2803409993648529, |
|
"learning_rate": 6.973180076628353e-06, |
|
"loss": 0.9349, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.010498084291188, |
|
"grad_norm": 28.8873291015625, |
|
"learning_rate": 7.049808429118774e-06, |
|
"loss": 2.5514, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.011264367816092, |
|
"grad_norm": 0.5354589223861694, |
|
"learning_rate": 7.126436781609196e-06, |
|
"loss": 0.0343, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.012030651340996, |
|
"grad_norm": 68.06498718261719, |
|
"learning_rate": 7.2030651340996175e-06, |
|
"loss": 1.9193, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.0127969348659005, |
|
"grad_norm": 0.4350726902484894, |
|
"learning_rate": 7.279693486590039e-06, |
|
"loss": 1.0043, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.0135632183908045, |
|
"grad_norm": 27.31719207763672, |
|
"learning_rate": 7.35632183908046e-06, |
|
"loss": 1.0381, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.014329501915709, |
|
"grad_norm": 34.04944610595703, |
|
"learning_rate": 7.4329501915708825e-06, |
|
"loss": 2.5194, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.015095785440613, |
|
"grad_norm": 0.4477466642856598, |
|
"learning_rate": 7.509578544061303e-06, |
|
"loss": 0.4492, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.015862068965517, |
|
"grad_norm": 0.3116937577724457, |
|
"learning_rate": 7.586206896551724e-06, |
|
"loss": 1.3244, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.0166283524904216, |
|
"grad_norm": 0.24950920045375824, |
|
"learning_rate": 7.662835249042147e-06, |
|
"loss": 0.4459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0173946360153256, |
|
"grad_norm": 25.60928726196289, |
|
"learning_rate": 7.739463601532567e-06, |
|
"loss": 2.3843, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.0181609195402297, |
|
"grad_norm": 1.749172329902649, |
|
"learning_rate": 7.81609195402299e-06, |
|
"loss": 1.7902, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.018927203065134, |
|
"grad_norm": 0.9840354919433594, |
|
"learning_rate": 7.89272030651341e-06, |
|
"loss": 0.8843, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.0196934865900382, |
|
"grad_norm": 0.3095335364341736, |
|
"learning_rate": 7.969348659003832e-06, |
|
"loss": 0.7597, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.138788938522339, |
|
"eval_runtime": 15.0623, |
|
"eval_samples_per_second": 2.921, |
|
"eval_steps_per_second": 2.921, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 4.000459770114943, |
|
"grad_norm": 0.18423452973365784, |
|
"learning_rate": 8.045977011494253e-06, |
|
"loss": 0.98, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.001226053639847, |
|
"grad_norm": 28.28644371032715, |
|
"learning_rate": 8.122605363984675e-06, |
|
"loss": 0.5345, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.001992337164751, |
|
"grad_norm": 29.955291748046875, |
|
"learning_rate": 8.199233716475097e-06, |
|
"loss": 1.0563, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.002758620689655, |
|
"grad_norm": 0.5588271617889404, |
|
"learning_rate": 8.275862068965518e-06, |
|
"loss": 1.8741, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.00352490421456, |
|
"grad_norm": 0.7834395170211792, |
|
"learning_rate": 8.35249042145594e-06, |
|
"loss": 0.8162, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.004291187739463, |
|
"grad_norm": 25.948200225830078, |
|
"learning_rate": 8.429118773946362e-06, |
|
"loss": 1.3937, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.005057471264368, |
|
"grad_norm": 29.971668243408203, |
|
"learning_rate": 8.505747126436782e-06, |
|
"loss": 2.0311, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.005823754789272, |
|
"grad_norm": 1.7803348302841187, |
|
"learning_rate": 8.582375478927203e-06, |
|
"loss": 1.294, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.006590038314176, |
|
"grad_norm": 0.4401187300682068, |
|
"learning_rate": 8.659003831417625e-06, |
|
"loss": 0.9076, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.00735632183908, |
|
"grad_norm": 42.1232795715332, |
|
"learning_rate": 8.735632183908047e-06, |
|
"loss": 1.4754, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.008122605363985, |
|
"grad_norm": 0.4527631402015686, |
|
"learning_rate": 8.812260536398468e-06, |
|
"loss": 0.9216, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.0088888888888885, |
|
"grad_norm": 0.08525290340185165, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.1031, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.009655172413793, |
|
"grad_norm": 0.3350205719470978, |
|
"learning_rate": 8.965517241379312e-06, |
|
"loss": 1.4999, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.0104214559386975, |
|
"grad_norm": 0.8244456052780151, |
|
"learning_rate": 9.042145593869732e-06, |
|
"loss": 1.636, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.011187739463602, |
|
"grad_norm": 0.27975335717201233, |
|
"learning_rate": 9.118773946360155e-06, |
|
"loss": 1.2409, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.011954022988506, |
|
"grad_norm": 0.38494256138801575, |
|
"learning_rate": 9.195402298850575e-06, |
|
"loss": 0.8623, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.01272030651341, |
|
"grad_norm": 0.1227990984916687, |
|
"learning_rate": 9.272030651340997e-06, |
|
"loss": 0.5032, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.0134865900383145, |
|
"grad_norm": 0.16210174560546875, |
|
"learning_rate": 9.348659003831418e-06, |
|
"loss": 0.5292, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.014252873563218, |
|
"grad_norm": 0.25423163175582886, |
|
"learning_rate": 9.42528735632184e-06, |
|
"loss": 2.1277, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.015019157088123, |
|
"grad_norm": 0.5340194702148438, |
|
"learning_rate": 9.501915708812262e-06, |
|
"loss": 1.6607, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.015785440613027, |
|
"grad_norm": 24.610986709594727, |
|
"learning_rate": 9.578544061302683e-06, |
|
"loss": 1.1063, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.016551724137931, |
|
"grad_norm": 0.35801875591278076, |
|
"learning_rate": 9.655172413793105e-06, |
|
"loss": 0.4161, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.017318007662835, |
|
"grad_norm": 0.11957996338605881, |
|
"learning_rate": 9.731800766283525e-06, |
|
"loss": 0.5095, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.01808429118774, |
|
"grad_norm": 0.0835915356874466, |
|
"learning_rate": 9.808429118773947e-06, |
|
"loss": 1.0919, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.018850574712643, |
|
"grad_norm": 0.11581507325172424, |
|
"learning_rate": 9.885057471264368e-06, |
|
"loss": 1.0691, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.019616858237548, |
|
"grad_norm": 24.233352661132812, |
|
"learning_rate": 9.96168582375479e-06, |
|
"loss": 1.8176, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.5856646299362183, |
|
"eval_runtime": 14.5025, |
|
"eval_samples_per_second": 3.034, |
|
"eval_steps_per_second": 3.034, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 5.000383141762452, |
|
"grad_norm": 0.24897067248821259, |
|
"learning_rate": 9.995742869306088e-06, |
|
"loss": 0.7786, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.001149425287356, |
|
"grad_norm": 0.12967245280742645, |
|
"learning_rate": 9.987228607918263e-06, |
|
"loss": 1.2452, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.001915708812261, |
|
"grad_norm": 0.284026563167572, |
|
"learning_rate": 9.97871434653044e-06, |
|
"loss": 0.4414, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 5.002681992337164, |
|
"grad_norm": 0.2654504179954529, |
|
"learning_rate": 9.970200085142615e-06, |
|
"loss": 1.4701, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.003448275862069, |
|
"grad_norm": 26.214235305786133, |
|
"learning_rate": 9.96168582375479e-06, |
|
"loss": 2.1511, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.004214559386973, |
|
"grad_norm": 24.513473510742188, |
|
"learning_rate": 9.953171562366965e-06, |
|
"loss": 1.0803, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.004980842911878, |
|
"grad_norm": 0.24664299190044403, |
|
"learning_rate": 9.944657300979142e-06, |
|
"loss": 0.0163, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.005747126436781, |
|
"grad_norm": 0.23416149616241455, |
|
"learning_rate": 9.936143039591317e-06, |
|
"loss": 1.4651, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.006513409961686, |
|
"grad_norm": 29.56205940246582, |
|
"learning_rate": 9.927628778203492e-06, |
|
"loss": 2.2793, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.00727969348659, |
|
"grad_norm": 0.3607791066169739, |
|
"learning_rate": 9.919114516815667e-06, |
|
"loss": 0.3963, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.008045977011494, |
|
"grad_norm": 24.871665954589844, |
|
"learning_rate": 9.910600255427842e-06, |
|
"loss": 2.05, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.0088122605363985, |
|
"grad_norm": 0.23402588069438934, |
|
"learning_rate": 9.902085994040018e-06, |
|
"loss": 0.4222, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.009578544061303, |
|
"grad_norm": 25.41075325012207, |
|
"learning_rate": 9.893571732652193e-06, |
|
"loss": 1.3493, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.010344827586207, |
|
"grad_norm": 0.5918464064598083, |
|
"learning_rate": 9.885057471264368e-06, |
|
"loss": 1.7511, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.011111111111111, |
|
"grad_norm": 0.42463651299476624, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.7238, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.011877394636016, |
|
"grad_norm": 0.17891407012939453, |
|
"learning_rate": 9.86802894848872e-06, |
|
"loss": 0.8326, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.012643678160919, |
|
"grad_norm": 0.16813120245933533, |
|
"learning_rate": 9.859514687100895e-06, |
|
"loss": 0.9384, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.013409961685824, |
|
"grad_norm": 0.5184898376464844, |
|
"learning_rate": 9.85100042571307e-06, |
|
"loss": 1.7822, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.014176245210728, |
|
"grad_norm": 0.4033224284648895, |
|
"learning_rate": 9.842486164325245e-06, |
|
"loss": 0.8116, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 5.014942528735633, |
|
"grad_norm": 0.5266627669334412, |
|
"learning_rate": 9.833971902937422e-06, |
|
"loss": 1.0952, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.015708812260536, |
|
"grad_norm": 0.19357925653457642, |
|
"learning_rate": 9.825457641549597e-06, |
|
"loss": 1.0991, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 5.016475095785441, |
|
"grad_norm": 0.08832190185785294, |
|
"learning_rate": 9.816943380161772e-06, |
|
"loss": 0.3087, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 5.017241379310345, |
|
"grad_norm": 30.98222541809082, |
|
"learning_rate": 9.808429118773947e-06, |
|
"loss": 1.8135, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.018007662835249, |
|
"grad_norm": 23.943462371826172, |
|
"learning_rate": 9.799914857386122e-06, |
|
"loss": 1.6737, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.018773946360153, |
|
"grad_norm": 0.1893884688615799, |
|
"learning_rate": 9.791400595998298e-06, |
|
"loss": 0.3859, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.019540229885058, |
|
"grad_norm": 0.2271726280450821, |
|
"learning_rate": 9.782886334610473e-06, |
|
"loss": 0.9596, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.9454078674316406, |
|
"eval_runtime": 14.8718, |
|
"eval_samples_per_second": 2.959, |
|
"eval_steps_per_second": 2.959, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 6.000306513409962, |
|
"grad_norm": 26.258907318115234, |
|
"learning_rate": 9.774372073222648e-06, |
|
"loss": 0.9372, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 6.001072796934866, |
|
"grad_norm": 23.58159065246582, |
|
"learning_rate": 9.765857811834825e-06, |
|
"loss": 1.3394, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 6.00183908045977, |
|
"grad_norm": 0.7323487401008606, |
|
"learning_rate": 9.757343550447e-06, |
|
"loss": 0.8471, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 6.002605363984674, |
|
"grad_norm": 0.14237739145755768, |
|
"learning_rate": 9.748829289059175e-06, |
|
"loss": 0.7524, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.003371647509579, |
|
"grad_norm": 0.5373148322105408, |
|
"learning_rate": 9.74031502767135e-06, |
|
"loss": 1.9909, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.0041379310344825, |
|
"grad_norm": 0.13252149522304535, |
|
"learning_rate": 9.731800766283525e-06, |
|
"loss": 0.8562, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.004904214559387, |
|
"grad_norm": 49.63750457763672, |
|
"learning_rate": 9.723286504895702e-06, |
|
"loss": 2.1695, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 6.005670498084291, |
|
"grad_norm": 0.09695886820554733, |
|
"learning_rate": 9.714772243507877e-06, |
|
"loss": 0.0143, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.006436781609195, |
|
"grad_norm": 26.921308517456055, |
|
"learning_rate": 9.706257982120052e-06, |
|
"loss": 1.0552, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.0072030651340995, |
|
"grad_norm": 31.4003849029541, |
|
"learning_rate": 9.697743720732228e-06, |
|
"loss": 1.0826, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 6.007969348659004, |
|
"grad_norm": 27.47873306274414, |
|
"learning_rate": 9.689229459344403e-06, |
|
"loss": 1.9293, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 6.008735632183908, |
|
"grad_norm": 0.9135425686836243, |
|
"learning_rate": 9.680715197956578e-06, |
|
"loss": 0.7049, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 6.009501915708812, |
|
"grad_norm": 0.8477075695991516, |
|
"learning_rate": 9.672200936568753e-06, |
|
"loss": 0.7642, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 6.010268199233717, |
|
"grad_norm": 26.746427536010742, |
|
"learning_rate": 9.663686675180928e-06, |
|
"loss": 1.7131, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.011034482758621, |
|
"grad_norm": 0.21620433032512665, |
|
"learning_rate": 9.655172413793105e-06, |
|
"loss": 1.4327, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 6.011800766283525, |
|
"grad_norm": 2.045217275619507, |
|
"learning_rate": 9.64665815240528e-06, |
|
"loss": 1.2185, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 6.012567049808429, |
|
"grad_norm": 0.4398377537727356, |
|
"learning_rate": 9.638143891017455e-06, |
|
"loss": 0.0241, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 6.013333333333334, |
|
"grad_norm": 0.16005031764507294, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 1.4657, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 6.014099616858237, |
|
"grad_norm": 26.34746742248535, |
|
"learning_rate": 9.621115368241805e-06, |
|
"loss": 1.4696, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 6.014865900383142, |
|
"grad_norm": 0.1783309131860733, |
|
"learning_rate": 9.612601106853982e-06, |
|
"loss": 0.8318, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 6.015632183908046, |
|
"grad_norm": 0.03938477113842964, |
|
"learning_rate": 9.604086845466157e-06, |
|
"loss": 0.3561, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.01639846743295, |
|
"grad_norm": 0.06440555304288864, |
|
"learning_rate": 9.595572584078332e-06, |
|
"loss": 1.1959, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 6.017164750957854, |
|
"grad_norm": 23.690397262573242, |
|
"learning_rate": 9.587058322690508e-06, |
|
"loss": 2.3981, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 6.017931034482759, |
|
"grad_norm": 26.19554328918457, |
|
"learning_rate": 9.578544061302683e-06, |
|
"loss": 1.4422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.018697318007663, |
|
"grad_norm": 0.27188414335250854, |
|
"learning_rate": 9.570029799914858e-06, |
|
"loss": 0.4129, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 6.019463601532567, |
|
"grad_norm": 0.28943178057670593, |
|
"learning_rate": 9.561515538527033e-06, |
|
"loss": 0.8402, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.0550131797790527, |
|
"eval_runtime": 14.8777, |
|
"eval_samples_per_second": 2.957, |
|
"eval_steps_per_second": 2.957, |
|
"step": 1827 |
|
}, |
|
{ |
|
"epoch": 7.000229885057471, |
|
"grad_norm": 24.880643844604492, |
|
"learning_rate": 9.553001277139208e-06, |
|
"loss": 0.9041, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 7.000996168582375, |
|
"grad_norm": 28.165273666381836, |
|
"learning_rate": 9.544487015751385e-06, |
|
"loss": 1.7479, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 7.00176245210728, |
|
"grad_norm": 0.4542657136917114, |
|
"learning_rate": 9.53597275436356e-06, |
|
"loss": 0.7588, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.0025287356321835, |
|
"grad_norm": 33.722007751464844, |
|
"learning_rate": 9.527458492975735e-06, |
|
"loss": 1.4655, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 7.003295019157088, |
|
"grad_norm": 0.47786277532577515, |
|
"learning_rate": 9.518944231587912e-06, |
|
"loss": 0.959, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 7.0040613026819925, |
|
"grad_norm": 0.09276581555604935, |
|
"learning_rate": 9.510429970200085e-06, |
|
"loss": 0.4812, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 7.004827586206897, |
|
"grad_norm": 27.1479434967041, |
|
"learning_rate": 9.501915708812262e-06, |
|
"loss": 1.0251, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 7.0055938697318005, |
|
"grad_norm": 25.373821258544922, |
|
"learning_rate": 9.493401447424437e-06, |
|
"loss": 1.7737, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.006360153256705, |
|
"grad_norm": 0.850132942199707, |
|
"learning_rate": 9.484887186036612e-06, |
|
"loss": 1.2012, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 7.0071264367816095, |
|
"grad_norm": 0.4961250424385071, |
|
"learning_rate": 9.476372924648788e-06, |
|
"loss": 1.0435, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 7.007892720306513, |
|
"grad_norm": 0.12135482579469681, |
|
"learning_rate": 9.467858663260963e-06, |
|
"loss": 0.7643, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 7.008659003831418, |
|
"grad_norm": 0.16561543941497803, |
|
"learning_rate": 9.459344401873138e-06, |
|
"loss": 1.7143, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 7.009425287356322, |
|
"grad_norm": 23.327978134155273, |
|
"learning_rate": 9.450830140485315e-06, |
|
"loss": 0.7526, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 7.010191570881226, |
|
"grad_norm": 28.888317108154297, |
|
"learning_rate": 9.442315879097488e-06, |
|
"loss": 1.0215, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.01095785440613, |
|
"grad_norm": 27.170988082885742, |
|
"learning_rate": 9.433801617709665e-06, |
|
"loss": 1.6417, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 7.011724137931035, |
|
"grad_norm": 0.11138776689767838, |
|
"learning_rate": 9.42528735632184e-06, |
|
"loss": 0.0106, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 7.012490421455938, |
|
"grad_norm": 42.729454040527344, |
|
"learning_rate": 9.416773094934015e-06, |
|
"loss": 0.9887, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 7.013256704980843, |
|
"grad_norm": 1.001929759979248, |
|
"learning_rate": 9.408258833546192e-06, |
|
"loss": 0.9068, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.014022988505747, |
|
"grad_norm": 0.5514718294143677, |
|
"learning_rate": 9.399744572158365e-06, |
|
"loss": 0.8516, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 7.014789272030652, |
|
"grad_norm": 38.44541931152344, |
|
"learning_rate": 9.391230310770542e-06, |
|
"loss": 1.6097, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 7.015555555555555, |
|
"grad_norm": 0.18232332170009613, |
|
"learning_rate": 9.382716049382717e-06, |
|
"loss": 1.0932, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 7.01632183908046, |
|
"grad_norm": 0.11249390244483948, |
|
"learning_rate": 9.374201787994892e-06, |
|
"loss": 0.4727, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 7.017088122605364, |
|
"grad_norm": 0.15842938423156738, |
|
"learning_rate": 9.365687526607068e-06, |
|
"loss": 0.8513, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 7.017854406130268, |
|
"grad_norm": 0.4671227037906647, |
|
"learning_rate": 9.357173265219243e-06, |
|
"loss": 2.2532, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 7.018620689655172, |
|
"grad_norm": 1.2637794017791748, |
|
"learning_rate": 9.348659003831418e-06, |
|
"loss": 1.2856, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 7.019386973180077, |
|
"grad_norm": 35.64610290527344, |
|
"learning_rate": 9.340144742443595e-06, |
|
"loss": 1.0823, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.7863643169403076, |
|
"eval_runtime": 16.0795, |
|
"eval_samples_per_second": 2.736, |
|
"eval_steps_per_second": 2.736, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 8.00015325670498, |
|
"grad_norm": 0.1616896390914917, |
|
"learning_rate": 9.331630481055768e-06, |
|
"loss": 1.2282, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 8.000919540229885, |
|
"grad_norm": 68.63554382324219, |
|
"learning_rate": 9.323116219667945e-06, |
|
"loss": 0.9345, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.001685823754789, |
|
"grad_norm": 0.11135207116603851, |
|
"learning_rate": 9.31460195828012e-06, |
|
"loss": 1.4961, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 8.002452107279694, |
|
"grad_norm": 0.13253170251846313, |
|
"learning_rate": 9.306087696892295e-06, |
|
"loss": 0.8539, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 8.003218390804598, |
|
"grad_norm": 5.191343307495117, |
|
"learning_rate": 9.297573435504472e-06, |
|
"loss": 2.1507, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 8.003984674329502, |
|
"grad_norm": 0.2696518898010254, |
|
"learning_rate": 9.289059174116647e-06, |
|
"loss": 0.0308, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 8.004750957854407, |
|
"grad_norm": 0.2299196422100067, |
|
"learning_rate": 9.280544912728822e-06, |
|
"loss": 0.5293, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.00551724137931, |
|
"grad_norm": 28.45562171936035, |
|
"learning_rate": 9.272030651340997e-06, |
|
"loss": 0.6167, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.006283524904214, |
|
"grad_norm": 32.24641418457031, |
|
"learning_rate": 9.263516389953172e-06, |
|
"loss": 3.1816, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 8.00704980842912, |
|
"grad_norm": 2.399733066558838, |
|
"learning_rate": 9.255002128565348e-06, |
|
"loss": 2.2774, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 8.007816091954023, |
|
"grad_norm": 25.827760696411133, |
|
"learning_rate": 9.246487867177523e-06, |
|
"loss": 0.9662, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 8.008582375478927, |
|
"grad_norm": 32.011016845703125, |
|
"learning_rate": 9.237973605789698e-06, |
|
"loss": 0.9282, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.009348659003832, |
|
"grad_norm": 1.2816479206085205, |
|
"learning_rate": 9.229459344401875e-06, |
|
"loss": 1.7448, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 8.010114942528736, |
|
"grad_norm": 0.3145643472671509, |
|
"learning_rate": 9.220945083014048e-06, |
|
"loss": 0.5686, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 8.01088122605364, |
|
"grad_norm": 0.2079908400774002, |
|
"learning_rate": 9.212430821626225e-06, |
|
"loss": 0.4686, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 8.011647509578545, |
|
"grad_norm": 0.23505721986293793, |
|
"learning_rate": 9.2039165602384e-06, |
|
"loss": 1.0458, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 8.012413793103448, |
|
"grad_norm": 0.26654016971588135, |
|
"learning_rate": 9.195402298850575e-06, |
|
"loss": 0.9613, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 8.013180076628352, |
|
"grad_norm": 0.05446112900972366, |
|
"learning_rate": 9.186888037462752e-06, |
|
"loss": 1.0667, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 8.013946360153257, |
|
"grad_norm": 30.373464584350586, |
|
"learning_rate": 9.178373776074927e-06, |
|
"loss": 1.1519, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 8.01471264367816, |
|
"grad_norm": 0.09900560975074768, |
|
"learning_rate": 9.169859514687102e-06, |
|
"loss": 0.7377, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 8.015478927203064, |
|
"grad_norm": 41.964866638183594, |
|
"learning_rate": 9.161345253299277e-06, |
|
"loss": 0.8308, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 8.01624521072797, |
|
"grad_norm": 0.2613646388053894, |
|
"learning_rate": 9.152830991911452e-06, |
|
"loss": 1.5092, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.017011494252873, |
|
"grad_norm": 0.11744105070829391, |
|
"learning_rate": 9.144316730523628e-06, |
|
"loss": 0.8981, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 8.017777777777777, |
|
"grad_norm": 40.48182678222656, |
|
"learning_rate": 9.135802469135803e-06, |
|
"loss": 1.6858, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 8.018544061302682, |
|
"grad_norm": 0.37574025988578796, |
|
"learning_rate": 9.127288207747978e-06, |
|
"loss": 0.3569, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 8.019310344827586, |
|
"grad_norm": 0.22550857067108154, |
|
"learning_rate": 9.118773946360155e-06, |
|
"loss": 1.0229, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.859169840812683, |
|
"eval_runtime": 14.8718, |
|
"eval_samples_per_second": 2.959, |
|
"eval_steps_per_second": 2.959, |
|
"step": 2349 |
|
}, |
|
{ |
|
"epoch": 9.00007662835249, |
|
"grad_norm": 0.1769675612449646, |
|
"learning_rate": 9.110259684972328e-06, |
|
"loss": 1.0183, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9.000842911877395, |
|
"grad_norm": 38.53457260131836, |
|
"learning_rate": 9.101745423584505e-06, |
|
"loss": 0.4817, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 9.001609195402299, |
|
"grad_norm": 23.685789108276367, |
|
"learning_rate": 9.09323116219668e-06, |
|
"loss": 1.1308, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 9.002375478927203, |
|
"grad_norm": 42.43982696533203, |
|
"learning_rate": 9.084716900808855e-06, |
|
"loss": 0.4238, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 9.003141762452108, |
|
"grad_norm": 0.3625785708427429, |
|
"learning_rate": 9.076202639421032e-06, |
|
"loss": 1.1276, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 9.003908045977012, |
|
"grad_norm": 22.1848201751709, |
|
"learning_rate": 9.067688378033207e-06, |
|
"loss": 1.6327, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.004674329501915, |
|
"grad_norm": 18.3494873046875, |
|
"learning_rate": 9.059174116645382e-06, |
|
"loss": 1.3836, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 9.00544061302682, |
|
"grad_norm": 0.14898690581321716, |
|
"learning_rate": 9.050659855257558e-06, |
|
"loss": 0.82, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 9.006206896551724, |
|
"grad_norm": 1.0918123722076416, |
|
"learning_rate": 9.042145593869732e-06, |
|
"loss": 2.1076, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 9.006973180076628, |
|
"grad_norm": 64.87985229492188, |
|
"learning_rate": 9.033631332481908e-06, |
|
"loss": 1.3321, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 9.007739463601533, |
|
"grad_norm": 1.12923002243042, |
|
"learning_rate": 9.025117071094083e-06, |
|
"loss": 0.9004, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 9.008505747126437, |
|
"grad_norm": 0.2782602608203888, |
|
"learning_rate": 9.016602809706258e-06, |
|
"loss": 1.3348, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 9.00927203065134, |
|
"grad_norm": 0.02168104238808155, |
|
"learning_rate": 9.008088548318435e-06, |
|
"loss": 0.45, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 9.010038314176246, |
|
"grad_norm": 0.033409785479307175, |
|
"learning_rate": 8.999574286930608e-06, |
|
"loss": 0.0018, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 9.01080459770115, |
|
"grad_norm": 0.055789679288864136, |
|
"learning_rate": 8.991060025542785e-06, |
|
"loss": 0.0013, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 9.011570881226053, |
|
"grad_norm": 0.16263394057750702, |
|
"learning_rate": 8.98254576415496e-06, |
|
"loss": 1.363, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.012337164750958, |
|
"grad_norm": 0.09605167806148529, |
|
"learning_rate": 8.974031502767135e-06, |
|
"loss": 0.9454, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 9.013103448275862, |
|
"grad_norm": 2.4232611656188965, |
|
"learning_rate": 8.965517241379312e-06, |
|
"loss": 2.008, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 9.013869731800765, |
|
"grad_norm": 1.330285906791687, |
|
"learning_rate": 8.957002979991487e-06, |
|
"loss": 0.2915, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 9.01463601532567, |
|
"grad_norm": 1.4357658624649048, |
|
"learning_rate": 8.948488718603662e-06, |
|
"loss": 2.0742, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 9.015402298850574, |
|
"grad_norm": 2.14530611038208, |
|
"learning_rate": 8.939974457215838e-06, |
|
"loss": 1.1691, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 9.01616858237548, |
|
"grad_norm": 33.60592269897461, |
|
"learning_rate": 8.931460195828012e-06, |
|
"loss": 0.5806, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 9.016934865900383, |
|
"grad_norm": 68.0654525756836, |
|
"learning_rate": 8.922945934440188e-06, |
|
"loss": 1.2719, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 9.017701149425287, |
|
"grad_norm": 53.02241134643555, |
|
"learning_rate": 8.914431673052363e-06, |
|
"loss": 0.4034, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 9.018467432950192, |
|
"grad_norm": 0.1054820865392685, |
|
"learning_rate": 8.905917411664538e-06, |
|
"loss": 0.7365, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 9.019233716475096, |
|
"grad_norm": 4.63501501083374, |
|
"learning_rate": 8.897403150276715e-06, |
|
"loss": 2.1887, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 2.4743075370788574, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.7113, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.4044773578643799, |
|
"eval_runtime": 14.9606, |
|
"eval_samples_per_second": 2.941, |
|
"eval_steps_per_second": 2.941, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 10.000766283524904, |
|
"grad_norm": 0.18622168898582458, |
|
"learning_rate": 8.880374627501065e-06, |
|
"loss": 0.7462, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 10.001532567049809, |
|
"grad_norm": 0.38399988412857056, |
|
"learning_rate": 8.87186036611324e-06, |
|
"loss": 1.5916, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 10.002298850574713, |
|
"grad_norm": 0.06477665156126022, |
|
"learning_rate": 8.863346104725415e-06, |
|
"loss": 0.5319, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 10.003065134099616, |
|
"grad_norm": 49.976531982421875, |
|
"learning_rate": 8.854831843337592e-06, |
|
"loss": 2.3173, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 10.003831417624522, |
|
"grad_norm": 0.24009446799755096, |
|
"learning_rate": 8.846317581949767e-06, |
|
"loss": 1.4548, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 10.004597701149425, |
|
"grad_norm": 0.04304501414299011, |
|
"learning_rate": 8.837803320561942e-06, |
|
"loss": 0.0137, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 10.005363984674329, |
|
"grad_norm": 0.7517868280410767, |
|
"learning_rate": 8.829289059174118e-06, |
|
"loss": 2.1527, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.006130268199234, |
|
"grad_norm": 0.11649739742279053, |
|
"learning_rate": 8.820774797786292e-06, |
|
"loss": 1.0451, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 10.006896551724138, |
|
"grad_norm": 0.03151066228747368, |
|
"learning_rate": 8.812260536398468e-06, |
|
"loss": 0.0062, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.007662835249041, |
|
"grad_norm": 0.040688611567020416, |
|
"learning_rate": 8.803746275010643e-06, |
|
"loss": 2.3048, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 10.008429118773947, |
|
"grad_norm": 2.731215476989746, |
|
"learning_rate": 8.795232013622818e-06, |
|
"loss": 0.7031, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 10.00919540229885, |
|
"grad_norm": 0.04532806947827339, |
|
"learning_rate": 8.786717752234995e-06, |
|
"loss": 0.0176, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.009961685823756, |
|
"grad_norm": 0.033764056861400604, |
|
"learning_rate": 8.77820349084717e-06, |
|
"loss": 1.0768, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 10.01072796934866, |
|
"grad_norm": 30.16205596923828, |
|
"learning_rate": 8.769689229459345e-06, |
|
"loss": 1.6044, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.011494252873563, |
|
"grad_norm": 0.25742805004119873, |
|
"learning_rate": 8.76117496807152e-06, |
|
"loss": 1.2001, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 10.012260536398468, |
|
"grad_norm": 0.061720699071884155, |
|
"learning_rate": 8.752660706683695e-06, |
|
"loss": 0.2926, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 10.013026819923372, |
|
"grad_norm": 0.0824761614203453, |
|
"learning_rate": 8.744146445295872e-06, |
|
"loss": 1.1917, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 10.013793103448275, |
|
"grad_norm": 0.027958814054727554, |
|
"learning_rate": 8.735632183908047e-06, |
|
"loss": 0.3411, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 10.01455938697318, |
|
"grad_norm": 54.794559478759766, |
|
"learning_rate": 8.727117922520222e-06, |
|
"loss": 2.0185, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.015325670498084, |
|
"grad_norm": 0.8871710300445557, |
|
"learning_rate": 8.718603661132398e-06, |
|
"loss": 1.1621, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 10.016091954022988, |
|
"grad_norm": 26.996959686279297, |
|
"learning_rate": 8.710089399744572e-06, |
|
"loss": 1.9966, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 10.016858237547893, |
|
"grad_norm": 22.15346908569336, |
|
"learning_rate": 8.701575138356748e-06, |
|
"loss": 1.2582, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 10.017624521072797, |
|
"grad_norm": 0.3889228403568268, |
|
"learning_rate": 8.693060876968923e-06, |
|
"loss": 0.0322, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 10.0183908045977, |
|
"grad_norm": 0.32267317175865173, |
|
"learning_rate": 8.684546615581098e-06, |
|
"loss": 0.574, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 10.019157088122606, |
|
"grad_norm": 0.024278851225972176, |
|
"learning_rate": 8.676032354193275e-06, |
|
"loss": 0.9715, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 10.01992337164751, |
|
"grad_norm": 0.0393819659948349, |
|
"learning_rate": 8.66751809280545e-06, |
|
"loss": 1.3068, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.453585147857666, |
|
"eval_runtime": 15.8452, |
|
"eval_samples_per_second": 2.777, |
|
"eval_steps_per_second": 2.777, |
|
"step": 2871 |
|
}, |
|
{ |
|
"epoch": 11.000689655172414, |
|
"grad_norm": 0.37053388357162476, |
|
"learning_rate": 8.659003831417625e-06, |
|
"loss": 0.9443, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 11.001455938697317, |
|
"grad_norm": 0.22762715816497803, |
|
"learning_rate": 8.650489570029802e-06, |
|
"loss": 2.1313, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 11.002222222222223, |
|
"grad_norm": 2.5229854583740234, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.7189, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.002988505747126, |
|
"grad_norm": 35.79051971435547, |
|
"learning_rate": 8.633461047254152e-06, |
|
"loss": 1.4409, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 11.00375478927203, |
|
"grad_norm": 0.2708571255207062, |
|
"learning_rate": 8.624946785866327e-06, |
|
"loss": 0.524, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 11.004521072796935, |
|
"grad_norm": 61.25001907348633, |
|
"learning_rate": 8.616432524478502e-06, |
|
"loss": 1.42, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 11.005287356321839, |
|
"grad_norm": 35.30380630493164, |
|
"learning_rate": 8.607918263090678e-06, |
|
"loss": 1.1967, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 11.006053639846744, |
|
"grad_norm": 38.052555084228516, |
|
"learning_rate": 8.599404001702853e-06, |
|
"loss": 2.0261, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 11.006819923371648, |
|
"grad_norm": 0.355120986700058, |
|
"learning_rate": 8.590889740315028e-06, |
|
"loss": 0.7783, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 11.007586206896551, |
|
"grad_norm": 3.1586191654205322, |
|
"learning_rate": 8.582375478927203e-06, |
|
"loss": 1.0561, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 11.008352490421457, |
|
"grad_norm": 22.636682510375977, |
|
"learning_rate": 8.573861217539378e-06, |
|
"loss": 0.636, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 11.00911877394636, |
|
"grad_norm": 48.90217208862305, |
|
"learning_rate": 8.565346956151555e-06, |
|
"loss": 0.9843, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 11.009885057471264, |
|
"grad_norm": 0.023744115605950356, |
|
"learning_rate": 8.55683269476373e-06, |
|
"loss": 1.1279, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.01065134099617, |
|
"grad_norm": 58.53615188598633, |
|
"learning_rate": 8.548318433375905e-06, |
|
"loss": 0.4682, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 11.011417624521073, |
|
"grad_norm": 0.06051109731197357, |
|
"learning_rate": 8.539804171988082e-06, |
|
"loss": 0.0048, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 11.012183908045976, |
|
"grad_norm": 46.59239196777344, |
|
"learning_rate": 8.531289910600255e-06, |
|
"loss": 1.129, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 11.012950191570882, |
|
"grad_norm": 0.076231449842453, |
|
"learning_rate": 8.522775649212432e-06, |
|
"loss": 1.7491, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 11.013716475095785, |
|
"grad_norm": 30.431854248046875, |
|
"learning_rate": 8.514261387824607e-06, |
|
"loss": 1.7486, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 11.014482758620689, |
|
"grad_norm": 0.35167890787124634, |
|
"learning_rate": 8.505747126436782e-06, |
|
"loss": 0.291, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 11.015249042145594, |
|
"grad_norm": 0.02206863835453987, |
|
"learning_rate": 8.497232865048958e-06, |
|
"loss": 1.2294, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 11.016015325670498, |
|
"grad_norm": 0.06610409170389175, |
|
"learning_rate": 8.488718603661133e-06, |
|
"loss": 0.6221, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 11.016781609195402, |
|
"grad_norm": 0.012659168802201748, |
|
"learning_rate": 8.480204342273308e-06, |
|
"loss": 0.4978, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 11.017547892720307, |
|
"grad_norm": 0.3502841889858246, |
|
"learning_rate": 8.471690080885483e-06, |
|
"loss": 1.3976, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 11.01831417624521, |
|
"grad_norm": 46.04039764404297, |
|
"learning_rate": 8.463175819497658e-06, |
|
"loss": 1.5974, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 11.019080459770114, |
|
"grad_norm": 0.3014867305755615, |
|
"learning_rate": 8.454661558109835e-06, |
|
"loss": 0.0097, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.01984674329502, |
|
"grad_norm": 57.508182525634766, |
|
"learning_rate": 8.44614729672201e-06, |
|
"loss": 1.7964, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 1.8694943189620972, |
|
"eval_runtime": 16.0476, |
|
"eval_samples_per_second": 2.742, |
|
"eval_steps_per_second": 2.742, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 12.000613026819924, |
|
"grad_norm": 0.08212984353303909, |
|
"learning_rate": 8.437633035334185e-06, |
|
"loss": 0.3527, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 12.001379310344827, |
|
"grad_norm": 1.7721196413040161, |
|
"learning_rate": 8.429118773946362e-06, |
|
"loss": 0.7301, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 12.002145593869733, |
|
"grad_norm": 69.38721466064453, |
|
"learning_rate": 8.420604512558537e-06, |
|
"loss": 1.1776, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 12.002911877394636, |
|
"grad_norm": 53.12070083618164, |
|
"learning_rate": 8.412090251170712e-06, |
|
"loss": 0.228, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 12.00367816091954, |
|
"grad_norm": 5.521632194519043, |
|
"learning_rate": 8.403575989782887e-06, |
|
"loss": 0.2917, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 12.004444444444445, |
|
"grad_norm": 0.30451831221580505, |
|
"learning_rate": 8.395061728395062e-06, |
|
"loss": 0.9035, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 12.005210727969349, |
|
"grad_norm": 0.03218378126621246, |
|
"learning_rate": 8.386547467007238e-06, |
|
"loss": 0.5426, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 12.005977011494252, |
|
"grad_norm": 1.2288103103637695, |
|
"learning_rate": 8.378033205619413e-06, |
|
"loss": 1.7878, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 12.006743295019158, |
|
"grad_norm": 0.010741832666099072, |
|
"learning_rate": 8.369518944231588e-06, |
|
"loss": 0.9043, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 12.007509578544061, |
|
"grad_norm": 0.06214595213532448, |
|
"learning_rate": 8.361004682843763e-06, |
|
"loss": 0.53, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 12.008275862068965, |
|
"grad_norm": 60.94453430175781, |
|
"learning_rate": 8.35249042145594e-06, |
|
"loss": 1.4428, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 12.00904214559387, |
|
"grad_norm": 100.55858612060547, |
|
"learning_rate": 8.343976160068115e-06, |
|
"loss": 0.8024, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 12.009808429118774, |
|
"grad_norm": 0.280082643032074, |
|
"learning_rate": 8.33546189868029e-06, |
|
"loss": 1.0197, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 12.010574712643677, |
|
"grad_norm": 3.481741189956665, |
|
"learning_rate": 8.326947637292465e-06, |
|
"loss": 0.6278, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 12.011340996168583, |
|
"grad_norm": 0.04201626405119896, |
|
"learning_rate": 8.318433375904642e-06, |
|
"loss": 1.5908, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 12.012107279693486, |
|
"grad_norm": 0.551430881023407, |
|
"learning_rate": 8.309919114516817e-06, |
|
"loss": 0.3607, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 12.01287356321839, |
|
"grad_norm": 0.44825848937034607, |
|
"learning_rate": 8.301404853128992e-06, |
|
"loss": 1.1027, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 12.013639846743295, |
|
"grad_norm": 0.03944149240851402, |
|
"learning_rate": 8.292890591741167e-06, |
|
"loss": 0.972, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 12.014406130268199, |
|
"grad_norm": 36.95643615722656, |
|
"learning_rate": 8.284376330353342e-06, |
|
"loss": 1.8604, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 12.015172413793103, |
|
"grad_norm": 0.028339561074972153, |
|
"learning_rate": 8.275862068965518e-06, |
|
"loss": 0.5603, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 12.015938697318008, |
|
"grad_norm": 0.012526708655059338, |
|
"learning_rate": 8.267347807577693e-06, |
|
"loss": 0.2803, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 12.016704980842912, |
|
"grad_norm": 0.4388207495212555, |
|
"learning_rate": 8.258833546189868e-06, |
|
"loss": 2.0793, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 12.017471264367815, |
|
"grad_norm": 0.07423753291368484, |
|
"learning_rate": 8.250319284802043e-06, |
|
"loss": 0.5785, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 12.01823754789272, |
|
"grad_norm": 4.145880222320557, |
|
"learning_rate": 8.24180502341422e-06, |
|
"loss": 1.6285, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 12.019003831417624, |
|
"grad_norm": 2.8178610801696777, |
|
"learning_rate": 8.233290762026395e-06, |
|
"loss": 0.5352, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 12.01977011494253, |
|
"grad_norm": 6.510222911834717, |
|
"learning_rate": 8.22477650063857e-06, |
|
"loss": 1.6925, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 0.7860025763511658, |
|
"eval_runtime": 16.0765, |
|
"eval_samples_per_second": 2.737, |
|
"eval_steps_per_second": 2.737, |
|
"step": 3393 |
|
}, |
|
{ |
|
"epoch": 13.000536398467434, |
|
"grad_norm": 0.28236937522888184, |
|
"learning_rate": 8.216262239250745e-06, |
|
"loss": 0.5122, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.001302681992337, |
|
"grad_norm": 0.006412493530660868, |
|
"learning_rate": 8.207747977862922e-06, |
|
"loss": 0.1813, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 13.00206896551724, |
|
"grad_norm": 0.09450627118349075, |
|
"learning_rate": 8.199233716475097e-06, |
|
"loss": 0.803, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 13.002835249042146, |
|
"grad_norm": 93.26319122314453, |
|
"learning_rate": 8.190719455087272e-06, |
|
"loss": 0.413, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 13.00360153256705, |
|
"grad_norm": 0.10218259692192078, |
|
"learning_rate": 8.182205193699447e-06, |
|
"loss": 1.2228, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 13.004367816091953, |
|
"grad_norm": 0.11460888385772705, |
|
"learning_rate": 8.173690932311623e-06, |
|
"loss": 0.5362, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 13.005134099616859, |
|
"grad_norm": 0.2618325650691986, |
|
"learning_rate": 8.165176670923798e-06, |
|
"loss": 2.5198, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 13.005900383141762, |
|
"grad_norm": 0.2946365475654602, |
|
"learning_rate": 8.156662409535973e-06, |
|
"loss": 0.7394, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 13.006666666666666, |
|
"grad_norm": 2.869508981704712, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 1.1118, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 13.007432950191571, |
|
"grad_norm": 0.03459819778800011, |
|
"learning_rate": 8.139633886760325e-06, |
|
"loss": 0.0175, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 13.008199233716475, |
|
"grad_norm": 0.05931926891207695, |
|
"learning_rate": 8.1311196253725e-06, |
|
"loss": 3.6146, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.008965517241379, |
|
"grad_norm": 0.2934442460536957, |
|
"learning_rate": 8.122605363984675e-06, |
|
"loss": 0.6959, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 13.009731800766284, |
|
"grad_norm": 7.501830101013184, |
|
"learning_rate": 8.11409110259685e-06, |
|
"loss": 0.8963, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 13.010498084291187, |
|
"grad_norm": 65.24735260009766, |
|
"learning_rate": 8.105576841209027e-06, |
|
"loss": 0.793, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 13.011264367816091, |
|
"grad_norm": 0.0081586679443717, |
|
"learning_rate": 8.097062579821202e-06, |
|
"loss": 0.3874, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.012030651340996, |
|
"grad_norm": 0.17738187313079834, |
|
"learning_rate": 8.088548318433377e-06, |
|
"loss": 1.0484, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 13.0127969348659, |
|
"grad_norm": 9.277081489562988, |
|
"learning_rate": 8.080034057045552e-06, |
|
"loss": 0.1586, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 13.013563218390805, |
|
"grad_norm": 0.15834064781665802, |
|
"learning_rate": 8.071519795657727e-06, |
|
"loss": 0.9143, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 13.014329501915709, |
|
"grad_norm": 2.7785654067993164, |
|
"learning_rate": 8.063005534269903e-06, |
|
"loss": 0.4188, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 13.015095785440613, |
|
"grad_norm": 0.020670482888817787, |
|
"learning_rate": 8.054491272882078e-06, |
|
"loss": 0.1171, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 13.015862068965518, |
|
"grad_norm": 0.03489285707473755, |
|
"learning_rate": 8.045977011494253e-06, |
|
"loss": 0.2484, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.016628352490422, |
|
"grad_norm": 0.03298380970954895, |
|
"learning_rate": 8.037462750106428e-06, |
|
"loss": 0.7981, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 13.017394636015325, |
|
"grad_norm": 5.918551921844482, |
|
"learning_rate": 8.028948488718605e-06, |
|
"loss": 0.9502, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 13.01816091954023, |
|
"grad_norm": 1.3374207019805908, |
|
"learning_rate": 8.02043422733078e-06, |
|
"loss": 0.7857, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 13.018927203065134, |
|
"grad_norm": 0.7085449695587158, |
|
"learning_rate": 8.011919965942955e-06, |
|
"loss": 2.5765, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 13.019693486590038, |
|
"grad_norm": 0.10716116428375244, |
|
"learning_rate": 8.00340570455513e-06, |
|
"loss": 0.3966, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.16099214553833, |
|
"eval_runtime": 14.8011, |
|
"eval_samples_per_second": 2.973, |
|
"eval_steps_per_second": 2.973, |
|
"step": 3654 |
|
}, |
|
{ |
|
"epoch": 14.000459770114942, |
|
"grad_norm": 83.57977294921875, |
|
"learning_rate": 7.994891443167307e-06, |
|
"loss": 2.387, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 14.001226053639847, |
|
"grad_norm": 1.1556613445281982, |
|
"learning_rate": 7.986377181779482e-06, |
|
"loss": 1.3209, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 14.00199233716475, |
|
"grad_norm": 22.63657569885254, |
|
"learning_rate": 7.977862920391657e-06, |
|
"loss": 0.5223, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 14.002758620689654, |
|
"grad_norm": 0.046173349022865295, |
|
"learning_rate": 7.969348659003832e-06, |
|
"loss": 0.2162, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 14.00352490421456, |
|
"grad_norm": 79.81478118896484, |
|
"learning_rate": 7.960834397616007e-06, |
|
"loss": 1.6065, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 14.004291187739463, |
|
"grad_norm": 0.8003261685371399, |
|
"learning_rate": 7.952320136228183e-06, |
|
"loss": 1.4407, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 14.005057471264367, |
|
"grad_norm": 0.6227564215660095, |
|
"learning_rate": 7.943805874840358e-06, |
|
"loss": 0.3292, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 14.005823754789272, |
|
"grad_norm": 0.013531082309782505, |
|
"learning_rate": 7.935291613452533e-06, |
|
"loss": 1.317, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 14.006590038314176, |
|
"grad_norm": 43.24138641357422, |
|
"learning_rate": 7.92677735206471e-06, |
|
"loss": 0.3234, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 14.007356321839081, |
|
"grad_norm": 204.6854248046875, |
|
"learning_rate": 7.918263090676885e-06, |
|
"loss": 0.5012, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 14.008122605363985, |
|
"grad_norm": 0.8757322430610657, |
|
"learning_rate": 7.90974882928906e-06, |
|
"loss": 0.6135, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 14.008888888888889, |
|
"grad_norm": 123.17404174804688, |
|
"learning_rate": 7.901234567901235e-06, |
|
"loss": 0.2081, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 14.009655172413794, |
|
"grad_norm": 11.556707382202148, |
|
"learning_rate": 7.89272030651341e-06, |
|
"loss": 1.0459, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 14.010421455938697, |
|
"grad_norm": 0.0303201861679554, |
|
"learning_rate": 7.884206045125587e-06, |
|
"loss": 0.271, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 14.011187739463601, |
|
"grad_norm": 0.009424479678273201, |
|
"learning_rate": 7.875691783737762e-06, |
|
"loss": 1.8567, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 14.011954022988506, |
|
"grad_norm": 0.048258207738399506, |
|
"learning_rate": 7.867177522349937e-06, |
|
"loss": 0.177, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 14.01272030651341, |
|
"grad_norm": 0.03346821293234825, |
|
"learning_rate": 7.858663260962112e-06, |
|
"loss": 1.2784, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 14.013486590038314, |
|
"grad_norm": 60.763832092285156, |
|
"learning_rate": 7.850148999574287e-06, |
|
"loss": 1.5424, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 14.014252873563219, |
|
"grad_norm": 68.4090805053711, |
|
"learning_rate": 7.841634738186463e-06, |
|
"loss": 0.6336, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 14.015019157088123, |
|
"grad_norm": 7.362729072570801, |
|
"learning_rate": 7.833120476798638e-06, |
|
"loss": 0.4734, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 14.015785440613026, |
|
"grad_norm": 0.14248596131801605, |
|
"learning_rate": 7.824606215410813e-06, |
|
"loss": 0.021, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 14.016551724137932, |
|
"grad_norm": 0.003983147908002138, |
|
"learning_rate": 7.81609195402299e-06, |
|
"loss": 0.0004, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 14.017318007662835, |
|
"grad_norm": 0.016375727951526642, |
|
"learning_rate": 7.807577692635165e-06, |
|
"loss": 1.6831, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 14.018084291187739, |
|
"grad_norm": 1.461202621459961, |
|
"learning_rate": 7.79906343124734e-06, |
|
"loss": 2.3294, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 14.018850574712644, |
|
"grad_norm": 44.32033157348633, |
|
"learning_rate": 7.790549169859515e-06, |
|
"loss": 1.2059, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 14.019616858237548, |
|
"grad_norm": 0.5498666763305664, |
|
"learning_rate": 7.78203490847169e-06, |
|
"loss": 0.0112, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.7137937545776367, |
|
"eval_runtime": 14.9715, |
|
"eval_samples_per_second": 2.939, |
|
"eval_steps_per_second": 2.939, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 15.000383141762452, |
|
"grad_norm": 0.08150316029787064, |
|
"learning_rate": 7.773520647083867e-06, |
|
"loss": 2.0771, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 15.001149425287357, |
|
"grad_norm": 0.04414815455675125, |
|
"learning_rate": 7.765006385696042e-06, |
|
"loss": 0.005, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 15.00191570881226, |
|
"grad_norm": 0.04258653149008751, |
|
"learning_rate": 7.756492124308217e-06, |
|
"loss": 0.9048, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 15.002681992337164, |
|
"grad_norm": 39.778770446777344, |
|
"learning_rate": 7.747977862920393e-06, |
|
"loss": 0.62, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 15.00344827586207, |
|
"grad_norm": 0.37194588780403137, |
|
"learning_rate": 7.739463601532567e-06, |
|
"loss": 0.7976, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 15.004214559386973, |
|
"grad_norm": 8.690224647521973, |
|
"learning_rate": 7.730949340144743e-06, |
|
"loss": 1.941, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 15.004980842911877, |
|
"grad_norm": 0.12905330955982208, |
|
"learning_rate": 7.722435078756918e-06, |
|
"loss": 0.7817, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 15.005747126436782, |
|
"grad_norm": 0.1041402518749237, |
|
"learning_rate": 7.713920817369093e-06, |
|
"loss": 0.2625, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 15.006513409961686, |
|
"grad_norm": 3.4772400856018066, |
|
"learning_rate": 7.70540655598127e-06, |
|
"loss": 1.204, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.00727969348659, |
|
"grad_norm": 89.86620330810547, |
|
"learning_rate": 7.696892294593445e-06, |
|
"loss": 1.0373, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 15.008045977011495, |
|
"grad_norm": 0.06035508215427399, |
|
"learning_rate": 7.68837803320562e-06, |
|
"loss": 1.1244, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 15.008812260536398, |
|
"grad_norm": 109.76663208007812, |
|
"learning_rate": 7.679863771817797e-06, |
|
"loss": 1.0546, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 15.009578544061302, |
|
"grad_norm": 148.40438842773438, |
|
"learning_rate": 7.67134951042997e-06, |
|
"loss": 0.8543, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 15.010344827586207, |
|
"grad_norm": 0.009009936824440956, |
|
"learning_rate": 7.662835249042147e-06, |
|
"loss": 0.9242, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 15.011111111111111, |
|
"grad_norm": 0.037366222590208054, |
|
"learning_rate": 7.654320987654322e-06, |
|
"loss": 0.2698, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 15.011877394636015, |
|
"grad_norm": 0.08401846140623093, |
|
"learning_rate": 7.645806726266497e-06, |
|
"loss": 0.2274, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 15.01264367816092, |
|
"grad_norm": 0.01287546381354332, |
|
"learning_rate": 7.637292464878673e-06, |
|
"loss": 1.7874, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 15.013409961685824, |
|
"grad_norm": 0.7353202700614929, |
|
"learning_rate": 7.6287782034908475e-06, |
|
"loss": 0.6991, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 15.014176245210727, |
|
"grad_norm": 0.017869267612695694, |
|
"learning_rate": 7.620263942103023e-06, |
|
"loss": 1.0055, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 15.014942528735633, |
|
"grad_norm": 0.019609203562140465, |
|
"learning_rate": 7.611749680715198e-06, |
|
"loss": 0.288, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 15.015708812260536, |
|
"grad_norm": 0.049044087529182434, |
|
"learning_rate": 7.603235419327374e-06, |
|
"loss": 0.3063, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 15.01647509578544, |
|
"grad_norm": 0.04734064266085625, |
|
"learning_rate": 7.59472115793955e-06, |
|
"loss": 2.2264, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.017241379310345, |
|
"grad_norm": 0.08333654701709747, |
|
"learning_rate": 7.586206896551724e-06, |
|
"loss": 1.5173, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 15.018007662835249, |
|
"grad_norm": 0.2572680413722992, |
|
"learning_rate": 7.5776926351639e-06, |
|
"loss": 0.6422, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 15.018773946360152, |
|
"grad_norm": 0.34934768080711365, |
|
"learning_rate": 7.569178373776076e-06, |
|
"loss": 0.4229, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 15.019540229885058, |
|
"grad_norm": 106.82306671142578, |
|
"learning_rate": 7.560664112388251e-06, |
|
"loss": 0.5847, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 0.8433036208152771, |
|
"eval_runtime": 14.7857, |
|
"eval_samples_per_second": 2.976, |
|
"eval_steps_per_second": 2.976, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 16.00030651340996, |
|
"grad_norm": 5.15858793258667, |
|
"learning_rate": 7.552149851000427e-06, |
|
"loss": 0.5931, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 16.001072796934867, |
|
"grad_norm": 0.1785539984703064, |
|
"learning_rate": 7.543635589612601e-06, |
|
"loss": 0.2837, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 16.00183908045977, |
|
"grad_norm": 0.02489466406404972, |
|
"learning_rate": 7.535121328224777e-06, |
|
"loss": 0.7209, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 16.002605363984674, |
|
"grad_norm": 88.38400268554688, |
|
"learning_rate": 7.5266070668369525e-06, |
|
"loss": 0.4813, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 16.003371647509578, |
|
"grad_norm": 0.03208989277482033, |
|
"learning_rate": 7.5180928054491275e-06, |
|
"loss": 0.793, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 16.00413793103448, |
|
"grad_norm": 0.04229838773608208, |
|
"learning_rate": 7.509578544061303e-06, |
|
"loss": 0.0119, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 16.00490421455939, |
|
"grad_norm": 0.0872679054737091, |
|
"learning_rate": 7.501064282673479e-06, |
|
"loss": 0.4911, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 16.005670498084292, |
|
"grad_norm": 0.010030065663158894, |
|
"learning_rate": 7.492550021285654e-06, |
|
"loss": 0.9722, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 16.006436781609196, |
|
"grad_norm": 0.6941576600074768, |
|
"learning_rate": 7.48403575989783e-06, |
|
"loss": 0.1926, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 16.0072030651341, |
|
"grad_norm": 457.5856018066406, |
|
"learning_rate": 7.475521498510004e-06, |
|
"loss": 2.1542, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 16.007969348659003, |
|
"grad_norm": 0.4293195605278015, |
|
"learning_rate": 7.46700723712218e-06, |
|
"loss": 0.9839, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 16.008735632183907, |
|
"grad_norm": 15.19454574584961, |
|
"learning_rate": 7.458492975734356e-06, |
|
"loss": 0.6035, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 16.009501915708814, |
|
"grad_norm": 60.78336715698242, |
|
"learning_rate": 7.449978714346531e-06, |
|
"loss": 2.0367, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 16.010268199233717, |
|
"grad_norm": 59.73289489746094, |
|
"learning_rate": 7.441464452958707e-06, |
|
"loss": 1.8694, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 16.01103448275862, |
|
"grad_norm": 23.923913955688477, |
|
"learning_rate": 7.4329501915708825e-06, |
|
"loss": 1.3875, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 16.011800766283525, |
|
"grad_norm": 37.67430877685547, |
|
"learning_rate": 7.4244359301830575e-06, |
|
"loss": 1.3104, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 16.01256704980843, |
|
"grad_norm": 0.14508657157421112, |
|
"learning_rate": 7.4159216687952325e-06, |
|
"loss": 0.5459, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 16.013333333333332, |
|
"grad_norm": 0.03012464940547943, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.3597, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 16.01409961685824, |
|
"grad_norm": 0.014629879966378212, |
|
"learning_rate": 7.398893146019583e-06, |
|
"loss": 0.4167, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 16.014865900383143, |
|
"grad_norm": 0.0070334975607693195, |
|
"learning_rate": 7.390378884631759e-06, |
|
"loss": 0.2939, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 16.015632183908046, |
|
"grad_norm": 0.2191082388162613, |
|
"learning_rate": 7.381864623243934e-06, |
|
"loss": 0.984, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 16.01639846743295, |
|
"grad_norm": 109.90150451660156, |
|
"learning_rate": 7.37335036185611e-06, |
|
"loss": 1.2009, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 16.017164750957853, |
|
"grad_norm": 0.925826907157898, |
|
"learning_rate": 7.364836100468284e-06, |
|
"loss": 1.2227, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 16.017931034482757, |
|
"grad_norm": 118.05204010009766, |
|
"learning_rate": 7.35632183908046e-06, |
|
"loss": 1.3761, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 16.018697318007664, |
|
"grad_norm": 0.0967426672577858, |
|
"learning_rate": 7.347807577692636e-06, |
|
"loss": 0.3146, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 16.019463601532568, |
|
"grad_norm": 25.102903366088867, |
|
"learning_rate": 7.339293316304811e-06, |
|
"loss": 0.6547, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"eval_accuracy": 0.6136363636363636, |
|
"eval_loss": 1.7384045124053955, |
|
"eval_runtime": 14.8537, |
|
"eval_samples_per_second": 2.962, |
|
"eval_steps_per_second": 2.962, |
|
"step": 4437 |
|
}, |
|
{ |
|
"epoch": 17.000229885057472, |
|
"grad_norm": 0.022724539041519165, |
|
"learning_rate": 7.330779054916987e-06, |
|
"loss": 0.5371, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 17.000996168582375, |
|
"grad_norm": 46.65639114379883, |
|
"learning_rate": 7.3222647935291625e-06, |
|
"loss": 1.771, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 17.00176245210728, |
|
"grad_norm": 0.024233011528849602, |
|
"learning_rate": 7.3137505321413375e-06, |
|
"loss": 0.5883, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 17.002528735632183, |
|
"grad_norm": 0.10233330726623535, |
|
"learning_rate": 7.305236270753513e-06, |
|
"loss": 1.0152, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 17.00329501915709, |
|
"grad_norm": 0.050764210522174835, |
|
"learning_rate": 7.2967220093656875e-06, |
|
"loss": 0.3224, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 17.004061302681993, |
|
"grad_norm": 41.09421157836914, |
|
"learning_rate": 7.288207747977863e-06, |
|
"loss": 1.8103, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 17.004827586206897, |
|
"grad_norm": 0.494922012090683, |
|
"learning_rate": 7.279693486590039e-06, |
|
"loss": 0.477, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0055938697318, |
|
"grad_norm": 22.638551712036133, |
|
"learning_rate": 7.271179225202214e-06, |
|
"loss": 0.411, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 17.006360153256704, |
|
"grad_norm": 0.008860452100634575, |
|
"learning_rate": 7.26266496381439e-06, |
|
"loss": 0.6254, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 17.007126436781608, |
|
"grad_norm": 0.025726931169629097, |
|
"learning_rate": 7.254150702426566e-06, |
|
"loss": 1.264, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 17.007892720306515, |
|
"grad_norm": 251.97262573242188, |
|
"learning_rate": 7.24563644103874e-06, |
|
"loss": 1.3841, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 17.00865900383142, |
|
"grad_norm": 0.009524849243462086, |
|
"learning_rate": 7.237122179650916e-06, |
|
"loss": 0.0907, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 17.009425287356322, |
|
"grad_norm": 16.7768611907959, |
|
"learning_rate": 7.228607918263091e-06, |
|
"loss": 0.6758, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 17.010191570881226, |
|
"grad_norm": 0.3332536220550537, |
|
"learning_rate": 7.220093656875267e-06, |
|
"loss": 0.377, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 17.01095785440613, |
|
"grad_norm": 0.04879189282655716, |
|
"learning_rate": 7.2115793954874425e-06, |
|
"loss": 0.0047, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 17.011724137931033, |
|
"grad_norm": 0.012600916437804699, |
|
"learning_rate": 7.2030651340996175e-06, |
|
"loss": 0.0008, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 17.01249042145594, |
|
"grad_norm": 5.205287933349609, |
|
"learning_rate": 7.194550872711793e-06, |
|
"loss": 1.4922, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 17.013256704980844, |
|
"grad_norm": 20.54860496520996, |
|
"learning_rate": 7.1860366113239675e-06, |
|
"loss": 1.1475, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 17.014022988505747, |
|
"grad_norm": 5.545294284820557, |
|
"learning_rate": 7.177522349936143e-06, |
|
"loss": 1.0824, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 17.01478927203065, |
|
"grad_norm": 0.022746141999959946, |
|
"learning_rate": 7.169008088548319e-06, |
|
"loss": 0.7447, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 17.015555555555554, |
|
"grad_norm": 0.011499214917421341, |
|
"learning_rate": 7.160493827160494e-06, |
|
"loss": 0.6717, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 17.016321839080458, |
|
"grad_norm": 0.008388209156692028, |
|
"learning_rate": 7.15197956577267e-06, |
|
"loss": 1.2204, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 17.017088122605365, |
|
"grad_norm": 0.019599752500653267, |
|
"learning_rate": 7.143465304384846e-06, |
|
"loss": 0.2615, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 17.01785440613027, |
|
"grad_norm": 6.7550506591796875, |
|
"learning_rate": 7.13495104299702e-06, |
|
"loss": 0.3677, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 17.018620689655172, |
|
"grad_norm": 35.38494873046875, |
|
"learning_rate": 7.126436781609196e-06, |
|
"loss": 1.1461, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 17.019386973180076, |
|
"grad_norm": 0.6330614686012268, |
|
"learning_rate": 7.117922520221371e-06, |
|
"loss": 0.7854, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 1.3476608991622925, |
|
"eval_runtime": 14.9835, |
|
"eval_samples_per_second": 2.937, |
|
"eval_steps_per_second": 2.937, |
|
"step": 4698 |
|
}, |
|
{ |
|
"epoch": 18.00015325670498, |
|
"grad_norm": 0.634505033493042, |
|
"learning_rate": 7.109408258833547e-06, |
|
"loss": 1.6979, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 18.000919540229884, |
|
"grad_norm": 0.16293062269687653, |
|
"learning_rate": 7.1008939974457225e-06, |
|
"loss": 1.0607, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 18.00168582375479, |
|
"grad_norm": 0.12276284396648407, |
|
"learning_rate": 7.0923797360578975e-06, |
|
"loss": 0.0826, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 18.002452107279694, |
|
"grad_norm": 0.02927841618657112, |
|
"learning_rate": 7.083865474670073e-06, |
|
"loss": 0.1384, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 18.003218390804598, |
|
"grad_norm": 42.946388244628906, |
|
"learning_rate": 7.075351213282249e-06, |
|
"loss": 0.9614, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 18.0039846743295, |
|
"grad_norm": 0.007065847050398588, |
|
"learning_rate": 7.066836951894423e-06, |
|
"loss": 1.0878, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 18.004750957854405, |
|
"grad_norm": 0.06603094935417175, |
|
"learning_rate": 7.058322690506599e-06, |
|
"loss": 1.2561, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 18.00551724137931, |
|
"grad_norm": 0.12717241048812866, |
|
"learning_rate": 7.049808429118774e-06, |
|
"loss": 0.5532, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 18.006283524904216, |
|
"grad_norm": 0.023113179951906204, |
|
"learning_rate": 7.04129416773095e-06, |
|
"loss": 0.2697, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 18.00704980842912, |
|
"grad_norm": 9.795312881469727, |
|
"learning_rate": 7.032779906343126e-06, |
|
"loss": 0.1965, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 18.007816091954023, |
|
"grad_norm": 0.005494195502251387, |
|
"learning_rate": 7.0242656449553e-06, |
|
"loss": 1.0595, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 18.008582375478927, |
|
"grad_norm": 98.12525177001953, |
|
"learning_rate": 7.015751383567476e-06, |
|
"loss": 0.607, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 18.00934865900383, |
|
"grad_norm": 0.02085270546376705, |
|
"learning_rate": 7.007237122179652e-06, |
|
"loss": 1.867, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 18.010114942528734, |
|
"grad_norm": 58.701210021972656, |
|
"learning_rate": 6.998722860791827e-06, |
|
"loss": 0.7827, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 18.01088122605364, |
|
"grad_norm": 0.015044331550598145, |
|
"learning_rate": 6.9902085994040025e-06, |
|
"loss": 0.3662, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 18.011647509578545, |
|
"grad_norm": 6.259363174438477, |
|
"learning_rate": 6.9816943380161775e-06, |
|
"loss": 0.4321, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 18.01241379310345, |
|
"grad_norm": 64.01567840576172, |
|
"learning_rate": 6.973180076628353e-06, |
|
"loss": 0.3617, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 18.013180076628352, |
|
"grad_norm": 0.06636208295822144, |
|
"learning_rate": 6.964665815240529e-06, |
|
"loss": 0.5991, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 18.013946360153255, |
|
"grad_norm": 0.18038584291934967, |
|
"learning_rate": 6.956151553852703e-06, |
|
"loss": 1.3252, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 18.014712643678163, |
|
"grad_norm": 7.999940395355225, |
|
"learning_rate": 6.947637292464879e-06, |
|
"loss": 0.4412, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 18.015478927203066, |
|
"grad_norm": 0.03466758877038956, |
|
"learning_rate": 6.939123031077054e-06, |
|
"loss": 0.1706, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 18.01624521072797, |
|
"grad_norm": 0.055949654430150986, |
|
"learning_rate": 6.93060876968923e-06, |
|
"loss": 0.959, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 18.017011494252873, |
|
"grad_norm": 2.0870323181152344, |
|
"learning_rate": 6.922094508301406e-06, |
|
"loss": 0.3926, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 18.017777777777777, |
|
"grad_norm": 0.007222402840852737, |
|
"learning_rate": 6.913580246913581e-06, |
|
"loss": 1.2137, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 18.01854406130268, |
|
"grad_norm": 0.004876899532973766, |
|
"learning_rate": 6.905065985525757e-06, |
|
"loss": 0.9595, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 18.019310344827588, |
|
"grad_norm": 201.1775360107422, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 1.0052, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_accuracy": 0.7045454545454546, |
|
"eval_loss": 1.4197241067886353, |
|
"eval_runtime": 14.7949, |
|
"eval_samples_per_second": 2.974, |
|
"eval_steps_per_second": 2.974, |
|
"step": 4959 |
|
}, |
|
{ |
|
"epoch": 19.000076628352492, |
|
"grad_norm": 93.49481201171875, |
|
"learning_rate": 6.888037462750107e-06, |
|
"loss": 0.7382, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 19.000842911877395, |
|
"grad_norm": 41.43840408325195, |
|
"learning_rate": 6.8795232013622825e-06, |
|
"loss": 0.619, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 19.0016091954023, |
|
"grad_norm": 0.00778006250038743, |
|
"learning_rate": 6.8710089399744575e-06, |
|
"loss": 0.362, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 19.002375478927203, |
|
"grad_norm": 0.006389183923602104, |
|
"learning_rate": 6.862494678586633e-06, |
|
"loss": 0.1976, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 19.003141762452106, |
|
"grad_norm": 0.003348706290125847, |
|
"learning_rate": 6.853980417198809e-06, |
|
"loss": 0.5945, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.00390804597701, |
|
"grad_norm": 0.2479257434606552, |
|
"learning_rate": 6.845466155810983e-06, |
|
"loss": 1.7944, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 19.004674329501917, |
|
"grad_norm": 0.010431020520627499, |
|
"learning_rate": 6.836951894423159e-06, |
|
"loss": 0.0126, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 19.00544061302682, |
|
"grad_norm": 0.059323448687791824, |
|
"learning_rate": 6.828437633035335e-06, |
|
"loss": 1.1333, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 19.006206896551724, |
|
"grad_norm": 0.1679394543170929, |
|
"learning_rate": 6.81992337164751e-06, |
|
"loss": 0.2732, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 19.006973180076628, |
|
"grad_norm": 0.015040397644042969, |
|
"learning_rate": 6.811409110259686e-06, |
|
"loss": 0.4259, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 19.00773946360153, |
|
"grad_norm": 248.40744018554688, |
|
"learning_rate": 6.802894848871861e-06, |
|
"loss": 1.8924, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 19.00850574712644, |
|
"grad_norm": 0.009207407012581825, |
|
"learning_rate": 6.794380587484037e-06, |
|
"loss": 0.4579, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 19.009272030651342, |
|
"grad_norm": 0.00731949508190155, |
|
"learning_rate": 6.7858663260962125e-06, |
|
"loss": 0.9262, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 19.010038314176246, |
|
"grad_norm": 0.9819995164871216, |
|
"learning_rate": 6.777352064708387e-06, |
|
"loss": 0.7865, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 19.01080459770115, |
|
"grad_norm": 64.07319641113281, |
|
"learning_rate": 6.7688378033205625e-06, |
|
"loss": 0.8652, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 19.011570881226053, |
|
"grad_norm": 0.005349809303879738, |
|
"learning_rate": 6.760323541932738e-06, |
|
"loss": 0.0021, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 19.012337164750956, |
|
"grad_norm": 43.98818588256836, |
|
"learning_rate": 6.751809280544913e-06, |
|
"loss": 0.6594, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 19.013103448275864, |
|
"grad_norm": 75.74127197265625, |
|
"learning_rate": 6.743295019157089e-06, |
|
"loss": 0.4529, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 19.013869731800767, |
|
"grad_norm": 171.2770538330078, |
|
"learning_rate": 6.734780757769263e-06, |
|
"loss": 0.354, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 19.01463601532567, |
|
"grad_norm": 0.019526701420545578, |
|
"learning_rate": 6.726266496381439e-06, |
|
"loss": 0.1203, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 19.015402298850574, |
|
"grad_norm": 0.060471560806035995, |
|
"learning_rate": 6.717752234993615e-06, |
|
"loss": 0.1018, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 19.016168582375478, |
|
"grad_norm": 0.05770859867334366, |
|
"learning_rate": 6.70923797360579e-06, |
|
"loss": 0.0989, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 19.01693486590038, |
|
"grad_norm": 0.0061812433414161205, |
|
"learning_rate": 6.700723712217966e-06, |
|
"loss": 1.3281, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 19.01770114942529, |
|
"grad_norm": 0.008389589376747608, |
|
"learning_rate": 6.692209450830141e-06, |
|
"loss": 1.1687, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 19.018467432950192, |
|
"grad_norm": 0.04719560965895653, |
|
"learning_rate": 6.683695189442317e-06, |
|
"loss": 0.1496, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 19.019233716475096, |
|
"grad_norm": 27.764373779296875, |
|
"learning_rate": 6.6751809280544925e-06, |
|
"loss": 1.0203, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"grad_norm": 34.23369216918945, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.4927, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.6136363636363636, |
|
"eval_loss": 2.204637289047241, |
|
"eval_runtime": 14.8159, |
|
"eval_samples_per_second": 2.97, |
|
"eval_steps_per_second": 2.97, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 20.000766283524904, |
|
"grad_norm": 0.01189060602337122, |
|
"learning_rate": 6.6581524052788425e-06, |
|
"loss": 0.0017, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 20.001532567049807, |
|
"grad_norm": 0.14064911007881165, |
|
"learning_rate": 6.649638143891018e-06, |
|
"loss": 2.6074, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 20.002298850574714, |
|
"grad_norm": 0.3934263586997986, |
|
"learning_rate": 6.641123882503193e-06, |
|
"loss": 0.352, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 20.003065134099618, |
|
"grad_norm": 0.0731765404343605, |
|
"learning_rate": 6.632609621115369e-06, |
|
"loss": 0.1418, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 20.00383141762452, |
|
"grad_norm": 207.2374725341797, |
|
"learning_rate": 6.624095359727543e-06, |
|
"loss": 1.6409, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 20.004597701149425, |
|
"grad_norm": 0.12684249877929688, |
|
"learning_rate": 6.615581098339719e-06, |
|
"loss": 0.6044, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 20.00536398467433, |
|
"grad_norm": 0.4711454212665558, |
|
"learning_rate": 6.607066836951895e-06, |
|
"loss": 1.157, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 20.006130268199232, |
|
"grad_norm": 0.14143647253513336, |
|
"learning_rate": 6.59855257556407e-06, |
|
"loss": 0.2839, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 20.00689655172414, |
|
"grad_norm": 0.033943917602300644, |
|
"learning_rate": 6.590038314176246e-06, |
|
"loss": 0.7102, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 20.007662835249043, |
|
"grad_norm": 0.049255017191171646, |
|
"learning_rate": 6.581524052788422e-06, |
|
"loss": 0.2677, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 20.008429118773947, |
|
"grad_norm": 0.016812535002827644, |
|
"learning_rate": 6.573009791400597e-06, |
|
"loss": 0.7073, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 20.00919540229885, |
|
"grad_norm": 47.37237548828125, |
|
"learning_rate": 6.5644955300127725e-06, |
|
"loss": 1.0679, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 20.009961685823754, |
|
"grad_norm": 458.0691833496094, |
|
"learning_rate": 6.555981268624947e-06, |
|
"loss": 0.8184, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 20.010727969348657, |
|
"grad_norm": 0.020279478281736374, |
|
"learning_rate": 6.5474670072371225e-06, |
|
"loss": 0.004, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 20.011494252873565, |
|
"grad_norm": 0.03246476501226425, |
|
"learning_rate": 6.538952745849298e-06, |
|
"loss": 0.1875, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 20.01226053639847, |
|
"grad_norm": 84.93108367919922, |
|
"learning_rate": 6.530438484461473e-06, |
|
"loss": 1.1212, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 20.013026819923372, |
|
"grad_norm": 0.05080035328865051, |
|
"learning_rate": 6.521924223073649e-06, |
|
"loss": 1.3401, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 20.013793103448275, |
|
"grad_norm": 0.022338256239891052, |
|
"learning_rate": 6.513409961685824e-06, |
|
"loss": 0.6263, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 20.01455938697318, |
|
"grad_norm": 0.006467349827289581, |
|
"learning_rate": 6.504895700297999e-06, |
|
"loss": 0.6373, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 20.015325670498083, |
|
"grad_norm": 63.687164306640625, |
|
"learning_rate": 6.496381438910175e-06, |
|
"loss": 1.0641, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 20.01609195402299, |
|
"grad_norm": 326.7529602050781, |
|
"learning_rate": 6.48786717752235e-06, |
|
"loss": 0.2013, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 20.016858237547893, |
|
"grad_norm": 0.15167830884456635, |
|
"learning_rate": 6.479352916134526e-06, |
|
"loss": 0.5322, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 20.017624521072797, |
|
"grad_norm": 0.5869858860969543, |
|
"learning_rate": 6.470838654746702e-06, |
|
"loss": 0.925, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 20.0183908045977, |
|
"grad_norm": 0.0439978688955307, |
|
"learning_rate": 6.462324393358877e-06, |
|
"loss": 1.1406, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.019157088122604, |
|
"grad_norm": 0.03456604853272438, |
|
"learning_rate": 6.4538101319710525e-06, |
|
"loss": 0.6476, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 20.01992337164751, |
|
"grad_norm": 0.008062485605478287, |
|
"learning_rate": 6.445295870583227e-06, |
|
"loss": 0.5386, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 1.2006016969680786, |
|
"eval_runtime": 14.8909, |
|
"eval_samples_per_second": 2.955, |
|
"eval_steps_per_second": 2.955, |
|
"step": 5481 |
|
}, |
|
{ |
|
"epoch": 21.000689655172415, |
|
"grad_norm": 0.009699113667011261, |
|
"learning_rate": 6.4367816091954025e-06, |
|
"loss": 0.4343, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 21.00145593869732, |
|
"grad_norm": 0.7203290462493896, |
|
"learning_rate": 6.428267347807578e-06, |
|
"loss": 0.4548, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.002222222222223, |
|
"grad_norm": 0.0787847563624382, |
|
"learning_rate": 6.419753086419753e-06, |
|
"loss": 0.0141, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 21.002988505747126, |
|
"grad_norm": 0.004414450377225876, |
|
"learning_rate": 6.411238825031929e-06, |
|
"loss": 0.6573, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 21.00375478927203, |
|
"grad_norm": 0.009877575561404228, |
|
"learning_rate": 6.402724563644105e-06, |
|
"loss": 0.438, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 21.004521072796933, |
|
"grad_norm": 154.08343505859375, |
|
"learning_rate": 6.39421030225628e-06, |
|
"loss": 1.1306, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 21.00528735632184, |
|
"grad_norm": 112.66160583496094, |
|
"learning_rate": 6.385696040868455e-06, |
|
"loss": 2.0577, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 21.006053639846744, |
|
"grad_norm": 0.07090185582637787, |
|
"learning_rate": 6.37718177948063e-06, |
|
"loss": 0.9777, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 21.006819923371648, |
|
"grad_norm": 59.00656509399414, |
|
"learning_rate": 6.368667518092806e-06, |
|
"loss": 0.558, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 21.00758620689655, |
|
"grad_norm": 0.2411382496356964, |
|
"learning_rate": 6.360153256704982e-06, |
|
"loss": 0.1421, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 21.008352490421455, |
|
"grad_norm": 0.12683288753032684, |
|
"learning_rate": 6.351638995317157e-06, |
|
"loss": 0.0251, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 21.00911877394636, |
|
"grad_norm": 0.14304675161838531, |
|
"learning_rate": 6.3431247339293325e-06, |
|
"loss": 0.5667, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 21.009885057471266, |
|
"grad_norm": 0.05938946083188057, |
|
"learning_rate": 6.334610472541508e-06, |
|
"loss": 0.0017, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 21.01065134099617, |
|
"grad_norm": 0.028048302978277206, |
|
"learning_rate": 6.3260962111536825e-06, |
|
"loss": 0.2176, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 21.011417624521073, |
|
"grad_norm": 466.30902099609375, |
|
"learning_rate": 6.317581949765858e-06, |
|
"loss": 1.4602, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 21.012183908045976, |
|
"grad_norm": 0.004101600032299757, |
|
"learning_rate": 6.309067688378033e-06, |
|
"loss": 0.0038, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 21.01295019157088, |
|
"grad_norm": 0.0273132286965847, |
|
"learning_rate": 6.300553426990209e-06, |
|
"loss": 0.0006, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 21.013716475095784, |
|
"grad_norm": 0.3355754315853119, |
|
"learning_rate": 6.292039165602385e-06, |
|
"loss": 1.697, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 21.01448275862069, |
|
"grad_norm": 0.0029017862398177385, |
|
"learning_rate": 6.28352490421456e-06, |
|
"loss": 1.3869, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 21.015249042145594, |
|
"grad_norm": 307.9607849121094, |
|
"learning_rate": 6.275010642826736e-06, |
|
"loss": 0.2193, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 21.016015325670498, |
|
"grad_norm": 0.009345950558781624, |
|
"learning_rate": 6.26649638143891e-06, |
|
"loss": 0.9992, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 21.0167816091954, |
|
"grad_norm": 0.014894979074597359, |
|
"learning_rate": 6.257982120051086e-06, |
|
"loss": 1.394, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 21.017547892720305, |
|
"grad_norm": 0.11634480208158493, |
|
"learning_rate": 6.249467858663262e-06, |
|
"loss": 0.6948, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 21.018314176245212, |
|
"grad_norm": 1.7374573945999146, |
|
"learning_rate": 6.240953597275437e-06, |
|
"loss": 0.5279, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 21.019080459770116, |
|
"grad_norm": 110.46876525878906, |
|
"learning_rate": 6.2324393358876125e-06, |
|
"loss": 0.0659, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 21.01984674329502, |
|
"grad_norm": 0.0037237678188830614, |
|
"learning_rate": 6.223925074499788e-06, |
|
"loss": 0.7256, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5015186071395874, |
|
"eval_runtime": 16.0919, |
|
"eval_samples_per_second": 2.734, |
|
"eval_steps_per_second": 2.734, |
|
"step": 5742 |
|
}, |
|
{ |
|
"epoch": 22.000613026819924, |
|
"grad_norm": 0.14362813532352448, |
|
"learning_rate": 6.2154108131119625e-06, |
|
"loss": 0.4832, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 22.001379310344827, |
|
"grad_norm": 0.004388533066958189, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 0.5042, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 22.00214559386973, |
|
"grad_norm": 0.009793414734303951, |
|
"learning_rate": 6.198382290336313e-06, |
|
"loss": 0.0007, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 22.002911877394634, |
|
"grad_norm": 0.10711691528558731, |
|
"learning_rate": 6.189868028948489e-06, |
|
"loss": 0.4023, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 22.00367816091954, |
|
"grad_norm": 0.028111960738897324, |
|
"learning_rate": 6.181353767560665e-06, |
|
"loss": 1.3179, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 22.004444444444445, |
|
"grad_norm": 0.04511615261435509, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 0.7028, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 22.00521072796935, |
|
"grad_norm": 1.3484392166137695, |
|
"learning_rate": 6.164325244785016e-06, |
|
"loss": 0.8454, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 22.005977011494252, |
|
"grad_norm": 0.00628571305423975, |
|
"learning_rate": 6.155810983397192e-06, |
|
"loss": 0.1536, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 22.006743295019156, |
|
"grad_norm": 0.019994376227259636, |
|
"learning_rate": 6.147296722009366e-06, |
|
"loss": 1.1557, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 22.00750957854406, |
|
"grad_norm": 0.18527528643608093, |
|
"learning_rate": 6.138782460621542e-06, |
|
"loss": 0.6571, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 22.008275862068967, |
|
"grad_norm": 0.010576602071523666, |
|
"learning_rate": 6.130268199233717e-06, |
|
"loss": 0.9536, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 22.00904214559387, |
|
"grad_norm": 0.24366912245750427, |
|
"learning_rate": 6.1217539378458925e-06, |
|
"loss": 0.0125, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 22.009808429118774, |
|
"grad_norm": 431.5052795410156, |
|
"learning_rate": 6.113239676458068e-06, |
|
"loss": 1.6293, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 22.010574712643677, |
|
"grad_norm": 0.040831536054611206, |
|
"learning_rate": 6.1047254150702425e-06, |
|
"loss": 0.5619, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 22.01134099616858, |
|
"grad_norm": 53.907928466796875, |
|
"learning_rate": 6.096211153682418e-06, |
|
"loss": 0.5729, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 22.01210727969349, |
|
"grad_norm": 253.20339965820312, |
|
"learning_rate": 6.087696892294594e-06, |
|
"loss": 1.336, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 22.012873563218392, |
|
"grad_norm": 0.008975782431662083, |
|
"learning_rate": 6.079182630906769e-06, |
|
"loss": 0.7322, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 22.013639846743295, |
|
"grad_norm": 0.7453123331069946, |
|
"learning_rate": 6.070668369518945e-06, |
|
"loss": 0.6297, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 22.0144061302682, |
|
"grad_norm": 50.5977668762207, |
|
"learning_rate": 6.06215410813112e-06, |
|
"loss": 0.0448, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 22.015172413793103, |
|
"grad_norm": 0.03801470249891281, |
|
"learning_rate": 6.053639846743296e-06, |
|
"loss": 0.4144, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 22.015938697318006, |
|
"grad_norm": 0.15638412535190582, |
|
"learning_rate": 6.045125585355472e-06, |
|
"loss": 1.0739, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 22.016704980842913, |
|
"grad_norm": 0.00895222369581461, |
|
"learning_rate": 6.036611323967646e-06, |
|
"loss": 0.1679, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 22.017471264367817, |
|
"grad_norm": 0.019864307716488838, |
|
"learning_rate": 6.028097062579822e-06, |
|
"loss": 1.0272, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 22.01823754789272, |
|
"grad_norm": 58.649620056152344, |
|
"learning_rate": 6.019582801191997e-06, |
|
"loss": 1.2786, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 22.019003831417624, |
|
"grad_norm": 273.28289794921875, |
|
"learning_rate": 6.0110685398041725e-06, |
|
"loss": 0.3921, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 22.019770114942528, |
|
"grad_norm": 0.0023106043227016926, |
|
"learning_rate": 6.002554278416348e-06, |
|
"loss": 0.8462, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"eval_accuracy": 0.6590909090909091, |
|
"eval_loss": 1.6404529809951782, |
|
"eval_runtime": 16.0448, |
|
"eval_samples_per_second": 2.742, |
|
"eval_steps_per_second": 2.742, |
|
"step": 6003 |
|
}, |
|
{ |
|
"epoch": 23.000536398467432, |
|
"grad_norm": 144.0416259765625, |
|
"learning_rate": 5.9940400170285225e-06, |
|
"loss": 0.9767, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 23.001302681992335, |
|
"grad_norm": 9.536761283874512, |
|
"learning_rate": 5.985525755640698e-06, |
|
"loss": 0.543, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 23.002068965517243, |
|
"grad_norm": 0.1460639387369156, |
|
"learning_rate": 5.977011494252874e-06, |
|
"loss": 0.0045, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 23.002835249042146, |
|
"grad_norm": 0.007892929948866367, |
|
"learning_rate": 5.968497232865049e-06, |
|
"loss": 0.0018, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 23.00360153256705, |
|
"grad_norm": 0.005539083853363991, |
|
"learning_rate": 5.959982971477225e-06, |
|
"loss": 0.0012, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 23.004367816091953, |
|
"grad_norm": 0.02287321165204048, |
|
"learning_rate": 5.9514687100894e-06, |
|
"loss": 0.0005, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 23.005134099616857, |
|
"grad_norm": 0.02531210333108902, |
|
"learning_rate": 5.942954448701576e-06, |
|
"loss": 0.7612, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 23.005900383141764, |
|
"grad_norm": 0.03882148116827011, |
|
"learning_rate": 5.934440187313752e-06, |
|
"loss": 0.0088, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 23.006666666666668, |
|
"grad_norm": 0.14414261281490326, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.0017, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 23.00743295019157, |
|
"grad_norm": 14.874513626098633, |
|
"learning_rate": 5.917411664538102e-06, |
|
"loss": 0.4744, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 23.008199233716475, |
|
"grad_norm": 0.0021286820992827415, |
|
"learning_rate": 5.9088974031502775e-06, |
|
"loss": 0.0979, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 23.00896551724138, |
|
"grad_norm": 16.475772857666016, |
|
"learning_rate": 5.9003831417624525e-06, |
|
"loss": 1.0579, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 23.009731800766282, |
|
"grad_norm": 0.03063524328172207, |
|
"learning_rate": 5.891868880374628e-06, |
|
"loss": 1.2737, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 23.01049808429119, |
|
"grad_norm": 0.36535847187042236, |
|
"learning_rate": 5.883354618986803e-06, |
|
"loss": 0.9265, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 23.011264367816093, |
|
"grad_norm": 0.23040282726287842, |
|
"learning_rate": 5.874840357598979e-06, |
|
"loss": 0.2574, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 23.012030651340996, |
|
"grad_norm": 0.0024795138742774725, |
|
"learning_rate": 5.866326096211154e-06, |
|
"loss": 0.5133, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 23.0127969348659, |
|
"grad_norm": 0.0013183488044887781, |
|
"learning_rate": 5.857811834823329e-06, |
|
"loss": 0.0288, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 23.013563218390804, |
|
"grad_norm": 0.005073636770248413, |
|
"learning_rate": 5.849297573435505e-06, |
|
"loss": 0.4814, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 23.014329501915707, |
|
"grad_norm": 0.0024840885307639837, |
|
"learning_rate": 5.84078331204768e-06, |
|
"loss": 1.0832, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 23.015095785440614, |
|
"grad_norm": 0.2680709660053253, |
|
"learning_rate": 5.832269050659856e-06, |
|
"loss": 0.4422, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 23.015862068965518, |
|
"grad_norm": 0.04920409247279167, |
|
"learning_rate": 5.823754789272032e-06, |
|
"loss": 0.5758, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 23.01662835249042, |
|
"grad_norm": 58.36740493774414, |
|
"learning_rate": 5.815240527884206e-06, |
|
"loss": 1.2365, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 23.017394636015325, |
|
"grad_norm": 415.8174133300781, |
|
"learning_rate": 5.806726266496382e-06, |
|
"loss": 1.4749, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 23.01816091954023, |
|
"grad_norm": 223.3861846923828, |
|
"learning_rate": 5.7982120051085575e-06, |
|
"loss": 1.037, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 23.018927203065132, |
|
"grad_norm": 0.21632128953933716, |
|
"learning_rate": 5.7896977437207325e-06, |
|
"loss": 0.4796, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 23.01969348659004, |
|
"grad_norm": 0.0012822558637708426, |
|
"learning_rate": 5.781183482332908e-06, |
|
"loss": 0.64, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.215956926345825, |
|
"eval_runtime": 14.8542, |
|
"eval_samples_per_second": 2.962, |
|
"eval_steps_per_second": 2.962, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 24.000459770114944, |
|
"grad_norm": 3.5624399185180664, |
|
"learning_rate": 5.772669220945083e-06, |
|
"loss": 0.6806, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 24.001226053639847, |
|
"grad_norm": 0.0020271483808755875, |
|
"learning_rate": 5.764154959557259e-06, |
|
"loss": 0.5674, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 24.00199233716475, |
|
"grad_norm": 0.007503538392484188, |
|
"learning_rate": 5.755640698169435e-06, |
|
"loss": 0.0001, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 24.002758620689654, |
|
"grad_norm": 0.002249213634058833, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 0.0004, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 24.003524904214558, |
|
"grad_norm": 0.0016854830319061875, |
|
"learning_rate": 5.738612175393785e-06, |
|
"loss": 0.0003, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 24.004291187739465, |
|
"grad_norm": 0.002008609240874648, |
|
"learning_rate": 5.730097914005961e-06, |
|
"loss": 0.6071, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 24.00505747126437, |
|
"grad_norm": 0.11076414585113525, |
|
"learning_rate": 5.721583652618136e-06, |
|
"loss": 0.6872, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 24.005823754789272, |
|
"grad_norm": 0.013032696209847927, |
|
"learning_rate": 5.713069391230312e-06, |
|
"loss": 0.5565, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 24.006590038314176, |
|
"grad_norm": 0.01334427110850811, |
|
"learning_rate": 5.704555129842486e-06, |
|
"loss": 0.0005, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 24.00735632183908, |
|
"grad_norm": 0.0072980476543307304, |
|
"learning_rate": 5.696040868454662e-06, |
|
"loss": 1.0164, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 24.008122605363983, |
|
"grad_norm": 0.03648148849606514, |
|
"learning_rate": 5.6875266070668375e-06, |
|
"loss": 0.5201, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 24.00888888888889, |
|
"grad_norm": 0.010938500054180622, |
|
"learning_rate": 5.6790123456790125e-06, |
|
"loss": 1.2859, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 24.009655172413794, |
|
"grad_norm": 11.166935920715332, |
|
"learning_rate": 5.670498084291188e-06, |
|
"loss": 0.5835, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 24.010421455938697, |
|
"grad_norm": 0.15971843898296356, |
|
"learning_rate": 5.661983822903364e-06, |
|
"loss": 0.3884, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 24.0111877394636, |
|
"grad_norm": 0.010505812242627144, |
|
"learning_rate": 5.653469561515539e-06, |
|
"loss": 0.2408, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 24.011954022988505, |
|
"grad_norm": 0.049537863582372665, |
|
"learning_rate": 5.644955300127715e-06, |
|
"loss": 0.623, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 24.01272030651341, |
|
"grad_norm": 0.002080702455714345, |
|
"learning_rate": 5.636441038739889e-06, |
|
"loss": 0.5115, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 24.013486590038315, |
|
"grad_norm": 0.026667874306440353, |
|
"learning_rate": 5.627926777352065e-06, |
|
"loss": 0.4791, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 24.01425287356322, |
|
"grad_norm": 0.3080986738204956, |
|
"learning_rate": 5.619412515964241e-06, |
|
"loss": 0.9133, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 24.015019157088123, |
|
"grad_norm": 0.10097556561231613, |
|
"learning_rate": 5.610898254576416e-06, |
|
"loss": 0.0031, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 24.015785440613026, |
|
"grad_norm": 0.001120518776588142, |
|
"learning_rate": 5.602383993188592e-06, |
|
"loss": 0.5052, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 24.01655172413793, |
|
"grad_norm": 0.0024539188016206026, |
|
"learning_rate": 5.593869731800766e-06, |
|
"loss": 0.5064, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 24.017318007662837, |
|
"grad_norm": 0.002675322350114584, |
|
"learning_rate": 5.585355470412942e-06, |
|
"loss": 0.7407, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 24.01808429118774, |
|
"grad_norm": 103.82475280761719, |
|
"learning_rate": 5.5768412090251175e-06, |
|
"loss": 1.0015, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.018850574712644, |
|
"grad_norm": 0.016353830695152283, |
|
"learning_rate": 5.5683269476372925e-06, |
|
"loss": 0.4096, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 24.019616858237548, |
|
"grad_norm": 0.001887011807411909, |
|
"learning_rate": 5.559812686249468e-06, |
|
"loss": 1.0358, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.667351484298706, |
|
"eval_runtime": 14.8264, |
|
"eval_samples_per_second": 2.968, |
|
"eval_steps_per_second": 2.968, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 25.000383141762452, |
|
"grad_norm": 0.06761617958545685, |
|
"learning_rate": 5.551298424861644e-06, |
|
"loss": 0.6978, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 25.001149425287355, |
|
"grad_norm": 0.015423398464918137, |
|
"learning_rate": 5.542784163473819e-06, |
|
"loss": 0.6113, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 25.00191570881226, |
|
"grad_norm": 0.12241526693105698, |
|
"learning_rate": 5.534269902085995e-06, |
|
"loss": 0.5787, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 25.002681992337166, |
|
"grad_norm": 0.0016671658959239721, |
|
"learning_rate": 5.525755640698169e-06, |
|
"loss": 0.0018, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 25.00344827586207, |
|
"grad_norm": 3.1989338397979736, |
|
"learning_rate": 5.517241379310345e-06, |
|
"loss": 0.5898, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 25.004214559386973, |
|
"grad_norm": 121.3276596069336, |
|
"learning_rate": 5.508727117922521e-06, |
|
"loss": 0.8383, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 25.004980842911877, |
|
"grad_norm": 0.0029487779829651117, |
|
"learning_rate": 5.500212856534696e-06, |
|
"loss": 1.2531, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 25.00574712643678, |
|
"grad_norm": 136.24179077148438, |
|
"learning_rate": 5.491698595146872e-06, |
|
"loss": 1.171, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 25.006513409961684, |
|
"grad_norm": 263.5198974609375, |
|
"learning_rate": 5.4831843337590475e-06, |
|
"loss": 0.2691, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 25.00727969348659, |
|
"grad_norm": 0.0011305210646241903, |
|
"learning_rate": 5.474670072371222e-06, |
|
"loss": 0.0043, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 25.008045977011495, |
|
"grad_norm": 0.010297092609107494, |
|
"learning_rate": 5.4661558109833975e-06, |
|
"loss": 0.7063, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 25.0088122605364, |
|
"grad_norm": 0.0019072829745709896, |
|
"learning_rate": 5.4576415495955725e-06, |
|
"loss": 1.2498, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 25.009578544061302, |
|
"grad_norm": 0.0029067446012049913, |
|
"learning_rate": 5.449127288207748e-06, |
|
"loss": 0.2556, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 25.010344827586206, |
|
"grad_norm": 0.14154160022735596, |
|
"learning_rate": 5.440613026819924e-06, |
|
"loss": 0.6456, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 25.011111111111113, |
|
"grad_norm": 0.008590281940996647, |
|
"learning_rate": 5.432098765432099e-06, |
|
"loss": 0.2726, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 25.011877394636016, |
|
"grad_norm": 0.00853242538869381, |
|
"learning_rate": 5.423584504044275e-06, |
|
"loss": 0.6377, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 25.01264367816092, |
|
"grad_norm": 177.44859313964844, |
|
"learning_rate": 5.415070242656451e-06, |
|
"loss": 0.2749, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 25.013409961685824, |
|
"grad_norm": 1.3147214651107788, |
|
"learning_rate": 5.406555981268625e-06, |
|
"loss": 1.1138, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 25.014176245210727, |
|
"grad_norm": 0.0013268928742036223, |
|
"learning_rate": 5.398041719880801e-06, |
|
"loss": 0.6919, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 25.01494252873563, |
|
"grad_norm": 0.02117413468658924, |
|
"learning_rate": 5.389527458492976e-06, |
|
"loss": 0.0059, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 25.015708812260538, |
|
"grad_norm": 261.3617858886719, |
|
"learning_rate": 5.381013197105152e-06, |
|
"loss": 0.933, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 25.01647509578544, |
|
"grad_norm": 0.003189639886841178, |
|
"learning_rate": 5.3724989357173275e-06, |
|
"loss": 0.9261, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 25.017241379310345, |
|
"grad_norm": 0.017773039638996124, |
|
"learning_rate": 5.3639846743295025e-06, |
|
"loss": 0.6448, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 25.01800766283525, |
|
"grad_norm": 0.0032892501913011074, |
|
"learning_rate": 5.3554704129416775e-06, |
|
"loss": 0.7993, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 25.018773946360152, |
|
"grad_norm": 0.7441956996917725, |
|
"learning_rate": 5.3469561515538525e-06, |
|
"loss": 0.0042, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 25.019540229885056, |
|
"grad_norm": 0.008563314564526081, |
|
"learning_rate": 5.338441890166028e-06, |
|
"loss": 0.0003, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 3.2236807346343994, |
|
"eval_runtime": 13.8715, |
|
"eval_samples_per_second": 3.172, |
|
"eval_steps_per_second": 3.172, |
|
"step": 6786 |
|
}, |
|
{ |
|
"epoch": 26.00030651340996, |
|
"grad_norm": 0.001214326941408217, |
|
"learning_rate": 5.329927628778204e-06, |
|
"loss": 0.0007, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 26.001072796934867, |
|
"grad_norm": 0.362863689661026, |
|
"learning_rate": 5.321413367390379e-06, |
|
"loss": 0.5333, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 26.00183908045977, |
|
"grad_norm": 0.23882326483726501, |
|
"learning_rate": 5.312899106002555e-06, |
|
"loss": 0.0006, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 26.002605363984674, |
|
"grad_norm": 0.012792966328561306, |
|
"learning_rate": 5.304384844614731e-06, |
|
"loss": 0.1579, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 26.003371647509578, |
|
"grad_norm": 0.13500720262527466, |
|
"learning_rate": 5.295870583226905e-06, |
|
"loss": 0.2455, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 26.00413793103448, |
|
"grad_norm": 0.0011103450087830424, |
|
"learning_rate": 5.287356321839081e-06, |
|
"loss": 1.3782, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 26.00490421455939, |
|
"grad_norm": 0.037791136652231216, |
|
"learning_rate": 5.278842060451256e-06, |
|
"loss": 0.0006, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 26.005670498084292, |
|
"grad_norm": 303.8115234375, |
|
"learning_rate": 5.270327799063432e-06, |
|
"loss": 0.4692, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 26.006436781609196, |
|
"grad_norm": 0.0019213318591937423, |
|
"learning_rate": 5.2618135376756075e-06, |
|
"loss": 0.0023, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 26.0072030651341, |
|
"grad_norm": 0.0007200756808742881, |
|
"learning_rate": 5.2532992762877825e-06, |
|
"loss": 0.1082, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 26.007969348659003, |
|
"grad_norm": 0.01857464760541916, |
|
"learning_rate": 5.244785014899958e-06, |
|
"loss": 0.2011, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 26.008735632183907, |
|
"grad_norm": 0.00919260922819376, |
|
"learning_rate": 5.236270753512134e-06, |
|
"loss": 0.0357, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 26.009501915708814, |
|
"grad_norm": 0.030439550057053566, |
|
"learning_rate": 5.227756492124308e-06, |
|
"loss": 0.6429, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 26.010268199233717, |
|
"grad_norm": 0.00577349029481411, |
|
"learning_rate": 5.219242230736484e-06, |
|
"loss": 0.058, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 26.01103448275862, |
|
"grad_norm": 0.03052118420600891, |
|
"learning_rate": 5.210727969348659e-06, |
|
"loss": 0.0002, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 26.011800766283525, |
|
"grad_norm": 0.005563646554946899, |
|
"learning_rate": 5.202213707960835e-06, |
|
"loss": 0.1445, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 26.01256704980843, |
|
"grad_norm": 2.8447425365448, |
|
"learning_rate": 5.193699446573011e-06, |
|
"loss": 0.0008, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 26.013333333333332, |
|
"grad_norm": 0.04641766846179962, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.1407, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 26.01409961685824, |
|
"grad_norm": 0.001094731967896223, |
|
"learning_rate": 5.176670923797361e-06, |
|
"loss": 0.4741, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 26.014865900383143, |
|
"grad_norm": 0.002571564167737961, |
|
"learning_rate": 5.168156662409536e-06, |
|
"loss": 1.4676, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 26.015632183908046, |
|
"grad_norm": 0.11337020993232727, |
|
"learning_rate": 5.159642401021712e-06, |
|
"loss": 1.0659, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 26.01639846743295, |
|
"grad_norm": 244.4503173828125, |
|
"learning_rate": 5.1511281396338875e-06, |
|
"loss": 1.526, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.017164750957853, |
|
"grad_norm": 4.522707462310791, |
|
"learning_rate": 5.1426138782460625e-06, |
|
"loss": 0.9379, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 26.017931034482757, |
|
"grad_norm": 0.011589415371418, |
|
"learning_rate": 5.134099616858238e-06, |
|
"loss": 1.3082, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 26.018697318007664, |
|
"grad_norm": 94.52461242675781, |
|
"learning_rate": 5.125585355470414e-06, |
|
"loss": 0.7085, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 26.019463601532568, |
|
"grad_norm": 6.482922554016113, |
|
"learning_rate": 5.117071094082588e-06, |
|
"loss": 1.449, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 2.9910295009613037, |
|
"eval_runtime": 14.8574, |
|
"eval_samples_per_second": 2.961, |
|
"eval_steps_per_second": 2.961, |
|
"step": 7047 |
|
}, |
|
{ |
|
"epoch": 27.000229885057472, |
|
"grad_norm": 0.005694302264600992, |
|
"learning_rate": 5.108556832694764e-06, |
|
"loss": 0.0011, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 27.000996168582375, |
|
"grad_norm": 0.0007168428273871541, |
|
"learning_rate": 5.100042571306939e-06, |
|
"loss": 0.1468, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 27.00176245210728, |
|
"grad_norm": 0.003922580275684595, |
|
"learning_rate": 5.091528309919115e-06, |
|
"loss": 0.3568, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 27.002528735632183, |
|
"grad_norm": 0.003760001854971051, |
|
"learning_rate": 5.083014048531291e-06, |
|
"loss": 0.0013, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 27.00329501915709, |
|
"grad_norm": 0.002225434873253107, |
|
"learning_rate": 5.074499787143465e-06, |
|
"loss": 0.3058, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 27.004061302681993, |
|
"grad_norm": 0.016028400510549545, |
|
"learning_rate": 5.065985525755641e-06, |
|
"loss": 1.427, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 27.004827586206897, |
|
"grad_norm": 0.00416993536055088, |
|
"learning_rate": 5.057471264367817e-06, |
|
"loss": 0.6042, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 27.0055938697318, |
|
"grad_norm": 0.007803148590028286, |
|
"learning_rate": 5.048957002979992e-06, |
|
"loss": 0.4856, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 27.006360153256704, |
|
"grad_norm": 0.020952487364411354, |
|
"learning_rate": 5.0404427415921675e-06, |
|
"loss": 0.9861, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 27.007126436781608, |
|
"grad_norm": 0.001544815837405622, |
|
"learning_rate": 5.0319284802043425e-06, |
|
"loss": 1.2238, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 27.007892720306515, |
|
"grad_norm": 314.83135986328125, |
|
"learning_rate": 5.023414218816518e-06, |
|
"loss": 0.7323, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 27.00865900383142, |
|
"grad_norm": 0.30988994240760803, |
|
"learning_rate": 5.014899957428694e-06, |
|
"loss": 0.0193, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 27.009425287356322, |
|
"grad_norm": 0.0029081483371555805, |
|
"learning_rate": 5.006385696040868e-06, |
|
"loss": 0.0006, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 27.010191570881226, |
|
"grad_norm": 0.06407798826694489, |
|
"learning_rate": 4.997871434653044e-06, |
|
"loss": 1.4836, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 27.01095785440613, |
|
"grad_norm": 0.00890645757317543, |
|
"learning_rate": 4.98935717326522e-06, |
|
"loss": 0.0016, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 27.011724137931033, |
|
"grad_norm": 634.438720703125, |
|
"learning_rate": 4.980842911877395e-06, |
|
"loss": 1.3999, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 27.01249042145594, |
|
"grad_norm": 0.17482855916023254, |
|
"learning_rate": 4.972328650489571e-06, |
|
"loss": 0.3151, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 27.013256704980844, |
|
"grad_norm": 0.9534220099449158, |
|
"learning_rate": 4.963814389101746e-06, |
|
"loss": 0.0114, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 27.014022988505747, |
|
"grad_norm": 0.011324645951390266, |
|
"learning_rate": 4.955300127713921e-06, |
|
"loss": 0.6767, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 27.01478927203065, |
|
"grad_norm": 1291.836181640625, |
|
"learning_rate": 4.946785866326097e-06, |
|
"loss": 1.525, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 27.015555555555554, |
|
"grad_norm": 223.3017578125, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.0539, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 27.016321839080458, |
|
"grad_norm": 0.011704668402671814, |
|
"learning_rate": 4.9297573435504475e-06, |
|
"loss": 1.3802, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 27.017088122605365, |
|
"grad_norm": 0.08513358235359192, |
|
"learning_rate": 4.9212430821626225e-06, |
|
"loss": 0.0338, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 27.01785440613027, |
|
"grad_norm": 0.004341603256762028, |
|
"learning_rate": 4.912728820774798e-06, |
|
"loss": 0.1484, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 27.018620689655172, |
|
"grad_norm": 0.0013116906629875302, |
|
"learning_rate": 4.904214559386973e-06, |
|
"loss": 1.0582, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 27.019386973180076, |
|
"grad_norm": 130.7567901611328, |
|
"learning_rate": 4.895700297999149e-06, |
|
"loss": 0.6425, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 2.966822862625122, |
|
"eval_runtime": 14.4038, |
|
"eval_samples_per_second": 3.055, |
|
"eval_steps_per_second": 3.055, |
|
"step": 7308 |
|
}, |
|
{ |
|
"epoch": 28.00015325670498, |
|
"grad_norm": 0.032952889800071716, |
|
"learning_rate": 4.887186036611324e-06, |
|
"loss": 0.3801, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 28.000919540229884, |
|
"grad_norm": 0.005185917019844055, |
|
"learning_rate": 4.8786717752235e-06, |
|
"loss": 0.0608, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 28.00168582375479, |
|
"grad_norm": 0.0012036847183480859, |
|
"learning_rate": 4.870157513835675e-06, |
|
"loss": 0.0047, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 28.002452107279694, |
|
"grad_norm": 0.01903628185391426, |
|
"learning_rate": 4.861643252447851e-06, |
|
"loss": 0.0062, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 28.003218390804598, |
|
"grad_norm": 0.004351526964455843, |
|
"learning_rate": 4.853128991060026e-06, |
|
"loss": 0.4134, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 28.0039846743295, |
|
"grad_norm": 0.027524391189217567, |
|
"learning_rate": 4.844614729672202e-06, |
|
"loss": 0.58, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 28.004750957854405, |
|
"grad_norm": 659.3217163085938, |
|
"learning_rate": 4.836100468284377e-06, |
|
"loss": 0.4621, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 28.00551724137931, |
|
"grad_norm": 28.157276153564453, |
|
"learning_rate": 4.8275862068965525e-06, |
|
"loss": 0.4983, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 28.006283524904216, |
|
"grad_norm": 0.05856695398688316, |
|
"learning_rate": 4.8190719455087275e-06, |
|
"loss": 0.7367, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 28.00704980842912, |
|
"grad_norm": 8.497085571289062, |
|
"learning_rate": 4.8105576841209025e-06, |
|
"loss": 0.3623, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 28.007816091954023, |
|
"grad_norm": 0.009733939543366432, |
|
"learning_rate": 4.802043422733078e-06, |
|
"loss": 0.0104, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 28.008582375478927, |
|
"grad_norm": 0.2841988503932953, |
|
"learning_rate": 4.793529161345254e-06, |
|
"loss": 0.0029, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 28.00934865900383, |
|
"grad_norm": 12.586609840393066, |
|
"learning_rate": 4.785014899957429e-06, |
|
"loss": 0.1155, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 28.010114942528734, |
|
"grad_norm": 0.0015590345719829202, |
|
"learning_rate": 4.776500638569604e-06, |
|
"loss": 0.9701, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 28.01088122605364, |
|
"grad_norm": 2.8632147312164307, |
|
"learning_rate": 4.76798637718178e-06, |
|
"loss": 0.3896, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 28.011647509578545, |
|
"grad_norm": 0.004916210658848286, |
|
"learning_rate": 4.759472115793956e-06, |
|
"loss": 0.6174, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 28.01241379310345, |
|
"grad_norm": 0.001710303477011621, |
|
"learning_rate": 4.750957854406131e-06, |
|
"loss": 0.6807, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 28.013180076628352, |
|
"grad_norm": 0.0009364011930301785, |
|
"learning_rate": 4.742443593018306e-06, |
|
"loss": 0.6268, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 28.013946360153255, |
|
"grad_norm": 0.10325934737920761, |
|
"learning_rate": 4.733929331630482e-06, |
|
"loss": 0.9304, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 28.014712643678163, |
|
"grad_norm": 0.03776532784104347, |
|
"learning_rate": 4.7254150702426575e-06, |
|
"loss": 1.3781, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.015478927203066, |
|
"grad_norm": 0.10980332642793655, |
|
"learning_rate": 4.7169008088548325e-06, |
|
"loss": 0.0035, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 28.01624521072797, |
|
"grad_norm": 0.017906807363033295, |
|
"learning_rate": 4.7083865474670075e-06, |
|
"loss": 1.0038, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 28.017011494252873, |
|
"grad_norm": 0.014551023952662945, |
|
"learning_rate": 4.6998722860791825e-06, |
|
"loss": 0.4496, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 28.017777777777777, |
|
"grad_norm": 0.044592831283807755, |
|
"learning_rate": 4.691358024691358e-06, |
|
"loss": 0.0392, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 28.01854406130268, |
|
"grad_norm": 0.004972092807292938, |
|
"learning_rate": 4.682843763303534e-06, |
|
"loss": 0.0175, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 28.019310344827588, |
|
"grad_norm": 14.057037353515625, |
|
"learning_rate": 4.674329501915709e-06, |
|
"loss": 0.0038, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 3.2074098587036133, |
|
"eval_runtime": 13.8424, |
|
"eval_samples_per_second": 3.179, |
|
"eval_steps_per_second": 3.179, |
|
"step": 7569 |
|
}, |
|
{ |
|
"epoch": 29.000076628352492, |
|
"grad_norm": 6.356884956359863, |
|
"learning_rate": 4.665815240527884e-06, |
|
"loss": 1.0329, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 29.000842911877395, |
|
"grad_norm": 0.059181034564971924, |
|
"learning_rate": 4.65730097914006e-06, |
|
"loss": 0.8988, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 29.0016091954023, |
|
"grad_norm": 0.08678928017616272, |
|
"learning_rate": 4.648786717752236e-06, |
|
"loss": 0.571, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 29.002375478927203, |
|
"grad_norm": 0.001696910709142685, |
|
"learning_rate": 4.640272456364411e-06, |
|
"loss": 0.0003, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 29.003141762452106, |
|
"grad_norm": 0.08558080345392227, |
|
"learning_rate": 4.631758194976586e-06, |
|
"loss": 0.8065, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 29.00390804597701, |
|
"grad_norm": 0.004383187275379896, |
|
"learning_rate": 4.623243933588762e-06, |
|
"loss": 0.0043, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 29.004674329501917, |
|
"grad_norm": 0.008191928267478943, |
|
"learning_rate": 4.6147296722009375e-06, |
|
"loss": 0.5831, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 29.00544061302682, |
|
"grad_norm": 0.004930571187287569, |
|
"learning_rate": 4.6062154108131125e-06, |
|
"loss": 0.0007, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 29.006206896551724, |
|
"grad_norm": 0.008940590545535088, |
|
"learning_rate": 4.5977011494252875e-06, |
|
"loss": 0.0214, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 29.006973180076628, |
|
"grad_norm": 0.0016375266714021564, |
|
"learning_rate": 4.589186888037463e-06, |
|
"loss": 0.0013, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 29.00773946360153, |
|
"grad_norm": 0.0013604441192001104, |
|
"learning_rate": 4.580672626649638e-06, |
|
"loss": 0.3626, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 29.00850574712644, |
|
"grad_norm": 0.0702805370092392, |
|
"learning_rate": 4.572158365261814e-06, |
|
"loss": 0.672, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 29.009272030651342, |
|
"grad_norm": 0.0019100743811577559, |
|
"learning_rate": 4.563644103873989e-06, |
|
"loss": 0.0006, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 29.010038314176246, |
|
"grad_norm": 0.6471951007843018, |
|
"learning_rate": 4.555129842486164e-06, |
|
"loss": 0.765, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 29.01080459770115, |
|
"grad_norm": 7.065598487854004, |
|
"learning_rate": 4.54661558109834e-06, |
|
"loss": 0.0129, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 29.011570881226053, |
|
"grad_norm": 0.007074694149196148, |
|
"learning_rate": 4.538101319710516e-06, |
|
"loss": 0.0816, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 29.012337164750956, |
|
"grad_norm": 776.0087890625, |
|
"learning_rate": 4.529587058322691e-06, |
|
"loss": 0.8074, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 29.013103448275864, |
|
"grad_norm": 0.0009385868324898183, |
|
"learning_rate": 4.521072796934866e-06, |
|
"loss": 0.0932, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 29.013869731800767, |
|
"grad_norm": 0.01541681308299303, |
|
"learning_rate": 4.512558535547042e-06, |
|
"loss": 0.0542, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 29.01463601532567, |
|
"grad_norm": 0.030001681298017502, |
|
"learning_rate": 4.5040442741592175e-06, |
|
"loss": 0.1591, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 29.015402298850574, |
|
"grad_norm": 0.009904453530907631, |
|
"learning_rate": 4.4955300127713925e-06, |
|
"loss": 1.188, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 29.016168582375478, |
|
"grad_norm": 0.02423286624252796, |
|
"learning_rate": 4.4870157513835675e-06, |
|
"loss": 0.6747, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 29.01693486590038, |
|
"grad_norm": 0.0015969909727573395, |
|
"learning_rate": 4.478501489995743e-06, |
|
"loss": 0.0634, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 29.01770114942529, |
|
"grad_norm": 0.029565755277872086, |
|
"learning_rate": 4.469987228607919e-06, |
|
"loss": 0.0109, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 29.018467432950192, |
|
"grad_norm": 184.49755859375, |
|
"learning_rate": 4.461472967220094e-06, |
|
"loss": 0.0263, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 29.019233716475096, |
|
"grad_norm": 0.044473979622125626, |
|
"learning_rate": 4.452958705832269e-06, |
|
"loss": 0.8261, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"grad_norm": 0.00299667427316308, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.4198, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 3.455441474914551, |
|
"eval_runtime": 14.8201, |
|
"eval_samples_per_second": 2.969, |
|
"eval_steps_per_second": 2.969, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 30.000766283524904, |
|
"grad_norm": 0.017114119604229927, |
|
"learning_rate": 4.43593018305662e-06, |
|
"loss": 1.3292, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 30.001532567049807, |
|
"grad_norm": 0.0015261216321960092, |
|
"learning_rate": 4.427415921668796e-06, |
|
"loss": 0.0003, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 30.002298850574714, |
|
"grad_norm": 0.6907082796096802, |
|
"learning_rate": 4.418901660280971e-06, |
|
"loss": 0.8247, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 30.003065134099618, |
|
"grad_norm": 18.267147064208984, |
|
"learning_rate": 4.410387398893146e-06, |
|
"loss": 0.5214, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 30.00383141762452, |
|
"grad_norm": 8.251144409179688, |
|
"learning_rate": 4.401873137505322e-06, |
|
"loss": 0.3459, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 30.004597701149425, |
|
"grad_norm": 319.2434387207031, |
|
"learning_rate": 4.3933588761174975e-06, |
|
"loss": 0.1268, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 30.00536398467433, |
|
"grad_norm": 0.0032091925386339426, |
|
"learning_rate": 4.3848446147296725e-06, |
|
"loss": 0.0005, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 30.006130268199232, |
|
"grad_norm": 0.13797052204608917, |
|
"learning_rate": 4.3763303533418475e-06, |
|
"loss": 1.256, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 30.00689655172414, |
|
"grad_norm": 0.4690380394458771, |
|
"learning_rate": 4.367816091954023e-06, |
|
"loss": 0.0003, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 30.007662835249043, |
|
"grad_norm": 374.2715759277344, |
|
"learning_rate": 4.359301830566199e-06, |
|
"loss": 0.3673, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 30.008429118773947, |
|
"grad_norm": 0.02178417146205902, |
|
"learning_rate": 4.350787569178374e-06, |
|
"loss": 0.7324, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 30.00919540229885, |
|
"grad_norm": 0.010709022171795368, |
|
"learning_rate": 4.342273307790549e-06, |
|
"loss": 0.0003, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 30.009961685823754, |
|
"grad_norm": 3.94123911857605, |
|
"learning_rate": 4.333759046402725e-06, |
|
"loss": 0.425, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 30.010727969348657, |
|
"grad_norm": 0.012052084319293499, |
|
"learning_rate": 4.325244785014901e-06, |
|
"loss": 0.4947, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 30.011494252873565, |
|
"grad_norm": 0.008172095753252506, |
|
"learning_rate": 4.316730523627076e-06, |
|
"loss": 0.1542, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 30.01226053639847, |
|
"grad_norm": 0.03389030322432518, |
|
"learning_rate": 4.308216262239251e-06, |
|
"loss": 0.4665, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 30.013026819923372, |
|
"grad_norm": 0.0029715001583099365, |
|
"learning_rate": 4.299702000851427e-06, |
|
"loss": 0.0001, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.013793103448275, |
|
"grad_norm": 0.0016406503273174167, |
|
"learning_rate": 4.291187739463602e-06, |
|
"loss": 0.0099, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 30.01455938697318, |
|
"grad_norm": 0.002739381045103073, |
|
"learning_rate": 4.2826734780757775e-06, |
|
"loss": 0.0136, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 30.015325670498083, |
|
"grad_norm": 0.0010001113405451179, |
|
"learning_rate": 4.2741592166879525e-06, |
|
"loss": 0.0002, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 30.01609195402299, |
|
"grad_norm": 65.55481719970703, |
|
"learning_rate": 4.2656449553001275e-06, |
|
"loss": 0.0088, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 30.016858237547893, |
|
"grad_norm": 0.006348451133817434, |
|
"learning_rate": 4.257130693912303e-06, |
|
"loss": 0.7034, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 30.017624521072797, |
|
"grad_norm": 0.020931551232933998, |
|
"learning_rate": 4.248616432524479e-06, |
|
"loss": 0.0025, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 30.0183908045977, |
|
"grad_norm": 0.012286363169550896, |
|
"learning_rate": 4.240102171136654e-06, |
|
"loss": 0.6274, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 30.019157088122604, |
|
"grad_norm": 0.012333545833826065, |
|
"learning_rate": 4.231587909748829e-06, |
|
"loss": 0.0005, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 30.01992337164751, |
|
"grad_norm": 0.09246806055307388, |
|
"learning_rate": 4.223073648361005e-06, |
|
"loss": 0.0002, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"eval_accuracy": 0.6590909090909091, |
|
"eval_loss": 2.2222249507904053, |
|
"eval_runtime": 13.8433, |
|
"eval_samples_per_second": 3.178, |
|
"eval_steps_per_second": 3.178, |
|
"step": 8091 |
|
}, |
|
{ |
|
"epoch": 31.000689655172415, |
|
"grad_norm": 0.011383074335753918, |
|
"learning_rate": 4.214559386973181e-06, |
|
"loss": 0.7547, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 31.00145593869732, |
|
"grad_norm": 0.044549647718667984, |
|
"learning_rate": 4.206045125585356e-06, |
|
"loss": 0.0258, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 31.002222222222223, |
|
"grad_norm": 0.000657400581985712, |
|
"learning_rate": 4.197530864197531e-06, |
|
"loss": 0.5743, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 31.002988505747126, |
|
"grad_norm": 0.06604219973087311, |
|
"learning_rate": 4.189016602809707e-06, |
|
"loss": 0.0009, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 31.00375478927203, |
|
"grad_norm": 0.006331180222332478, |
|
"learning_rate": 4.180502341421882e-06, |
|
"loss": 0.0003, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 31.004521072796933, |
|
"grad_norm": 0.10193214565515518, |
|
"learning_rate": 4.1719880800340575e-06, |
|
"loss": 0.9214, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 31.00528735632184, |
|
"grad_norm": 0.0010729215573519468, |
|
"learning_rate": 4.1634738186462325e-06, |
|
"loss": 0.0004, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 31.006053639846744, |
|
"grad_norm": 0.0012805596925318241, |
|
"learning_rate": 4.154959557258408e-06, |
|
"loss": 0.0005, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 31.006819923371648, |
|
"grad_norm": 0.0266268327832222, |
|
"learning_rate": 4.146445295870583e-06, |
|
"loss": 0.0002, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 31.00758620689655, |
|
"grad_norm": 0.002659642370417714, |
|
"learning_rate": 4.137931034482759e-06, |
|
"loss": 0.6745, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.008352490421455, |
|
"grad_norm": 0.0009053266257978976, |
|
"learning_rate": 4.129416773094934e-06, |
|
"loss": 0.0029, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 31.00911877394636, |
|
"grad_norm": 0.0031698057428002357, |
|
"learning_rate": 4.12090251170711e-06, |
|
"loss": 0.6336, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 31.009885057471266, |
|
"grad_norm": 0.005453723482787609, |
|
"learning_rate": 4.112388250319285e-06, |
|
"loss": 1.281, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 31.01065134099617, |
|
"grad_norm": 0.000987889594398439, |
|
"learning_rate": 4.103873988931461e-06, |
|
"loss": 0.6552, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 31.011417624521073, |
|
"grad_norm": 0.014338169246912003, |
|
"learning_rate": 4.095359727543636e-06, |
|
"loss": 1.6499, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 31.012183908045976, |
|
"grad_norm": 0.002959508216008544, |
|
"learning_rate": 4.086845466155812e-06, |
|
"loss": 0.2794, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 31.01295019157088, |
|
"grad_norm": 0.0011834179749712348, |
|
"learning_rate": 4.078331204767987e-06, |
|
"loss": 0.6988, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 31.013716475095784, |
|
"grad_norm": 0.0499357208609581, |
|
"learning_rate": 4.0698169433801625e-06, |
|
"loss": 0.0137, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 31.01448275862069, |
|
"grad_norm": 0.0008901017135940492, |
|
"learning_rate": 4.0613026819923375e-06, |
|
"loss": 0.001, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 31.015249042145594, |
|
"grad_norm": 0.012158668600022793, |
|
"learning_rate": 4.052788420604513e-06, |
|
"loss": 0.1343, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 31.016015325670498, |
|
"grad_norm": 0.01974300481379032, |
|
"learning_rate": 4.044274159216688e-06, |
|
"loss": 0.613, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 31.0167816091954, |
|
"grad_norm": 55.92547607421875, |
|
"learning_rate": 4.035759897828863e-06, |
|
"loss": 0.7095, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 31.017547892720305, |
|
"grad_norm": 0.009739676490426064, |
|
"learning_rate": 4.027245636441039e-06, |
|
"loss": 0.2903, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 31.018314176245212, |
|
"grad_norm": 407.288818359375, |
|
"learning_rate": 4.018731375053214e-06, |
|
"loss": 0.3688, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 31.019080459770116, |
|
"grad_norm": 0.0012542600743472576, |
|
"learning_rate": 4.01021711366539e-06, |
|
"loss": 0.0013, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 31.01984674329502, |
|
"grad_norm": 0.12644848227500916, |
|
"learning_rate": 4.001702852277565e-06, |
|
"loss": 0.0087, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 2.709275722503662, |
|
"eval_runtime": 14.8609, |
|
"eval_samples_per_second": 2.961, |
|
"eval_steps_per_second": 2.961, |
|
"step": 8352 |
|
}, |
|
{ |
|
"epoch": 32.00061302681992, |
|
"grad_norm": 907.0887451171875, |
|
"learning_rate": 3.993188590889741e-06, |
|
"loss": 0.7293, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 32.00137931034483, |
|
"grad_norm": 0.11170382797718048, |
|
"learning_rate": 3.984674329501916e-06, |
|
"loss": 0.0035, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 32.002145593869734, |
|
"grad_norm": 0.029629867523908615, |
|
"learning_rate": 3.976160068114092e-06, |
|
"loss": 0.7695, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 32.00291187739464, |
|
"grad_norm": 0.031115718185901642, |
|
"learning_rate": 3.967645806726267e-06, |
|
"loss": 1.3666, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 32.00367816091954, |
|
"grad_norm": 0.0034033299889415503, |
|
"learning_rate": 3.9591315453384425e-06, |
|
"loss": 0.0002, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 32.004444444444445, |
|
"grad_norm": 0.033746037632226944, |
|
"learning_rate": 3.9506172839506175e-06, |
|
"loss": 1.7655, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 32.00521072796935, |
|
"grad_norm": 0.0077743083238601685, |
|
"learning_rate": 3.942103022562793e-06, |
|
"loss": 0.6231, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 32.00597701149425, |
|
"grad_norm": 0.08134739100933075, |
|
"learning_rate": 3.933588761174968e-06, |
|
"loss": 0.0007, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 32.006743295019156, |
|
"grad_norm": 0.12249594926834106, |
|
"learning_rate": 3.925074499787143e-06, |
|
"loss": 0.0142, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 32.00750957854406, |
|
"grad_norm": 26.010934829711914, |
|
"learning_rate": 3.916560238399319e-06, |
|
"loss": 1.1471, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 32.00827586206896, |
|
"grad_norm": 0.0005790782743133605, |
|
"learning_rate": 3.908045977011495e-06, |
|
"loss": 0.0005, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 32.00904214559387, |
|
"grad_norm": 0.0010411114199087024, |
|
"learning_rate": 3.89953171562367e-06, |
|
"loss": 0.3289, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 32.00980842911878, |
|
"grad_norm": 0.005206049885600805, |
|
"learning_rate": 3.891017454235845e-06, |
|
"loss": 0.0005, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 32.01057471264368, |
|
"grad_norm": 0.08218900859355927, |
|
"learning_rate": 3.882503192848021e-06, |
|
"loss": 0.0003, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 32.011340996168585, |
|
"grad_norm": 0.28884491324424744, |
|
"learning_rate": 3.873988931460197e-06, |
|
"loss": 0.0014, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.01210727969349, |
|
"grad_norm": 0.22715908288955688, |
|
"learning_rate": 3.865474670072372e-06, |
|
"loss": 0.3284, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 32.01287356321839, |
|
"grad_norm": 0.0010180029785260558, |
|
"learning_rate": 3.856960408684547e-06, |
|
"loss": 0.0581, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 32.013639846743295, |
|
"grad_norm": 0.004401144105941057, |
|
"learning_rate": 3.8484461472967225e-06, |
|
"loss": 1.105, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 32.0144061302682, |
|
"grad_norm": 0.001169188879430294, |
|
"learning_rate": 3.839931885908898e-06, |
|
"loss": 0.424, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 32.0151724137931, |
|
"grad_norm": 0.1417514830827713, |
|
"learning_rate": 3.831417624521073e-06, |
|
"loss": 0.1667, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 32.015938697318006, |
|
"grad_norm": 0.0014354916056618094, |
|
"learning_rate": 3.822903363133248e-06, |
|
"loss": 0.0009, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 32.01670498084291, |
|
"grad_norm": 0.037321824580430984, |
|
"learning_rate": 3.8143891017454237e-06, |
|
"loss": 0.6105, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 32.01747126436781, |
|
"grad_norm": 0.0009216446196660399, |
|
"learning_rate": 3.805874840357599e-06, |
|
"loss": 0.0083, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 32.01823754789272, |
|
"grad_norm": 0.04349252209067345, |
|
"learning_rate": 3.797360578969775e-06, |
|
"loss": 0.232, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 32.01900383141763, |
|
"grad_norm": 0.0021178857423365116, |
|
"learning_rate": 3.78884631758195e-06, |
|
"loss": 0.1235, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 32.01977011494253, |
|
"grad_norm": 416.1335754394531, |
|
"learning_rate": 3.7803320561941254e-06, |
|
"loss": 0.2823, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 2.8994028568267822, |
|
"eval_runtime": 15.0524, |
|
"eval_samples_per_second": 2.923, |
|
"eval_steps_per_second": 2.923, |
|
"step": 8613 |
|
}, |
|
{ |
|
"epoch": 33.00053639846743, |
|
"grad_norm": 0.0009653361630626023, |
|
"learning_rate": 3.7718177948063004e-06, |
|
"loss": 0.0964, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 33.001302681992335, |
|
"grad_norm": 302.6064147949219, |
|
"learning_rate": 3.7633035334184762e-06, |
|
"loss": 0.5987, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 33.00206896551724, |
|
"grad_norm": 0.0064612445421516895, |
|
"learning_rate": 3.7547892720306517e-06, |
|
"loss": 0.0008, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 33.00283524904214, |
|
"grad_norm": 0.004648339003324509, |
|
"learning_rate": 3.746275010642827e-06, |
|
"loss": 0.5332, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 33.00360153256705, |
|
"grad_norm": 0.0010578223736956716, |
|
"learning_rate": 3.737760749255002e-06, |
|
"loss": 0.2155, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 33.00436781609196, |
|
"grad_norm": 0.0008020292734727263, |
|
"learning_rate": 3.729246487867178e-06, |
|
"loss": 0.0013, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 33.00513409961686, |
|
"grad_norm": 81.35603332519531, |
|
"learning_rate": 3.7207322264793533e-06, |
|
"loss": 0.9455, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 33.005900383141764, |
|
"grad_norm": 27.300270080566406, |
|
"learning_rate": 3.7122179650915287e-06, |
|
"loss": 0.0075, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 33.00666666666667, |
|
"grad_norm": 0.00942432601004839, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0025, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 33.00743295019157, |
|
"grad_norm": 11.0194730758667, |
|
"learning_rate": 3.6951894423158796e-06, |
|
"loss": 0.528, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 33.008199233716475, |
|
"grad_norm": 0.0005607404164038599, |
|
"learning_rate": 3.686675180928055e-06, |
|
"loss": 0.6476, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 33.00896551724138, |
|
"grad_norm": 0.010962043888866901, |
|
"learning_rate": 3.67816091954023e-06, |
|
"loss": 0.0002, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 33.00973180076628, |
|
"grad_norm": 0.0008337310864590108, |
|
"learning_rate": 3.6696466581524054e-06, |
|
"loss": 0.0007, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 33.010498084291186, |
|
"grad_norm": 0.001953879836946726, |
|
"learning_rate": 3.6611323967645812e-06, |
|
"loss": 0.0006, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 33.01126436781609, |
|
"grad_norm": 0.0012560301693156362, |
|
"learning_rate": 3.6526181353767567e-06, |
|
"loss": 0.8756, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 33.01203065134099, |
|
"grad_norm": 0.01747513934969902, |
|
"learning_rate": 3.6441038739889317e-06, |
|
"loss": 0.0016, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 33.012796934865904, |
|
"grad_norm": 0.0007940547657199204, |
|
"learning_rate": 3.635589612601107e-06, |
|
"loss": 0.0186, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 33.01356321839081, |
|
"grad_norm": 0.0010784586193040013, |
|
"learning_rate": 3.627075351213283e-06, |
|
"loss": 0.0004, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 33.01432950191571, |
|
"grad_norm": 0.001068047247827053, |
|
"learning_rate": 3.618561089825458e-06, |
|
"loss": 0.0262, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 33.015095785440614, |
|
"grad_norm": 0.0023100238759070635, |
|
"learning_rate": 3.6100468284376333e-06, |
|
"loss": 0.0006, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 33.01586206896552, |
|
"grad_norm": 0.0016799827571958303, |
|
"learning_rate": 3.6015325670498087e-06, |
|
"loss": 0.0138, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 33.01662835249042, |
|
"grad_norm": 0.01968042552471161, |
|
"learning_rate": 3.5930183056619837e-06, |
|
"loss": 0.7263, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 33.017394636015325, |
|
"grad_norm": 0.14012639224529266, |
|
"learning_rate": 3.5845040442741596e-06, |
|
"loss": 0.0032, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 33.01816091954023, |
|
"grad_norm": 0.0006995020085014403, |
|
"learning_rate": 3.575989782886335e-06, |
|
"loss": 0.0002, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 33.01892720306513, |
|
"grad_norm": 0.021627303212881088, |
|
"learning_rate": 3.56747552149851e-06, |
|
"loss": 0.4568, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 33.019693486590036, |
|
"grad_norm": 0.010454010218381882, |
|
"learning_rate": 3.5589612601106854e-06, |
|
"loss": 0.0009, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 2.926067352294922, |
|
"eval_runtime": 14.8184, |
|
"eval_samples_per_second": 2.969, |
|
"eval_steps_per_second": 2.969, |
|
"step": 8874 |
|
}, |
|
{ |
|
"epoch": 34.000459770114944, |
|
"grad_norm": 0.010927160270512104, |
|
"learning_rate": 3.5504469987228612e-06, |
|
"loss": 0.0002, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 34.00122605363985, |
|
"grad_norm": 0.001091673388145864, |
|
"learning_rate": 3.5419327373350367e-06, |
|
"loss": 0.0003, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 34.00199233716475, |
|
"grad_norm": 0.03687361627817154, |
|
"learning_rate": 3.5334184759472117e-06, |
|
"loss": 0.7919, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 34.002758620689654, |
|
"grad_norm": 0.010946787893772125, |
|
"learning_rate": 3.524904214559387e-06, |
|
"loss": 0.134, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 34.00352490421456, |
|
"grad_norm": 0.002212031278759241, |
|
"learning_rate": 3.516389953171563e-06, |
|
"loss": 0.0009, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 34.00429118773946, |
|
"grad_norm": 0.005900637712329626, |
|
"learning_rate": 3.507875691783738e-06, |
|
"loss": 0.3398, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 34.005057471264365, |
|
"grad_norm": 0.005494630429893732, |
|
"learning_rate": 3.4993614303959133e-06, |
|
"loss": 0.7209, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 34.00582375478927, |
|
"grad_norm": 0.022435931488871574, |
|
"learning_rate": 3.4908471690080887e-06, |
|
"loss": 0.0001, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 34.00659003831418, |
|
"grad_norm": 0.002095250179991126, |
|
"learning_rate": 3.4823329076202646e-06, |
|
"loss": 0.5444, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 34.00735632183908, |
|
"grad_norm": 39.439178466796875, |
|
"learning_rate": 3.4738186462324396e-06, |
|
"loss": 1.3017, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 34.00812260536399, |
|
"grad_norm": 0.0027492980007082224, |
|
"learning_rate": 3.465304384844615e-06, |
|
"loss": 0.0001, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 34.00888888888889, |
|
"grad_norm": 0.0027229604311287403, |
|
"learning_rate": 3.4567901234567904e-06, |
|
"loss": 0.8991, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 34.009655172413794, |
|
"grad_norm": 0.009229590184986591, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 0.0009, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 34.0104214559387, |
|
"grad_norm": 0.0006710727466270328, |
|
"learning_rate": 3.4397616006811412e-06, |
|
"loss": 0.0011, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 34.0111877394636, |
|
"grad_norm": 0.007417478132992983, |
|
"learning_rate": 3.4312473392933167e-06, |
|
"loss": 0.0933, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 34.011954022988505, |
|
"grad_norm": 0.004090723115950823, |
|
"learning_rate": 3.4227330779054917e-06, |
|
"loss": 0.0008, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 34.01272030651341, |
|
"grad_norm": 196.004638671875, |
|
"learning_rate": 3.4142188165176675e-06, |
|
"loss": 0.2323, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 34.01348659003831, |
|
"grad_norm": 0.013877572491765022, |
|
"learning_rate": 3.405704555129843e-06, |
|
"loss": 0.0615, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 34.014252873563215, |
|
"grad_norm": 0.02762063406407833, |
|
"learning_rate": 3.3971902937420183e-06, |
|
"loss": 0.5957, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 34.01501915708812, |
|
"grad_norm": 0.0006761613767594099, |
|
"learning_rate": 3.3886760323541933e-06, |
|
"loss": 0.7228, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 34.01578544061303, |
|
"grad_norm": 562.878173828125, |
|
"learning_rate": 3.380161770966369e-06, |
|
"loss": 0.4888, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 34.01655172413793, |
|
"grad_norm": 0.0005810054135508835, |
|
"learning_rate": 3.3716475095785446e-06, |
|
"loss": 1.1959, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 34.01731800766284, |
|
"grad_norm": 0.07913737744092941, |
|
"learning_rate": 3.3631332481907196e-06, |
|
"loss": 0.3934, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 34.01808429118774, |
|
"grad_norm": 0.0006305016577243805, |
|
"learning_rate": 3.354618986802895e-06, |
|
"loss": 0.0003, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 34.018850574712644, |
|
"grad_norm": 0.0009238147758878767, |
|
"learning_rate": 3.3461047254150704e-06, |
|
"loss": 0.0002, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 34.01961685823755, |
|
"grad_norm": 0.0018586228834465146, |
|
"learning_rate": 3.3375904640272463e-06, |
|
"loss": 0.0064, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"eval_accuracy": 0.6818181818181818, |
|
"eval_loss": 2.40366530418396, |
|
"eval_runtime": 14.8033, |
|
"eval_samples_per_second": 2.972, |
|
"eval_steps_per_second": 2.972, |
|
"step": 9135 |
|
}, |
|
{ |
|
"epoch": 35.000383141762455, |
|
"grad_norm": 0.01388569362461567, |
|
"learning_rate": 3.3290762026394212e-06, |
|
"loss": 0.0007, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 35.00114942528736, |
|
"grad_norm": 0.108930304646492, |
|
"learning_rate": 3.3205619412515967e-06, |
|
"loss": 0.0008, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 35.00191570881226, |
|
"grad_norm": 0.00047900251229293644, |
|
"learning_rate": 3.3120476798637717e-06, |
|
"loss": 0.0002, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 35.002681992337166, |
|
"grad_norm": 0.0006218141061253846, |
|
"learning_rate": 3.3035334184759475e-06, |
|
"loss": 0.0003, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 35.00344827586207, |
|
"grad_norm": 0.002620828803628683, |
|
"learning_rate": 3.295019157088123e-06, |
|
"loss": 0.0001, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 35.00421455938697, |
|
"grad_norm": 0.0014196954434737563, |
|
"learning_rate": 3.2865048957002983e-06, |
|
"loss": 0.433, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 35.00498084291188, |
|
"grad_norm": 0.001321490271948278, |
|
"learning_rate": 3.2779906343124733e-06, |
|
"loss": 0.0004, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 35.00574712643678, |
|
"grad_norm": 0.001246229512616992, |
|
"learning_rate": 3.269476372924649e-06, |
|
"loss": 0.1408, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 35.006513409961684, |
|
"grad_norm": 0.04006018117070198, |
|
"learning_rate": 3.2609621115368246e-06, |
|
"loss": 0.0002, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 35.00727969348659, |
|
"grad_norm": 0.009740673005580902, |
|
"learning_rate": 3.2524478501489996e-06, |
|
"loss": 0.288, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 35.00804597701149, |
|
"grad_norm": 0.05743330344557762, |
|
"learning_rate": 3.243933588761175e-06, |
|
"loss": 0.6095, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 35.008812260536395, |
|
"grad_norm": 0.0009492093813605607, |
|
"learning_rate": 3.235419327373351e-06, |
|
"loss": 0.0006, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 35.009578544061306, |
|
"grad_norm": 134.01454162597656, |
|
"learning_rate": 3.2269050659855262e-06, |
|
"loss": 1.4099, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 35.01034482758621, |
|
"grad_norm": 0.0008109943591989577, |
|
"learning_rate": 3.2183908045977012e-06, |
|
"loss": 0.054, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 35.01111111111111, |
|
"grad_norm": 0.009571348316967487, |
|
"learning_rate": 3.2098765432098767e-06, |
|
"loss": 0.1329, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 35.01187739463602, |
|
"grad_norm": 0.006338722538203001, |
|
"learning_rate": 3.2013622818220525e-06, |
|
"loss": 0.3751, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 35.01264367816092, |
|
"grad_norm": 0.0013711826177313924, |
|
"learning_rate": 3.1928480204342275e-06, |
|
"loss": 0.4517, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 35.013409961685824, |
|
"grad_norm": 0.00042183598270639777, |
|
"learning_rate": 3.184333759046403e-06, |
|
"loss": 0.0005, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 35.01417624521073, |
|
"grad_norm": 0.0013564632972702384, |
|
"learning_rate": 3.1758194976585783e-06, |
|
"loss": 0.2219, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 35.01494252873563, |
|
"grad_norm": 2264.461669921875, |
|
"learning_rate": 3.167305236270754e-06, |
|
"loss": 0.3003, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 35.015708812260534, |
|
"grad_norm": 0.020562073215842247, |
|
"learning_rate": 3.158790974882929e-06, |
|
"loss": 0.0002, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 35.01647509578544, |
|
"grad_norm": 0.12569333612918854, |
|
"learning_rate": 3.1502767134951046e-06, |
|
"loss": 0.0003, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 35.01724137931034, |
|
"grad_norm": 1.020317554473877, |
|
"learning_rate": 3.14176245210728e-06, |
|
"loss": 0.7144, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 35.01800766283525, |
|
"grad_norm": 0.018321597948670387, |
|
"learning_rate": 3.133248190719455e-06, |
|
"loss": 0.0009, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 35.018773946360156, |
|
"grad_norm": 0.0006187857361510396, |
|
"learning_rate": 3.124733929331631e-06, |
|
"loss": 0.7581, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 35.01954022988506, |
|
"grad_norm": 0.0035878210328519344, |
|
"learning_rate": 3.1162196679438062e-06, |
|
"loss": 0.7506, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 2.8436357975006104, |
|
"eval_runtime": 14.538, |
|
"eval_samples_per_second": 3.027, |
|
"eval_steps_per_second": 3.027, |
|
"step": 9396 |
|
}, |
|
{ |
|
"epoch": 36.00030651340996, |
|
"grad_norm": 0.01877000741660595, |
|
"learning_rate": 3.1077054065559812e-06, |
|
"loss": 0.0032, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 36.001072796934864, |
|
"grad_norm": 0.0004208860918879509, |
|
"learning_rate": 3.0991911451681567e-06, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 36.00183908045977, |
|
"grad_norm": 0.000519466761033982, |
|
"learning_rate": 3.0906768837803325e-06, |
|
"loss": 0.0003, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 36.00260536398467, |
|
"grad_norm": 0.01593741402029991, |
|
"learning_rate": 3.082162622392508e-06, |
|
"loss": 0.0006, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 36.00337164750958, |
|
"grad_norm": 0.0032676176633685827, |
|
"learning_rate": 3.073648361004683e-06, |
|
"loss": 0.0001, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 36.004137931034485, |
|
"grad_norm": 0.006009204778820276, |
|
"learning_rate": 3.0651340996168583e-06, |
|
"loss": 0.4098, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 36.00490421455939, |
|
"grad_norm": 0.002699817530810833, |
|
"learning_rate": 3.056619838229034e-06, |
|
"loss": 0.001, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 36.00567049808429, |
|
"grad_norm": 0.0004704708408098668, |
|
"learning_rate": 3.048105576841209e-06, |
|
"loss": 0.0692, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 36.006436781609196, |
|
"grad_norm": 0.0005243064952082932, |
|
"learning_rate": 3.0395913154533846e-06, |
|
"loss": 0.0009, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 36.0072030651341, |
|
"grad_norm": 0.0703183263540268, |
|
"learning_rate": 3.03107705406556e-06, |
|
"loss": 0.0007, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 36.007969348659, |
|
"grad_norm": 164.19235229492188, |
|
"learning_rate": 3.022562792677736e-06, |
|
"loss": 0.0289, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 36.00873563218391, |
|
"grad_norm": 0.0004233966174069792, |
|
"learning_rate": 3.014048531289911e-06, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 36.00950191570881, |
|
"grad_norm": 0.0005149325006641448, |
|
"learning_rate": 3.0055342699020862e-06, |
|
"loss": 0.8486, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 36.010268199233714, |
|
"grad_norm": 0.0011353259906172752, |
|
"learning_rate": 2.9970200085142612e-06, |
|
"loss": 0.047, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 36.01103448275862, |
|
"grad_norm": 0.0030816805083304644, |
|
"learning_rate": 2.988505747126437e-06, |
|
"loss": 0.0001, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 36.01180076628353, |
|
"grad_norm": 0.0009453135426156223, |
|
"learning_rate": 2.9799914857386125e-06, |
|
"loss": 1.6255, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 36.01256704980843, |
|
"grad_norm": 0.0009770867181941867, |
|
"learning_rate": 2.971477224350788e-06, |
|
"loss": 0.0002, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 36.013333333333335, |
|
"grad_norm": 0.0007406068034470081, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.0004, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 36.01409961685824, |
|
"grad_norm": 0.0027752225287258625, |
|
"learning_rate": 2.9544487015751387e-06, |
|
"loss": 0.0006, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 36.01486590038314, |
|
"grad_norm": 0.0011974005028605461, |
|
"learning_rate": 2.945934440187314e-06, |
|
"loss": 0.001, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 36.015632183908046, |
|
"grad_norm": 0.0006551674450747669, |
|
"learning_rate": 2.9374201787994896e-06, |
|
"loss": 0.0004, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 36.01639846743295, |
|
"grad_norm": 0.004319522529840469, |
|
"learning_rate": 2.9289059174116646e-06, |
|
"loss": 0.0106, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 36.01716475095785, |
|
"grad_norm": 0.0014230869710445404, |
|
"learning_rate": 2.92039165602384e-06, |
|
"loss": 0.0003, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 36.01793103448276, |
|
"grad_norm": 0.08640120923519135, |
|
"learning_rate": 2.911877394636016e-06, |
|
"loss": 1.3173, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 36.01869731800766, |
|
"grad_norm": 0.13412199914455414, |
|
"learning_rate": 2.903363133248191e-06, |
|
"loss": 0.0011, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 36.019463601532564, |
|
"grad_norm": 0.026053734123706818, |
|
"learning_rate": 2.8948488718603662e-06, |
|
"loss": 0.6686, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 3.11977219581604, |
|
"eval_runtime": 14.8333, |
|
"eval_samples_per_second": 2.966, |
|
"eval_steps_per_second": 2.966, |
|
"step": 9657 |
|
}, |
|
{ |
|
"epoch": 37.00022988505747, |
|
"grad_norm": 0.023461876437067986, |
|
"learning_rate": 2.8863346104725417e-06, |
|
"loss": 0.4307, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 37.000996168582375, |
|
"grad_norm": 0.0005552047514356673, |
|
"learning_rate": 2.8778203490847175e-06, |
|
"loss": 0.5624, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 37.00176245210728, |
|
"grad_norm": 0.00047876007738523185, |
|
"learning_rate": 2.8693060876968925e-06, |
|
"loss": 0.1775, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 37.00252873563218, |
|
"grad_norm": 0.010084550827741623, |
|
"learning_rate": 2.860791826309068e-06, |
|
"loss": 0.001, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 37.003295019157086, |
|
"grad_norm": 3.9025561809539795, |
|
"learning_rate": 2.852277564921243e-06, |
|
"loss": 0.001, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 37.00406130268199, |
|
"grad_norm": 0.02816319279372692, |
|
"learning_rate": 2.8437633035334187e-06, |
|
"loss": 0.0001, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 37.00482758620689, |
|
"grad_norm": 0.00047449435805901885, |
|
"learning_rate": 2.835249042145594e-06, |
|
"loss": 0.0002, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 37.005593869731804, |
|
"grad_norm": 0.00288780708797276, |
|
"learning_rate": 2.8267347807577696e-06, |
|
"loss": 0.0063, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 37.00636015325671, |
|
"grad_norm": 0.006289138458669186, |
|
"learning_rate": 2.8182205193699446e-06, |
|
"loss": 0.0002, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 37.00712643678161, |
|
"grad_norm": 0.014909166842699051, |
|
"learning_rate": 2.8097062579821204e-06, |
|
"loss": 0.0001, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 37.007892720306515, |
|
"grad_norm": 0.5095618963241577, |
|
"learning_rate": 2.801191996594296e-06, |
|
"loss": 0.6749, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 37.00865900383142, |
|
"grad_norm": 0.003209776012226939, |
|
"learning_rate": 2.792677735206471e-06, |
|
"loss": 0.7102, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 37.00942528735632, |
|
"grad_norm": 0.0005697616725228727, |
|
"learning_rate": 2.7841634738186462e-06, |
|
"loss": 0.0001, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 37.010191570881226, |
|
"grad_norm": 0.11594537645578384, |
|
"learning_rate": 2.775649212430822e-06, |
|
"loss": 0.007, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 37.01095785440613, |
|
"grad_norm": 0.0036572450771927834, |
|
"learning_rate": 2.7671349510429975e-06, |
|
"loss": 0.0001, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 37.01172413793103, |
|
"grad_norm": 26.05764389038086, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 0.0021, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 37.01249042145594, |
|
"grad_norm": 0.0010314063401892781, |
|
"learning_rate": 2.750106428267348e-06, |
|
"loss": 0.0002, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 37.01325670498084, |
|
"grad_norm": 0.0029162864666432142, |
|
"learning_rate": 2.7415921668795238e-06, |
|
"loss": 0.0001, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 37.014022988505744, |
|
"grad_norm": 0.002195713808760047, |
|
"learning_rate": 2.7330779054916987e-06, |
|
"loss": 0.1451, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 37.014789272030654, |
|
"grad_norm": 0.0009393435902893543, |
|
"learning_rate": 2.724563644103874e-06, |
|
"loss": 0.0001, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 37.01555555555556, |
|
"grad_norm": 0.014376325532793999, |
|
"learning_rate": 2.7160493827160496e-06, |
|
"loss": 0.0001, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 37.01632183908046, |
|
"grad_norm": 0.0007700129644945264, |
|
"learning_rate": 2.7075351213282254e-06, |
|
"loss": 0.0003, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 37.017088122605365, |
|
"grad_norm": 0.0006324906717054546, |
|
"learning_rate": 2.6990208599404004e-06, |
|
"loss": 0.0003, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 37.01785440613027, |
|
"grad_norm": 0.027618492022156715, |
|
"learning_rate": 2.690506598552576e-06, |
|
"loss": 0.0001, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 37.01862068965517, |
|
"grad_norm": 0.0297914557158947, |
|
"learning_rate": 2.6819923371647512e-06, |
|
"loss": 0.8707, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 37.019386973180076, |
|
"grad_norm": 0.0038593225181102753, |
|
"learning_rate": 2.6734780757769262e-06, |
|
"loss": 0.0089, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"eval_accuracy": 0.6590909090909091, |
|
"eval_loss": 2.2352752685546875, |
|
"eval_runtime": 13.822, |
|
"eval_samples_per_second": 3.183, |
|
"eval_steps_per_second": 3.183, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 38.000153256704984, |
|
"grad_norm": 0.005155360326170921, |
|
"learning_rate": 2.664963814389102e-06, |
|
"loss": 0.0002, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 38.00091954022989, |
|
"grad_norm": 0.008852887898683548, |
|
"learning_rate": 2.6564495530012775e-06, |
|
"loss": 0.7052, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 38.00168582375479, |
|
"grad_norm": 0.06385504454374313, |
|
"learning_rate": 2.6479352916134525e-06, |
|
"loss": 1.4949, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 38.002452107279694, |
|
"grad_norm": 0.007622735574841499, |
|
"learning_rate": 2.639421030225628e-06, |
|
"loss": 0.0011, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 38.0032183908046, |
|
"grad_norm": 235.2069091796875, |
|
"learning_rate": 2.6309067688378037e-06, |
|
"loss": 0.7174, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 38.0039846743295, |
|
"grad_norm": 0.017189040780067444, |
|
"learning_rate": 2.622392507449979e-06, |
|
"loss": 0.0002, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 38.004750957854405, |
|
"grad_norm": 0.0074161868542432785, |
|
"learning_rate": 2.613878246062154e-06, |
|
"loss": 0.0003, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 38.00551724137931, |
|
"grad_norm": 0.0003068734076805413, |
|
"learning_rate": 2.6053639846743296e-06, |
|
"loss": 0.0002, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 38.00628352490421, |
|
"grad_norm": 0.0025288888718932867, |
|
"learning_rate": 2.5968497232865054e-06, |
|
"loss": 0.001, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 38.007049808429116, |
|
"grad_norm": 0.007924763485789299, |
|
"learning_rate": 2.5883354618986804e-06, |
|
"loss": 0.6423, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 38.00781609195402, |
|
"grad_norm": 0.6689996719360352, |
|
"learning_rate": 2.579821200510856e-06, |
|
"loss": 0.0012, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 38.00858237547893, |
|
"grad_norm": 0.00040578548214398324, |
|
"learning_rate": 2.5713069391230312e-06, |
|
"loss": 0.7107, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 38.009348659003834, |
|
"grad_norm": 0.0012143438216298819, |
|
"learning_rate": 2.562792677735207e-06, |
|
"loss": 0.0005, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 38.01011494252874, |
|
"grad_norm": 0.0007456608582288027, |
|
"learning_rate": 2.554278416347382e-06, |
|
"loss": 0.0001, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 38.01088122605364, |
|
"grad_norm": 0.0021871451754122972, |
|
"learning_rate": 2.5457641549595575e-06, |
|
"loss": 0.0003, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 38.011647509578545, |
|
"grad_norm": 0.008283521980047226, |
|
"learning_rate": 2.5372498935717325e-06, |
|
"loss": 0.0001, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 38.01241379310345, |
|
"grad_norm": 0.027728542685508728, |
|
"learning_rate": 2.5287356321839083e-06, |
|
"loss": 0.0001, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 38.01318007662835, |
|
"grad_norm": 0.034006163477897644, |
|
"learning_rate": 2.5202213707960837e-06, |
|
"loss": 1.8379, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 38.013946360153255, |
|
"grad_norm": 0.000460324517916888, |
|
"learning_rate": 2.511707109408259e-06, |
|
"loss": 0.0003, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 38.01471264367816, |
|
"grad_norm": 0.07976827770471573, |
|
"learning_rate": 2.503192848020434e-06, |
|
"loss": 0.541, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 38.01547892720306, |
|
"grad_norm": 0.0009657596237957478, |
|
"learning_rate": 2.49467858663261e-06, |
|
"loss": 0.0003, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 38.016245210727966, |
|
"grad_norm": 0.024574127048254013, |
|
"learning_rate": 2.4861643252447854e-06, |
|
"loss": 0.0002, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 38.01701149425288, |
|
"grad_norm": 1094.8077392578125, |
|
"learning_rate": 2.4776500638569604e-06, |
|
"loss": 0.2895, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 38.01777777777778, |
|
"grad_norm": 0.0004095015465281904, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.0001, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 38.018544061302684, |
|
"grad_norm": 0.0006595210870727897, |
|
"learning_rate": 2.4606215410813112e-06, |
|
"loss": 0.7965, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 38.01931034482759, |
|
"grad_norm": 7.768756866455078, |
|
"learning_rate": 2.4521072796934867e-06, |
|
"loss": 0.6753, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 3.0287606716156006, |
|
"eval_runtime": 14.8343, |
|
"eval_samples_per_second": 2.966, |
|
"eval_steps_per_second": 2.966, |
|
"step": 10179 |
|
}, |
|
{ |
|
"epoch": 39.00007662835249, |
|
"grad_norm": 0.0009040410513989627, |
|
"learning_rate": 2.443593018305662e-06, |
|
"loss": 0.0003, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 39.00084291187739, |
|
"grad_norm": 0.0036912732757627964, |
|
"learning_rate": 2.4350787569178375e-06, |
|
"loss": 0.0001, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 39.001609195402295, |
|
"grad_norm": 0.2800818681716919, |
|
"learning_rate": 2.426564495530013e-06, |
|
"loss": 0.0003, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 39.002375478927206, |
|
"grad_norm": 0.0013043698854744434, |
|
"learning_rate": 2.4180502341421883e-06, |
|
"loss": 0.0001, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 39.00314176245211, |
|
"grad_norm": 0.0003887113998644054, |
|
"learning_rate": 2.4095359727543637e-06, |
|
"loss": 0.4938, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 39.00390804597701, |
|
"grad_norm": 0.06424582749605179, |
|
"learning_rate": 2.401021711366539e-06, |
|
"loss": 0.0002, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 39.00467432950192, |
|
"grad_norm": 0.0008459803648293018, |
|
"learning_rate": 2.3925074499787146e-06, |
|
"loss": 0.0001, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 39.00544061302682, |
|
"grad_norm": 1107.911376953125, |
|
"learning_rate": 2.38399318859089e-06, |
|
"loss": 0.3945, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 39.006206896551724, |
|
"grad_norm": 0.04933220148086548, |
|
"learning_rate": 2.3754789272030654e-06, |
|
"loss": 0.0005, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 39.00697318007663, |
|
"grad_norm": 0.8757120966911316, |
|
"learning_rate": 2.366964665815241e-06, |
|
"loss": 0.9736, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 39.00773946360153, |
|
"grad_norm": 0.1119384616613388, |
|
"learning_rate": 2.3584504044274162e-06, |
|
"loss": 0.5556, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 39.008505747126435, |
|
"grad_norm": 0.032142117619514465, |
|
"learning_rate": 2.3499361430395912e-06, |
|
"loss": 0.0001, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 39.00927203065134, |
|
"grad_norm": 0.011488020420074463, |
|
"learning_rate": 2.341421881651767e-06, |
|
"loss": 0.0763, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 39.01003831417624, |
|
"grad_norm": 8.727746963500977, |
|
"learning_rate": 2.332907620263942e-06, |
|
"loss": 0.001, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 39.01080459770115, |
|
"grad_norm": 0.001796330907382071, |
|
"learning_rate": 2.324393358876118e-06, |
|
"loss": 0.2818, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 39.011570881226056, |
|
"grad_norm": 0.0008893462363630533, |
|
"learning_rate": 2.315879097488293e-06, |
|
"loss": 1.393, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 39.01233716475096, |
|
"grad_norm": 0.00048230242100544274, |
|
"learning_rate": 2.3073648361004688e-06, |
|
"loss": 0.7123, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 39.013103448275864, |
|
"grad_norm": 0.002747567603364587, |
|
"learning_rate": 2.2988505747126437e-06, |
|
"loss": 0.2786, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 39.01386973180077, |
|
"grad_norm": 0.05197069048881531, |
|
"learning_rate": 2.290336313324819e-06, |
|
"loss": 0.0002, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 39.01463601532567, |
|
"grad_norm": 0.004026700276881456, |
|
"learning_rate": 2.2818220519369946e-06, |
|
"loss": 0.0028, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 39.015402298850574, |
|
"grad_norm": 0.0009501639869995415, |
|
"learning_rate": 2.27330779054917e-06, |
|
"loss": 0.0001, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 39.01616858237548, |
|
"grad_norm": 0.0010376839200034738, |
|
"learning_rate": 2.2647935291613454e-06, |
|
"loss": 0.0003, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 39.01693486590038, |
|
"grad_norm": 0.38573241233825684, |
|
"learning_rate": 2.256279267773521e-06, |
|
"loss": 0.0002, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 39.017701149425285, |
|
"grad_norm": 0.0010412068804726005, |
|
"learning_rate": 2.2477650063856962e-06, |
|
"loss": 0.6516, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 39.01846743295019, |
|
"grad_norm": 0.0006415279931388795, |
|
"learning_rate": 2.2392507449978717e-06, |
|
"loss": 0.0004, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 39.01923371647509, |
|
"grad_norm": 0.0005771737196482718, |
|
"learning_rate": 2.230736483610047e-06, |
|
"loss": 0.185, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"grad_norm": 0.000372162350686267, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0003, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.6590909090909091, |
|
"eval_loss": 2.4052083492279053, |
|
"eval_runtime": 13.8264, |
|
"eval_samples_per_second": 3.182, |
|
"eval_steps_per_second": 3.182, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 40.000766283524904, |
|
"grad_norm": 0.00799707230180502, |
|
"learning_rate": 2.213707960834398e-06, |
|
"loss": 0.0002, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 40.00153256704981, |
|
"grad_norm": 0.0005616048001684248, |
|
"learning_rate": 2.205193699446573e-06, |
|
"loss": 0.0004, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 40.00229885057471, |
|
"grad_norm": 0.0007245043525472283, |
|
"learning_rate": 2.1966794380587487e-06, |
|
"loss": 1.0726, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 40.003065134099614, |
|
"grad_norm": 0.0015607347013428807, |
|
"learning_rate": 2.1881651766709237e-06, |
|
"loss": 0.0002, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 40.00383141762452, |
|
"grad_norm": 0.0011406068224459887, |
|
"learning_rate": 2.1796509152830996e-06, |
|
"loss": 0.0001, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 40.00459770114943, |
|
"grad_norm": 0.009210841730237007, |
|
"learning_rate": 2.1711366538952746e-06, |
|
"loss": 0.0007, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 40.00536398467433, |
|
"grad_norm": 0.011264988221228123, |
|
"learning_rate": 2.1626223925074504e-06, |
|
"loss": 0.6471, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 40.006130268199236, |
|
"grad_norm": 0.006953706499189138, |
|
"learning_rate": 2.1541081311196254e-06, |
|
"loss": 0.0002, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 40.00689655172414, |
|
"grad_norm": 0.021138427779078484, |
|
"learning_rate": 2.145593869731801e-06, |
|
"loss": 0.0002, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 40.00766283524904, |
|
"grad_norm": 0.3621610105037689, |
|
"learning_rate": 2.1370796083439762e-06, |
|
"loss": 0.0006, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 40.00842911877395, |
|
"grad_norm": 0.008800797164440155, |
|
"learning_rate": 2.1285653469561517e-06, |
|
"loss": 0.0025, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 40.00919540229885, |
|
"grad_norm": 0.0007413265993818641, |
|
"learning_rate": 2.120051085568327e-06, |
|
"loss": 0.0003, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 40.009961685823754, |
|
"grad_norm": 0.0016993769677355886, |
|
"learning_rate": 2.1115368241805025e-06, |
|
"loss": 0.0002, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 40.01072796934866, |
|
"grad_norm": 0.00036552909296005964, |
|
"learning_rate": 2.103022562792678e-06, |
|
"loss": 0.0021, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 40.01149425287356, |
|
"grad_norm": 0.0012887638295069337, |
|
"learning_rate": 2.0945083014048533e-06, |
|
"loss": 0.5497, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 40.012260536398465, |
|
"grad_norm": 0.00035430127172730863, |
|
"learning_rate": 2.0859940400170287e-06, |
|
"loss": 0.0001, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 40.01302681992337, |
|
"grad_norm": 0.08609393984079361, |
|
"learning_rate": 2.077479778629204e-06, |
|
"loss": 0.0007, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 40.01379310344828, |
|
"grad_norm": 0.0021943659521639347, |
|
"learning_rate": 2.0689655172413796e-06, |
|
"loss": 0.0001, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 40.01455938697318, |
|
"grad_norm": 0.006592462304979563, |
|
"learning_rate": 2.060451255853555e-06, |
|
"loss": 0.0002, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 40.015325670498086, |
|
"grad_norm": 0.0006286811549216509, |
|
"learning_rate": 2.0519369944657304e-06, |
|
"loss": 0.0002, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 40.01609195402299, |
|
"grad_norm": 0.0024540943559259176, |
|
"learning_rate": 2.043422733077906e-06, |
|
"loss": 0.4088, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 40.01685823754789, |
|
"grad_norm": 0.004602407105267048, |
|
"learning_rate": 2.0349084716900813e-06, |
|
"loss": 0.555, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 40.0176245210728, |
|
"grad_norm": 0.0005967610632069409, |
|
"learning_rate": 2.0263942103022567e-06, |
|
"loss": 0.0, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 40.0183908045977, |
|
"grad_norm": 0.0055085900239646435, |
|
"learning_rate": 2.0178799489144317e-06, |
|
"loss": 0.7712, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 40.019157088122604, |
|
"grad_norm": 0.00604881439357996, |
|
"learning_rate": 2.009365687526607e-06, |
|
"loss": 0.0002, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 40.01992337164751, |
|
"grad_norm": 0.001953615341335535, |
|
"learning_rate": 2.0008514261387825e-06, |
|
"loss": 0.295, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 40.02, |
|
"eval_accuracy": 0.5681818181818182, |
|
"eval_loss": 3.7578535079956055, |
|
"eval_runtime": 13.8322, |
|
"eval_samples_per_second": 3.181, |
|
"eval_steps_per_second": 3.181, |
|
"step": 10701 |
|
}, |
|
{ |
|
"epoch": 41.000689655172415, |
|
"grad_norm": 0.005290044937282801, |
|
"learning_rate": 1.992337164750958e-06, |
|
"loss": 0.0001, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 41.00145593869732, |
|
"grad_norm": 0.0006823335424996912, |
|
"learning_rate": 1.9838229033631333e-06, |
|
"loss": 0.0035, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 41.00222222222222, |
|
"grad_norm": 0.00927961990237236, |
|
"learning_rate": 1.9753086419753087e-06, |
|
"loss": 0.0002, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 41.002988505747126, |
|
"grad_norm": 0.005429677199572325, |
|
"learning_rate": 1.966794380587484e-06, |
|
"loss": 0.0022, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 41.00375478927203, |
|
"grad_norm": 0.0110420947894454, |
|
"learning_rate": 1.9582801191996596e-06, |
|
"loss": 0.0001, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 41.00452107279693, |
|
"grad_norm": 0.0007418534951284528, |
|
"learning_rate": 1.949765857811835e-06, |
|
"loss": 0.0011, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 41.00528735632184, |
|
"grad_norm": 0.006928546354174614, |
|
"learning_rate": 1.9412515964240104e-06, |
|
"loss": 0.6895, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 41.00605363984674, |
|
"grad_norm": 1.4589892625808716, |
|
"learning_rate": 1.932737335036186e-06, |
|
"loss": 0.0004, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 41.006819923371644, |
|
"grad_norm": 0.007447612006217241, |
|
"learning_rate": 1.9242230736483612e-06, |
|
"loss": 0.0004, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 41.007586206896555, |
|
"grad_norm": 0.004844400566071272, |
|
"learning_rate": 1.9157088122605367e-06, |
|
"loss": 0.0001, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 41.00835249042146, |
|
"grad_norm": 0.00038913957541808486, |
|
"learning_rate": 1.9071945508727119e-06, |
|
"loss": 0.0589, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 41.00911877394636, |
|
"grad_norm": 0.09327152371406555, |
|
"learning_rate": 1.8986802894848875e-06, |
|
"loss": 0.0005, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 41.009885057471266, |
|
"grad_norm": 0.0005804190295748413, |
|
"learning_rate": 1.8901660280970627e-06, |
|
"loss": 0.0001, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 41.01065134099617, |
|
"grad_norm": 0.00033549295039847493, |
|
"learning_rate": 1.8816517667092381e-06, |
|
"loss": 0.0, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 41.01141762452107, |
|
"grad_norm": 0.002924927743151784, |
|
"learning_rate": 1.8731375053214135e-06, |
|
"loss": 0.1175, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 41.01218390804598, |
|
"grad_norm": 0.043191149830818176, |
|
"learning_rate": 1.864623243933589e-06, |
|
"loss": 0.0001, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 41.01295019157088, |
|
"grad_norm": 0.02755994163453579, |
|
"learning_rate": 1.8561089825457644e-06, |
|
"loss": 0.0002, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 41.013716475095784, |
|
"grad_norm": 0.01996181160211563, |
|
"learning_rate": 1.8475947211579398e-06, |
|
"loss": 0.0001, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 41.01448275862069, |
|
"grad_norm": 0.0004897011676803231, |
|
"learning_rate": 1.839080459770115e-06, |
|
"loss": 0.0564, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 41.01524904214559, |
|
"grad_norm": 0.0007953636813908815, |
|
"learning_rate": 1.8305661983822906e-06, |
|
"loss": 0.0001, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 41.0160153256705, |
|
"grad_norm": 0.007019116543233395, |
|
"learning_rate": 1.8220519369944658e-06, |
|
"loss": 0.0002, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 41.016781609195405, |
|
"grad_norm": 0.042223602533340454, |
|
"learning_rate": 1.8135376756066415e-06, |
|
"loss": 0.0002, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 41.01754789272031, |
|
"grad_norm": 0.0006034268299117684, |
|
"learning_rate": 1.8050234142188167e-06, |
|
"loss": 0.4816, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 41.01831417624521, |
|
"grad_norm": 0.09731455892324448, |
|
"learning_rate": 1.7965091528309919e-06, |
|
"loss": 0.0001, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 41.019080459770116, |
|
"grad_norm": 0.05873502045869827, |
|
"learning_rate": 1.7879948914431675e-06, |
|
"loss": 0.4643, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 41.01984674329502, |
|
"grad_norm": 0.0003686108975671232, |
|
"learning_rate": 1.7794806300553427e-06, |
|
"loss": 0.0002, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 3.3830528259277344, |
|
"eval_runtime": 14.8411, |
|
"eval_samples_per_second": 2.965, |
|
"eval_steps_per_second": 2.965, |
|
"step": 10962 |
|
}, |
|
{ |
|
"epoch": 42.00061302681992, |
|
"grad_norm": 0.0015631462447345257, |
|
"learning_rate": 1.7709663686675183e-06, |
|
"loss": 0.1334, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 42.00137931034483, |
|
"grad_norm": 0.009292828850448132, |
|
"learning_rate": 1.7624521072796935e-06, |
|
"loss": 0.0002, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 42.002145593869734, |
|
"grad_norm": 0.0009322819532826543, |
|
"learning_rate": 1.753937845891869e-06, |
|
"loss": 0.0001, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 42.00291187739464, |
|
"grad_norm": 0.0006600533379241824, |
|
"learning_rate": 1.7454235845040444e-06, |
|
"loss": 0.0004, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 42.00367816091954, |
|
"grad_norm": 0.0004522402014117688, |
|
"learning_rate": 1.7369093231162198e-06, |
|
"loss": 0.0001, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 42.004444444444445, |
|
"grad_norm": 0.0035208070185035467, |
|
"learning_rate": 1.7283950617283952e-06, |
|
"loss": 0.02, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 42.00521072796935, |
|
"grad_norm": 0.0003420313587412238, |
|
"learning_rate": 1.7198808003405706e-06, |
|
"loss": 0.0, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 42.00597701149425, |
|
"grad_norm": 0.0042352150194346905, |
|
"learning_rate": 1.7113665389527458e-06, |
|
"loss": 0.0002, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 42.006743295019156, |
|
"grad_norm": 0.005759659223258495, |
|
"learning_rate": 1.7028522775649215e-06, |
|
"loss": 0.0001, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 42.00750957854406, |
|
"grad_norm": 0.0006591933779418468, |
|
"learning_rate": 1.6943380161770967e-06, |
|
"loss": 0.0002, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 42.00827586206896, |
|
"grad_norm": 0.0005406893906183541, |
|
"learning_rate": 1.6858237547892723e-06, |
|
"loss": 0.0001, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 42.00904214559387, |
|
"grad_norm": 0.003154515055939555, |
|
"learning_rate": 1.6773094934014475e-06, |
|
"loss": 0.8174, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 42.00980842911878, |
|
"grad_norm": 0.008111665025353432, |
|
"learning_rate": 1.6687952320136231e-06, |
|
"loss": 0.0001, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 42.01057471264368, |
|
"grad_norm": 0.0010925852693617344, |
|
"learning_rate": 1.6602809706257983e-06, |
|
"loss": 0.722, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 42.011340996168585, |
|
"grad_norm": 0.000787044526077807, |
|
"learning_rate": 1.6517667092379737e-06, |
|
"loss": 0.4857, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 42.01210727969349, |
|
"grad_norm": 0.0003349155012983829, |
|
"learning_rate": 1.6432524478501492e-06, |
|
"loss": 0.0001, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 42.01287356321839, |
|
"grad_norm": 201.89450073242188, |
|
"learning_rate": 1.6347381864623246e-06, |
|
"loss": 0.611, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 42.013639846743295, |
|
"grad_norm": 0.0009962088661268353, |
|
"learning_rate": 1.6262239250744998e-06, |
|
"loss": 0.0002, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 42.0144061302682, |
|
"grad_norm": 0.0031512256246060133, |
|
"learning_rate": 1.6177096636866754e-06, |
|
"loss": 0.0002, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 42.0151724137931, |
|
"grad_norm": 0.0015748321311548352, |
|
"learning_rate": 1.6091954022988506e-06, |
|
"loss": 0.7718, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 42.015938697318006, |
|
"grad_norm": 0.0007163421832956374, |
|
"learning_rate": 1.6006811409110262e-06, |
|
"loss": 0.005, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 42.01670498084291, |
|
"grad_norm": 0.05585160851478577, |
|
"learning_rate": 1.5921668795232015e-06, |
|
"loss": 0.0001, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 42.01747126436781, |
|
"grad_norm": 0.000383546925149858, |
|
"learning_rate": 1.583652618135377e-06, |
|
"loss": 0.0001, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 42.01823754789272, |
|
"grad_norm": 0.0015097754076123238, |
|
"learning_rate": 1.5751383567475523e-06, |
|
"loss": 0.0001, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 42.01900383141763, |
|
"grad_norm": 1035.0009765625, |
|
"learning_rate": 1.5666240953597275e-06, |
|
"loss": 0.4727, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 42.01977011494253, |
|
"grad_norm": 0.0004107044078409672, |
|
"learning_rate": 1.5581098339719031e-06, |
|
"loss": 0.5379, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 42.02, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 3.5118601322174072, |
|
"eval_runtime": 15.0397, |
|
"eval_samples_per_second": 2.926, |
|
"eval_steps_per_second": 2.926, |
|
"step": 11223 |
|
}, |
|
{ |
|
"epoch": 43.00053639846743, |
|
"grad_norm": 0.0003969682438764721, |
|
"learning_rate": 1.5495955725840783e-06, |
|
"loss": 0.0, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 43.001302681992335, |
|
"grad_norm": 0.01822790317237377, |
|
"learning_rate": 1.541081311196254e-06, |
|
"loss": 0.5557, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 43.00206896551724, |
|
"grad_norm": 0.00035006855614483356, |
|
"learning_rate": 1.5325670498084292e-06, |
|
"loss": 0.0003, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 43.00283524904214, |
|
"grad_norm": 0.00039158156141638756, |
|
"learning_rate": 1.5240527884206046e-06, |
|
"loss": 0.0003, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 43.00360153256705, |
|
"grad_norm": 0.000356638862285763, |
|
"learning_rate": 1.51553852703278e-06, |
|
"loss": 0.0, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 43.00436781609196, |
|
"grad_norm": 0.0004496400069911033, |
|
"learning_rate": 1.5070242656449554e-06, |
|
"loss": 0.0486, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 43.00513409961686, |
|
"grad_norm": 0.0011731594568118453, |
|
"learning_rate": 1.4985100042571306e-06, |
|
"loss": 0.0001, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 43.005900383141764, |
|
"grad_norm": 0.0004121040110476315, |
|
"learning_rate": 1.4899957428693062e-06, |
|
"loss": 0.0001, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 43.00666666666667, |
|
"grad_norm": 288.9941711425781, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.8195, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 43.00743295019157, |
|
"grad_norm": 0.00861730519682169, |
|
"learning_rate": 1.472967220093657e-06, |
|
"loss": 0.0001, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 43.008199233716475, |
|
"grad_norm": 0.0015749471494928002, |
|
"learning_rate": 1.4644529587058323e-06, |
|
"loss": 0.0001, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 43.00896551724138, |
|
"grad_norm": 0.0002915385121013969, |
|
"learning_rate": 1.455938697318008e-06, |
|
"loss": 0.0, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 43.00973180076628, |
|
"grad_norm": 0.03495306149125099, |
|
"learning_rate": 1.4474244359301831e-06, |
|
"loss": 0.0104, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 43.010498084291186, |
|
"grad_norm": 0.0007649486069567502, |
|
"learning_rate": 1.4389101745423588e-06, |
|
"loss": 0.0002, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 43.01126436781609, |
|
"grad_norm": 0.006255231332033873, |
|
"learning_rate": 1.430395913154534e-06, |
|
"loss": 0.6895, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 43.01203065134099, |
|
"grad_norm": 0.9659698605537415, |
|
"learning_rate": 1.4218816517667094e-06, |
|
"loss": 0.0003, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 43.012796934865904, |
|
"grad_norm": 0.005589112639427185, |
|
"learning_rate": 1.4133673903788848e-06, |
|
"loss": 0.0003, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 43.01356321839081, |
|
"grad_norm": 0.0013983509270474315, |
|
"learning_rate": 1.4048531289910602e-06, |
|
"loss": 0.0001, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 43.01432950191571, |
|
"grad_norm": 0.03766027092933655, |
|
"learning_rate": 1.3963388676032354e-06, |
|
"loss": 0.6466, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 43.015095785440614, |
|
"grad_norm": 0.001255474635399878, |
|
"learning_rate": 1.387824606215411e-06, |
|
"loss": 0.0056, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 43.01586206896552, |
|
"grad_norm": 1.1729211807250977, |
|
"learning_rate": 1.3793103448275862e-06, |
|
"loss": 0.0003, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 43.01662835249042, |
|
"grad_norm": 0.0005780119099654257, |
|
"learning_rate": 1.3707960834397619e-06, |
|
"loss": 0.0001, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 43.017394636015325, |
|
"grad_norm": 0.0005708260578103364, |
|
"learning_rate": 1.362281822051937e-06, |
|
"loss": 0.0003, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 43.01816091954023, |
|
"grad_norm": 0.0004368394147604704, |
|
"learning_rate": 1.3537675606641127e-06, |
|
"loss": 0.0002, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 43.01892720306513, |
|
"grad_norm": 0.00039488793117925525, |
|
"learning_rate": 1.345253299276288e-06, |
|
"loss": 0.0, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 43.019693486590036, |
|
"grad_norm": 0.00838773138821125, |
|
"learning_rate": 1.3367390378884631e-06, |
|
"loss": 0.0001, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 3.3207497596740723, |
|
"eval_runtime": 16.1106, |
|
"eval_samples_per_second": 2.731, |
|
"eval_steps_per_second": 2.731, |
|
"step": 11484 |
|
}, |
|
{ |
|
"epoch": 44.000459770114944, |
|
"grad_norm": 5.722577095031738, |
|
"learning_rate": 1.3282247765006387e-06, |
|
"loss": 0.0005, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 44.00122605363985, |
|
"grad_norm": 0.007740352768450975, |
|
"learning_rate": 1.319710515112814e-06, |
|
"loss": 0.0001, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 44.00199233716475, |
|
"grad_norm": 0.006107802968472242, |
|
"learning_rate": 1.3111962537249896e-06, |
|
"loss": 0.0001, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 44.002758620689654, |
|
"grad_norm": 0.03945707157254219, |
|
"learning_rate": 1.3026819923371648e-06, |
|
"loss": 0.9321, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 44.00352490421456, |
|
"grad_norm": 0.0008566186879761517, |
|
"learning_rate": 1.2941677309493402e-06, |
|
"loss": 0.001, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 44.00429118773946, |
|
"grad_norm": 0.0006140118348412216, |
|
"learning_rate": 1.2856534695615156e-06, |
|
"loss": 0.4337, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 44.005057471264365, |
|
"grad_norm": 0.002904461231082678, |
|
"learning_rate": 1.277139208173691e-06, |
|
"loss": 0.0007, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 44.00582375478927, |
|
"grad_norm": 0.0010221564443781972, |
|
"learning_rate": 1.2686249467858662e-06, |
|
"loss": 0.5956, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 44.00659003831418, |
|
"grad_norm": 0.05578969419002533, |
|
"learning_rate": 1.2601106853980419e-06, |
|
"loss": 0.001, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 44.00735632183908, |
|
"grad_norm": 0.0007205125293694437, |
|
"learning_rate": 1.251596424010217e-06, |
|
"loss": 0.0001, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 44.00812260536399, |
|
"grad_norm": 0.007460341323167086, |
|
"learning_rate": 1.2430821626223927e-06, |
|
"loss": 1.5103, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 44.00888888888889, |
|
"grad_norm": 0.002121605910360813, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0001, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 44.009655172413794, |
|
"grad_norm": 0.00040120750782079995, |
|
"learning_rate": 1.2260536398467433e-06, |
|
"loss": 0.0001, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 44.0104214559387, |
|
"grad_norm": 0.001008983119390905, |
|
"learning_rate": 1.2175393784589187e-06, |
|
"loss": 0.5407, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 44.0111877394636, |
|
"grad_norm": 0.00028601608937606215, |
|
"learning_rate": 1.2090251170710942e-06, |
|
"loss": 0.0314, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 44.011954022988505, |
|
"grad_norm": 0.00034253779449500144, |
|
"learning_rate": 1.2005108556832696e-06, |
|
"loss": 0.0002, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 44.01272030651341, |
|
"grad_norm": 0.0008278365130536258, |
|
"learning_rate": 1.191996594295445e-06, |
|
"loss": 0.0004, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 44.01348659003831, |
|
"grad_norm": 0.0008784251403994858, |
|
"learning_rate": 1.1834823329076204e-06, |
|
"loss": 0.0002, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 44.014252873563215, |
|
"grad_norm": 0.000691076391376555, |
|
"learning_rate": 1.1749680715197956e-06, |
|
"loss": 0.0, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 44.01501915708812, |
|
"grad_norm": 0.0015078171854838729, |
|
"learning_rate": 1.166453810131971e-06, |
|
"loss": 0.0004, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 44.01578544061303, |
|
"grad_norm": 0.023979652673006058, |
|
"learning_rate": 1.1579395487441465e-06, |
|
"loss": 0.0002, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 44.01655172413793, |
|
"grad_norm": 0.0004299211723264307, |
|
"learning_rate": 1.1494252873563219e-06, |
|
"loss": 0.0001, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 44.01731800766284, |
|
"grad_norm": 0.007529785390943289, |
|
"learning_rate": 1.1409110259684973e-06, |
|
"loss": 0.4831, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 44.01808429118774, |
|
"grad_norm": 0.0003040138108190149, |
|
"learning_rate": 1.1323967645806727e-06, |
|
"loss": 0.0003, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 44.018850574712644, |
|
"grad_norm": 0.006258353590965271, |
|
"learning_rate": 1.1238825031928481e-06, |
|
"loss": 0.0001, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 44.01961685823755, |
|
"grad_norm": 0.0002640557650011033, |
|
"learning_rate": 1.1153682418050235e-06, |
|
"loss": 0.0001, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"eval_accuracy": 0.6136363636363636, |
|
"eval_loss": 3.1331329345703125, |
|
"eval_runtime": 14.8664, |
|
"eval_samples_per_second": 2.96, |
|
"eval_steps_per_second": 2.96, |
|
"step": 11745 |
|
}, |
|
{ |
|
"epoch": 45.000383141762455, |
|
"grad_norm": 0.0033771779853850603, |
|
"learning_rate": 1.106853980417199e-06, |
|
"loss": 0.0005, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 45.00114942528736, |
|
"grad_norm": 0.0010386168723925948, |
|
"learning_rate": 1.0983397190293744e-06, |
|
"loss": 0.0809, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 45.00191570881226, |
|
"grad_norm": 0.000448176811914891, |
|
"learning_rate": 1.0898254576415498e-06, |
|
"loss": 0.4986, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 45.002681992337166, |
|
"grad_norm": 0.0009056306444108486, |
|
"learning_rate": 1.0813111962537252e-06, |
|
"loss": 0.0003, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 45.00344827586207, |
|
"grad_norm": 0.00048185919877141714, |
|
"learning_rate": 1.0727969348659004e-06, |
|
"loss": 0.0005, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 45.00421455938697, |
|
"grad_norm": 0.03918066993355751, |
|
"learning_rate": 1.0642826734780758e-06, |
|
"loss": 0.0001, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 45.00498084291188, |
|
"grad_norm": 0.000294065335765481, |
|
"learning_rate": 1.0557684120902512e-06, |
|
"loss": 0.7863, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 45.00574712643678, |
|
"grad_norm": 0.009170363657176495, |
|
"learning_rate": 1.0472541507024267e-06, |
|
"loss": 0.0001, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 45.006513409961684, |
|
"grad_norm": 0.03107430972158909, |
|
"learning_rate": 1.038739889314602e-06, |
|
"loss": 0.0003, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 45.00727969348659, |
|
"grad_norm": 0.00028556439792737365, |
|
"learning_rate": 1.0302256279267775e-06, |
|
"loss": 0.0019, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 45.00804597701149, |
|
"grad_norm": 0.0008216087589971721, |
|
"learning_rate": 1.021711366538953e-06, |
|
"loss": 0.0336, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 45.008812260536395, |
|
"grad_norm": 0.00033497792901471257, |
|
"learning_rate": 1.0131971051511283e-06, |
|
"loss": 0.0002, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 45.009578544061306, |
|
"grad_norm": 1009.4758911132812, |
|
"learning_rate": 1.0046828437633035e-06, |
|
"loss": 0.6736, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 45.01034482758621, |
|
"grad_norm": 0.013901930302381516, |
|
"learning_rate": 9.96168582375479e-07, |
|
"loss": 0.0002, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 45.01111111111111, |
|
"grad_norm": 0.012286617420613766, |
|
"learning_rate": 9.876543209876544e-07, |
|
"loss": 0.5899, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 45.01187739463602, |
|
"grad_norm": 0.001657485612668097, |
|
"learning_rate": 9.791400595998298e-07, |
|
"loss": 0.0001, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 45.01264367816092, |
|
"grad_norm": 0.00191357487346977, |
|
"learning_rate": 9.706257982120052e-07, |
|
"loss": 0.0001, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 45.013409961685824, |
|
"grad_norm": 0.00033402678673155606, |
|
"learning_rate": 9.621115368241806e-07, |
|
"loss": 0.0001, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 45.01417624521073, |
|
"grad_norm": 1.9644687175750732, |
|
"learning_rate": 9.535972754363559e-07, |
|
"loss": 0.0004, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 45.01494252873563, |
|
"grad_norm": 0.0003822579456027597, |
|
"learning_rate": 9.450830140485314e-07, |
|
"loss": 0.0002, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 45.015708812260534, |
|
"grad_norm": 0.0069314162246882915, |
|
"learning_rate": 9.365687526607068e-07, |
|
"loss": 0.0002, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 45.01647509578544, |
|
"grad_norm": 0.18637993931770325, |
|
"learning_rate": 9.280544912728822e-07, |
|
"loss": 0.0002, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 45.01724137931034, |
|
"grad_norm": 0.0011075153015553951, |
|
"learning_rate": 9.195402298850575e-07, |
|
"loss": 0.0, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 45.01800766283525, |
|
"grad_norm": 0.008211519569158554, |
|
"learning_rate": 9.110259684972329e-07, |
|
"loss": 0.0002, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 45.018773946360156, |
|
"grad_norm": 0.0005213989643380046, |
|
"learning_rate": 9.025117071094083e-07, |
|
"loss": 0.3504, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 45.01954022988506, |
|
"grad_norm": 0.012262558564543724, |
|
"learning_rate": 8.939974457215837e-07, |
|
"loss": 0.0002, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 3.193819999694824, |
|
"eval_runtime": 14.8317, |
|
"eval_samples_per_second": 2.967, |
|
"eval_steps_per_second": 2.967, |
|
"step": 12006 |
|
}, |
|
{ |
|
"epoch": 46.00030651340996, |
|
"grad_norm": 0.00048760254867374897, |
|
"learning_rate": 8.854831843337592e-07, |
|
"loss": 0.8275, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 46.001072796934864, |
|
"grad_norm": 0.00048131306539289653, |
|
"learning_rate": 8.769689229459345e-07, |
|
"loss": 0.0001, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 46.00183908045977, |
|
"grad_norm": 0.011210956610739231, |
|
"learning_rate": 8.684546615581099e-07, |
|
"loss": 0.0001, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 46.00260536398467, |
|
"grad_norm": 0.00035722830216400325, |
|
"learning_rate": 8.599404001702853e-07, |
|
"loss": 0.0008, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 46.00337164750958, |
|
"grad_norm": 0.535866379737854, |
|
"learning_rate": 8.514261387824607e-07, |
|
"loss": 0.0002, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 46.004137931034485, |
|
"grad_norm": 0.00033111704397015274, |
|
"learning_rate": 8.429118773946361e-07, |
|
"loss": 0.0021, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 46.00490421455939, |
|
"grad_norm": 0.0002754300076048821, |
|
"learning_rate": 8.343976160068116e-07, |
|
"loss": 0.0, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 46.00567049808429, |
|
"grad_norm": 0.226227268576622, |
|
"learning_rate": 8.258833546189869e-07, |
|
"loss": 0.6565, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 46.006436781609196, |
|
"grad_norm": 0.0003193597658537328, |
|
"learning_rate": 8.173690932311623e-07, |
|
"loss": 0.6703, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 46.0072030651341, |
|
"grad_norm": 0.0008009669254533947, |
|
"learning_rate": 8.088548318433377e-07, |
|
"loss": 0.0006, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 46.007969348659, |
|
"grad_norm": 0.0027845597360283136, |
|
"learning_rate": 8.003405704555131e-07, |
|
"loss": 0.0, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 46.00873563218391, |
|
"grad_norm": 0.028570132330060005, |
|
"learning_rate": 7.918263090676885e-07, |
|
"loss": 0.0008, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 46.00950191570881, |
|
"grad_norm": 0.0005044966819696128, |
|
"learning_rate": 7.833120476798637e-07, |
|
"loss": 0.0021, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 46.010268199233714, |
|
"grad_norm": 0.011037427000701427, |
|
"learning_rate": 7.747977862920392e-07, |
|
"loss": 0.0015, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 46.01103448275862, |
|
"grad_norm": 0.010065269656479359, |
|
"learning_rate": 7.662835249042146e-07, |
|
"loss": 0.0002, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 46.01180076628353, |
|
"grad_norm": 0.0005655083805322647, |
|
"learning_rate": 7.5776926351639e-07, |
|
"loss": 0.0002, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 46.01256704980843, |
|
"grad_norm": 0.019746648147702217, |
|
"learning_rate": 7.492550021285653e-07, |
|
"loss": 0.0001, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 46.013333333333335, |
|
"grad_norm": 0.0004695019160863012, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 0.0171, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 46.01409961685824, |
|
"grad_norm": 0.0005349956336431205, |
|
"learning_rate": 7.322264793529161e-07, |
|
"loss": 0.0001, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 46.01486590038314, |
|
"grad_norm": 0.00024051878426689655, |
|
"learning_rate": 7.237122179650916e-07, |
|
"loss": 0.0001, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 46.015632183908046, |
|
"grad_norm": 0.0030741856899112463, |
|
"learning_rate": 7.15197956577267e-07, |
|
"loss": 0.372, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 46.01639846743295, |
|
"grad_norm": 0.0024755203630775213, |
|
"learning_rate": 7.066836951894424e-07, |
|
"loss": 0.0966, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 46.01716475095785, |
|
"grad_norm": 0.0027682564686983824, |
|
"learning_rate": 6.981694338016177e-07, |
|
"loss": 0.3224, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 46.01793103448276, |
|
"grad_norm": 0.000893780030310154, |
|
"learning_rate": 6.896551724137931e-07, |
|
"loss": 0.7237, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 46.01869731800766, |
|
"grad_norm": 0.0004804047057405114, |
|
"learning_rate": 6.811409110259685e-07, |
|
"loss": 0.0, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 46.019463601532564, |
|
"grad_norm": 0.00019813906692434102, |
|
"learning_rate": 6.72626649638144e-07, |
|
"loss": 0.0001, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 3.2386581897735596, |
|
"eval_runtime": 13.7788, |
|
"eval_samples_per_second": 3.193, |
|
"eval_steps_per_second": 3.193, |
|
"step": 12267 |
|
}, |
|
{ |
|
"epoch": 47.00022988505747, |
|
"grad_norm": 0.019910290837287903, |
|
"learning_rate": 6.641123882503194e-07, |
|
"loss": 0.0008, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 47.000996168582375, |
|
"grad_norm": 0.0022589366417378187, |
|
"learning_rate": 6.555981268624948e-07, |
|
"loss": 0.0004, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 47.00176245210728, |
|
"grad_norm": 0.0002697378513403237, |
|
"learning_rate": 6.470838654746701e-07, |
|
"loss": 0.0069, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 47.00252873563218, |
|
"grad_norm": 0.00024318839132320136, |
|
"learning_rate": 6.385696040868455e-07, |
|
"loss": 0.0001, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 47.003295019157086, |
|
"grad_norm": 0.00026064953999593854, |
|
"learning_rate": 6.300553426990209e-07, |
|
"loss": 0.0001, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 47.00406130268199, |
|
"grad_norm": 0.0005039444076828659, |
|
"learning_rate": 6.215410813111964e-07, |
|
"loss": 0.0, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 47.00482758620689, |
|
"grad_norm": 0.0006692575407214463, |
|
"learning_rate": 6.130268199233717e-07, |
|
"loss": 0.0001, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 47.005593869731804, |
|
"grad_norm": 0.0037658934015780687, |
|
"learning_rate": 6.045125585355471e-07, |
|
"loss": 0.3377, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 47.00636015325671, |
|
"grad_norm": 0.00023556100495625287, |
|
"learning_rate": 5.959982971477225e-07, |
|
"loss": 0.7458, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 47.00712643678161, |
|
"grad_norm": 0.04193214327096939, |
|
"learning_rate": 5.874840357598978e-07, |
|
"loss": 0.7215, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 47.007892720306515, |
|
"grad_norm": 0.0002495369117241353, |
|
"learning_rate": 5.789697743720732e-07, |
|
"loss": 0.0399, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 47.00865900383142, |
|
"grad_norm": 0.000457854475826025, |
|
"learning_rate": 5.704555129842486e-07, |
|
"loss": 0.0, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 47.00942528735632, |
|
"grad_norm": 0.0003802287392318249, |
|
"learning_rate": 5.619412515964241e-07, |
|
"loss": 0.0001, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 47.010191570881226, |
|
"grad_norm": 213.5444793701172, |
|
"learning_rate": 5.534269902085995e-07, |
|
"loss": 0.0226, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 47.01095785440613, |
|
"grad_norm": 0.00043641228694468737, |
|
"learning_rate": 5.449127288207749e-07, |
|
"loss": 0.0, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 47.01172413793103, |
|
"grad_norm": 0.0003507279616314918, |
|
"learning_rate": 5.363984674329502e-07, |
|
"loss": 0.19, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 47.01249042145594, |
|
"grad_norm": 0.0006008325144648552, |
|
"learning_rate": 5.278842060451256e-07, |
|
"loss": 0.0, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 47.01325670498084, |
|
"grad_norm": 0.00023644456814508885, |
|
"learning_rate": 5.19369944657301e-07, |
|
"loss": 0.13, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 47.014022988505744, |
|
"grad_norm": 0.0004428470565471798, |
|
"learning_rate": 5.108556832694765e-07, |
|
"loss": 0.0001, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 47.014789272030654, |
|
"grad_norm": 0.0005785804823972285, |
|
"learning_rate": 5.023414218816518e-07, |
|
"loss": 0.0001, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 47.01555555555556, |
|
"grad_norm": 0.04039114713668823, |
|
"learning_rate": 4.938271604938272e-07, |
|
"loss": 0.0001, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 47.01632183908046, |
|
"grad_norm": 0.015721915289759636, |
|
"learning_rate": 4.853128991060026e-07, |
|
"loss": 0.0001, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 47.017088122605365, |
|
"grad_norm": 0.013774341903626919, |
|
"learning_rate": 4.7679863771817797e-07, |
|
"loss": 0.0003, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 47.01785440613027, |
|
"grad_norm": 0.0004029707342851907, |
|
"learning_rate": 4.682843763303534e-07, |
|
"loss": 0.755, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 47.01862068965517, |
|
"grad_norm": 0.0003439601860009134, |
|
"learning_rate": 4.5977011494252875e-07, |
|
"loss": 0.0001, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 47.019386973180076, |
|
"grad_norm": 0.00022625003475695848, |
|
"learning_rate": 4.5125585355470417e-07, |
|
"loss": 0.6632, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 3.3888802528381348, |
|
"eval_runtime": 13.8122, |
|
"eval_samples_per_second": 3.186, |
|
"eval_steps_per_second": 3.186, |
|
"step": 12528 |
|
}, |
|
{ |
|
"epoch": 48.000153256704984, |
|
"grad_norm": 0.0002444499696139246, |
|
"learning_rate": 4.427415921668796e-07, |
|
"loss": 0.5532, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 48.00091954022989, |
|
"grad_norm": 0.0003611572610680014, |
|
"learning_rate": 4.3422733077905495e-07, |
|
"loss": 0.0001, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 48.00168582375479, |
|
"grad_norm": 0.0015018805861473083, |
|
"learning_rate": 4.2571306939123036e-07, |
|
"loss": 0.0002, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 48.002452107279694, |
|
"grad_norm": 0.00028550362912938, |
|
"learning_rate": 4.171988080034058e-07, |
|
"loss": 0.5263, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 48.0032183908046, |
|
"grad_norm": 0.00025121928774751723, |
|
"learning_rate": 4.0868454661558115e-07, |
|
"loss": 0.0001, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 48.0039846743295, |
|
"grad_norm": 2030.238037109375, |
|
"learning_rate": 4.0017028522775656e-07, |
|
"loss": 0.5624, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 48.004750957854405, |
|
"grad_norm": 2.1930882930755615, |
|
"learning_rate": 3.916560238399319e-07, |
|
"loss": 0.0006, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 48.00551724137931, |
|
"grad_norm": 0.0027892333455383778, |
|
"learning_rate": 3.831417624521073e-07, |
|
"loss": 0.0001, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 48.00628352490421, |
|
"grad_norm": 1.8781001567840576, |
|
"learning_rate": 3.7462750106428265e-07, |
|
"loss": 0.0005, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 48.007049808429116, |
|
"grad_norm": 0.014453647658228874, |
|
"learning_rate": 3.6611323967645807e-07, |
|
"loss": 0.0012, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 48.00781609195402, |
|
"grad_norm": 0.0002665473148226738, |
|
"learning_rate": 3.575989782886335e-07, |
|
"loss": 0.0, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 48.00858237547893, |
|
"grad_norm": 0.0005023235571570694, |
|
"learning_rate": 3.4908471690080885e-07, |
|
"loss": 0.0614, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 48.009348659003834, |
|
"grad_norm": 0.00046487923827953637, |
|
"learning_rate": 3.4057045551298427e-07, |
|
"loss": 0.0, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 48.01011494252874, |
|
"grad_norm": 0.0028797071427106857, |
|
"learning_rate": 3.320561941251597e-07, |
|
"loss": 0.0002, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 48.01088122605364, |
|
"grad_norm": 0.00073176936712116, |
|
"learning_rate": 3.2354193273733505e-07, |
|
"loss": 0.0, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 48.011647509578545, |
|
"grad_norm": 0.0032006646506488323, |
|
"learning_rate": 3.1502767134951047e-07, |
|
"loss": 0.0001, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 48.01241379310345, |
|
"grad_norm": 0.08220506459474564, |
|
"learning_rate": 3.0651340996168583e-07, |
|
"loss": 0.9449, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 48.01318007662835, |
|
"grad_norm": 0.00026570618501864374, |
|
"learning_rate": 2.9799914857386125e-07, |
|
"loss": 0.0001, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 48.013946360153255, |
|
"grad_norm": 0.00023152890207711607, |
|
"learning_rate": 2.894848871860366e-07, |
|
"loss": 0.0, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 48.01471264367816, |
|
"grad_norm": 0.00019304055604152381, |
|
"learning_rate": 2.8097062579821203e-07, |
|
"loss": 0.0, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 48.01547892720306, |
|
"grad_norm": 0.06435206532478333, |
|
"learning_rate": 2.7245636441038745e-07, |
|
"loss": 0.0003, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 48.016245210727966, |
|
"grad_norm": 0.0005736428429372609, |
|
"learning_rate": 2.639421030225628e-07, |
|
"loss": 0.7122, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 48.01701149425288, |
|
"grad_norm": 0.00026888441061601043, |
|
"learning_rate": 2.5542784163473823e-07, |
|
"loss": 0.0021, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 48.01777777777778, |
|
"grad_norm": 0.0007350470987148583, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.0001, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 48.018544061302684, |
|
"grad_norm": 0.00445356871932745, |
|
"learning_rate": 2.3839931885908898e-07, |
|
"loss": 0.0009, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 48.01931034482759, |
|
"grad_norm": 0.001289373030886054, |
|
"learning_rate": 2.2988505747126437e-07, |
|
"loss": 0.2849, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 3.3583779335021973, |
|
"eval_runtime": 13.8118, |
|
"eval_samples_per_second": 3.186, |
|
"eval_steps_per_second": 3.186, |
|
"step": 12789 |
|
}, |
|
{ |
|
"epoch": 49.00007662835249, |
|
"grad_norm": 0.003831675508990884, |
|
"learning_rate": 2.213707960834398e-07, |
|
"loss": 0.0002, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 49.00084291187739, |
|
"grad_norm": 0.014694935642182827, |
|
"learning_rate": 2.1285653469561518e-07, |
|
"loss": 0.0, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 49.001609195402295, |
|
"grad_norm": 0.008809147402644157, |
|
"learning_rate": 2.0434227330779057e-07, |
|
"loss": 0.0001, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 49.002375478927206, |
|
"grad_norm": 0.00024942102027125657, |
|
"learning_rate": 1.9582801191996594e-07, |
|
"loss": 0.0001, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 49.00314176245211, |
|
"grad_norm": 0.0008103514555841684, |
|
"learning_rate": 1.8731375053214133e-07, |
|
"loss": 0.0001, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 49.00390804597701, |
|
"grad_norm": 0.005583544261753559, |
|
"learning_rate": 1.7879948914431674e-07, |
|
"loss": 0.5833, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 49.00467432950192, |
|
"grad_norm": 0.0005416848580352962, |
|
"learning_rate": 1.7028522775649214e-07, |
|
"loss": 0.0002, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 49.00544061302682, |
|
"grad_norm": 0.0006115997675806284, |
|
"learning_rate": 1.6177096636866753e-07, |
|
"loss": 0.0001, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 49.006206896551724, |
|
"grad_norm": 0.0004344468470662832, |
|
"learning_rate": 1.5325670498084292e-07, |
|
"loss": 0.0001, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 49.00697318007663, |
|
"grad_norm": 0.015236585400998592, |
|
"learning_rate": 1.447424435930183e-07, |
|
"loss": 0.0002, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 49.00773946360153, |
|
"grad_norm": 0.0002661962644197047, |
|
"learning_rate": 1.3622818220519372e-07, |
|
"loss": 0.0, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 49.008505747126435, |
|
"grad_norm": 0.0023795650340616703, |
|
"learning_rate": 1.2771392081736911e-07, |
|
"loss": 0.0023, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 49.00927203065134, |
|
"grad_norm": 0.0002690415713004768, |
|
"learning_rate": 1.1919965942954449e-07, |
|
"loss": 0.0, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 49.01003831417624, |
|
"grad_norm": 0.0004219943657517433, |
|
"learning_rate": 1.106853980417199e-07, |
|
"loss": 0.3472, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 49.01080459770115, |
|
"grad_norm": 0.0008178652497008443, |
|
"learning_rate": 1.0217113665389529e-07, |
|
"loss": 0.0, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 49.011570881226056, |
|
"grad_norm": 0.0925702452659607, |
|
"learning_rate": 9.365687526607066e-08, |
|
"loss": 0.0005, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 49.01233716475096, |
|
"grad_norm": 0.00023567973403260112, |
|
"learning_rate": 8.514261387824607e-08, |
|
"loss": 0.0202, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 49.013103448275864, |
|
"grad_norm": 0.0007446402451023459, |
|
"learning_rate": 7.662835249042146e-08, |
|
"loss": 0.0001, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 49.01386973180077, |
|
"grad_norm": 0.00021899881539866328, |
|
"learning_rate": 6.811409110259686e-08, |
|
"loss": 0.0, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 49.01463601532567, |
|
"grad_norm": 0.005949971731752157, |
|
"learning_rate": 5.9599829714772246e-08, |
|
"loss": 0.0, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 49.015402298850574, |
|
"grad_norm": 0.0013950084103271365, |
|
"learning_rate": 5.108556832694764e-08, |
|
"loss": 0.0001, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 49.01616858237548, |
|
"grad_norm": 0.024892427027225494, |
|
"learning_rate": 4.2571306939123034e-08, |
|
"loss": 0.0001, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 49.01693486590038, |
|
"grad_norm": 0.009243225678801537, |
|
"learning_rate": 3.405704555129843e-08, |
|
"loss": 0.0001, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 49.017701149425285, |
|
"grad_norm": 0.001323187374509871, |
|
"learning_rate": 2.554278416347382e-08, |
|
"loss": 0.7682, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 49.01846743295019, |
|
"grad_norm": 0.0006840747082605958, |
|
"learning_rate": 1.7028522775649215e-08, |
|
"loss": 0.5822, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 49.01923371647509, |
|
"grad_norm": 0.00034900163882412016, |
|
"learning_rate": 8.514261387824608e-09, |
|
"loss": 0.0, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"grad_norm": 0.011186674237251282, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_accuracy": 0.6136363636363636, |
|
"eval_loss": 3.2970285415649414, |
|
"eval_runtime": 16.6936, |
|
"eval_samples_per_second": 2.636, |
|
"eval_steps_per_second": 2.636, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"step": 13050, |
|
"total_flos": 5.730289341462282e+19, |
|
"train_loss": 0.5958610415218489, |
|
"train_runtime": 9712.6274, |
|
"train_samples_per_second": 1.344, |
|
"train_steps_per_second": 1.344 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 1.2006016969680786, |
|
"eval_runtime": 13.7389, |
|
"eval_samples_per_second": 3.203, |
|
"eval_steps_per_second": 3.203, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 1.200601577758789, |
|
"eval_runtime": 13.7456, |
|
"eval_samples_per_second": 3.201, |
|
"eval_steps_per_second": 3.201, |
|
"step": 13050 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.730289341462282e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|