{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9953917050691246, "eval_steps": 500, "global_step": 758, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0026333113890717576, "grad_norm": 31.375, "learning_rate": 3.947368421052631e-06, "loss": 2.687, "step": 1 }, { "epoch": 0.013166556945358789, "grad_norm": 11.375, "learning_rate": 1.9736842105263155e-05, "loss": 2.3265, "step": 5 }, { "epoch": 0.026333113890717578, "grad_norm": 3.5625, "learning_rate": 3.947368421052631e-05, "loss": 1.834, "step": 10 }, { "epoch": 0.03949967083607637, "grad_norm": 2.828125, "learning_rate": 5.921052631578947e-05, "loss": 1.6157, "step": 15 }, { "epoch": 0.052666227781435156, "grad_norm": 7.34375, "learning_rate": 7.894736842105262e-05, "loss": 1.5546, "step": 20 }, { "epoch": 0.06583278472679395, "grad_norm": 1.6796875, "learning_rate": 9.868421052631579e-05, "loss": 1.561, "step": 25 }, { "epoch": 0.07899934167215274, "grad_norm": 1.875, "learning_rate": 0.00011842105263157894, "loss": 1.4433, "step": 30 }, { "epoch": 0.09216589861751152, "grad_norm": 2.0, "learning_rate": 0.0001381578947368421, "loss": 1.5053, "step": 35 }, { "epoch": 0.10533245556287031, "grad_norm": 4.9375, "learning_rate": 0.00015789473684210524, "loss": 1.5204, "step": 40 }, { "epoch": 0.1184990125082291, "grad_norm": 2.03125, "learning_rate": 0.00017763157894736838, "loss": 1.5645, "step": 45 }, { "epoch": 0.1316655694535879, "grad_norm": 2.140625, "learning_rate": 0.00019736842105263157, "loss": 1.5742, "step": 50 }, { "epoch": 0.1448321263989467, "grad_norm": 1.9765625, "learning_rate": 0.00021710526315789472, "loss": 1.6198, "step": 55 }, { "epoch": 0.15799868334430547, "grad_norm": 2.125, "learning_rate": 0.00023684210526315788, "loss": 1.6436, "step": 60 }, { "epoch": 0.17116524028966426, "grad_norm": 2.125, "learning_rate": 0.00025657894736842105, "loss": 1.6867, "step": 65 }, { "epoch": 0.18433179723502305, "grad_norm": 2.359375, "learning_rate": 0.0002763157894736842, "loss": 1.7356, "step": 70 }, { "epoch": 0.19749835418038184, "grad_norm": 3.859375, "learning_rate": 0.00029605263157894733, "loss": 1.7819, "step": 75 }, { "epoch": 0.21066491112574062, "grad_norm": 5.40625, "learning_rate": 0.0002999745375637391, "loss": 1.9272, "step": 80 }, { "epoch": 0.2238314680710994, "grad_norm": 5.15625, "learning_rate": 0.00029987111123173417, "loss": 2.0363, "step": 85 }, { "epoch": 0.2369980250164582, "grad_norm": 3.109375, "learning_rate": 0.00029968818442293417, "loss": 1.8288, "step": 90 }, { "epoch": 0.250164581961817, "grad_norm": 2.84375, "learning_rate": 0.00029942585417250744, "loss": 1.8436, "step": 95 }, { "epoch": 0.2633311389071758, "grad_norm": 2.40625, "learning_rate": 0.00029908425963589115, "loss": 1.7724, "step": 100 }, { "epoch": 0.2764976958525346, "grad_norm": 1.875, "learning_rate": 0.00029866358201497474, "loss": 1.7534, "step": 105 }, { "epoch": 0.2896642527978934, "grad_norm": 1.9765625, "learning_rate": 0.0002981640444619799, "loss": 1.7532, "step": 110 }, { "epoch": 0.30283080974325216, "grad_norm": 2.796875, "learning_rate": 0.00029758591196108743, "loss": 1.7545, "step": 115 }, { "epoch": 0.31599736668861095, "grad_norm": 2.015625, "learning_rate": 0.00029692949118787415, "loss": 1.8269, "step": 120 }, { "epoch": 0.32916392363396973, "grad_norm": 2.125, "learning_rate": 0.0002961951303466338, "loss": 1.7823, "step": 125 }, { "epoch": 0.3423304805793285, "grad_norm": 2.71875, "learning_rate": 0.0002953832189856691, "loss": 1.7371, "step": 130 }, { "epoch": 0.3554970375246873, "grad_norm": 1.9921875, "learning_rate": 0.00029449418779065257, "loss": 1.7607, "step": 135 }, { "epoch": 0.3686635944700461, "grad_norm": 2.109375, "learning_rate": 0.00029352850835616504, "loss": 1.7956, "step": 140 }, { "epoch": 0.3818301514154049, "grad_norm": 2.0625, "learning_rate": 0.00029248669293553437, "loss": 1.7176, "step": 145 }, { "epoch": 0.39499670836076367, "grad_norm": 1.9453125, "learning_rate": 0.0002913692941691059, "loss": 1.843, "step": 150 }, { "epoch": 0.40816326530612246, "grad_norm": 1.828125, "learning_rate": 0.0002901769047910895, "loss": 1.7918, "step": 155 }, { "epoch": 0.42132982225148125, "grad_norm": 1.5625, "learning_rate": 0.0002889101573151384, "loss": 1.7714, "step": 160 }, { "epoch": 0.43449637919684003, "grad_norm": 2.671875, "learning_rate": 0.00028756972369882667, "loss": 1.8033, "step": 165 }, { "epoch": 0.4476629361421988, "grad_norm": 1.6015625, "learning_rate": 0.0002861563149872031, "loss": 1.8409, "step": 170 }, { "epoch": 0.4608294930875576, "grad_norm": 1.8203125, "learning_rate": 0.0002846706809356112, "loss": 1.8259, "step": 175 }, { "epoch": 0.4739960500329164, "grad_norm": 1.8125, "learning_rate": 0.0002831136096119747, "loss": 1.7612, "step": 180 }, { "epoch": 0.4871626069782752, "grad_norm": 1.796875, "learning_rate": 0.0002814859269787596, "loss": 1.7649, "step": 185 }, { "epoch": 0.500329163923634, "grad_norm": 1.8359375, "learning_rate": 0.0002797884964548353, "loss": 1.7443, "step": 190 }, { "epoch": 0.5134957208689928, "grad_norm": 1.5703125, "learning_rate": 0.0002780222184574662, "loss": 1.7219, "step": 195 }, { "epoch": 0.5266622778143516, "grad_norm": 1.6328125, "learning_rate": 0.0002761880299246772, "loss": 1.7409, "step": 200 }, { "epoch": 0.5398288347597103, "grad_norm": 1.59375, "learning_rate": 0.00027428690381824637, "loss": 1.7043, "step": 205 }, { "epoch": 0.5529953917050692, "grad_norm": 1.6171875, "learning_rate": 0.00027231984860758907, "loss": 1.6709, "step": 210 }, { "epoch": 0.5661619486504279, "grad_norm": 2.0, "learning_rate": 0.000270287907734806, "loss": 1.7417, "step": 215 }, { "epoch": 0.5793285055957867, "grad_norm": 1.65625, "learning_rate": 0.0002681921590611799, "loss": 1.66, "step": 220 }, { "epoch": 0.5924950625411455, "grad_norm": 1.8671875, "learning_rate": 0.0002660337142954145, "loss": 1.732, "step": 225 }, { "epoch": 0.6056616194865043, "grad_norm": 1.5234375, "learning_rate": 0.0002638137184039186, "loss": 1.6964, "step": 230 }, { "epoch": 0.618828176431863, "grad_norm": 1.625, "learning_rate": 0.00026153334900344853, "loss": 1.648, "step": 235 }, { "epoch": 0.6319947333772219, "grad_norm": 1.375, "learning_rate": 0.0002591938157364303, "loss": 1.6197, "step": 240 }, { "epoch": 0.6451612903225806, "grad_norm": 1.7109375, "learning_rate": 0.00025679635962929455, "loss": 1.701, "step": 245 }, { "epoch": 0.6583278472679395, "grad_norm": 1.640625, "learning_rate": 0.00025434225243416234, "loss": 1.7649, "step": 250 }, { "epoch": 0.6714944042132982, "grad_norm": 1.6328125, "learning_rate": 0.0002518327959542333, "loss": 1.712, "step": 255 }, { "epoch": 0.684660961158657, "grad_norm": 1.796875, "learning_rate": 0.0002492693213532321, "loss": 1.6628, "step": 260 }, { "epoch": 0.6978275181040158, "grad_norm": 2.015625, "learning_rate": 0.0002466531884492808, "loss": 1.6714, "step": 265 }, { "epoch": 0.7109940750493746, "grad_norm": 1.9921875, "learning_rate": 0.0002439857849935712, "loss": 1.6833, "step": 270 }, { "epoch": 0.7241606319947334, "grad_norm": 1.703125, "learning_rate": 0.00024126852593421967, "loss": 1.7174, "step": 275 }, { "epoch": 0.7373271889400922, "grad_norm": 1.6015625, "learning_rate": 0.0002385028526656952, "loss": 1.6437, "step": 280 }, { "epoch": 0.7504937458854509, "grad_norm": 1.7109375, "learning_rate": 0.00023569023226421883, "loss": 1.6515, "step": 285 }, { "epoch": 0.7636603028308098, "grad_norm": 1.6015625, "learning_rate": 0.0002328321567095398, "loss": 1.6352, "step": 290 }, { "epoch": 0.7768268597761685, "grad_norm": 1.625, "learning_rate": 0.00022993014209350167, "loss": 1.6205, "step": 295 }, { "epoch": 0.7899934167215273, "grad_norm": 1.546875, "learning_rate": 0.00022698572781581757, "loss": 1.6508, "step": 300 }, { "epoch": 0.8031599736668861, "grad_norm": 1.453125, "learning_rate": 0.0002240004757674819, "loss": 1.5989, "step": 305 }, { "epoch": 0.8163265306122449, "grad_norm": 1.8046875, "learning_rate": 0.00022097596950225134, "loss": 1.6176, "step": 310 }, { "epoch": 0.8294930875576036, "grad_norm": 1.3671875, "learning_rate": 0.00021791381339663423, "loss": 1.6204, "step": 315 }, { "epoch": 0.8426596445029625, "grad_norm": 1.5390625, "learning_rate": 0.00021481563179883502, "loss": 1.5592, "step": 320 }, { "epoch": 0.8558262014483212, "grad_norm": 1.3125, "learning_rate": 0.00021168306816710393, "loss": 1.5973, "step": 325 }, { "epoch": 0.8689927583936801, "grad_norm": 1.421875, "learning_rate": 0.0002085177841979498, "loss": 1.5367, "step": 330 }, { "epoch": 0.8821593153390388, "grad_norm": 1.6796875, "learning_rate": 0.00020532145894467828, "loss": 1.5283, "step": 335 }, { "epoch": 0.8953258722843976, "grad_norm": 1.46875, "learning_rate": 0.000202095787926723, "loss": 1.5374, "step": 340 }, { "epoch": 0.9084924292297564, "grad_norm": 1.515625, "learning_rate": 0.00019884248223024203, "loss": 1.5021, "step": 345 }, { "epoch": 0.9216589861751152, "grad_norm": 1.3046875, "learning_rate": 0.00019556326760045658, "loss": 1.5345, "step": 350 }, { "epoch": 0.934825543120474, "grad_norm": 1.328125, "learning_rate": 0.00019225988352621445, "loss": 1.5164, "step": 355 }, { "epoch": 0.9479921000658328, "grad_norm": 1.3984375, "learning_rate": 0.0001889340823172622, "loss": 1.4778, "step": 360 }, { "epoch": 0.9611586570111915, "grad_norm": 1.2734375, "learning_rate": 0.00018558762817471678, "loss": 1.5624, "step": 365 }, { "epoch": 0.9743252139565504, "grad_norm": 1.453125, "learning_rate": 0.00018222229625522928, "loss": 1.527, "step": 370 }, { "epoch": 0.9874917709019092, "grad_norm": 1.609375, "learning_rate": 0.00017883987172933707, "loss": 1.4608, "step": 375 }, { "epoch": 1.0, "grad_norm": 1.3671875, "learning_rate": 0.0001754421488345041, "loss": 1.4084, "step": 380 }, { "epoch": 1.0131665569453587, "grad_norm": 1.4375, "learning_rate": 0.00017203092992335137, "loss": 1.013, "step": 385 }, { "epoch": 1.0263331138907177, "grad_norm": 1.3984375, "learning_rate": 0.0001686080245075831, "loss": 1.0124, "step": 390 }, { "epoch": 1.0394996708360764, "grad_norm": 1.53125, "learning_rate": 0.0001651752482981148, "loss": 1.0275, "step": 395 }, { "epoch": 1.0526662277814351, "grad_norm": 1.2265625, "learning_rate": 0.00016173442224191309, "loss": 0.9538, "step": 400 }, { "epoch": 1.0658327847267939, "grad_norm": 1.2109375, "learning_rate": 0.00015828737155605804, "loss": 0.9683, "step": 405 }, { "epoch": 1.0789993416721528, "grad_norm": 1.21875, "learning_rate": 0.0001548359247595405, "loss": 1.0414, "step": 410 }, { "epoch": 1.0921658986175116, "grad_norm": 1.2578125, "learning_rate": 0.00015138191270330773, "loss": 0.9749, "step": 415 }, { "epoch": 1.1053324555628703, "grad_norm": 1.4921875, "learning_rate": 0.00014792716759907186, "loss": 0.9802, "step": 420 }, { "epoch": 1.118499012508229, "grad_norm": 1.34375, "learning_rate": 0.00014447352204739712, "loss": 0.9399, "step": 425 }, { "epoch": 1.131665569453588, "grad_norm": 1.21875, "learning_rate": 0.00014102280806558006, "loss": 1.0111, "step": 430 }, { "epoch": 1.1448321263989467, "grad_norm": 1.2890625, "learning_rate": 0.00013757685611583983, "loss": 0.9483, "step": 435 }, { "epoch": 1.1579986833443054, "grad_norm": 1.15625, "learning_rate": 0.00013413749413433273, "loss": 0.9546, "step": 440 }, { "epoch": 1.1711652402896642, "grad_norm": 1.2734375, "learning_rate": 0.0001307065465615073, "loss": 0.9294, "step": 445 }, { "epoch": 1.1843317972350231, "grad_norm": 1.2265625, "learning_rate": 0.00012728583337431353, "loss": 0.9498, "step": 450 }, { "epoch": 1.1974983541803819, "grad_norm": 1.296875, "learning_rate": 0.0001238771691207795, "loss": 0.942, "step": 455 }, { "epoch": 1.2106649111257406, "grad_norm": 1.4375, "learning_rate": 0.00012048236195746822, "loss": 0.9069, "step": 460 }, { "epoch": 1.2238314680710993, "grad_norm": 1.5078125, "learning_rate": 0.00011710321269032502, "loss": 0.9452, "step": 465 }, { "epoch": 1.2369980250164583, "grad_norm": 1.3984375, "learning_rate": 0.00011374151381942327, "loss": 0.9533, "step": 470 }, { "epoch": 1.250164581961817, "grad_norm": 1.375, "learning_rate": 0.00011039904858811712, "loss": 0.9229, "step": 475 }, { "epoch": 1.2633311389071757, "grad_norm": 1.1015625, "learning_rate": 0.00010707759003710384, "loss": 0.8528, "step": 480 }, { "epoch": 1.2764976958525347, "grad_norm": 1.328125, "learning_rate": 0.00010377890006389856, "loss": 0.8836, "step": 485 }, { "epoch": 1.2896642527978934, "grad_norm": 1.3203125, "learning_rate": 0.00010050472848821968, "loss": 0.9177, "step": 490 }, { "epoch": 1.3028308097432522, "grad_norm": 1.296875, "learning_rate": 9.725681212378167e-05, "loss": 0.8867, "step": 495 }, { "epoch": 1.315997366688611, "grad_norm": 1.2421875, "learning_rate": 9.403687385698632e-05, "loss": 0.9074, "step": 500 }, { "epoch": 1.3291639236339696, "grad_norm": 1.1796875, "learning_rate": 9.084662173300223e-05, "loss": 0.8652, "step": 505 }, { "epoch": 1.3423304805793286, "grad_norm": 1.1796875, "learning_rate": 8.768774804971705e-05, "loss": 0.8758, "step": 510 }, { "epoch": 1.3554970375246873, "grad_norm": 3.953125, "learning_rate": 8.456192846004275e-05, "loss": 0.8357, "step": 515 }, { "epoch": 1.368663594470046, "grad_norm": 1.0703125, "learning_rate": 8.147082108305058e-05, "loss": 0.8258, "step": 520 }, { "epoch": 1.381830151415405, "grad_norm": 1.1015625, "learning_rate": 7.84160656244067e-05, "loss": 0.906, "step": 525 }, { "epoch": 1.3949967083607637, "grad_norm": 1.1484375, "learning_rate": 7.539928250657594e-05, "loss": 0.809, "step": 530 }, { "epoch": 1.4081632653061225, "grad_norm": 1.15625, "learning_rate": 7.242207200925383e-05, "loss": 0.7685, "step": 535 }, { "epoch": 1.4213298222514812, "grad_norm": 1.1171875, "learning_rate": 6.948601342048397e-05, "loss": 0.8473, "step": 540 }, { "epoch": 1.43449637919684, "grad_norm": 1.1015625, "learning_rate": 6.65926641989106e-05, "loss": 0.8022, "step": 545 }, { "epoch": 1.4476629361421989, "grad_norm": 1.15625, "learning_rate": 6.374355914761062e-05, "loss": 0.7762, "step": 550 }, { "epoch": 1.4608294930875576, "grad_norm": 1.109375, "learning_rate": 6.094020959994336e-05, "loss": 0.862, "step": 555 }, { "epoch": 1.4739960500329163, "grad_norm": 1.0703125, "learning_rate": 5.818410261785056e-05, "loss": 0.793, "step": 560 }, { "epoch": 1.4871626069782753, "grad_norm": 1.0625, "learning_rate": 5.5476700203030643e-05, "loss": 0.7979, "step": 565 }, { "epoch": 1.500329163923634, "grad_norm": 1.28125, "learning_rate": 5.281943852140697e-05, "loss": 0.8223, "step": 570 }, { "epoch": 1.5134957208689928, "grad_norm": 1.0078125, "learning_rate": 5.021372714130087e-05, "loss": 0.84, "step": 575 }, { "epoch": 1.5266622778143515, "grad_norm": 1.21875, "learning_rate": 4.766094828571313e-05, "loss": 0.7897, "step": 580 }, { "epoch": 1.5398288347597102, "grad_norm": 1.0859375, "learning_rate": 4.516245609911161e-05, "loss": 0.7917, "step": 585 }, { "epoch": 1.5529953917050692, "grad_norm": 1.1015625, "learning_rate": 4.271957592911325e-05, "loss": 0.7691, "step": 590 }, { "epoch": 1.566161948650428, "grad_norm": 1.1875, "learning_rate": 4.033360362344117e-05, "loss": 0.8063, "step": 595 }, { "epoch": 1.5793285055957869, "grad_norm": 1.1640625, "learning_rate": 3.800580484253105e-05, "loss": 0.7744, "step": 600 }, { "epoch": 1.5924950625411456, "grad_norm": 1.1015625, "learning_rate": 3.5737414388149785e-05, "loss": 0.7701, "step": 605 }, { "epoch": 1.6056616194865043, "grad_norm": 1.046875, "learning_rate": 3.352963554838402e-05, "loss": 0.7414, "step": 610 }, { "epoch": 1.618828176431863, "grad_norm": 1.2109375, "learning_rate": 3.138363945934523e-05, "loss": 0.7739, "step": 615 }, { "epoch": 1.6319947333772218, "grad_norm": 1.0859375, "learning_rate": 2.9300564483929852e-05, "loss": 0.794, "step": 620 }, { "epoch": 1.6451612903225805, "grad_norm": 1.3203125, "learning_rate": 2.728151560796454e-05, "loss": 0.8121, "step": 625 }, { "epoch": 1.6583278472679395, "grad_norm": 1.0546875, "learning_rate": 2.5327563854056714e-05, "loss": 0.7925, "step": 630 }, { "epoch": 1.6714944042132982, "grad_norm": 1.2109375, "learning_rate": 2.3439745713460624e-05, "loss": 0.8124, "step": 635 }, { "epoch": 1.6846609611586572, "grad_norm": 1.109375, "learning_rate": 2.1619062596261583e-05, "loss": 0.7899, "step": 640 }, { "epoch": 1.6978275181040159, "grad_norm": 1.046875, "learning_rate": 1.9866480300168885e-05, "loss": 0.7489, "step": 645 }, { "epoch": 1.7109940750493746, "grad_norm": 1.03125, "learning_rate": 1.8182928498199634e-05, "loss": 0.7739, "step": 650 }, { "epoch": 1.7241606319947334, "grad_norm": 0.99609375, "learning_rate": 1.6569300245525457e-05, "loss": 0.7311, "step": 655 }, { "epoch": 1.737327188940092, "grad_norm": 1.15625, "learning_rate": 1.5026451505743408e-05, "loss": 0.7321, "step": 660 }, { "epoch": 1.7504937458854508, "grad_norm": 1.1171875, "learning_rate": 1.3555200696822232e-05, "loss": 0.7963, "step": 665 }, { "epoch": 1.7636603028308098, "grad_norm": 1.1171875, "learning_rate": 1.215632825696541e-05, "loss": 0.7587, "step": 670 }, { "epoch": 1.7768268597761685, "grad_norm": 1.171875, "learning_rate": 1.0830576230620492e-05, "loss": 0.7989, "step": 675 }, { "epoch": 1.7899934167215275, "grad_norm": 1.078125, "learning_rate": 9.578647874855095e-06, "loss": 0.8169, "step": 680 }, { "epoch": 1.8031599736668862, "grad_norm": 1.09375, "learning_rate": 8.401207286307881e-06, "loss": 0.7674, "step": 685 }, { "epoch": 1.816326530612245, "grad_norm": 1.0234375, "learning_rate": 7.2988790489124424e-06, "loss": 0.8234, "step": 690 }, { "epoch": 1.8294930875576036, "grad_norm": 1.15625, "learning_rate": 6.272247902581201e-06, "loss": 0.7603, "step": 695 }, { "epoch": 1.8426596445029624, "grad_norm": 1.015625, "learning_rate": 5.3218584330249e-06, "loss": 0.795, "step": 700 }, { "epoch": 1.8558262014483211, "grad_norm": 1.0546875, "learning_rate": 4.448214782872134e-06, "loss": 0.759, "step": 705 }, { "epoch": 1.86899275839368, "grad_norm": 1.125, "learning_rate": 3.6517803842424474e-06, "loss": 0.7344, "step": 710 }, { "epoch": 1.8821593153390388, "grad_norm": 1.0546875, "learning_rate": 2.932977712914586e-06, "loss": 0.7102, "step": 715 }, { "epoch": 1.8953258722843978, "grad_norm": 1.046875, "learning_rate": 2.292188064220374e-06, "loss": 0.7783, "step": 720 }, { "epoch": 1.9084924292297565, "grad_norm": 1.125, "learning_rate": 1.7297513507832927e-06, "loss": 0.7961, "step": 725 }, { "epoch": 1.9216589861751152, "grad_norm": 44.75, "learning_rate": 1.2459659222086304e-06, "loss": 0.7633, "step": 730 }, { "epoch": 1.934825543120474, "grad_norm": 1.0859375, "learning_rate": 8.410884068213941e-07, "loss": 0.7727, "step": 735 }, { "epoch": 1.9479921000658327, "grad_norm": 1.09375, "learning_rate": 5.153335755354038e-07, "loss": 0.7779, "step": 740 }, { "epoch": 1.9611586570111914, "grad_norm": 1.0234375, "learning_rate": 2.688742279261913e-07, "loss": 0.7058, "step": 745 }, { "epoch": 1.9743252139565504, "grad_norm": 2.1875, "learning_rate": 1.0184110056790651e-07, "loss": 0.8194, "step": 750 }, { "epoch": 1.9874917709019093, "grad_norm": 1.0546875, "learning_rate": 1.432279768290856e-08, "loss": 0.7634, "step": 755 }, { "epoch": 1.9953917050691246, "step": 758, "total_flos": 1.449790274661253e+17, "train_loss": 1.2643018703032924, "train_runtime": 2142.5178, "train_samples_per_second": 11.339, "train_steps_per_second": 0.354 } ], "logging_steps": 5, "max_steps": 758, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.449790274661253e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }