|
{ |
|
"best_metric": 1.3224910497665405, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-1050", |
|
"epoch": 0.9657392504023914, |
|
"eval_steps": 150, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009197516670498966, |
|
"grad_norm": 6.181552886962891, |
|
"learning_rate": 5e-06, |
|
"loss": 6.0754, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009197516670498966, |
|
"eval_loss": 4.431160926818848, |
|
"eval_runtime": 49.8075, |
|
"eval_samples_per_second": 165.437, |
|
"eval_steps_per_second": 20.68, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018395033340997931, |
|
"grad_norm": 6.504417896270752, |
|
"learning_rate": 1e-05, |
|
"loss": 5.7192, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0027592550011496897, |
|
"grad_norm": 7.00899076461792, |
|
"learning_rate": 1.5e-05, |
|
"loss": 5.533, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0036790066681995862, |
|
"grad_norm": 6.67568826675415, |
|
"learning_rate": 2e-05, |
|
"loss": 5.0143, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004598758335249482, |
|
"grad_norm": 6.313548564910889, |
|
"learning_rate": 2.5e-05, |
|
"loss": 4.6726, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005518510002299379, |
|
"grad_norm": 5.2927422523498535, |
|
"learning_rate": 3e-05, |
|
"loss": 4.6566, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0064382616693492755, |
|
"grad_norm": 4.771329879760742, |
|
"learning_rate": 3.5e-05, |
|
"loss": 4.3112, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0073580133363991725, |
|
"grad_norm": 3.6339666843414307, |
|
"learning_rate": 4e-05, |
|
"loss": 4.1199, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008277765003449069, |
|
"grad_norm": 2.8113648891448975, |
|
"learning_rate": 4.5e-05, |
|
"loss": 3.9369, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009197516670498965, |
|
"grad_norm": 2.2301437854766846, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7798, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010117268337548863, |
|
"grad_norm": 2.4432830810546875, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 3.8405, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011037020004598759, |
|
"grad_norm": 1.870229721069336, |
|
"learning_rate": 6e-05, |
|
"loss": 3.6539, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011956771671648655, |
|
"grad_norm": 1.9459682703018188, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 3.5456, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012876523338698551, |
|
"grad_norm": 1.4028608798980713, |
|
"learning_rate": 7e-05, |
|
"loss": 3.4191, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013796275005748447, |
|
"grad_norm": 1.9811638593673706, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 3.6083, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014716026672798345, |
|
"grad_norm": 1.952579379081726, |
|
"learning_rate": 8e-05, |
|
"loss": 3.4243, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01563577833984824, |
|
"grad_norm": 1.5935711860656738, |
|
"learning_rate": 8.5e-05, |
|
"loss": 3.3783, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.016555530006898137, |
|
"grad_norm": 1.475130558013916, |
|
"learning_rate": 9e-05, |
|
"loss": 3.3419, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.017475281673948035, |
|
"grad_norm": 1.465334177017212, |
|
"learning_rate": 9.5e-05, |
|
"loss": 3.2841, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01839503334099793, |
|
"grad_norm": 1.5258549451828003, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1315, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.019314785008047827, |
|
"grad_norm": 1.2697194814682007, |
|
"learning_rate": 9.999978367986987e-05, |
|
"loss": 3.1049, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.020234536675097725, |
|
"grad_norm": 1.0417594909667969, |
|
"learning_rate": 9.999913472135125e-05, |
|
"loss": 3.0702, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02115428834214762, |
|
"grad_norm": 0.8249285221099854, |
|
"learning_rate": 9.999805313005946e-05, |
|
"loss": 3.0126, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.022074040009197517, |
|
"grad_norm": 0.8856204152107239, |
|
"learning_rate": 9.99965389153533e-05, |
|
"loss": 2.936, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.022993791676247412, |
|
"grad_norm": 1.0896774530410767, |
|
"learning_rate": 9.999459209033495e-05, |
|
"loss": 3.0088, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02391354334329731, |
|
"grad_norm": 0.878311276435852, |
|
"learning_rate": 9.999221267184993e-05, |
|
"loss": 2.8434, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.024833295010347207, |
|
"grad_norm": 0.6598113179206848, |
|
"learning_rate": 9.998940068048688e-05, |
|
"loss": 2.7397, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.025753046677397102, |
|
"grad_norm": 0.8144488334655762, |
|
"learning_rate": 9.998615614057742e-05, |
|
"loss": 2.7315, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.026672798344447, |
|
"grad_norm": 0.8650857210159302, |
|
"learning_rate": 9.998247908019593e-05, |
|
"loss": 2.8126, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.027592550011496894, |
|
"grad_norm": 0.6536254286766052, |
|
"learning_rate": 9.997836953115926e-05, |
|
"loss": 2.7479, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.028512301678546792, |
|
"grad_norm": 1.0240334272384644, |
|
"learning_rate": 9.997382752902657e-05, |
|
"loss": 2.7575, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02943205334559669, |
|
"grad_norm": 0.6431864500045776, |
|
"learning_rate": 9.996885311309891e-05, |
|
"loss": 2.6497, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.030351805012646584, |
|
"grad_norm": 0.6775922179222107, |
|
"learning_rate": 9.996344632641894e-05, |
|
"loss": 2.6301, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03127155667969648, |
|
"grad_norm": 0.9252532124519348, |
|
"learning_rate": 9.995760721577052e-05, |
|
"loss": 2.6123, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.032191308346746376, |
|
"grad_norm": 0.6126474738121033, |
|
"learning_rate": 9.995133583167832e-05, |
|
"loss": 2.5311, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.033111060013796274, |
|
"grad_norm": 0.7896717190742493, |
|
"learning_rate": 9.994463222840746e-05, |
|
"loss": 2.5242, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03403081168084617, |
|
"grad_norm": 0.6513295769691467, |
|
"learning_rate": 9.993749646396286e-05, |
|
"loss": 2.4802, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03495056334789607, |
|
"grad_norm": 0.6320262551307678, |
|
"learning_rate": 9.992992860008892e-05, |
|
"loss": 2.5144, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03587031501494597, |
|
"grad_norm": 0.9524784684181213, |
|
"learning_rate": 9.992192870226889e-05, |
|
"loss": 2.5425, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03679006668199586, |
|
"grad_norm": 0.5857168436050415, |
|
"learning_rate": 9.991349683972434e-05, |
|
"loss": 2.447, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03770981834904576, |
|
"grad_norm": 0.7533925175666809, |
|
"learning_rate": 9.990463308541451e-05, |
|
"loss": 2.3431, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.038629570016095655, |
|
"grad_norm": 0.5700300931930542, |
|
"learning_rate": 9.989533751603577e-05, |
|
"loss": 2.3499, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03954932168314555, |
|
"grad_norm": 0.8808962106704712, |
|
"learning_rate": 9.988561021202083e-05, |
|
"loss": 2.3962, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04046907335019545, |
|
"grad_norm": 0.7045830488204956, |
|
"learning_rate": 9.987545125753819e-05, |
|
"loss": 2.2948, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04138882501724534, |
|
"grad_norm": 0.8472001552581787, |
|
"learning_rate": 9.986486074049131e-05, |
|
"loss": 2.3045, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04230857668429524, |
|
"grad_norm": 0.7173092365264893, |
|
"learning_rate": 9.985383875251783e-05, |
|
"loss": 2.2929, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04322832835134514, |
|
"grad_norm": 0.7962790727615356, |
|
"learning_rate": 9.984238538898891e-05, |
|
"loss": 2.327, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.044148080018395035, |
|
"grad_norm": 0.8624216318130493, |
|
"learning_rate": 9.983050074900824e-05, |
|
"loss": 2.187, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04506783168544493, |
|
"grad_norm": 0.917926549911499, |
|
"learning_rate": 9.98181849354113e-05, |
|
"loss": 2.1946, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.045987583352494824, |
|
"grad_norm": 0.8663224577903748, |
|
"learning_rate": 9.980543805476446e-05, |
|
"loss": 2.0988, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04690733501954472, |
|
"grad_norm": 11.713833808898926, |
|
"learning_rate": 9.979226021736396e-05, |
|
"loss": 4.3627, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04782708668659462, |
|
"grad_norm": 5.917041301727295, |
|
"learning_rate": 9.977865153723507e-05, |
|
"loss": 3.7012, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04874683835364452, |
|
"grad_norm": 3.241976499557495, |
|
"learning_rate": 9.976461213213104e-05, |
|
"loss": 3.1752, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.049666590020694415, |
|
"grad_norm": 3.6020805835723877, |
|
"learning_rate": 9.975014212353213e-05, |
|
"loss": 3.019, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.050586341687744306, |
|
"grad_norm": 3.3399133682250977, |
|
"learning_rate": 9.973524163664447e-05, |
|
"loss": 2.7453, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.051506093354794204, |
|
"grad_norm": 2.34346604347229, |
|
"learning_rate": 9.97199108003991e-05, |
|
"loss": 2.5133, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0524258450218441, |
|
"grad_norm": 1.2596639394760132, |
|
"learning_rate": 9.970414974745076e-05, |
|
"loss": 2.5255, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.053345596688894, |
|
"grad_norm": 2.061197519302368, |
|
"learning_rate": 9.968795861417676e-05, |
|
"loss": 2.4012, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0542653483559439, |
|
"grad_norm": 2.119333028793335, |
|
"learning_rate": 9.967133754067582e-05, |
|
"loss": 2.3668, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05518510002299379, |
|
"grad_norm": 1.2170815467834473, |
|
"learning_rate": 9.965428667076686e-05, |
|
"loss": 2.4343, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.056104851690043686, |
|
"grad_norm": 1.0711098909378052, |
|
"learning_rate": 9.963680615198773e-05, |
|
"loss": 2.3052, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.057024603357093584, |
|
"grad_norm": 1.64667809009552, |
|
"learning_rate": 9.961889613559395e-05, |
|
"loss": 2.3781, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05794435502414348, |
|
"grad_norm": 1.2105283737182617, |
|
"learning_rate": 9.960055677655742e-05, |
|
"loss": 2.357, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05886410669119338, |
|
"grad_norm": 1.0943785905838013, |
|
"learning_rate": 9.958178823356503e-05, |
|
"loss": 2.2903, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05978385835824328, |
|
"grad_norm": 1.5415120124816895, |
|
"learning_rate": 9.956259066901733e-05, |
|
"loss": 2.3312, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06070361002529317, |
|
"grad_norm": 0.8917611837387085, |
|
"learning_rate": 9.954296424902708e-05, |
|
"loss": 2.32, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.061623361692343066, |
|
"grad_norm": 0.7154043316841125, |
|
"learning_rate": 9.952290914341791e-05, |
|
"loss": 2.24, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06254311335939296, |
|
"grad_norm": 1.1616435050964355, |
|
"learning_rate": 9.950242552572271e-05, |
|
"loss": 2.2741, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06346286502644286, |
|
"grad_norm": 0.7844848036766052, |
|
"learning_rate": 9.948151357318228e-05, |
|
"loss": 2.2333, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06438261669349275, |
|
"grad_norm": 0.7282043695449829, |
|
"learning_rate": 9.946017346674361e-05, |
|
"loss": 2.1664, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06530236836054265, |
|
"grad_norm": 0.7888442873954773, |
|
"learning_rate": 9.943840539105854e-05, |
|
"loss": 2.2735, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06622212002759255, |
|
"grad_norm": 0.5766344666481018, |
|
"learning_rate": 9.941620953448194e-05, |
|
"loss": 2.1517, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06714187169464245, |
|
"grad_norm": 0.7196112871170044, |
|
"learning_rate": 9.939358608907026e-05, |
|
"loss": 2.1162, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06806162336169234, |
|
"grad_norm": 0.7088760137557983, |
|
"learning_rate": 9.937053525057977e-05, |
|
"loss": 2.1777, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06898137502874224, |
|
"grad_norm": 0.5653288960456848, |
|
"learning_rate": 9.934705721846487e-05, |
|
"loss": 2.1762, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06990112669579214, |
|
"grad_norm": 0.8287534117698669, |
|
"learning_rate": 9.93231521958764e-05, |
|
"loss": 2.1711, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07082087836284204, |
|
"grad_norm": 0.4657200872898102, |
|
"learning_rate": 9.929882038965989e-05, |
|
"loss": 2.1953, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07174063002989194, |
|
"grad_norm": 0.47897377610206604, |
|
"learning_rate": 9.927406201035368e-05, |
|
"loss": 2.1214, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07266038169694182, |
|
"grad_norm": 0.678236722946167, |
|
"learning_rate": 9.924887727218724e-05, |
|
"loss": 2.0763, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07358013336399172, |
|
"grad_norm": 0.4475807249546051, |
|
"learning_rate": 9.922326639307917e-05, |
|
"loss": 2.16, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07449988503104162, |
|
"grad_norm": 0.49449393153190613, |
|
"learning_rate": 9.919722959463544e-05, |
|
"loss": 2.1382, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07541963669809151, |
|
"grad_norm": 0.5139850974082947, |
|
"learning_rate": 9.917076710214739e-05, |
|
"loss": 2.1543, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07633938836514141, |
|
"grad_norm": 0.5776795148849487, |
|
"learning_rate": 9.914387914458982e-05, |
|
"loss": 2.157, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07725914003219131, |
|
"grad_norm": 0.573421061038971, |
|
"learning_rate": 9.911656595461898e-05, |
|
"loss": 2.0451, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07817889169924121, |
|
"grad_norm": 0.6673893332481384, |
|
"learning_rate": 9.908882776857056e-05, |
|
"loss": 2.11, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0790986433662911, |
|
"grad_norm": 0.5322157740592957, |
|
"learning_rate": 9.906066482645772e-05, |
|
"loss": 2.0667, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.080018395033341, |
|
"grad_norm": 0.7134078741073608, |
|
"learning_rate": 9.903207737196891e-05, |
|
"loss": 2.0217, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0809381467003909, |
|
"grad_norm": 0.5911161303520203, |
|
"learning_rate": 9.900306565246578e-05, |
|
"loss": 2.0574, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08185789836744078, |
|
"grad_norm": 0.6985921263694763, |
|
"learning_rate": 9.897362991898109e-05, |
|
"loss": 2.0796, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08277765003449068, |
|
"grad_norm": 0.5797624588012695, |
|
"learning_rate": 9.894377042621655e-05, |
|
"loss": 2.0293, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08369740170154058, |
|
"grad_norm": 0.6441212296485901, |
|
"learning_rate": 9.891348743254046e-05, |
|
"loss": 2.0006, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08461715336859048, |
|
"grad_norm": 0.5719017386436462, |
|
"learning_rate": 9.888278119998573e-05, |
|
"loss": 1.9847, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08553690503564038, |
|
"grad_norm": 0.618574857711792, |
|
"learning_rate": 9.885165199424738e-05, |
|
"loss": 1.9194, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08645665670269027, |
|
"grad_norm": 0.8214313983917236, |
|
"learning_rate": 9.882010008468036e-05, |
|
"loss": 1.8845, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08737640836974017, |
|
"grad_norm": 0.6314259767532349, |
|
"learning_rate": 9.878812574429721e-05, |
|
"loss": 1.8474, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08829616003679007, |
|
"grad_norm": 0.6584024429321289, |
|
"learning_rate": 9.875572924976568e-05, |
|
"loss": 1.8843, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08921591170383997, |
|
"grad_norm": 0.7131389379501343, |
|
"learning_rate": 9.87229108814063e-05, |
|
"loss": 1.9198, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09013566337088987, |
|
"grad_norm": 0.824288010597229, |
|
"learning_rate": 9.868967092319003e-05, |
|
"loss": 1.8658, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09105541503793976, |
|
"grad_norm": 0.7455874681472778, |
|
"learning_rate": 9.865600966273575e-05, |
|
"loss": 1.7975, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09197516670498965, |
|
"grad_norm": 1.2152295112609863, |
|
"learning_rate": 9.86219273913078e-05, |
|
"loss": 1.7226, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09289491837203954, |
|
"grad_norm": 5.640716552734375, |
|
"learning_rate": 9.858742440381343e-05, |
|
"loss": 3.5625, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09381467003908944, |
|
"grad_norm": 3.7876381874084473, |
|
"learning_rate": 9.855250099880025e-05, |
|
"loss": 3.0309, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09473442170613934, |
|
"grad_norm": 2.426966428756714, |
|
"learning_rate": 9.851715747845373e-05, |
|
"loss": 2.6085, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09565417337318924, |
|
"grad_norm": 2.368666172027588, |
|
"learning_rate": 9.848139414859441e-05, |
|
"loss": 2.457, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09657392504023914, |
|
"grad_norm": 1.607815146446228, |
|
"learning_rate": 9.844521131867546e-05, |
|
"loss": 2.2837, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09749367670728903, |
|
"grad_norm": 1.2020126581192017, |
|
"learning_rate": 9.840860930177983e-05, |
|
"loss": 2.1918, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09841342837433893, |
|
"grad_norm": 1.469667673110962, |
|
"learning_rate": 9.837158841461766e-05, |
|
"loss": 2.1856, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09933318004138883, |
|
"grad_norm": 1.2101978063583374, |
|
"learning_rate": 9.833414897752347e-05, |
|
"loss": 2.1572, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10025293170843873, |
|
"grad_norm": 1.0145184993743896, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 2.0651, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10117268337548861, |
|
"grad_norm": 1.0942986011505127, |
|
"learning_rate": 9.825801575298248e-05, |
|
"loss": 2.1006, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10209243504253851, |
|
"grad_norm": 0.812549889087677, |
|
"learning_rate": 9.821932262430165e-05, |
|
"loss": 2.0787, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10301218670958841, |
|
"grad_norm": 0.9913772344589233, |
|
"learning_rate": 9.8180212263215e-05, |
|
"loss": 2.0555, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1039319383766383, |
|
"grad_norm": 0.7573890686035156, |
|
"learning_rate": 9.814068500813692e-05, |
|
"loss": 2.022, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1048516900436882, |
|
"grad_norm": 0.876980185508728, |
|
"learning_rate": 9.8100741201089e-05, |
|
"loss": 2.0677, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1057714417107381, |
|
"grad_norm": 0.8768622875213623, |
|
"learning_rate": 9.806038118769723e-05, |
|
"loss": 2.0766, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.106691193377788, |
|
"grad_norm": 0.6824678182601929, |
|
"learning_rate": 9.801960531718896e-05, |
|
"loss": 2.1323, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1076109450448379, |
|
"grad_norm": 0.9467669129371643, |
|
"learning_rate": 9.797841394238986e-05, |
|
"loss": 1.9414, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1085306967118878, |
|
"grad_norm": 0.5850769281387329, |
|
"learning_rate": 9.793680741972084e-05, |
|
"loss": 1.9249, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.10945044837893769, |
|
"grad_norm": 0.8185686469078064, |
|
"learning_rate": 9.789478610919507e-05, |
|
"loss": 1.9541, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11037020004598758, |
|
"grad_norm": 0.9609946608543396, |
|
"learning_rate": 9.785235037441474e-05, |
|
"loss": 1.943, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11128995171303747, |
|
"grad_norm": 0.6438754796981812, |
|
"learning_rate": 9.780950058256802e-05, |
|
"loss": 1.9613, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11220970338008737, |
|
"grad_norm": 1.0584321022033691, |
|
"learning_rate": 9.776623710442579e-05, |
|
"loss": 1.9652, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11312945504713727, |
|
"grad_norm": 0.5727084279060364, |
|
"learning_rate": 9.772256031433849e-05, |
|
"loss": 1.9769, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11404920671418717, |
|
"grad_norm": 0.8819255828857422, |
|
"learning_rate": 9.767847059023291e-05, |
|
"loss": 2.0024, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.11496895838123707, |
|
"grad_norm": 0.8120801448822021, |
|
"learning_rate": 9.763396831360884e-05, |
|
"loss": 1.9066, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11588871004828696, |
|
"grad_norm": 0.5545021891593933, |
|
"learning_rate": 9.758905386953579e-05, |
|
"loss": 1.9619, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.11680846171533686, |
|
"grad_norm": 1.0289326906204224, |
|
"learning_rate": 9.754372764664969e-05, |
|
"loss": 1.9098, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11772821338238676, |
|
"grad_norm": 0.609516441822052, |
|
"learning_rate": 9.749799003714954e-05, |
|
"loss": 1.9147, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11864796504943666, |
|
"grad_norm": 0.7941620945930481, |
|
"learning_rate": 9.745184143679397e-05, |
|
"loss": 1.8968, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11956771671648656, |
|
"grad_norm": 0.787964940071106, |
|
"learning_rate": 9.74052822448978e-05, |
|
"loss": 1.9712, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12048746838353644, |
|
"grad_norm": 0.730323314666748, |
|
"learning_rate": 9.735831286432868e-05, |
|
"loss": 1.8993, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12140722005058634, |
|
"grad_norm": 0.8297889232635498, |
|
"learning_rate": 9.731093370150349e-05, |
|
"loss": 1.9682, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12232697171763623, |
|
"grad_norm": 0.768775463104248, |
|
"learning_rate": 9.72631451663849e-05, |
|
"loss": 1.8542, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12324672338468613, |
|
"grad_norm": 0.7137448787689209, |
|
"learning_rate": 9.721494767247779e-05, |
|
"loss": 1.8801, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12416647505173603, |
|
"grad_norm": 0.6385506987571716, |
|
"learning_rate": 9.716634163682569e-05, |
|
"loss": 1.8384, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12508622671878591, |
|
"grad_norm": 0.7410357594490051, |
|
"learning_rate": 9.71173274800072e-05, |
|
"loss": 1.8761, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12600597838583583, |
|
"grad_norm": 0.7702000737190247, |
|
"learning_rate": 9.706790562613219e-05, |
|
"loss": 1.8183, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1269257300528857, |
|
"grad_norm": 0.6795453429222107, |
|
"learning_rate": 9.701807650283839e-05, |
|
"loss": 1.8434, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12784548171993562, |
|
"grad_norm": 0.8809398412704468, |
|
"learning_rate": 9.696784054128749e-05, |
|
"loss": 1.8462, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1287652333869855, |
|
"grad_norm": 0.9881577491760254, |
|
"learning_rate": 9.691719817616147e-05, |
|
"loss": 1.7828, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12968498505403542, |
|
"grad_norm": 0.9603993892669678, |
|
"learning_rate": 9.686614984565887e-05, |
|
"loss": 1.8768, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1306047367210853, |
|
"grad_norm": 1.0421313047409058, |
|
"learning_rate": 9.681469599149092e-05, |
|
"loss": 1.8302, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1315244883881352, |
|
"grad_norm": 0.8529607653617859, |
|
"learning_rate": 9.676283705887783e-05, |
|
"loss": 1.7531, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1324442400551851, |
|
"grad_norm": 0.8817620277404785, |
|
"learning_rate": 9.67105734965448e-05, |
|
"loss": 1.7358, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.133363991722235, |
|
"grad_norm": 0.9506654739379883, |
|
"learning_rate": 9.665790575671829e-05, |
|
"loss": 1.7789, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1342837433892849, |
|
"grad_norm": 1.1102913618087769, |
|
"learning_rate": 9.660483429512199e-05, |
|
"loss": 1.7401, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13520349505633478, |
|
"grad_norm": 0.7556246519088745, |
|
"learning_rate": 9.65513595709729e-05, |
|
"loss": 1.728, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1361232467233847, |
|
"grad_norm": 1.1163665056228638, |
|
"learning_rate": 9.64974820469774e-05, |
|
"loss": 1.6618, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.13704299839043457, |
|
"grad_norm": 0.9814196228981018, |
|
"learning_rate": 9.644320218932722e-05, |
|
"loss": 1.616, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13796275005748448, |
|
"grad_norm": 1.2995212078094482, |
|
"learning_rate": 9.638852046769539e-05, |
|
"loss": 1.6275, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13796275005748448, |
|
"eval_loss": 1.9198498725891113, |
|
"eval_runtime": 50.0535, |
|
"eval_samples_per_second": 164.624, |
|
"eval_steps_per_second": 20.578, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13888250172453437, |
|
"grad_norm": 3.668370485305786, |
|
"learning_rate": 9.633343735523219e-05, |
|
"loss": 2.841, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13980225339158428, |
|
"grad_norm": 2.5073230266571045, |
|
"learning_rate": 9.627795332856107e-05, |
|
"loss": 2.3706, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14072200505863416, |
|
"grad_norm": 1.542073130607605, |
|
"learning_rate": 9.622206886777448e-05, |
|
"loss": 2.1699, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14164175672568408, |
|
"grad_norm": 1.3604127168655396, |
|
"learning_rate": 9.616578445642981e-05, |
|
"loss": 1.9859, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14256150839273396, |
|
"grad_norm": 1.1186628341674805, |
|
"learning_rate": 9.61091005815451e-05, |
|
"loss": 1.9205, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14348126005978387, |
|
"grad_norm": 1.1308863162994385, |
|
"learning_rate": 9.605201773359485e-05, |
|
"loss": 1.9819, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14440101172683376, |
|
"grad_norm": 1.0661953687667847, |
|
"learning_rate": 9.599453640650585e-05, |
|
"loss": 1.9109, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.14532076339388364, |
|
"grad_norm": 0.7912338376045227, |
|
"learning_rate": 9.59366570976528e-05, |
|
"loss": 1.9331, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.14624051506093355, |
|
"grad_norm": 0.9056004881858826, |
|
"learning_rate": 9.587838030785413e-05, |
|
"loss": 1.9323, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.14716026672798344, |
|
"grad_norm": 1.0585856437683105, |
|
"learning_rate": 9.581970654136751e-05, |
|
"loss": 1.9443, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14808001839503335, |
|
"grad_norm": 1.0043240785598755, |
|
"learning_rate": 9.576063630588563e-05, |
|
"loss": 1.8468, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14899977006208323, |
|
"grad_norm": 0.9187436699867249, |
|
"learning_rate": 9.570117011253174e-05, |
|
"loss": 1.9558, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14991952172913314, |
|
"grad_norm": 0.862158477306366, |
|
"learning_rate": 9.56413084758552e-05, |
|
"loss": 1.851, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.15083927339618303, |
|
"grad_norm": 1.04788076877594, |
|
"learning_rate": 9.55810519138271e-05, |
|
"loss": 1.884, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15175902506323294, |
|
"grad_norm": 0.807015597820282, |
|
"learning_rate": 9.552040094783574e-05, |
|
"loss": 1.8688, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15267877673028282, |
|
"grad_norm": 0.8749469518661499, |
|
"learning_rate": 9.545935610268211e-05, |
|
"loss": 1.8487, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1535985283973327, |
|
"grad_norm": 0.7388503551483154, |
|
"learning_rate": 9.539791790657538e-05, |
|
"loss": 1.8447, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.15451828006438262, |
|
"grad_norm": 0.8812807202339172, |
|
"learning_rate": 9.533608689112827e-05, |
|
"loss": 1.8848, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1554380317314325, |
|
"grad_norm": 0.6926305890083313, |
|
"learning_rate": 9.527386359135253e-05, |
|
"loss": 1.824, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.15635778339848241, |
|
"grad_norm": 0.7211126089096069, |
|
"learning_rate": 9.521124854565425e-05, |
|
"loss": 1.8291, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1572775350655323, |
|
"grad_norm": 0.717591404914856, |
|
"learning_rate": 9.514824229582921e-05, |
|
"loss": 1.8463, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1581972867325822, |
|
"grad_norm": 0.5658002495765686, |
|
"learning_rate": 9.508484538705824e-05, |
|
"loss": 1.8864, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1591170383996321, |
|
"grad_norm": 0.8670650720596313, |
|
"learning_rate": 9.50210583679024e-05, |
|
"loss": 1.8437, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.160036790066682, |
|
"grad_norm": 0.6736385822296143, |
|
"learning_rate": 9.495688179029838e-05, |
|
"loss": 1.8376, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1609565417337319, |
|
"grad_norm": 0.7114839553833008, |
|
"learning_rate": 9.489231620955359e-05, |
|
"loss": 1.8259, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1618762934007818, |
|
"grad_norm": 0.8745600581169128, |
|
"learning_rate": 9.482736218434143e-05, |
|
"loss": 1.8571, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.16279604506783169, |
|
"grad_norm": 0.594724714756012, |
|
"learning_rate": 9.476202027669643e-05, |
|
"loss": 1.8385, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16371579673488157, |
|
"grad_norm": 0.8559861183166504, |
|
"learning_rate": 9.469629105200937e-05, |
|
"loss": 1.805, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16463554840193148, |
|
"grad_norm": 0.6145199537277222, |
|
"learning_rate": 9.463017507902244e-05, |
|
"loss": 1.8331, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.16555530006898136, |
|
"grad_norm": 1.0015912055969238, |
|
"learning_rate": 9.456367292982429e-05, |
|
"loss": 1.7974, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16647505173603128, |
|
"grad_norm": 0.5909841060638428, |
|
"learning_rate": 9.449678517984502e-05, |
|
"loss": 1.787, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.16739480340308116, |
|
"grad_norm": 0.766480565071106, |
|
"learning_rate": 9.442951240785135e-05, |
|
"loss": 1.7213, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.16831455507013107, |
|
"grad_norm": 0.6516543626785278, |
|
"learning_rate": 9.436185519594145e-05, |
|
"loss": 1.7548, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.16923430673718096, |
|
"grad_norm": 0.7793421745300293, |
|
"learning_rate": 9.429381412953999e-05, |
|
"loss": 1.7481, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.17015405840423087, |
|
"grad_norm": 0.8920656442642212, |
|
"learning_rate": 9.422538979739307e-05, |
|
"loss": 1.805, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.17107381007128075, |
|
"grad_norm": 0.8302977085113525, |
|
"learning_rate": 9.415658279156311e-05, |
|
"loss": 1.7267, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17199356173833066, |
|
"grad_norm": 0.8947249054908752, |
|
"learning_rate": 9.408739370742373e-05, |
|
"loss": 1.6794, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17291331340538055, |
|
"grad_norm": 0.6332067251205444, |
|
"learning_rate": 9.401782314365457e-05, |
|
"loss": 1.7127, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17383306507243043, |
|
"grad_norm": 0.830932080745697, |
|
"learning_rate": 9.39478717022362e-05, |
|
"loss": 1.6696, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.17475281673948034, |
|
"grad_norm": 0.6934016942977905, |
|
"learning_rate": 9.387753998844482e-05, |
|
"loss": 1.6327, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17567256840653023, |
|
"grad_norm": 0.733917236328125, |
|
"learning_rate": 9.380682861084701e-05, |
|
"loss": 1.6992, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.17659232007358014, |
|
"grad_norm": 0.7675406336784363, |
|
"learning_rate": 9.373573818129458e-05, |
|
"loss": 1.6759, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.17751207174063002, |
|
"grad_norm": 0.8431460857391357, |
|
"learning_rate": 9.366426931491916e-05, |
|
"loss": 1.6044, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17843182340767993, |
|
"grad_norm": 0.7542397975921631, |
|
"learning_rate": 9.359242263012693e-05, |
|
"loss": 1.6274, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.17935157507472982, |
|
"grad_norm": 0.8931959867477417, |
|
"learning_rate": 9.352019874859325e-05, |
|
"loss": 1.6006, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18027132674177973, |
|
"grad_norm": 0.8215823769569397, |
|
"learning_rate": 9.344759829525733e-05, |
|
"loss": 1.5865, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.18119107840882961, |
|
"grad_norm": 0.7112393379211426, |
|
"learning_rate": 9.337462189831669e-05, |
|
"loss": 1.5478, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18211083007587953, |
|
"grad_norm": 1.0283434391021729, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.5316, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1830305817429294, |
|
"grad_norm": 0.9886683225631714, |
|
"learning_rate": 9.322754380267109e-05, |
|
"loss": 1.4653, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1839503334099793, |
|
"grad_norm": 1.064937949180603, |
|
"learning_rate": 9.315344337660421e-05, |
|
"loss": 1.4673, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1848700850770292, |
|
"grad_norm": 3.375886917114258, |
|
"learning_rate": 9.307896955219786e-05, |
|
"loss": 2.5919, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1857898367440791, |
|
"grad_norm": 2.260359764099121, |
|
"learning_rate": 9.300412297385954e-05, |
|
"loss": 2.1729, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.186709588411129, |
|
"grad_norm": 1.4669098854064941, |
|
"learning_rate": 9.292890428922209e-05, |
|
"loss": 1.9383, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.18762934007817889, |
|
"grad_norm": 1.037178635597229, |
|
"learning_rate": 9.285331414913815e-05, |
|
"loss": 1.9071, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1885490917452288, |
|
"grad_norm": 1.154489517211914, |
|
"learning_rate": 9.277735320767449e-05, |
|
"loss": 1.8216, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18946884341227868, |
|
"grad_norm": 1.0613019466400146, |
|
"learning_rate": 9.270102212210632e-05, |
|
"loss": 1.7831, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1903885950793286, |
|
"grad_norm": 1.1248329877853394, |
|
"learning_rate": 9.262432155291167e-05, |
|
"loss": 1.8591, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.19130834674637848, |
|
"grad_norm": 0.8293649554252625, |
|
"learning_rate": 9.254725216376561e-05, |
|
"loss": 1.8205, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.19222809841342836, |
|
"grad_norm": 0.9506818652153015, |
|
"learning_rate": 9.246981462153456e-05, |
|
"loss": 1.8283, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.19314785008047827, |
|
"grad_norm": 0.8719251155853271, |
|
"learning_rate": 9.239200959627048e-05, |
|
"loss": 1.7719, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19406760174752816, |
|
"grad_norm": 0.808614194393158, |
|
"learning_rate": 9.231383776120512e-05, |
|
"loss": 1.8825, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.19498735341457807, |
|
"grad_norm": 0.897612988948822, |
|
"learning_rate": 9.22352997927441e-05, |
|
"loss": 1.8061, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.19590710508162795, |
|
"grad_norm": 0.7289676070213318, |
|
"learning_rate": 9.215639637046121e-05, |
|
"loss": 1.8348, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.19682685674867786, |
|
"grad_norm": 0.8267980813980103, |
|
"learning_rate": 9.207712817709236e-05, |
|
"loss": 1.7645, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.19774660841572775, |
|
"grad_norm": 0.7317152619361877, |
|
"learning_rate": 9.19974958985298e-05, |
|
"loss": 1.7478, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.19866636008277766, |
|
"grad_norm": 0.6896607875823975, |
|
"learning_rate": 9.191750022381614e-05, |
|
"loss": 1.7699, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.19958611174982754, |
|
"grad_norm": 0.7086347937583923, |
|
"learning_rate": 9.183714184513832e-05, |
|
"loss": 1.7938, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.20050586341687746, |
|
"grad_norm": 0.6830713152885437, |
|
"learning_rate": 9.175642145782179e-05, |
|
"loss": 1.7568, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.20142561508392734, |
|
"grad_norm": 0.5826436281204224, |
|
"learning_rate": 9.167533976032429e-05, |
|
"loss": 1.7548, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.20234536675097722, |
|
"grad_norm": 0.669696569442749, |
|
"learning_rate": 9.159389745423002e-05, |
|
"loss": 1.8096, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.20326511841802714, |
|
"grad_norm": 0.6378855109214783, |
|
"learning_rate": 9.151209524424333e-05, |
|
"loss": 1.7248, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.20418487008507702, |
|
"grad_norm": 0.7418368458747864, |
|
"learning_rate": 9.142993383818283e-05, |
|
"loss": 1.6951, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.20510462175212693, |
|
"grad_norm": 0.6502818465232849, |
|
"learning_rate": 9.134741394697517e-05, |
|
"loss": 1.6809, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.20602437341917682, |
|
"grad_norm": 0.6646417379379272, |
|
"learning_rate": 9.126453628464888e-05, |
|
"loss": 1.7178, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.20694412508622673, |
|
"grad_norm": 0.7070106267929077, |
|
"learning_rate": 9.118130156832823e-05, |
|
"loss": 1.7629, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2078638767532766, |
|
"grad_norm": 0.6244888305664062, |
|
"learning_rate": 9.109771051822702e-05, |
|
"loss": 1.763, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.20878362842032652, |
|
"grad_norm": 0.6641138195991516, |
|
"learning_rate": 9.10137638576423e-05, |
|
"loss": 1.7016, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.2097033800873764, |
|
"grad_norm": 0.7198558449745178, |
|
"learning_rate": 9.092946231294819e-05, |
|
"loss": 1.7247, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.21062313175442632, |
|
"grad_norm": 0.5700192451477051, |
|
"learning_rate": 9.084480661358953e-05, |
|
"loss": 1.6782, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2115428834214762, |
|
"grad_norm": 0.8081958293914795, |
|
"learning_rate": 9.075979749207561e-05, |
|
"loss": 1.7437, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2124626350885261, |
|
"grad_norm": 0.7449802756309509, |
|
"learning_rate": 9.067443568397378e-05, |
|
"loss": 1.6924, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.213382386755576, |
|
"grad_norm": 0.8385685086250305, |
|
"learning_rate": 9.058872192790313e-05, |
|
"loss": 1.6572, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.21430213842262588, |
|
"grad_norm": 0.7077139616012573, |
|
"learning_rate": 9.050265696552812e-05, |
|
"loss": 1.6949, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.2152218900896758, |
|
"grad_norm": 0.7295122742652893, |
|
"learning_rate": 9.041624154155208e-05, |
|
"loss": 1.6745, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.21614164175672568, |
|
"grad_norm": 0.6347808241844177, |
|
"learning_rate": 9.032947640371086e-05, |
|
"loss": 1.6441, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2170613934237756, |
|
"grad_norm": 0.8323748707771301, |
|
"learning_rate": 9.024236230276629e-05, |
|
"loss": 1.6198, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.21798114509082547, |
|
"grad_norm": 0.7440972328186035, |
|
"learning_rate": 9.01548999924997e-05, |
|
"loss": 1.6405, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.21890089675787539, |
|
"grad_norm": 0.7849915623664856, |
|
"learning_rate": 9.006709022970547e-05, |
|
"loss": 1.6361, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.21982064842492527, |
|
"grad_norm": 0.7478511929512024, |
|
"learning_rate": 8.997893377418432e-05, |
|
"loss": 1.543, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.22074040009197515, |
|
"grad_norm": 0.6225507259368896, |
|
"learning_rate": 8.98904313887369e-05, |
|
"loss": 1.6248, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.22166015175902506, |
|
"grad_norm": 0.6926827430725098, |
|
"learning_rate": 8.980158383915713e-05, |
|
"loss": 1.6449, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.22257990342607495, |
|
"grad_norm": 0.6942108869552612, |
|
"learning_rate": 8.971239189422555e-05, |
|
"loss": 1.5912, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.22349965509312486, |
|
"grad_norm": 0.623525857925415, |
|
"learning_rate": 8.962285632570267e-05, |
|
"loss": 1.5436, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.22441940676017474, |
|
"grad_norm": 0.5779447555541992, |
|
"learning_rate": 8.953297790832231e-05, |
|
"loss": 1.5747, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.22533915842722466, |
|
"grad_norm": 0.7703275680541992, |
|
"learning_rate": 8.944275741978493e-05, |
|
"loss": 1.5648, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.22625891009427454, |
|
"grad_norm": 0.7855743765830994, |
|
"learning_rate": 8.935219564075085e-05, |
|
"loss": 1.5246, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.22717866176132445, |
|
"grad_norm": 0.851977527141571, |
|
"learning_rate": 8.926129335483349e-05, |
|
"loss": 1.4777, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.22809841342837434, |
|
"grad_norm": 0.8636126518249512, |
|
"learning_rate": 8.917005134859263e-05, |
|
"loss": 1.5235, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.22901816509542425, |
|
"grad_norm": 1.055405616760254, |
|
"learning_rate": 8.907847041152756e-05, |
|
"loss": 1.5131, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.22993791676247413, |
|
"grad_norm": 1.2434190511703491, |
|
"learning_rate": 8.89865513360703e-05, |
|
"loss": 1.3169, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23085766842952402, |
|
"grad_norm": 2.794989585876465, |
|
"learning_rate": 8.889429491757871e-05, |
|
"loss": 2.3149, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.23177742009657393, |
|
"grad_norm": 2.0627057552337646, |
|
"learning_rate": 8.88017019543296e-05, |
|
"loss": 2.0616, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2326971717636238, |
|
"grad_norm": 1.3948839902877808, |
|
"learning_rate": 8.870877324751184e-05, |
|
"loss": 1.9026, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.23361692343067372, |
|
"grad_norm": 0.9678890109062195, |
|
"learning_rate": 8.861550960121945e-05, |
|
"loss": 1.8307, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2345366750977236, |
|
"grad_norm": 1.0957893133163452, |
|
"learning_rate": 8.852191182244456e-05, |
|
"loss": 1.7364, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.23545642676477352, |
|
"grad_norm": 0.9677236676216125, |
|
"learning_rate": 8.842798072107054e-05, |
|
"loss": 1.762, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2363761784318234, |
|
"grad_norm": 1.012479305267334, |
|
"learning_rate": 8.833371710986493e-05, |
|
"loss": 1.6711, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.23729593009887331, |
|
"grad_norm": 0.8846522569656372, |
|
"learning_rate": 8.823912180447236e-05, |
|
"loss": 1.8402, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.2382156817659232, |
|
"grad_norm": 1.0523695945739746, |
|
"learning_rate": 8.81441956234076e-05, |
|
"loss": 1.703, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2391354334329731, |
|
"grad_norm": 1.0177359580993652, |
|
"learning_rate": 8.80489393880484e-05, |
|
"loss": 1.7218, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.240055185100023, |
|
"grad_norm": 0.8454842567443848, |
|
"learning_rate": 8.79533539226284e-05, |
|
"loss": 1.6839, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.24097493676707288, |
|
"grad_norm": 0.9161872863769531, |
|
"learning_rate": 8.785744005423002e-05, |
|
"loss": 1.7333, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2418946884341228, |
|
"grad_norm": 0.7548457384109497, |
|
"learning_rate": 8.77611986127773e-05, |
|
"loss": 1.696, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.24281444010117267, |
|
"grad_norm": 0.9760596752166748, |
|
"learning_rate": 8.766463043102864e-05, |
|
"loss": 1.7102, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.24373419176822259, |
|
"grad_norm": 0.7247944474220276, |
|
"learning_rate": 8.756773634456975e-05, |
|
"loss": 1.7439, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.24465394343527247, |
|
"grad_norm": 0.7252097129821777, |
|
"learning_rate": 8.747051719180626e-05, |
|
"loss": 1.7811, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.24557369510232238, |
|
"grad_norm": 0.6071887016296387, |
|
"learning_rate": 8.737297381395657e-05, |
|
"loss": 1.6398, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.24649344676937227, |
|
"grad_norm": 0.7072895765304565, |
|
"learning_rate": 8.727510705504454e-05, |
|
"loss": 1.68, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.24741319843642218, |
|
"grad_norm": 0.7006264925003052, |
|
"learning_rate": 8.717691776189214e-05, |
|
"loss": 1.6814, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.24833295010347206, |
|
"grad_norm": 0.6832376718521118, |
|
"learning_rate": 8.707840678411224e-05, |
|
"loss": 1.6259, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24925270177052197, |
|
"grad_norm": 0.5689120292663574, |
|
"learning_rate": 8.697957497410108e-05, |
|
"loss": 1.6786, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.25017245343757183, |
|
"grad_norm": 0.8517261743545532, |
|
"learning_rate": 8.688042318703111e-05, |
|
"loss": 1.6644, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.25109220510462177, |
|
"grad_norm": 0.5697482824325562, |
|
"learning_rate": 8.678095228084343e-05, |
|
"loss": 1.6705, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.25201195677167165, |
|
"grad_norm": 0.6067523956298828, |
|
"learning_rate": 8.66811631162404e-05, |
|
"loss": 1.7022, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.25293170843872154, |
|
"grad_norm": 0.6944383382797241, |
|
"learning_rate": 8.65810565566782e-05, |
|
"loss": 1.6235, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2538514601057714, |
|
"grad_norm": 0.5674624443054199, |
|
"learning_rate": 8.648063346835942e-05, |
|
"loss": 1.6757, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.25477121177282136, |
|
"grad_norm": 0.6712316274642944, |
|
"learning_rate": 8.637989472022549e-05, |
|
"loss": 1.627, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.25569096343987124, |
|
"grad_norm": 0.5806477069854736, |
|
"learning_rate": 8.627884118394913e-05, |
|
"loss": 1.6709, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.25661071510692113, |
|
"grad_norm": 0.5989074110984802, |
|
"learning_rate": 8.617747373392696e-05, |
|
"loss": 1.6802, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.257530466773971, |
|
"grad_norm": 0.6222725510597229, |
|
"learning_rate": 8.607579324727175e-05, |
|
"loss": 1.5823, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.25845021844102095, |
|
"grad_norm": 0.6905350685119629, |
|
"learning_rate": 8.597380060380493e-05, |
|
"loss": 1.5795, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.25936997010807084, |
|
"grad_norm": 0.9093815684318542, |
|
"learning_rate": 8.5871496686049e-05, |
|
"loss": 1.6131, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2602897217751207, |
|
"grad_norm": 0.8468539714813232, |
|
"learning_rate": 8.576888237921983e-05, |
|
"loss": 1.5836, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2612094734421706, |
|
"grad_norm": 0.8949149250984192, |
|
"learning_rate": 8.566595857121902e-05, |
|
"loss": 1.5574, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2621292251092205, |
|
"grad_norm": 0.7991402745246887, |
|
"learning_rate": 8.556272615262622e-05, |
|
"loss": 1.5941, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2630489767762704, |
|
"grad_norm": 1.0631219148635864, |
|
"learning_rate": 8.545918601669147e-05, |
|
"loss": 1.6469, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2639687284433203, |
|
"grad_norm": 0.6237906217575073, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.5148, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2648884801103702, |
|
"grad_norm": 0.9192318320274353, |
|
"learning_rate": 8.525118617910143e-05, |
|
"loss": 1.4909, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2658082317774201, |
|
"grad_norm": 0.8480085134506226, |
|
"learning_rate": 8.514672827722824e-05, |
|
"loss": 1.4746, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.26672798344447, |
|
"grad_norm": 0.9110789895057678, |
|
"learning_rate": 8.504196625756166e-05, |
|
"loss": 1.5245, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2676477351115199, |
|
"grad_norm": 0.7915551066398621, |
|
"learning_rate": 8.493690102658703e-05, |
|
"loss": 1.4658, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2685674867785698, |
|
"grad_norm": 0.8689735531806946, |
|
"learning_rate": 8.483153349341335e-05, |
|
"loss": 1.5159, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.26948723844561967, |
|
"grad_norm": 0.966712474822998, |
|
"learning_rate": 8.472586456976535e-05, |
|
"loss": 1.4782, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.27040699011266955, |
|
"grad_norm": 0.8555867075920105, |
|
"learning_rate": 8.461989516997565e-05, |
|
"loss": 1.5046, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2713267417797195, |
|
"grad_norm": 0.8497052192687988, |
|
"learning_rate": 8.45136262109768e-05, |
|
"loss": 1.3816, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2722464934467694, |
|
"grad_norm": 0.776263952255249, |
|
"learning_rate": 8.440705861229344e-05, |
|
"loss": 1.5065, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.27316624511381926, |
|
"grad_norm": 1.1991870403289795, |
|
"learning_rate": 8.430019329603422e-05, |
|
"loss": 1.4482, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.27408599678086915, |
|
"grad_norm": 0.9438532590866089, |
|
"learning_rate": 8.41930311868839e-05, |
|
"loss": 1.4023, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2750057484479191, |
|
"grad_norm": 1.3889118432998657, |
|
"learning_rate": 8.408557321209534e-05, |
|
"loss": 1.3493, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.27592550011496897, |
|
"grad_norm": 1.7762432098388672, |
|
"learning_rate": 8.397782030148147e-05, |
|
"loss": 1.257, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27592550011496897, |
|
"eval_loss": 1.6551681756973267, |
|
"eval_runtime": 50.0018, |
|
"eval_samples_per_second": 164.794, |
|
"eval_steps_per_second": 20.599, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27684525178201885, |
|
"grad_norm": 2.846353530883789, |
|
"learning_rate": 8.386977338740724e-05, |
|
"loss": 2.0714, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.27776500344906874, |
|
"grad_norm": 2.5227103233337402, |
|
"learning_rate": 8.376143340478153e-05, |
|
"loss": 1.8748, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2786847551161186, |
|
"grad_norm": 2.0501370429992676, |
|
"learning_rate": 8.365280129104912e-05, |
|
"loss": 1.7948, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.27960450678316856, |
|
"grad_norm": 1.0905100107192993, |
|
"learning_rate": 8.354387798618253e-05, |
|
"loss": 1.7508, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.28052425845021844, |
|
"grad_norm": 1.1486353874206543, |
|
"learning_rate": 8.343466443267391e-05, |
|
"loss": 1.7368, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.28144401011726833, |
|
"grad_norm": 1.1892223358154297, |
|
"learning_rate": 8.332516157552684e-05, |
|
"loss": 1.6652, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2823637617843182, |
|
"grad_norm": 1.027815341949463, |
|
"learning_rate": 8.321537036224822e-05, |
|
"loss": 1.6847, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.28328351345136815, |
|
"grad_norm": 1.1536738872528076, |
|
"learning_rate": 8.310529174284004e-05, |
|
"loss": 1.7384, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.28420326511841804, |
|
"grad_norm": 0.8124598264694214, |
|
"learning_rate": 8.299492666979113e-05, |
|
"loss": 1.6906, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2851230167854679, |
|
"grad_norm": 1.1598918437957764, |
|
"learning_rate": 8.2884276098069e-05, |
|
"loss": 1.7223, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2860427684525178, |
|
"grad_norm": 1.1664563417434692, |
|
"learning_rate": 8.277334098511147e-05, |
|
"loss": 1.6548, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.28696252011956774, |
|
"grad_norm": 0.6637358069419861, |
|
"learning_rate": 8.266212229081847e-05, |
|
"loss": 1.6638, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2878822717866176, |
|
"grad_norm": 0.987754225730896, |
|
"learning_rate": 8.255062097754372e-05, |
|
"loss": 1.7133, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2888020234536675, |
|
"grad_norm": 0.7713818550109863, |
|
"learning_rate": 8.243883801008632e-05, |
|
"loss": 1.6705, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2897217751207174, |
|
"grad_norm": 1.0500911474227905, |
|
"learning_rate": 8.232677435568252e-05, |
|
"loss": 1.5651, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2906415267877673, |
|
"grad_norm": 0.7900861501693726, |
|
"learning_rate": 8.221443098399732e-05, |
|
"loss": 1.6276, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2915612784548172, |
|
"grad_norm": 0.7363952994346619, |
|
"learning_rate": 8.210180886711602e-05, |
|
"loss": 1.5795, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2924810301218671, |
|
"grad_norm": 0.895269513130188, |
|
"learning_rate": 8.198890897953586e-05, |
|
"loss": 1.6644, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.293400781788917, |
|
"grad_norm": 0.9014370441436768, |
|
"learning_rate": 8.187573229815758e-05, |
|
"loss": 1.619, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.29432053345596687, |
|
"grad_norm": 1.06600821018219, |
|
"learning_rate": 8.176227980227694e-05, |
|
"loss": 1.6779, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2952402851230168, |
|
"grad_norm": 1.0690526962280273, |
|
"learning_rate": 8.164855247357627e-05, |
|
"loss": 1.553, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2961600367900667, |
|
"grad_norm": 0.8835525512695312, |
|
"learning_rate": 8.153455129611605e-05, |
|
"loss": 1.614, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.2970797884571166, |
|
"grad_norm": 1.1458913087844849, |
|
"learning_rate": 8.142027725632623e-05, |
|
"loss": 1.6015, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.29799954012416646, |
|
"grad_norm": 0.6511287093162537, |
|
"learning_rate": 8.130573134299782e-05, |
|
"loss": 1.6129, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.29891929179121635, |
|
"grad_norm": 1.1985218524932861, |
|
"learning_rate": 8.119091454727428e-05, |
|
"loss": 1.564, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2998390434582663, |
|
"grad_norm": 1.0999850034713745, |
|
"learning_rate": 8.107582786264299e-05, |
|
"loss": 1.6318, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.30075879512531617, |
|
"grad_norm": 0.664042055606842, |
|
"learning_rate": 8.09604722849266e-05, |
|
"loss": 1.6049, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.30167854679236605, |
|
"grad_norm": 0.9706513285636902, |
|
"learning_rate": 8.084484881227448e-05, |
|
"loss": 1.6157, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.30259829845941594, |
|
"grad_norm": 0.7374880909919739, |
|
"learning_rate": 8.072895844515398e-05, |
|
"loss": 1.573, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3035180501264659, |
|
"grad_norm": 0.9631950855255127, |
|
"learning_rate": 8.061280218634192e-05, |
|
"loss": 1.5568, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.30443780179351576, |
|
"grad_norm": 0.9304092526435852, |
|
"learning_rate": 8.049638104091575e-05, |
|
"loss": 1.6135, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.30535755346056564, |
|
"grad_norm": 0.7095350027084351, |
|
"learning_rate": 8.037969601624495e-05, |
|
"loss": 1.5427, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.30627730512761553, |
|
"grad_norm": 1.130644679069519, |
|
"learning_rate": 8.026274812198234e-05, |
|
"loss": 1.5704, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.3071970567946654, |
|
"grad_norm": 0.6161345839500427, |
|
"learning_rate": 8.014553837005527e-05, |
|
"loss": 1.5705, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.30811680846171535, |
|
"grad_norm": 0.7174437046051025, |
|
"learning_rate": 8.002806777465685e-05, |
|
"loss": 1.599, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.30903656012876524, |
|
"grad_norm": 1.0651494264602661, |
|
"learning_rate": 7.991033735223729e-05, |
|
"loss": 1.538, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.3099563117958151, |
|
"grad_norm": 0.7327350974082947, |
|
"learning_rate": 7.979234812149501e-05, |
|
"loss": 1.4112, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.310876063462865, |
|
"grad_norm": 0.8603296279907227, |
|
"learning_rate": 7.967410110336782e-05, |
|
"loss": 1.4141, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.31179581512991494, |
|
"grad_norm": 0.7242352962493896, |
|
"learning_rate": 7.955559732102414e-05, |
|
"loss": 1.4316, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.31271556679696483, |
|
"grad_norm": 0.7651688456535339, |
|
"learning_rate": 7.943683779985413e-05, |
|
"loss": 1.5116, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3136353184640147, |
|
"grad_norm": 0.6736311316490173, |
|
"learning_rate": 7.931782356746076e-05, |
|
"loss": 1.4454, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.3145550701310646, |
|
"grad_norm": 0.6474123597145081, |
|
"learning_rate": 7.919855565365102e-05, |
|
"loss": 1.4616, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.31547482179811454, |
|
"grad_norm": 0.6624403595924377, |
|
"learning_rate": 7.907903509042696e-05, |
|
"loss": 1.4973, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.3163945734651644, |
|
"grad_norm": 0.6722452640533447, |
|
"learning_rate": 7.895926291197667e-05, |
|
"loss": 1.4452, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.3173143251322143, |
|
"grad_norm": 0.8001620769500732, |
|
"learning_rate": 7.883924015466553e-05, |
|
"loss": 1.4532, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.3182340767992642, |
|
"grad_norm": 0.8588351011276245, |
|
"learning_rate": 7.871896785702707e-05, |
|
"loss": 1.4036, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.31915382846631407, |
|
"grad_norm": 0.8040063977241516, |
|
"learning_rate": 7.859844705975404e-05, |
|
"loss": 1.3815, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.320073580133364, |
|
"grad_norm": 1.0031120777130127, |
|
"learning_rate": 7.847767880568945e-05, |
|
"loss": 1.3611, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3209933318004139, |
|
"grad_norm": 0.8174616098403931, |
|
"learning_rate": 7.835666413981743e-05, |
|
"loss": 1.2897, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3219130834674638, |
|
"grad_norm": 1.1649737358093262, |
|
"learning_rate": 7.823540410925435e-05, |
|
"loss": 1.22, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.32283283513451366, |
|
"grad_norm": 2.4392778873443604, |
|
"learning_rate": 7.811389976323961e-05, |
|
"loss": 1.9789, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3237525868015636, |
|
"grad_norm": 1.9123626947402954, |
|
"learning_rate": 7.799215215312667e-05, |
|
"loss": 1.817, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.3246723384686135, |
|
"grad_norm": 1.556714653968811, |
|
"learning_rate": 7.787016233237387e-05, |
|
"loss": 1.6248, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.32559209013566337, |
|
"grad_norm": 1.0949770212173462, |
|
"learning_rate": 7.774793135653538e-05, |
|
"loss": 1.6925, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.32651184180271325, |
|
"grad_norm": 1.0330501794815063, |
|
"learning_rate": 7.7625460283252e-05, |
|
"loss": 1.6667, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.32743159346976314, |
|
"grad_norm": 1.113447666168213, |
|
"learning_rate": 7.750275017224207e-05, |
|
"loss": 1.6345, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.3283513451368131, |
|
"grad_norm": 1.0157980918884277, |
|
"learning_rate": 7.737980208529231e-05, |
|
"loss": 1.6047, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.32927109680386296, |
|
"grad_norm": 0.8798123598098755, |
|
"learning_rate": 7.725661708624853e-05, |
|
"loss": 1.5993, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.33019084847091285, |
|
"grad_norm": 0.9784142374992371, |
|
"learning_rate": 7.713319624100657e-05, |
|
"loss": 1.578, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.33111060013796273, |
|
"grad_norm": 0.9105007648468018, |
|
"learning_rate": 7.700954061750293e-05, |
|
"loss": 1.6108, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.33203035180501267, |
|
"grad_norm": 0.9545553922653198, |
|
"learning_rate": 7.688565128570564e-05, |
|
"loss": 1.6134, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.33295010347206255, |
|
"grad_norm": 0.8679737448692322, |
|
"learning_rate": 7.676152931760496e-05, |
|
"loss": 1.5928, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.33386985513911244, |
|
"grad_norm": 0.6711000204086304, |
|
"learning_rate": 7.663717578720411e-05, |
|
"loss": 1.6628, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.3347896068061623, |
|
"grad_norm": 0.7280721068382263, |
|
"learning_rate": 7.651259177050996e-05, |
|
"loss": 1.6265, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.33570935847321226, |
|
"grad_norm": 1.0024129152297974, |
|
"learning_rate": 7.63877783455237e-05, |
|
"loss": 1.6356, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.33662911014026214, |
|
"grad_norm": 0.7483541369438171, |
|
"learning_rate": 7.626273659223165e-05, |
|
"loss": 1.5906, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.33754886180731203, |
|
"grad_norm": 0.811964750289917, |
|
"learning_rate": 7.61374675925957e-05, |
|
"loss": 1.5831, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3384686134743619, |
|
"grad_norm": 0.9911743998527527, |
|
"learning_rate": 7.60119724305441e-05, |
|
"loss": 1.5819, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3393883651414118, |
|
"grad_norm": 0.6445810794830322, |
|
"learning_rate": 7.588625219196208e-05, |
|
"loss": 1.5991, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.34030811680846174, |
|
"grad_norm": 0.8051655888557434, |
|
"learning_rate": 7.576030796468233e-05, |
|
"loss": 1.5491, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3412278684755116, |
|
"grad_norm": 0.9976129531860352, |
|
"learning_rate": 7.563414083847573e-05, |
|
"loss": 1.5645, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3421476201425615, |
|
"grad_norm": 0.7071700096130371, |
|
"learning_rate": 7.550775190504189e-05, |
|
"loss": 1.528, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3430673718096114, |
|
"grad_norm": 0.7412607669830322, |
|
"learning_rate": 7.538114225799954e-05, |
|
"loss": 1.5505, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.3439871234766613, |
|
"grad_norm": 0.7667213082313538, |
|
"learning_rate": 7.525431299287738e-05, |
|
"loss": 1.525, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3449068751437112, |
|
"grad_norm": 0.5956572890281677, |
|
"learning_rate": 7.51272652071043e-05, |
|
"loss": 1.5149, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3458266268107611, |
|
"grad_norm": 0.797289252281189, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.5407, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.346746378477811, |
|
"grad_norm": 0.7374883890151978, |
|
"learning_rate": 7.48725184727656e-05, |
|
"loss": 1.5777, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.34766613014486086, |
|
"grad_norm": 0.7943119406700134, |
|
"learning_rate": 7.47448217284739e-05, |
|
"loss": 1.5795, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3485858818119108, |
|
"grad_norm": 0.6397266387939453, |
|
"learning_rate": 7.461691087205993e-05, |
|
"loss": 1.5687, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3495056334789607, |
|
"grad_norm": 0.7197580337524414, |
|
"learning_rate": 7.448878701031142e-05, |
|
"loss": 1.4994, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.35042538514601057, |
|
"grad_norm": 0.614570677280426, |
|
"learning_rate": 7.436045125185922e-05, |
|
"loss": 1.5185, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.35134513681306045, |
|
"grad_norm": 0.766139566898346, |
|
"learning_rate": 7.423190470716761e-05, |
|
"loss": 1.5445, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3522648884801104, |
|
"grad_norm": 0.6843118667602539, |
|
"learning_rate": 7.410314848852483e-05, |
|
"loss": 1.4972, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3531846401471603, |
|
"grad_norm": 0.6766433119773865, |
|
"learning_rate": 7.397418371003333e-05, |
|
"loss": 1.4285, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.35410439181421016, |
|
"grad_norm": 0.8003432154655457, |
|
"learning_rate": 7.384501148760024e-05, |
|
"loss": 1.5283, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.35502414348126005, |
|
"grad_norm": 0.8524566888809204, |
|
"learning_rate": 7.371563293892761e-05, |
|
"loss": 1.4922, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.35594389514830993, |
|
"grad_norm": 0.9243666529655457, |
|
"learning_rate": 7.358604918350288e-05, |
|
"loss": 1.4883, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.35686364681535987, |
|
"grad_norm": 0.7275565266609192, |
|
"learning_rate": 7.345626134258898e-05, |
|
"loss": 1.4268, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.35778339848240975, |
|
"grad_norm": 0.6936664581298828, |
|
"learning_rate": 7.332627053921482e-05, |
|
"loss": 1.3605, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.35870315014945964, |
|
"grad_norm": 0.7576991319656372, |
|
"learning_rate": 7.319607789816555e-05, |
|
"loss": 1.4222, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3596229018165095, |
|
"grad_norm": 0.7377772331237793, |
|
"learning_rate": 7.306568454597269e-05, |
|
"loss": 1.4681, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.36054265348355946, |
|
"grad_norm": 0.8987662196159363, |
|
"learning_rate": 7.293509161090452e-05, |
|
"loss": 1.4066, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.36146240515060934, |
|
"grad_norm": 0.7513107061386108, |
|
"learning_rate": 7.280430022295631e-05, |
|
"loss": 1.4134, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.36238215681765923, |
|
"grad_norm": 0.6676529049873352, |
|
"learning_rate": 7.267331151384039e-05, |
|
"loss": 1.4374, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3633019084847091, |
|
"grad_norm": 0.8300096988677979, |
|
"learning_rate": 7.254212661697659e-05, |
|
"loss": 1.3849, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.36422166015175905, |
|
"grad_norm": 0.8758336901664734, |
|
"learning_rate": 7.241074666748227e-05, |
|
"loss": 1.3774, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.36514141181880894, |
|
"grad_norm": 0.8264380693435669, |
|
"learning_rate": 7.227917280216254e-05, |
|
"loss": 1.3575, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3660611634858588, |
|
"grad_norm": 1.014760136604309, |
|
"learning_rate": 7.214740615950041e-05, |
|
"loss": 1.3026, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3669809151529087, |
|
"grad_norm": 0.8453448414802551, |
|
"learning_rate": 7.201544787964698e-05, |
|
"loss": 1.3114, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3679006668199586, |
|
"grad_norm": 1.1275343894958496, |
|
"learning_rate": 7.188329910441154e-05, |
|
"loss": 1.1734, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36882041848700853, |
|
"grad_norm": 2.2339935302734375, |
|
"learning_rate": 7.17509609772517e-05, |
|
"loss": 1.8776, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3697401701540584, |
|
"grad_norm": 1.5469164848327637, |
|
"learning_rate": 7.161843464326348e-05, |
|
"loss": 1.6876, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3706599218211083, |
|
"grad_norm": 1.2731298208236694, |
|
"learning_rate": 7.148572124917148e-05, |
|
"loss": 1.581, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3715796734881582, |
|
"grad_norm": 0.9135886430740356, |
|
"learning_rate": 7.13528219433188e-05, |
|
"loss": 1.5912, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3724994251552081, |
|
"grad_norm": 0.8309260606765747, |
|
"learning_rate": 7.121973787565726e-05, |
|
"loss": 1.5825, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.373419176822258, |
|
"grad_norm": 0.8344767093658447, |
|
"learning_rate": 7.10864701977374e-05, |
|
"loss": 1.5724, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.3743389284893079, |
|
"grad_norm": 0.8113982081413269, |
|
"learning_rate": 7.095302006269842e-05, |
|
"loss": 1.5899, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.37525868015635777, |
|
"grad_norm": 0.8019097447395325, |
|
"learning_rate": 7.081938862525839e-05, |
|
"loss": 1.6347, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.37617843182340766, |
|
"grad_norm": 0.7903069257736206, |
|
"learning_rate": 7.06855770417041e-05, |
|
"loss": 1.5924, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3770981834904576, |
|
"grad_norm": 0.7817911505699158, |
|
"learning_rate": 7.055158646988109e-05, |
|
"loss": 1.5705, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3780179351575075, |
|
"grad_norm": 0.7876037359237671, |
|
"learning_rate": 7.041741806918371e-05, |
|
"loss": 1.553, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.37893768682455736, |
|
"grad_norm": 0.8235687017440796, |
|
"learning_rate": 7.028307300054499e-05, |
|
"loss": 1.5954, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.37985743849160725, |
|
"grad_norm": 0.6427410244941711, |
|
"learning_rate": 7.014855242642662e-05, |
|
"loss": 1.5935, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.3807771901586572, |
|
"grad_norm": 0.6327434182167053, |
|
"learning_rate": 7.001385751080894e-05, |
|
"loss": 1.5992, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.38169694182570707, |
|
"grad_norm": 0.705020010471344, |
|
"learning_rate": 6.987898941918082e-05, |
|
"loss": 1.5326, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.38261669349275695, |
|
"grad_norm": 0.6907270550727844, |
|
"learning_rate": 6.974394931852956e-05, |
|
"loss": 1.543, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.38353644515980684, |
|
"grad_norm": 0.6643316745758057, |
|
"learning_rate": 6.960873837733088e-05, |
|
"loss": 1.501, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3844561968268567, |
|
"grad_norm": 0.6536545753479004, |
|
"learning_rate": 6.94733577655387e-05, |
|
"loss": 1.5498, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.38537594849390666, |
|
"grad_norm": 0.7011268138885498, |
|
"learning_rate": 6.933780865457508e-05, |
|
"loss": 1.6318, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.38629570016095655, |
|
"grad_norm": 0.6373593211174011, |
|
"learning_rate": 6.920209221732006e-05, |
|
"loss": 1.5523, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.38721545182800643, |
|
"grad_norm": 0.5898979902267456, |
|
"learning_rate": 6.90662096281016e-05, |
|
"loss": 1.5695, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3881352034950563, |
|
"grad_norm": 0.6590458750724792, |
|
"learning_rate": 6.893016206268518e-05, |
|
"loss": 1.4721, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.38905495516210625, |
|
"grad_norm": 0.6448785662651062, |
|
"learning_rate": 6.879395069826393e-05, |
|
"loss": 1.5485, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.38997470682915614, |
|
"grad_norm": 0.648471474647522, |
|
"learning_rate": 6.865757671344827e-05, |
|
"loss": 1.5469, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.390894458496206, |
|
"grad_norm": 0.8980266451835632, |
|
"learning_rate": 6.85210412882557e-05, |
|
"loss": 1.5831, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3918142101632559, |
|
"grad_norm": 0.6711221933364868, |
|
"learning_rate": 6.838434560410064e-05, |
|
"loss": 1.4341, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.39273396183030584, |
|
"grad_norm": 0.8187699317932129, |
|
"learning_rate": 6.824749084378428e-05, |
|
"loss": 1.4696, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.39365371349735573, |
|
"grad_norm": 0.8267800807952881, |
|
"learning_rate": 6.811047819148413e-05, |
|
"loss": 1.5041, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3945734651644056, |
|
"grad_norm": 0.764512300491333, |
|
"learning_rate": 6.797330883274403e-05, |
|
"loss": 1.4774, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3954932168314555, |
|
"grad_norm": 0.8012046813964844, |
|
"learning_rate": 6.783598395446371e-05, |
|
"loss": 1.4947, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3964129684985054, |
|
"grad_norm": 0.5986045598983765, |
|
"learning_rate": 6.769850474488859e-05, |
|
"loss": 1.5161, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3973327201655553, |
|
"grad_norm": 0.8222801685333252, |
|
"learning_rate": 6.756087239359947e-05, |
|
"loss": 1.4726, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3982524718326052, |
|
"grad_norm": 0.6513310670852661, |
|
"learning_rate": 6.742308809150232e-05, |
|
"loss": 1.4894, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3991722234996551, |
|
"grad_norm": 0.6340191960334778, |
|
"learning_rate": 6.728515303081781e-05, |
|
"loss": 1.4616, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.40009197516670497, |
|
"grad_norm": 0.8488625288009644, |
|
"learning_rate": 6.714706840507121e-05, |
|
"loss": 1.4096, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4010117268337549, |
|
"grad_norm": 0.6022557020187378, |
|
"learning_rate": 6.700883540908184e-05, |
|
"loss": 1.4149, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4019314785008048, |
|
"grad_norm": 0.7043591141700745, |
|
"learning_rate": 6.687045523895293e-05, |
|
"loss": 1.492, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.4028512301678547, |
|
"grad_norm": 0.8003234267234802, |
|
"learning_rate": 6.673192909206108e-05, |
|
"loss": 1.3878, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.40377098183490456, |
|
"grad_norm": 0.6873340010643005, |
|
"learning_rate": 6.659325816704611e-05, |
|
"loss": 1.4326, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.40469073350195445, |
|
"grad_norm": 0.673957884311676, |
|
"learning_rate": 6.64544436638005e-05, |
|
"loss": 1.4086, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4056104851690044, |
|
"grad_norm": 0.7485764026641846, |
|
"learning_rate": 6.63154867834591e-05, |
|
"loss": 1.3967, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.40653023683605427, |
|
"grad_norm": 0.6807146072387695, |
|
"learning_rate": 6.617638872838874e-05, |
|
"loss": 1.3429, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.40744998850310415, |
|
"grad_norm": 0.6480006575584412, |
|
"learning_rate": 6.603715070217778e-05, |
|
"loss": 1.3968, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.40836974017015404, |
|
"grad_norm": 0.7995392084121704, |
|
"learning_rate": 6.589777390962575e-05, |
|
"loss": 1.4309, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.409289491837204, |
|
"grad_norm": 0.7234594821929932, |
|
"learning_rate": 6.57582595567329e-05, |
|
"loss": 1.2972, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.41020924350425386, |
|
"grad_norm": 0.9040266871452332, |
|
"learning_rate": 6.561860885068972e-05, |
|
"loss": 1.3339, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.41112899517130375, |
|
"grad_norm": 0.8719410300254822, |
|
"learning_rate": 6.547882299986658e-05, |
|
"loss": 1.2914, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.41204874683835363, |
|
"grad_norm": 0.964036226272583, |
|
"learning_rate": 6.533890321380319e-05, |
|
"loss": 1.2348, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4129684985054035, |
|
"grad_norm": 1.0289238691329956, |
|
"learning_rate": 6.519885070319827e-05, |
|
"loss": 1.1747, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.41388825017245345, |
|
"grad_norm": 1.0722767114639282, |
|
"learning_rate": 6.505866667989884e-05, |
|
"loss": 1.1749, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.41388825017245345, |
|
"eval_loss": 1.5185648202896118, |
|
"eval_runtime": 49.961, |
|
"eval_samples_per_second": 164.929, |
|
"eval_steps_per_second": 20.616, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.41480800183950334, |
|
"grad_norm": 2.0002212524414062, |
|
"learning_rate": 6.491835235689e-05, |
|
"loss": 1.8527, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.4157277535065532, |
|
"grad_norm": 1.7632036209106445, |
|
"learning_rate": 6.477790894828421e-05, |
|
"loss": 1.6736, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4166475051736031, |
|
"grad_norm": 1.2842786312103271, |
|
"learning_rate": 6.463733766931095e-05, |
|
"loss": 1.6531, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.41756725684065304, |
|
"grad_norm": 0.9530149698257446, |
|
"learning_rate": 6.449663973630613e-05, |
|
"loss": 1.5728, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.41848700850770293, |
|
"grad_norm": 0.9490489363670349, |
|
"learning_rate": 6.435581636670154e-05, |
|
"loss": 1.458, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4194067601747528, |
|
"grad_norm": 0.9226535558700562, |
|
"learning_rate": 6.421486877901437e-05, |
|
"loss": 1.477, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.4203265118418027, |
|
"grad_norm": 0.7617946267127991, |
|
"learning_rate": 6.407379819283661e-05, |
|
"loss": 1.4929, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.42124626350885264, |
|
"grad_norm": 0.7731391787528992, |
|
"learning_rate": 6.39326058288246e-05, |
|
"loss": 1.5828, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4221660151759025, |
|
"grad_norm": 0.8461527824401855, |
|
"learning_rate": 6.379129290868837e-05, |
|
"loss": 1.558, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.4230857668429524, |
|
"grad_norm": 0.8030949234962463, |
|
"learning_rate": 6.364986065518106e-05, |
|
"loss": 1.5026, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4240055185100023, |
|
"grad_norm": 0.9712105989456177, |
|
"learning_rate": 6.350831029208844e-05, |
|
"loss": 1.5603, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4249252701770522, |
|
"grad_norm": 0.936730146408081, |
|
"learning_rate": 6.336664304421818e-05, |
|
"loss": 1.5037, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.4258450218441021, |
|
"grad_norm": 0.6644638776779175, |
|
"learning_rate": 6.322486013738942e-05, |
|
"loss": 1.5632, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.426764773511152, |
|
"grad_norm": 0.8889780044555664, |
|
"learning_rate": 6.308296279842205e-05, |
|
"loss": 1.5392, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.4276845251782019, |
|
"grad_norm": 0.771960973739624, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 1.5013, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.42860427684525176, |
|
"grad_norm": 0.7682729363441467, |
|
"learning_rate": 6.2798829736291e-05, |
|
"loss": 1.4829, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4295240285123017, |
|
"grad_norm": 0.9224911332130432, |
|
"learning_rate": 6.265659647167543e-05, |
|
"loss": 1.5283, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.4304437801793516, |
|
"grad_norm": 0.7462615370750427, |
|
"learning_rate": 6.251425369199599e-05, |
|
"loss": 1.4762, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.43136353184640147, |
|
"grad_norm": 0.7566426396369934, |
|
"learning_rate": 6.237180262891708e-05, |
|
"loss": 1.5537, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.43228328351345136, |
|
"grad_norm": 0.7278396487236023, |
|
"learning_rate": 6.222924451504001e-05, |
|
"loss": 1.4805, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.43320303518050124, |
|
"grad_norm": 0.6063376069068909, |
|
"learning_rate": 6.208658058389231e-05, |
|
"loss": 1.5403, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.4341227868475512, |
|
"grad_norm": 0.7265048623085022, |
|
"learning_rate": 6.194381206991722e-05, |
|
"loss": 1.5131, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.43504253851460106, |
|
"grad_norm": 0.6536186933517456, |
|
"learning_rate": 6.180094020846291e-05, |
|
"loss": 1.4777, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.43596229018165095, |
|
"grad_norm": 0.6153502464294434, |
|
"learning_rate": 6.165796623577171e-05, |
|
"loss": 1.4592, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.43688204184870083, |
|
"grad_norm": 0.7638461589813232, |
|
"learning_rate": 6.15148913889696e-05, |
|
"loss": 1.5779, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.43780179351575077, |
|
"grad_norm": 0.755756139755249, |
|
"learning_rate": 6.137171690605533e-05, |
|
"loss": 1.5246, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.43872154518280065, |
|
"grad_norm": 0.5608311295509338, |
|
"learning_rate": 6.122844402588982e-05, |
|
"loss": 1.4824, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.43964129684985054, |
|
"grad_norm": 0.7992551922798157, |
|
"learning_rate": 6.10850739881854e-05, |
|
"loss": 1.4434, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.4405610485169004, |
|
"grad_norm": 0.6986256241798401, |
|
"learning_rate": 6.094160803349508e-05, |
|
"loss": 1.4313, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.4414808001839503, |
|
"grad_norm": 0.6461309790611267, |
|
"learning_rate": 6.079804740320181e-05, |
|
"loss": 1.4743, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.44240055185100025, |
|
"grad_norm": 0.7250984311103821, |
|
"learning_rate": 6.0654393339507753e-05, |
|
"loss": 1.4551, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.44332030351805013, |
|
"grad_norm": 0.6796169281005859, |
|
"learning_rate": 6.051064708542357e-05, |
|
"loss": 1.485, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.4442400551851, |
|
"grad_norm": 0.7773648500442505, |
|
"learning_rate": 6.0366809884757556e-05, |
|
"loss": 1.4153, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.4451598068521499, |
|
"grad_norm": 0.9285596609115601, |
|
"learning_rate": 6.022288298210501e-05, |
|
"loss": 1.4624, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.44607955851919984, |
|
"grad_norm": 0.7707833051681519, |
|
"learning_rate": 6.0078867622837395e-05, |
|
"loss": 1.431, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4469993101862497, |
|
"grad_norm": 0.9251638650894165, |
|
"learning_rate": 5.993476505309155e-05, |
|
"loss": 1.406, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4479190618532996, |
|
"grad_norm": 0.7242058515548706, |
|
"learning_rate": 5.979057651975892e-05, |
|
"loss": 1.3418, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.4488388135203495, |
|
"grad_norm": 0.6925553679466248, |
|
"learning_rate": 5.9646303270474845e-05, |
|
"loss": 1.3463, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.44975856518739943, |
|
"grad_norm": 0.779308021068573, |
|
"learning_rate": 5.9501946553607615e-05, |
|
"loss": 1.3228, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.4506783168544493, |
|
"grad_norm": 0.750455379486084, |
|
"learning_rate": 5.9357507618247764e-05, |
|
"loss": 1.3406, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4515980685214992, |
|
"grad_norm": 0.7992476224899292, |
|
"learning_rate": 5.921298771419731e-05, |
|
"loss": 1.375, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4525178201885491, |
|
"grad_norm": 0.7606462240219116, |
|
"learning_rate": 5.9068388091958795e-05, |
|
"loss": 1.3066, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.45343757185559896, |
|
"grad_norm": 0.651400625705719, |
|
"learning_rate": 5.8923710002724594e-05, |
|
"loss": 1.3312, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4543573235226489, |
|
"grad_norm": 0.7911424040794373, |
|
"learning_rate": 5.877895469836604e-05, |
|
"loss": 1.3228, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4552770751896988, |
|
"grad_norm": 0.8071415424346924, |
|
"learning_rate": 5.863412343142258e-05, |
|
"loss": 1.3149, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.45619682685674867, |
|
"grad_norm": 1.001132845878601, |
|
"learning_rate": 5.848921745509094e-05, |
|
"loss": 1.2951, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.45711657852379856, |
|
"grad_norm": 0.9951808452606201, |
|
"learning_rate": 5.834423802321431e-05, |
|
"loss": 1.2331, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.4580363301908485, |
|
"grad_norm": 0.9824991822242737, |
|
"learning_rate": 5.8199186390271486e-05, |
|
"loss": 1.2146, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.4589560818578984, |
|
"grad_norm": 1.3014886379241943, |
|
"learning_rate": 5.805406381136598e-05, |
|
"loss": 1.2247, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.45987583352494826, |
|
"grad_norm": 1.4302425384521484, |
|
"learning_rate": 5.79088715422152e-05, |
|
"loss": 1.047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46079558519199815, |
|
"grad_norm": 1.9563382863998413, |
|
"learning_rate": 5.7763610839139594e-05, |
|
"loss": 1.6971, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.46171533685904803, |
|
"grad_norm": 1.5344587564468384, |
|
"learning_rate": 5.761828295905169e-05, |
|
"loss": 1.6824, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.46263508852609797, |
|
"grad_norm": 1.1466830968856812, |
|
"learning_rate": 5.747288915944533e-05, |
|
"loss": 1.5384, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.46355484019314785, |
|
"grad_norm": 1.1582822799682617, |
|
"learning_rate": 5.7327430698384775e-05, |
|
"loss": 1.6326, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.46447459186019774, |
|
"grad_norm": 1.1693201065063477, |
|
"learning_rate": 5.7181908834493726e-05, |
|
"loss": 1.5041, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4653943435272476, |
|
"grad_norm": 0.9729719758033752, |
|
"learning_rate": 5.703632482694453e-05, |
|
"loss": 1.5669, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.46631409519429756, |
|
"grad_norm": 0.9684829115867615, |
|
"learning_rate": 5.689067993544725e-05, |
|
"loss": 1.5907, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.46723384686134745, |
|
"grad_norm": 0.8785848021507263, |
|
"learning_rate": 5.6744975420238745e-05, |
|
"loss": 1.4962, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.46815359852839733, |
|
"grad_norm": 0.7249252796173096, |
|
"learning_rate": 5.6599212542071824e-05, |
|
"loss": 1.5372, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.4690733501954472, |
|
"grad_norm": 0.9696371555328369, |
|
"learning_rate": 5.645339256220426e-05, |
|
"loss": 1.4834, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.46999310186249715, |
|
"grad_norm": 0.9309729933738708, |
|
"learning_rate": 5.6307516742387955e-05, |
|
"loss": 1.6006, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.47091285352954704, |
|
"grad_norm": 0.8194191455841064, |
|
"learning_rate": 5.616158634485793e-05, |
|
"loss": 1.5423, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4718326051965969, |
|
"grad_norm": 0.8985216617584229, |
|
"learning_rate": 5.601560263232153e-05, |
|
"loss": 1.4869, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4727523568636468, |
|
"grad_norm": 0.8546054363250732, |
|
"learning_rate": 5.586956686794734e-05, |
|
"loss": 1.5534, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.4736721085306967, |
|
"grad_norm": 0.7134532332420349, |
|
"learning_rate": 5.572348031535441e-05, |
|
"loss": 1.465, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.47459186019774663, |
|
"grad_norm": 0.6382752656936646, |
|
"learning_rate": 5.557734423860123e-05, |
|
"loss": 1.4897, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4755116118647965, |
|
"grad_norm": 0.8380042314529419, |
|
"learning_rate": 5.543115990217478e-05, |
|
"loss": 1.4646, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4764313635318464, |
|
"grad_norm": 0.8848815560340881, |
|
"learning_rate": 5.528492857097966e-05, |
|
"loss": 1.4903, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4773511151988963, |
|
"grad_norm": 0.6244109272956848, |
|
"learning_rate": 5.5138651510327085e-05, |
|
"loss": 1.5031, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4782708668659462, |
|
"grad_norm": 0.8367244601249695, |
|
"learning_rate": 5.499232998592399e-05, |
|
"loss": 1.4978, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4791906185329961, |
|
"grad_norm": 0.7362543344497681, |
|
"learning_rate": 5.484596526386198e-05, |
|
"loss": 1.529, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.480110370200046, |
|
"grad_norm": 0.579655647277832, |
|
"learning_rate": 5.469955861060653e-05, |
|
"loss": 1.4446, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4810301218670959, |
|
"grad_norm": 0.7875382304191589, |
|
"learning_rate": 5.455311129298586e-05, |
|
"loss": 1.505, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.48194987353414576, |
|
"grad_norm": 0.7048112154006958, |
|
"learning_rate": 5.4406624578180096e-05, |
|
"loss": 1.4612, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4828696252011957, |
|
"grad_norm": 0.6148046255111694, |
|
"learning_rate": 5.4260099733710255e-05, |
|
"loss": 1.4871, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4837893768682456, |
|
"grad_norm": 0.7813459038734436, |
|
"learning_rate": 5.4113538027427245e-05, |
|
"loss": 1.431, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.48470912853529546, |
|
"grad_norm": 0.6388234496116638, |
|
"learning_rate": 5.396694072750099e-05, |
|
"loss": 1.4811, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.48562888020234535, |
|
"grad_norm": 0.5977755784988403, |
|
"learning_rate": 5.382030910240936e-05, |
|
"loss": 1.4302, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4865486318693953, |
|
"grad_norm": 0.6440762281417847, |
|
"learning_rate": 5.367364442092724e-05, |
|
"loss": 1.4468, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.48746838353644517, |
|
"grad_norm": 0.68966144323349, |
|
"learning_rate": 5.352694795211555e-05, |
|
"loss": 1.4563, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.48838813520349506, |
|
"grad_norm": 0.682101845741272, |
|
"learning_rate": 5.338022096531028e-05, |
|
"loss": 1.4953, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.48930788687054494, |
|
"grad_norm": 0.5871472954750061, |
|
"learning_rate": 5.3233464730111426e-05, |
|
"loss": 1.4285, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4902276385375948, |
|
"grad_norm": 0.60948246717453, |
|
"learning_rate": 5.308668051637212e-05, |
|
"loss": 1.4083, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.49114739020464476, |
|
"grad_norm": 0.7118504047393799, |
|
"learning_rate": 5.2939869594187595e-05, |
|
"loss": 1.4257, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.49206714187169465, |
|
"grad_norm": 0.6763386726379395, |
|
"learning_rate": 5.2793033233884124e-05, |
|
"loss": 1.3886, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.49298689353874453, |
|
"grad_norm": 0.6314605474472046, |
|
"learning_rate": 5.2646172706008156e-05, |
|
"loss": 1.3105, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4939066452057944, |
|
"grad_norm": 0.7385772466659546, |
|
"learning_rate": 5.249928928131523e-05, |
|
"loss": 1.3189, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.49482639687284435, |
|
"grad_norm": 0.6615415811538696, |
|
"learning_rate": 5.235238423075899e-05, |
|
"loss": 1.3235, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.49574614853989424, |
|
"grad_norm": 0.6805823445320129, |
|
"learning_rate": 5.220545882548023e-05, |
|
"loss": 1.3938, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4966659002069441, |
|
"grad_norm": 0.8164578676223755, |
|
"learning_rate": 5.205851433679589e-05, |
|
"loss": 1.329, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.497585651873994, |
|
"grad_norm": 0.7139110565185547, |
|
"learning_rate": 5.191155203618796e-05, |
|
"loss": 1.2914, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.49850540354104395, |
|
"grad_norm": 0.6411809921264648, |
|
"learning_rate": 5.176457319529263e-05, |
|
"loss": 1.3289, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.49942515520809383, |
|
"grad_norm": 0.639995813369751, |
|
"learning_rate": 5.161757908588917e-05, |
|
"loss": 1.2874, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5003449068751437, |
|
"grad_norm": 0.6557344794273376, |
|
"learning_rate": 5.1470570979888973e-05, |
|
"loss": 1.3043, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.5012646585421936, |
|
"grad_norm": 0.7925935387611389, |
|
"learning_rate": 5.132355014932455e-05, |
|
"loss": 1.2978, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5021844102092435, |
|
"grad_norm": 0.7339189052581787, |
|
"learning_rate": 5.117651786633849e-05, |
|
"loss": 1.2996, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.5031041618762934, |
|
"grad_norm": 0.805228054523468, |
|
"learning_rate": 5.102947540317253e-05, |
|
"loss": 1.2458, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.5040239135433433, |
|
"grad_norm": 0.7840575575828552, |
|
"learning_rate": 5.088242403215644e-05, |
|
"loss": 1.253, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.5049436652103932, |
|
"grad_norm": 1.0337337255477905, |
|
"learning_rate": 5.073536502569708e-05, |
|
"loss": 1.1262, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.5058634168774431, |
|
"grad_norm": 1.2608665227890015, |
|
"learning_rate": 5.0588299656267414e-05, |
|
"loss": 1.022, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.506783168544493, |
|
"grad_norm": 1.6019068956375122, |
|
"learning_rate": 5.044122919639541e-05, |
|
"loss": 1.6294, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.5077029202115428, |
|
"grad_norm": 1.4624245166778564, |
|
"learning_rate": 5.029415491865311e-05, |
|
"loss": 1.6211, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.5086226718785928, |
|
"grad_norm": 1.249880075454712, |
|
"learning_rate": 5.014707809564562e-05, |
|
"loss": 1.5335, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.5095424235456427, |
|
"grad_norm": 1.1160420179367065, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5818, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5104621752126925, |
|
"grad_norm": 0.9601331353187561, |
|
"learning_rate": 4.98529219043544e-05, |
|
"loss": 1.5011, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5113819268797425, |
|
"grad_norm": 0.9078472852706909, |
|
"learning_rate": 4.9705845081346894e-05, |
|
"loss": 1.4804, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.5123016785467923, |
|
"grad_norm": 1.0430097579956055, |
|
"learning_rate": 4.9558770803604614e-05, |
|
"loss": 1.5421, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.5132214302138423, |
|
"grad_norm": 0.9206668138504028, |
|
"learning_rate": 4.94117003437326e-05, |
|
"loss": 1.5167, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5141411818808922, |
|
"grad_norm": 0.7888804078102112, |
|
"learning_rate": 4.926463497430293e-05, |
|
"loss": 1.4761, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.515060933547942, |
|
"grad_norm": 0.7101994752883911, |
|
"learning_rate": 4.911757596784357e-05, |
|
"loss": 1.4642, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.515980685214992, |
|
"grad_norm": 0.8613134026527405, |
|
"learning_rate": 4.8970524596827486e-05, |
|
"loss": 1.5374, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5169004368820419, |
|
"grad_norm": 0.7729939222335815, |
|
"learning_rate": 4.8823482133661516e-05, |
|
"loss": 1.4959, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5178201885490917, |
|
"grad_norm": 0.9063132405281067, |
|
"learning_rate": 4.8676449850675475e-05, |
|
"loss": 1.5057, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5187399402161417, |
|
"grad_norm": 0.9306026697158813, |
|
"learning_rate": 4.852942902011103e-05, |
|
"loss": 1.5544, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5196596918831915, |
|
"grad_norm": 0.763334333896637, |
|
"learning_rate": 4.838242091411084e-05, |
|
"loss": 1.4385, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5205794435502414, |
|
"grad_norm": 0.7051974534988403, |
|
"learning_rate": 4.823542680470738e-05, |
|
"loss": 1.4612, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5214991952172914, |
|
"grad_norm": 0.7262412905693054, |
|
"learning_rate": 4.808844796381205e-05, |
|
"loss": 1.4366, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.5224189468843412, |
|
"grad_norm": 0.7530311346054077, |
|
"learning_rate": 4.7941485663204125e-05, |
|
"loss": 1.4883, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5233386985513911, |
|
"grad_norm": 0.653555691242218, |
|
"learning_rate": 4.779454117451977e-05, |
|
"loss": 1.3767, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.524258450218441, |
|
"grad_norm": 0.7212573289871216, |
|
"learning_rate": 4.7647615769241e-05, |
|
"loss": 1.3811, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5251782018854909, |
|
"grad_norm": 0.7534743547439575, |
|
"learning_rate": 4.750071071868478e-05, |
|
"loss": 1.4899, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5260979535525409, |
|
"grad_norm": 0.6205776333808899, |
|
"learning_rate": 4.735382729399184e-05, |
|
"loss": 1.4294, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5270177052195907, |
|
"grad_norm": 0.6632286906242371, |
|
"learning_rate": 4.720696676611589e-05, |
|
"loss": 1.4939, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5279374568866406, |
|
"grad_norm": 0.7253984808921814, |
|
"learning_rate": 4.706013040581242e-05, |
|
"loss": 1.4342, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5288572085536904, |
|
"grad_norm": 0.7158737778663635, |
|
"learning_rate": 4.691331948362789e-05, |
|
"loss": 1.4718, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5297769602207404, |
|
"grad_norm": 0.6117165088653564, |
|
"learning_rate": 4.676653526988858e-05, |
|
"loss": 1.4828, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5306967118877903, |
|
"grad_norm": 0.6031986474990845, |
|
"learning_rate": 4.661977903468974e-05, |
|
"loss": 1.4493, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5316164635548402, |
|
"grad_norm": 0.6613805890083313, |
|
"learning_rate": 4.647305204788445e-05, |
|
"loss": 1.4419, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5325362152218901, |
|
"grad_norm": 0.6349487900733948, |
|
"learning_rate": 4.632635557907277e-05, |
|
"loss": 1.4213, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.53345596688894, |
|
"grad_norm": 0.5844326019287109, |
|
"learning_rate": 4.617969089759066e-05, |
|
"loss": 1.4505, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5343757185559899, |
|
"grad_norm": 0.7105299234390259, |
|
"learning_rate": 4.603305927249902e-05, |
|
"loss": 1.3974, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5352954702230398, |
|
"grad_norm": 0.7277695536613464, |
|
"learning_rate": 4.588646197257277e-05, |
|
"loss": 1.371, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5362152218900896, |
|
"grad_norm": 0.6246547698974609, |
|
"learning_rate": 4.5739900266289756e-05, |
|
"loss": 1.3747, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5371349735571396, |
|
"grad_norm": 0.918038547039032, |
|
"learning_rate": 4.559337542181993e-05, |
|
"loss": 1.3068, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.5380547252241895, |
|
"grad_norm": 0.7304350733757019, |
|
"learning_rate": 4.544688870701415e-05, |
|
"loss": 1.3496, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5389744768912393, |
|
"grad_norm": 0.6852339506149292, |
|
"learning_rate": 4.53004413893935e-05, |
|
"loss": 1.3327, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5398942285582893, |
|
"grad_norm": 0.7337968349456787, |
|
"learning_rate": 4.515403473613803e-05, |
|
"loss": 1.3756, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.5408139802253391, |
|
"grad_norm": 0.7710087895393372, |
|
"learning_rate": 4.5007670014076045e-05, |
|
"loss": 1.3611, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.541733731892389, |
|
"grad_norm": 0.6107405424118042, |
|
"learning_rate": 4.486134848967292e-05, |
|
"loss": 1.312, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.542653483559439, |
|
"grad_norm": 0.7013472318649292, |
|
"learning_rate": 4.471507142902036e-05, |
|
"loss": 1.3194, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5435732352264888, |
|
"grad_norm": 0.8323330283164978, |
|
"learning_rate": 4.4568840097825226e-05, |
|
"loss": 1.2888, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5444929868935388, |
|
"grad_norm": 0.6520772576332092, |
|
"learning_rate": 4.442265576139878e-05, |
|
"loss": 1.2347, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5454127385605887, |
|
"grad_norm": 0.7573135495185852, |
|
"learning_rate": 4.4276519684645585e-05, |
|
"loss": 1.316, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5463324902276385, |
|
"grad_norm": 0.7183561325073242, |
|
"learning_rate": 4.4130433132052664e-05, |
|
"loss": 1.2999, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5472522418946885, |
|
"grad_norm": 0.8150544762611389, |
|
"learning_rate": 4.398439736767847e-05, |
|
"loss": 1.2111, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5481719935617383, |
|
"grad_norm": 0.8062061071395874, |
|
"learning_rate": 4.383841365514208e-05, |
|
"loss": 1.2231, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5490917452287882, |
|
"grad_norm": 0.8277079463005066, |
|
"learning_rate": 4.369248325761205e-05, |
|
"loss": 1.2266, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5500114968958382, |
|
"grad_norm": 1.1290823221206665, |
|
"learning_rate": 4.354660743779574e-05, |
|
"loss": 1.1825, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.550931248562888, |
|
"grad_norm": 1.0019193887710571, |
|
"learning_rate": 4.340078745792818e-05, |
|
"loss": 1.103, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5518510002299379, |
|
"grad_norm": 1.0963555574417114, |
|
"learning_rate": 4.325502457976126e-05, |
|
"loss": 1.031, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5518510002299379, |
|
"eval_loss": 1.4310619831085205, |
|
"eval_runtime": 49.9435, |
|
"eval_samples_per_second": 164.986, |
|
"eval_steps_per_second": 20.623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5527707518969878, |
|
"grad_norm": 1.6411505937576294, |
|
"learning_rate": 4.310932006455276e-05, |
|
"loss": 1.6187, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.5536905035640377, |
|
"grad_norm": 1.455959677696228, |
|
"learning_rate": 4.296367517305549e-05, |
|
"loss": 1.5665, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5546102552310876, |
|
"grad_norm": 1.3301597833633423, |
|
"learning_rate": 4.281809116550629e-05, |
|
"loss": 1.5417, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5555300068981375, |
|
"grad_norm": 1.0796560049057007, |
|
"learning_rate": 4.267256930161523e-05, |
|
"loss": 1.5482, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5564497585651874, |
|
"grad_norm": 0.842844545841217, |
|
"learning_rate": 4.252711084055467e-05, |
|
"loss": 1.4583, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5573695102322372, |
|
"grad_norm": 0.7908689379692078, |
|
"learning_rate": 4.2381717040948325e-05, |
|
"loss": 1.4621, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5582892618992872, |
|
"grad_norm": 0.9240807890892029, |
|
"learning_rate": 4.223638916086043e-05, |
|
"loss": 1.4843, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5592090135663371, |
|
"grad_norm": 0.9389266967773438, |
|
"learning_rate": 4.209112845778481e-05, |
|
"loss": 1.4186, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.560128765233387, |
|
"grad_norm": 0.7683906555175781, |
|
"learning_rate": 4.194593618863404e-05, |
|
"loss": 1.4541, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5610485169004369, |
|
"grad_norm": 0.6913854479789734, |
|
"learning_rate": 4.1800813609728526e-05, |
|
"loss": 1.4815, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5619682685674868, |
|
"grad_norm": 0.7714055776596069, |
|
"learning_rate": 4.1655761976785705e-05, |
|
"loss": 1.4577, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5628880202345367, |
|
"grad_norm": 0.7735984921455383, |
|
"learning_rate": 4.1510782544909075e-05, |
|
"loss": 1.5057, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.5638077719015866, |
|
"grad_norm": 0.8532646298408508, |
|
"learning_rate": 4.136587656857744e-05, |
|
"loss": 1.4917, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5647275235686364, |
|
"grad_norm": 0.7896936535835266, |
|
"learning_rate": 4.122104530163397e-05, |
|
"loss": 1.5009, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5656472752356864, |
|
"grad_norm": 0.6928205490112305, |
|
"learning_rate": 4.107628999727542e-05, |
|
"loss": 1.4733, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5665670269027363, |
|
"grad_norm": 0.728251576423645, |
|
"learning_rate": 4.09316119080412e-05, |
|
"loss": 1.4508, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5674867785697861, |
|
"grad_norm": 0.6070961356163025, |
|
"learning_rate": 4.078701228580269e-05, |
|
"loss": 1.5002, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5684065302368361, |
|
"grad_norm": 0.7009554505348206, |
|
"learning_rate": 4.064249238175223e-05, |
|
"loss": 1.5289, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5693262819038859, |
|
"grad_norm": 0.6865770220756531, |
|
"learning_rate": 4.0498053446392403e-05, |
|
"loss": 1.4876, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5702460335709358, |
|
"grad_norm": 0.6156379580497742, |
|
"learning_rate": 4.035369672952516e-05, |
|
"loss": 1.4032, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5711657852379858, |
|
"grad_norm": 0.5818307995796204, |
|
"learning_rate": 4.020942348024108e-05, |
|
"loss": 1.4421, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5720855369050356, |
|
"grad_norm": 0.5913554430007935, |
|
"learning_rate": 4.0065234946908456e-05, |
|
"loss": 1.4527, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5730052885720855, |
|
"grad_norm": 0.5924707651138306, |
|
"learning_rate": 3.992113237716261e-05, |
|
"loss": 1.4692, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5739250402391355, |
|
"grad_norm": 0.6369109749794006, |
|
"learning_rate": 3.977711701789499e-05, |
|
"loss": 1.4541, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5748447919061853, |
|
"grad_norm": 0.5432732701301575, |
|
"learning_rate": 3.9633190115242456e-05, |
|
"loss": 1.3981, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5757645435732353, |
|
"grad_norm": 0.6044031977653503, |
|
"learning_rate": 3.948935291457644e-05, |
|
"loss": 1.4086, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5766842952402851, |
|
"grad_norm": 0.5974178314208984, |
|
"learning_rate": 3.934560666049226e-05, |
|
"loss": 1.448, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.577604046907335, |
|
"grad_norm": 0.6302614212036133, |
|
"learning_rate": 3.920195259679822e-05, |
|
"loss": 1.4095, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.578523798574385, |
|
"grad_norm": 0.6615459322929382, |
|
"learning_rate": 3.905839196650493e-05, |
|
"loss": 1.5048, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5794435502414348, |
|
"grad_norm": 0.5650434494018555, |
|
"learning_rate": 3.8914926011814626e-05, |
|
"loss": 1.4093, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5803633019084847, |
|
"grad_norm": 0.5881006121635437, |
|
"learning_rate": 3.8771555974110194e-05, |
|
"loss": 1.3783, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5812830535755346, |
|
"grad_norm": 0.6607415676116943, |
|
"learning_rate": 3.8628283093944686e-05, |
|
"loss": 1.4406, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5822028052425845, |
|
"grad_norm": 0.6574285626411438, |
|
"learning_rate": 3.8485108611030415e-05, |
|
"loss": 1.3927, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5831225569096344, |
|
"grad_norm": 0.7541502714157104, |
|
"learning_rate": 3.834203376422831e-05, |
|
"loss": 1.374, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5840423085766843, |
|
"grad_norm": 0.6834109425544739, |
|
"learning_rate": 3.81990597915371e-05, |
|
"loss": 1.3459, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5849620602437342, |
|
"grad_norm": 0.649935781955719, |
|
"learning_rate": 3.805618793008279e-05, |
|
"loss": 1.3314, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.585881811910784, |
|
"grad_norm": 0.6892503499984741, |
|
"learning_rate": 3.7913419416107694e-05, |
|
"loss": 1.3958, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.586801563577834, |
|
"grad_norm": 0.6689726710319519, |
|
"learning_rate": 3.7770755484960004e-05, |
|
"loss": 1.3384, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5877213152448839, |
|
"grad_norm": 0.5913270711898804, |
|
"learning_rate": 3.762819737108291e-05, |
|
"loss": 1.3169, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5886410669119337, |
|
"grad_norm": 0.6090061068534851, |
|
"learning_rate": 3.748574630800401e-05, |
|
"loss": 1.2413, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5895608185789837, |
|
"grad_norm": 0.7058801651000977, |
|
"learning_rate": 3.734340352832457e-05, |
|
"loss": 1.289, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5904805702460336, |
|
"grad_norm": 0.7695034146308899, |
|
"learning_rate": 3.7201170263709e-05, |
|
"loss": 1.3332, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5914003219130834, |
|
"grad_norm": 0.6559154987335205, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 1.2992, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5923200735801334, |
|
"grad_norm": 0.7140766382217407, |
|
"learning_rate": 3.691703720157798e-05, |
|
"loss": 1.2247, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5932398252471832, |
|
"grad_norm": 0.7867764830589294, |
|
"learning_rate": 3.6775139862610574e-05, |
|
"loss": 1.2409, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5941595769142332, |
|
"grad_norm": 0.9307761788368225, |
|
"learning_rate": 3.663335695578183e-05, |
|
"loss": 1.1696, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5950793285812831, |
|
"grad_norm": 0.8968107104301453, |
|
"learning_rate": 3.649168970791157e-05, |
|
"loss": 1.1511, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5959990802483329, |
|
"grad_norm": 0.9723992943763733, |
|
"learning_rate": 3.635013934481895e-05, |
|
"loss": 1.1133, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5969188319153829, |
|
"grad_norm": 1.1764365434646606, |
|
"learning_rate": 3.6208707091311626e-05, |
|
"loss": 1.1247, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5978385835824327, |
|
"grad_norm": 1.0631630420684814, |
|
"learning_rate": 3.6067394171175394e-05, |
|
"loss": 1.0094, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5987583352494826, |
|
"grad_norm": 1.4610891342163086, |
|
"learning_rate": 3.592620180716338e-05, |
|
"loss": 1.635, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5996780869165326, |
|
"grad_norm": 1.4560317993164062, |
|
"learning_rate": 3.578513122098566e-05, |
|
"loss": 1.5683, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.6005978385835824, |
|
"grad_norm": 1.250054955482483, |
|
"learning_rate": 3.564418363329848e-05, |
|
"loss": 1.4994, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.6015175902506323, |
|
"grad_norm": 1.0758668184280396, |
|
"learning_rate": 3.5503360263693886e-05, |
|
"loss": 1.4581, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.6024373419176823, |
|
"grad_norm": 0.9774999022483826, |
|
"learning_rate": 3.5362662330689064e-05, |
|
"loss": 1.4609, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6033570935847321, |
|
"grad_norm": 0.8008742332458496, |
|
"learning_rate": 3.52220910517158e-05, |
|
"loss": 1.4672, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.604276845251782, |
|
"grad_norm": 0.7127364873886108, |
|
"learning_rate": 3.5081647643110024e-05, |
|
"loss": 1.4948, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.6051965969188319, |
|
"grad_norm": 0.76557457447052, |
|
"learning_rate": 3.494133332010117e-05, |
|
"loss": 1.4609, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.6061163485858818, |
|
"grad_norm": 0.8269351124763489, |
|
"learning_rate": 3.480114929680176e-05, |
|
"loss": 1.5268, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.6070361002529318, |
|
"grad_norm": 0.810955286026001, |
|
"learning_rate": 3.466109678619681e-05, |
|
"loss": 1.523, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6079558519199816, |
|
"grad_norm": 0.6712583303451538, |
|
"learning_rate": 3.452117700013345e-05, |
|
"loss": 1.4676, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.6088756035870315, |
|
"grad_norm": 0.828484058380127, |
|
"learning_rate": 3.43813911493103e-05, |
|
"loss": 1.5116, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.6097953552540814, |
|
"grad_norm": 0.7789233922958374, |
|
"learning_rate": 3.424174044326711e-05, |
|
"loss": 1.445, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.6107151069211313, |
|
"grad_norm": 0.7635114789009094, |
|
"learning_rate": 3.4102226090374246e-05, |
|
"loss": 1.5681, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.6116348585881812, |
|
"grad_norm": 0.6956825256347656, |
|
"learning_rate": 3.3962849297822226e-05, |
|
"loss": 1.4877, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6125546102552311, |
|
"grad_norm": 0.6926284432411194, |
|
"learning_rate": 3.382361127161127e-05, |
|
"loss": 1.4282, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.613474361922281, |
|
"grad_norm": 0.8702225089073181, |
|
"learning_rate": 3.368451321654091e-05, |
|
"loss": 1.4773, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.6143941135893308, |
|
"grad_norm": 0.7277842164039612, |
|
"learning_rate": 3.35455563361995e-05, |
|
"loss": 1.3959, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.6153138652563808, |
|
"grad_norm": 0.6363296508789062, |
|
"learning_rate": 3.340674183295389e-05, |
|
"loss": 1.4747, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.6162336169234307, |
|
"grad_norm": 0.6425765156745911, |
|
"learning_rate": 3.326807090793891e-05, |
|
"loss": 1.4423, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6171533685904805, |
|
"grad_norm": 0.6721304059028625, |
|
"learning_rate": 3.312954476104709e-05, |
|
"loss": 1.4241, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.6180731202575305, |
|
"grad_norm": 0.6218870878219604, |
|
"learning_rate": 3.299116459091816e-05, |
|
"loss": 1.4644, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.6189928719245804, |
|
"grad_norm": 0.6951906681060791, |
|
"learning_rate": 3.2852931594928807e-05, |
|
"loss": 1.452, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6199126235916302, |
|
"grad_norm": 0.6208174824714661, |
|
"learning_rate": 3.271484696918218e-05, |
|
"loss": 1.415, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6208323752586802, |
|
"grad_norm": 0.5596356391906738, |
|
"learning_rate": 3.257691190849769e-05, |
|
"loss": 1.4708, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.62175212692573, |
|
"grad_norm": 0.6394990682601929, |
|
"learning_rate": 3.243912760640054e-05, |
|
"loss": 1.4522, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.62267187859278, |
|
"grad_norm": 0.6112094521522522, |
|
"learning_rate": 3.2301495255111425e-05, |
|
"loss": 1.3607, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6235916302598299, |
|
"grad_norm": 0.645779013633728, |
|
"learning_rate": 3.2164016045536304e-05, |
|
"loss": 1.4282, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.6245113819268797, |
|
"grad_norm": 0.6169288754463196, |
|
"learning_rate": 3.202669116725598e-05, |
|
"loss": 1.4052, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6254311335939297, |
|
"grad_norm": 0.6002304553985596, |
|
"learning_rate": 3.188952180851589e-05, |
|
"loss": 1.419, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6263508852609795, |
|
"grad_norm": 0.6018975377082825, |
|
"learning_rate": 3.1752509156215734e-05, |
|
"loss": 1.3685, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.6272706369280294, |
|
"grad_norm": 0.6559040546417236, |
|
"learning_rate": 3.1615654395899375e-05, |
|
"loss": 1.3657, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6281903885950794, |
|
"grad_norm": 0.6393570899963379, |
|
"learning_rate": 3.147895871174432e-05, |
|
"loss": 1.405, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.6291101402621292, |
|
"grad_norm": 0.6094779968261719, |
|
"learning_rate": 3.134242328655175e-05, |
|
"loss": 1.3179, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.6300298919291791, |
|
"grad_norm": 0.6581336855888367, |
|
"learning_rate": 3.120604930173608e-05, |
|
"loss": 1.3276, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6309496435962291, |
|
"grad_norm": 0.6599423289299011, |
|
"learning_rate": 3.106983793731484e-05, |
|
"loss": 1.2805, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.6318693952632789, |
|
"grad_norm": 0.683204710483551, |
|
"learning_rate": 3.093379037189842e-05, |
|
"loss": 1.3557, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.6327891469303288, |
|
"grad_norm": 0.6180110573768616, |
|
"learning_rate": 3.079790778267994e-05, |
|
"loss": 1.2668, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6337088985973787, |
|
"grad_norm": 0.7273058891296387, |
|
"learning_rate": 3.066219134542492e-05, |
|
"loss": 1.2852, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6346286502644286, |
|
"grad_norm": 0.6892321705818176, |
|
"learning_rate": 3.052664223446131e-05, |
|
"loss": 1.2997, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6355484019314785, |
|
"grad_norm": 0.694174587726593, |
|
"learning_rate": 3.039126162266912e-05, |
|
"loss": 1.2398, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.6364681535985284, |
|
"grad_norm": 0.7471473217010498, |
|
"learning_rate": 3.0256050681470444e-05, |
|
"loss": 1.1879, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6373879052655783, |
|
"grad_norm": 0.7812895178794861, |
|
"learning_rate": 3.012101058081919e-05, |
|
"loss": 1.2826, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6383076569326281, |
|
"grad_norm": 0.7405266761779785, |
|
"learning_rate": 2.998614248919107e-05, |
|
"loss": 1.1937, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6392274085996781, |
|
"grad_norm": 0.7346695065498352, |
|
"learning_rate": 2.9851447573573384e-05, |
|
"loss": 1.2364, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.640147160266728, |
|
"grad_norm": 0.7376750707626343, |
|
"learning_rate": 2.971692699945502e-05, |
|
"loss": 1.222, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6410669119337778, |
|
"grad_norm": 0.7857553362846375, |
|
"learning_rate": 2.9582581930816288e-05, |
|
"loss": 1.1532, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6419866636008278, |
|
"grad_norm": 1.1139256954193115, |
|
"learning_rate": 2.9448413530118914e-05, |
|
"loss": 1.0823, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.6429064152678776, |
|
"grad_norm": 0.9734514355659485, |
|
"learning_rate": 2.9314422958295907e-05, |
|
"loss": 1.0059, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6438261669349276, |
|
"grad_norm": 1.195755124092102, |
|
"learning_rate": 2.9180611374741623e-05, |
|
"loss": 1.0146, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6447459186019775, |
|
"grad_norm": 1.1521427631378174, |
|
"learning_rate": 2.9046979937301588e-05, |
|
"loss": 1.5188, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6456656702690273, |
|
"grad_norm": 1.0498712062835693, |
|
"learning_rate": 2.8913529802262617e-05, |
|
"loss": 1.5642, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6465854219360773, |
|
"grad_norm": 1.004340410232544, |
|
"learning_rate": 2.8780262124342755e-05, |
|
"loss": 1.4869, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6475051736031272, |
|
"grad_norm": 0.9507954716682434, |
|
"learning_rate": 2.8647178056681194e-05, |
|
"loss": 1.5128, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.648424925270177, |
|
"grad_norm": 0.8366132974624634, |
|
"learning_rate": 2.8514278750828536e-05, |
|
"loss": 1.4907, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.649344676937227, |
|
"grad_norm": 0.8227055072784424, |
|
"learning_rate": 2.838156535673652e-05, |
|
"loss": 1.5356, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6502644286042768, |
|
"grad_norm": 0.7174684405326843, |
|
"learning_rate": 2.8249039022748313e-05, |
|
"loss": 1.4349, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6511841802713267, |
|
"grad_norm": 0.6819536089897156, |
|
"learning_rate": 2.8116700895588472e-05, |
|
"loss": 1.4133, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6521039319383767, |
|
"grad_norm": 0.7197076082229614, |
|
"learning_rate": 2.7984552120353046e-05, |
|
"loss": 1.4284, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.6530236836054265, |
|
"grad_norm": 0.7833074331283569, |
|
"learning_rate": 2.785259384049959e-05, |
|
"loss": 1.5066, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6539434352724764, |
|
"grad_norm": 0.7236879467964172, |
|
"learning_rate": 2.7720827197837472e-05, |
|
"loss": 1.3815, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6548631869395263, |
|
"grad_norm": 0.6463202238082886, |
|
"learning_rate": 2.7589253332517734e-05, |
|
"loss": 1.4513, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6557829386065762, |
|
"grad_norm": 0.7177314758300781, |
|
"learning_rate": 2.745787338302341e-05, |
|
"loss": 1.4443, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6567026902736262, |
|
"grad_norm": 0.7721028327941895, |
|
"learning_rate": 2.7326688486159613e-05, |
|
"loss": 1.4899, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.657622441940676, |
|
"grad_norm": 0.6830793023109436, |
|
"learning_rate": 2.719569977704372e-05, |
|
"loss": 1.5052, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6585421936077259, |
|
"grad_norm": 0.6752369403839111, |
|
"learning_rate": 2.7064908389095468e-05, |
|
"loss": 1.5062, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6594619452747759, |
|
"grad_norm": 0.6267321109771729, |
|
"learning_rate": 2.693431545402732e-05, |
|
"loss": 1.5125, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6603816969418257, |
|
"grad_norm": 0.6160003542900085, |
|
"learning_rate": 2.6803922101834454e-05, |
|
"loss": 1.4609, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6613014486088756, |
|
"grad_norm": 0.5926380157470703, |
|
"learning_rate": 2.6673729460785176e-05, |
|
"loss": 1.415, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6622212002759255, |
|
"grad_norm": 0.6655170321464539, |
|
"learning_rate": 2.6543738657411034e-05, |
|
"loss": 1.372, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6631409519429754, |
|
"grad_norm": 0.6094529628753662, |
|
"learning_rate": 2.6413950816497147e-05, |
|
"loss": 1.4037, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6640607036100253, |
|
"grad_norm": 0.6568109393119812, |
|
"learning_rate": 2.6284367061072378e-05, |
|
"loss": 1.458, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6649804552770752, |
|
"grad_norm": 0.5817413330078125, |
|
"learning_rate": 2.615498851239978e-05, |
|
"loss": 1.4009, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6659002069441251, |
|
"grad_norm": 0.6216491460800171, |
|
"learning_rate": 2.6025816289966704e-05, |
|
"loss": 1.4178, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6668199586111749, |
|
"grad_norm": 0.6176545023918152, |
|
"learning_rate": 2.5896851511475186e-05, |
|
"loss": 1.4191, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6677397102782249, |
|
"grad_norm": 0.5803206562995911, |
|
"learning_rate": 2.576809529283241e-05, |
|
"loss": 1.415, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6686594619452748, |
|
"grad_norm": 0.5935968160629272, |
|
"learning_rate": 2.5639548748140802e-05, |
|
"loss": 1.3797, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6695792136123246, |
|
"grad_norm": 0.6356935501098633, |
|
"learning_rate": 2.5511212989688586e-05, |
|
"loss": 1.4948, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6704989652793746, |
|
"grad_norm": 0.5835620760917664, |
|
"learning_rate": 2.5383089127940086e-05, |
|
"loss": 1.4203, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.6714187169464245, |
|
"grad_norm": 0.687403678894043, |
|
"learning_rate": 2.5255178271526137e-05, |
|
"loss": 1.3661, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6723384686134743, |
|
"grad_norm": 0.6388825178146362, |
|
"learning_rate": 2.51274815272344e-05, |
|
"loss": 1.4157, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.6732582202805243, |
|
"grad_norm": 0.6280670762062073, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.3854, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.6741779719475741, |
|
"grad_norm": 0.6690565943717957, |
|
"learning_rate": 2.4872734792895734e-05, |
|
"loss": 1.3974, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6750977236146241, |
|
"grad_norm": 0.6328375339508057, |
|
"learning_rate": 2.4745687007122636e-05, |
|
"loss": 1.3462, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.676017475281674, |
|
"grad_norm": 0.6421682834625244, |
|
"learning_rate": 2.4618857742000463e-05, |
|
"loss": 1.2237, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6769372269487238, |
|
"grad_norm": 0.6286811828613281, |
|
"learning_rate": 2.4492248094958147e-05, |
|
"loss": 1.3481, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.6778569786157738, |
|
"grad_norm": 0.61008220911026, |
|
"learning_rate": 2.4365859161524258e-05, |
|
"loss": 1.2088, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.6787767302828236, |
|
"grad_norm": 0.6456345915794373, |
|
"learning_rate": 2.4239692035317678e-05, |
|
"loss": 1.1997, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6796964819498735, |
|
"grad_norm": 0.8082221746444702, |
|
"learning_rate": 2.411374780803793e-05, |
|
"loss": 1.2172, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6806162336169235, |
|
"grad_norm": 0.6706709861755371, |
|
"learning_rate": 2.3988027569455895e-05, |
|
"loss": 1.211, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6815359852839733, |
|
"grad_norm": 0.6545360088348389, |
|
"learning_rate": 2.3862532407404303e-05, |
|
"loss": 1.3001, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.6824557369510232, |
|
"grad_norm": 0.8686853051185608, |
|
"learning_rate": 2.373726340776837e-05, |
|
"loss": 1.2328, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.6833754886180731, |
|
"grad_norm": 0.668156087398529, |
|
"learning_rate": 2.361222165447628e-05, |
|
"loss": 1.2011, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.684295240285123, |
|
"grad_norm": 0.685393750667572, |
|
"learning_rate": 2.348740822949006e-05, |
|
"loss": 1.2309, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.685214991952173, |
|
"grad_norm": 0.6708635687828064, |
|
"learning_rate": 2.3362824212795898e-05, |
|
"loss": 1.1972, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6861347436192228, |
|
"grad_norm": 0.8381814360618591, |
|
"learning_rate": 2.3238470682395037e-05, |
|
"loss": 1.2545, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6870544952862727, |
|
"grad_norm": 0.7803678512573242, |
|
"learning_rate": 2.3114348714294354e-05, |
|
"loss": 1.1471, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.6879742469533227, |
|
"grad_norm": 0.8974632024765015, |
|
"learning_rate": 2.2990459382497088e-05, |
|
"loss": 1.1145, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.6888939986203725, |
|
"grad_norm": 1.0532459020614624, |
|
"learning_rate": 2.2866803758993445e-05, |
|
"loss": 1.0573, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6898137502874224, |
|
"grad_norm": 1.208759069442749, |
|
"learning_rate": 2.274338291375147e-05, |
|
"loss": 0.9195, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6898137502874224, |
|
"eval_loss": 1.3665193319320679, |
|
"eval_runtime": 50.0048, |
|
"eval_samples_per_second": 164.784, |
|
"eval_steps_per_second": 20.598, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6907335019544723, |
|
"grad_norm": 1.253531575202942, |
|
"learning_rate": 2.2620197914707718e-05, |
|
"loss": 1.602, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6916532536215222, |
|
"grad_norm": 1.2635823488235474, |
|
"learning_rate": 2.2497249827757933e-05, |
|
"loss": 1.5615, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6925730052885721, |
|
"grad_norm": 1.0416873693466187, |
|
"learning_rate": 2.2374539716748032e-05, |
|
"loss": 1.4779, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.693492756955622, |
|
"grad_norm": 0.9805805087089539, |
|
"learning_rate": 2.225206864346465e-05, |
|
"loss": 1.4272, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6944125086226719, |
|
"grad_norm": 0.9023362398147583, |
|
"learning_rate": 2.2129837667626145e-05, |
|
"loss": 1.4208, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6953322602897217, |
|
"grad_norm": 1.0136377811431885, |
|
"learning_rate": 2.200784784687334e-05, |
|
"loss": 1.4692, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6962520119567717, |
|
"grad_norm": 0.9673015475273132, |
|
"learning_rate": 2.188610023676041e-05, |
|
"loss": 1.4966, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6971717636238216, |
|
"grad_norm": 0.8694583177566528, |
|
"learning_rate": 2.176459589074566e-05, |
|
"loss": 1.4035, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6980915152908714, |
|
"grad_norm": 0.7423250675201416, |
|
"learning_rate": 2.164333586018259e-05, |
|
"loss": 1.4623, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6990112669579214, |
|
"grad_norm": 0.7796162366867065, |
|
"learning_rate": 2.1522321194310574e-05, |
|
"loss": 1.466, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6999310186249713, |
|
"grad_norm": 0.9312780499458313, |
|
"learning_rate": 2.1401552940245962e-05, |
|
"loss": 1.3982, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.7008507702920211, |
|
"grad_norm": 0.7841870784759521, |
|
"learning_rate": 2.1281032142972933e-05, |
|
"loss": 1.505, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.7017705219590711, |
|
"grad_norm": 0.6561142206192017, |
|
"learning_rate": 2.1160759845334484e-05, |
|
"loss": 1.4446, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.7026902736261209, |
|
"grad_norm": 0.6478760242462158, |
|
"learning_rate": 2.1040737088023323e-05, |
|
"loss": 1.4218, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.7036100252931708, |
|
"grad_norm": 0.8280866146087646, |
|
"learning_rate": 2.0920964909573066e-05, |
|
"loss": 1.4915, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7045297769602208, |
|
"grad_norm": 0.8623349666595459, |
|
"learning_rate": 2.080144434634898e-05, |
|
"loss": 1.3761, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.7054495286272706, |
|
"grad_norm": 0.7455824613571167, |
|
"learning_rate": 2.0682176432539246e-05, |
|
"loss": 1.39, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.7063692802943206, |
|
"grad_norm": 0.6684551239013672, |
|
"learning_rate": 2.056316220014588e-05, |
|
"loss": 1.4599, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.7072890319613704, |
|
"grad_norm": 0.6949120759963989, |
|
"learning_rate": 2.0444402678975877e-05, |
|
"loss": 1.4068, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.7082087836284203, |
|
"grad_norm": 0.698066771030426, |
|
"learning_rate": 2.0325898896632177e-05, |
|
"loss": 1.4451, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7091285352954703, |
|
"grad_norm": 0.6923701167106628, |
|
"learning_rate": 2.0207651878505e-05, |
|
"loss": 1.4183, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.7100482869625201, |
|
"grad_norm": 0.6396070718765259, |
|
"learning_rate": 2.0089662647762715e-05, |
|
"loss": 1.4079, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.71096803862957, |
|
"grad_norm": 0.5608759522438049, |
|
"learning_rate": 1.997193222534316e-05, |
|
"loss": 1.3507, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.7118877902966199, |
|
"grad_norm": 0.6374341249465942, |
|
"learning_rate": 1.9854461629944763e-05, |
|
"loss": 1.395, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.7128075419636698, |
|
"grad_norm": 0.5628088116645813, |
|
"learning_rate": 1.9737251878017678e-05, |
|
"loss": 1.3779, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7137272936307197, |
|
"grad_norm": 0.6205474138259888, |
|
"learning_rate": 1.962030398375506e-05, |
|
"loss": 1.3974, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.7146470452977696, |
|
"grad_norm": 0.5789771676063538, |
|
"learning_rate": 1.950361895908427e-05, |
|
"loss": 1.331, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.7155667969648195, |
|
"grad_norm": 0.636550784111023, |
|
"learning_rate": 1.9387197813658092e-05, |
|
"loss": 1.3799, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.7164865486318694, |
|
"grad_norm": 0.6165384650230408, |
|
"learning_rate": 1.927104155484602e-05, |
|
"loss": 1.3579, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.7174063002989193, |
|
"grad_norm": 0.6170758008956909, |
|
"learning_rate": 1.9155151187725552e-05, |
|
"loss": 1.349, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7183260519659692, |
|
"grad_norm": 0.5404320359230042, |
|
"learning_rate": 1.9039527715073424e-05, |
|
"loss": 1.364, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.719245803633019, |
|
"grad_norm": 0.5796113014221191, |
|
"learning_rate": 1.892417213735704e-05, |
|
"loss": 1.2893, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.720165555300069, |
|
"grad_norm": 0.6280906796455383, |
|
"learning_rate": 1.8809085452725746e-05, |
|
"loss": 1.3598, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.7210853069671189, |
|
"grad_norm": 0.6569982171058655, |
|
"learning_rate": 1.8694268657002194e-05, |
|
"loss": 1.3006, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.7220050586341688, |
|
"grad_norm": 0.6892338991165161, |
|
"learning_rate": 1.8579722743673773e-05, |
|
"loss": 1.3557, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7229248103012187, |
|
"grad_norm": 0.6984684467315674, |
|
"learning_rate": 1.8465448703883958e-05, |
|
"loss": 1.3506, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.7238445619682685, |
|
"grad_norm": 0.65283203125, |
|
"learning_rate": 1.8351447526423727e-05, |
|
"loss": 1.3009, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.7247643136353185, |
|
"grad_norm": 0.7025482654571533, |
|
"learning_rate": 1.8237720197723075e-05, |
|
"loss": 1.1886, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.7256840653023684, |
|
"grad_norm": 0.6791706085205078, |
|
"learning_rate": 1.812426770184243e-05, |
|
"loss": 1.2081, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.7266038169694182, |
|
"grad_norm": 0.6996423602104187, |
|
"learning_rate": 1.801109102046414e-05, |
|
"loss": 1.2468, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7275235686364682, |
|
"grad_norm": 0.722210705280304, |
|
"learning_rate": 1.7898191132883968e-05, |
|
"loss": 1.196, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.7284433203035181, |
|
"grad_norm": 0.6527461409568787, |
|
"learning_rate": 1.7785569016002685e-05, |
|
"loss": 1.2516, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.7293630719705679, |
|
"grad_norm": 0.6403821110725403, |
|
"learning_rate": 1.7673225644317486e-05, |
|
"loss": 1.1883, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.7302828236376179, |
|
"grad_norm": 0.7447903156280518, |
|
"learning_rate": 1.7561161989913698e-05, |
|
"loss": 1.2232, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.7312025753046677, |
|
"grad_norm": 0.8253830671310425, |
|
"learning_rate": 1.7449379022456295e-05, |
|
"loss": 1.2144, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7321223269717176, |
|
"grad_norm": 0.8268104791641235, |
|
"learning_rate": 1.7337877709181526e-05, |
|
"loss": 1.1443, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.7330420786387676, |
|
"grad_norm": 0.8768870830535889, |
|
"learning_rate": 1.7226659014888546e-05, |
|
"loss": 1.0736, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.7339618303058174, |
|
"grad_norm": 0.8852882981300354, |
|
"learning_rate": 1.711572390193102e-05, |
|
"loss": 1.1051, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.7348815819728673, |
|
"grad_norm": 1.0162791013717651, |
|
"learning_rate": 1.7005073330208883e-05, |
|
"loss": 1.0043, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.7358013336399172, |
|
"grad_norm": 1.2660006284713745, |
|
"learning_rate": 1.689470825715998e-05, |
|
"loss": 1.0243, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7367210853069671, |
|
"grad_norm": 1.007739543914795, |
|
"learning_rate": 1.6784629637751815e-05, |
|
"loss": 1.5297, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.7376408369740171, |
|
"grad_norm": 0.9282512664794922, |
|
"learning_rate": 1.6674838424473173e-05, |
|
"loss": 1.5234, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.7385605886410669, |
|
"grad_norm": 0.8745155334472656, |
|
"learning_rate": 1.656533556732611e-05, |
|
"loss": 1.4494, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.7394803403081168, |
|
"grad_norm": 0.941735565662384, |
|
"learning_rate": 1.6456122013817476e-05, |
|
"loss": 1.5395, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.7404000919751667, |
|
"grad_norm": 0.9213740825653076, |
|
"learning_rate": 1.6347198708950882e-05, |
|
"loss": 1.4104, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7413198436422166, |
|
"grad_norm": 0.8986393809318542, |
|
"learning_rate": 1.6238566595218473e-05, |
|
"loss": 1.4004, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7422395953092665, |
|
"grad_norm": 1.212737798690796, |
|
"learning_rate": 1.6130226612592786e-05, |
|
"loss": 1.4478, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.7431593469763164, |
|
"grad_norm": 0.8150504231452942, |
|
"learning_rate": 1.6022179698518523e-05, |
|
"loss": 1.4197, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.7440790986433663, |
|
"grad_norm": 0.7515584826469421, |
|
"learning_rate": 1.591442678790467e-05, |
|
"loss": 1.454, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.7449988503104162, |
|
"grad_norm": 0.6738887429237366, |
|
"learning_rate": 1.5806968813116107e-05, |
|
"loss": 1.46, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7459186019774661, |
|
"grad_norm": 0.8340874314308167, |
|
"learning_rate": 1.5699806703965787e-05, |
|
"loss": 1.4261, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.746838353644516, |
|
"grad_norm": 0.7794579863548279, |
|
"learning_rate": 1.559294138770656e-05, |
|
"loss": 1.4964, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7477581053115658, |
|
"grad_norm": 0.7533066868782043, |
|
"learning_rate": 1.5486373789023205e-05, |
|
"loss": 1.4325, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.7486778569786158, |
|
"grad_norm": 0.643245279788971, |
|
"learning_rate": 1.538010483002435e-05, |
|
"loss": 1.4201, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7495976086456657, |
|
"grad_norm": 0.6805441379547119, |
|
"learning_rate": 1.5274135430234654e-05, |
|
"loss": 1.4768, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7505173603127155, |
|
"grad_norm": 0.7012439966201782, |
|
"learning_rate": 1.5168466506586654e-05, |
|
"loss": 1.3795, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7514371119797655, |
|
"grad_norm": 0.6986867189407349, |
|
"learning_rate": 1.506309897341297e-05, |
|
"loss": 1.3924, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7523568636468153, |
|
"grad_norm": 0.7575457692146301, |
|
"learning_rate": 1.495803374243835e-05, |
|
"loss": 1.4462, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7532766153138652, |
|
"grad_norm": 0.6013389229774475, |
|
"learning_rate": 1.4853271722771772e-05, |
|
"loss": 1.3786, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.7541963669809152, |
|
"grad_norm": 0.596037745475769, |
|
"learning_rate": 1.4748813820898554e-05, |
|
"loss": 1.3483, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.755116118647965, |
|
"grad_norm": 0.6031373739242554, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.364, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.756035870315015, |
|
"grad_norm": 0.6841591000556946, |
|
"learning_rate": 1.4540813983308548e-05, |
|
"loss": 1.4468, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7569556219820649, |
|
"grad_norm": 0.7204717993736267, |
|
"learning_rate": 1.4437273847373777e-05, |
|
"loss": 1.3843, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7578753736491147, |
|
"grad_norm": 0.6169053912162781, |
|
"learning_rate": 1.4334041428781003e-05, |
|
"loss": 1.3776, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7587951253161647, |
|
"grad_norm": 0.5684770941734314, |
|
"learning_rate": 1.4231117620780188e-05, |
|
"loss": 1.4011, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7597148769832145, |
|
"grad_norm": 0.5605279207229614, |
|
"learning_rate": 1.4128503313951009e-05, |
|
"loss": 1.4227, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7606346286502644, |
|
"grad_norm": 0.6137314438819885, |
|
"learning_rate": 1.4026199396195077e-05, |
|
"loss": 1.4014, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7615543803173144, |
|
"grad_norm": 0.6102471351623535, |
|
"learning_rate": 1.3924206752728281e-05, |
|
"loss": 1.2759, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7624741319843642, |
|
"grad_norm": 0.6177085638046265, |
|
"learning_rate": 1.3822526266073043e-05, |
|
"loss": 1.3204, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7633938836514141, |
|
"grad_norm": 0.5692439675331116, |
|
"learning_rate": 1.3721158816050873e-05, |
|
"loss": 1.3467, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.764313635318464, |
|
"grad_norm": 0.6170715689659119, |
|
"learning_rate": 1.362010527977453e-05, |
|
"loss": 1.2864, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.7652333869855139, |
|
"grad_norm": 0.6100102066993713, |
|
"learning_rate": 1.3519366531640587e-05, |
|
"loss": 1.331, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7661531386525638, |
|
"grad_norm": 0.6240009069442749, |
|
"learning_rate": 1.3418943443321807e-05, |
|
"loss": 1.2976, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7670728903196137, |
|
"grad_norm": 0.5838286876678467, |
|
"learning_rate": 1.3318836883759634e-05, |
|
"loss": 1.2843, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7679926419866636, |
|
"grad_norm": 0.6636451482772827, |
|
"learning_rate": 1.3219047719156575e-05, |
|
"loss": 1.2261, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7689123936537134, |
|
"grad_norm": 0.6104261875152588, |
|
"learning_rate": 1.3119576812968892e-05, |
|
"loss": 1.2723, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7698321453207634, |
|
"grad_norm": 0.7110616564750671, |
|
"learning_rate": 1.3020425025898925e-05, |
|
"loss": 1.295, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7707518969878133, |
|
"grad_norm": 0.6308919191360474, |
|
"learning_rate": 1.292159321588778e-05, |
|
"loss": 1.225, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7716716486548632, |
|
"grad_norm": 0.6422338485717773, |
|
"learning_rate": 1.2823082238107858e-05, |
|
"loss": 1.2812, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.7725914003219131, |
|
"grad_norm": 0.7281700372695923, |
|
"learning_rate": 1.272489294495548e-05, |
|
"loss": 1.2313, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.773511151988963, |
|
"grad_norm": 0.6761153340339661, |
|
"learning_rate": 1.2627026186043422e-05, |
|
"loss": 1.2118, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7744309036560129, |
|
"grad_norm": 0.6714473366737366, |
|
"learning_rate": 1.2529482808193749e-05, |
|
"loss": 1.2265, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.7753506553230628, |
|
"grad_norm": 0.6813847422599792, |
|
"learning_rate": 1.243226365543026e-05, |
|
"loss": 1.2408, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.7762704069901126, |
|
"grad_norm": 0.6646814346313477, |
|
"learning_rate": 1.233536956897136e-05, |
|
"loss": 1.1755, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.7771901586571626, |
|
"grad_norm": 0.6985054612159729, |
|
"learning_rate": 1.2238801387222714e-05, |
|
"loss": 1.155, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.7781099103242125, |
|
"grad_norm": 0.6989067196846008, |
|
"learning_rate": 1.2142559945769993e-05, |
|
"loss": 1.1747, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.7790296619912623, |
|
"grad_norm": 0.8439406156539917, |
|
"learning_rate": 1.2046646077371615e-05, |
|
"loss": 1.1648, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.7799494136583123, |
|
"grad_norm": 0.8463898301124573, |
|
"learning_rate": 1.1951060611951615e-05, |
|
"loss": 1.1043, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.7808691653253621, |
|
"grad_norm": 0.9298079013824463, |
|
"learning_rate": 1.185580437659241e-05, |
|
"loss": 1.0148, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.781788916992412, |
|
"grad_norm": 1.260094404220581, |
|
"learning_rate": 1.1760878195527642e-05, |
|
"loss": 0.9653, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.782708668659462, |
|
"grad_norm": 1.080349326133728, |
|
"learning_rate": 1.1666282890135082e-05, |
|
"loss": 1.4973, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.7836284203265118, |
|
"grad_norm": 1.0160036087036133, |
|
"learning_rate": 1.1572019278929458e-05, |
|
"loss": 1.4835, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.7845481719935617, |
|
"grad_norm": 1.0411534309387207, |
|
"learning_rate": 1.1478088177555441e-05, |
|
"loss": 1.4388, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.7854679236606117, |
|
"grad_norm": 0.8667961359024048, |
|
"learning_rate": 1.1384490398780562e-05, |
|
"loss": 1.4592, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.7863876753276615, |
|
"grad_norm": 0.7747707366943359, |
|
"learning_rate": 1.129122675248816e-05, |
|
"loss": 1.4124, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7873074269947115, |
|
"grad_norm": 0.9287156462669373, |
|
"learning_rate": 1.1198298045670402e-05, |
|
"loss": 1.4827, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.7882271786617613, |
|
"grad_norm": 1.0620696544647217, |
|
"learning_rate": 1.1105705082421303e-05, |
|
"loss": 1.4392, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.7891469303288112, |
|
"grad_norm": 1.099214792251587, |
|
"learning_rate": 1.1013448663929705e-05, |
|
"loss": 1.4812, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.7900666819958612, |
|
"grad_norm": 0.9307000637054443, |
|
"learning_rate": 1.0921529588472445e-05, |
|
"loss": 1.4939, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.790986433662911, |
|
"grad_norm": 0.7514574527740479, |
|
"learning_rate": 1.0829948651407374e-05, |
|
"loss": 1.4117, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7919061853299609, |
|
"grad_norm": 0.6653128862380981, |
|
"learning_rate": 1.0738706645166508e-05, |
|
"loss": 1.4885, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.7928259369970108, |
|
"grad_norm": 0.7091299295425415, |
|
"learning_rate": 1.0647804359249142e-05, |
|
"loss": 1.4785, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.7937456886640607, |
|
"grad_norm": 0.7756891250610352, |
|
"learning_rate": 1.0557242580215066e-05, |
|
"loss": 1.499, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.7946654403311106, |
|
"grad_norm": 0.7706134915351868, |
|
"learning_rate": 1.0467022091677691e-05, |
|
"loss": 1.3828, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.7955851919981605, |
|
"grad_norm": 0.6963340044021606, |
|
"learning_rate": 1.037714367429734e-05, |
|
"loss": 1.415, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7965049436652104, |
|
"grad_norm": 0.683591365814209, |
|
"learning_rate": 1.0287608105774454e-05, |
|
"loss": 1.4614, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.7974246953322602, |
|
"grad_norm": 0.6579643487930298, |
|
"learning_rate": 1.019841616084286e-05, |
|
"loss": 1.4229, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.7983444469993102, |
|
"grad_norm": 0.655005156993866, |
|
"learning_rate": 1.0109568611263093e-05, |
|
"loss": 1.3674, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.7992641986663601, |
|
"grad_norm": 0.6061270236968994, |
|
"learning_rate": 1.0021066225815689e-05, |
|
"loss": 1.4522, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.8001839503334099, |
|
"grad_norm": 0.6729152798652649, |
|
"learning_rate": 9.932909770294541e-06, |
|
"loss": 1.3665, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8011037020004599, |
|
"grad_norm": 0.6866083145141602, |
|
"learning_rate": 9.84510000750029e-06, |
|
"loss": 1.341, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.8020234536675098, |
|
"grad_norm": 0.6673592329025269, |
|
"learning_rate": 9.757637697233723e-06, |
|
"loss": 1.4353, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.8029432053345597, |
|
"grad_norm": 0.6237421035766602, |
|
"learning_rate": 9.670523596289138e-06, |
|
"loss": 1.4077, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.8038629570016096, |
|
"grad_norm": 0.6855435967445374, |
|
"learning_rate": 9.583758458447927e-06, |
|
"loss": 1.4204, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.8047827086686594, |
|
"grad_norm": 0.6294743418693542, |
|
"learning_rate": 9.497343034471895e-06, |
|
"loss": 1.4306, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8057024603357094, |
|
"grad_norm": 0.5920624136924744, |
|
"learning_rate": 9.41127807209688e-06, |
|
"loss": 1.4342, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.8066222120027593, |
|
"grad_norm": 0.5831781625747681, |
|
"learning_rate": 9.325564316026237e-06, |
|
"loss": 1.3581, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.8075419636698091, |
|
"grad_norm": 0.6441843509674072, |
|
"learning_rate": 9.240202507924412e-06, |
|
"loss": 1.3834, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.8084617153368591, |
|
"grad_norm": 0.8426811099052429, |
|
"learning_rate": 9.155193386410465e-06, |
|
"loss": 1.4059, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.8093814670039089, |
|
"grad_norm": 0.7335101366043091, |
|
"learning_rate": 9.070537687051817e-06, |
|
"loss": 1.3253, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8103012186709588, |
|
"grad_norm": 0.6380130052566528, |
|
"learning_rate": 8.986236142357708e-06, |
|
"loss": 1.368, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.8112209703380088, |
|
"grad_norm": 0.6573965549468994, |
|
"learning_rate": 8.902289481772997e-06, |
|
"loss": 1.2883, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.8121407220050586, |
|
"grad_norm": 0.658258855342865, |
|
"learning_rate": 8.818698431671773e-06, |
|
"loss": 1.3068, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.8130604736721085, |
|
"grad_norm": 0.5781223773956299, |
|
"learning_rate": 8.735463715351139e-06, |
|
"loss": 1.2877, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.8139802253391585, |
|
"grad_norm": 0.7181767225265503, |
|
"learning_rate": 8.652586053024836e-06, |
|
"loss": 1.2878, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8148999770062083, |
|
"grad_norm": 0.6754813194274902, |
|
"learning_rate": 8.570066161817176e-06, |
|
"loss": 1.2296, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.8158197286732582, |
|
"grad_norm": 0.655967652797699, |
|
"learning_rate": 8.487904755756677e-06, |
|
"loss": 1.2901, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.8167394803403081, |
|
"grad_norm": 0.6471141576766968, |
|
"learning_rate": 8.406102545769989e-06, |
|
"loss": 1.1674, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.817659232007358, |
|
"grad_norm": 0.615079939365387, |
|
"learning_rate": 8.324660239675696e-06, |
|
"loss": 1.2264, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.818578983674408, |
|
"grad_norm": 0.671017587184906, |
|
"learning_rate": 8.243578542178226e-06, |
|
"loss": 1.2746, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8194987353414578, |
|
"grad_norm": 0.6405725479125977, |
|
"learning_rate": 8.16285815486168e-06, |
|
"loss": 1.26, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.8204184870085077, |
|
"grad_norm": 0.7116778492927551, |
|
"learning_rate": 8.082499776183883e-06, |
|
"loss": 1.2526, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.8213382386755576, |
|
"grad_norm": 0.6701216697692871, |
|
"learning_rate": 8.002504101470204e-06, |
|
"loss": 1.1883, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.8222579903426075, |
|
"grad_norm": 0.7331655025482178, |
|
"learning_rate": 7.92287182290764e-06, |
|
"loss": 1.2322, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.8231777420096574, |
|
"grad_norm": 0.7266958951950073, |
|
"learning_rate": 7.843603629538804e-06, |
|
"loss": 1.1902, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8240974936767073, |
|
"grad_norm": 0.7101981043815613, |
|
"learning_rate": 7.764700207255903e-06, |
|
"loss": 1.0998, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.8250172453437572, |
|
"grad_norm": 0.7413234114646912, |
|
"learning_rate": 7.686162238794897e-06, |
|
"loss": 1.1047, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.825936997010807, |
|
"grad_norm": 0.8715062141418457, |
|
"learning_rate": 7.607990403729526e-06, |
|
"loss": 1.1146, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.826856748677857, |
|
"grad_norm": 0.9183730483055115, |
|
"learning_rate": 7.5301853784654595e-06, |
|
"loss": 1.0057, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.8277765003449069, |
|
"grad_norm": 1.0864571332931519, |
|
"learning_rate": 7.452747836234392e-06, |
|
"loss": 0.978, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8277765003449069, |
|
"eval_loss": 1.3344465494155884, |
|
"eval_runtime": 49.9437, |
|
"eval_samples_per_second": 164.986, |
|
"eval_steps_per_second": 20.623, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8286962520119567, |
|
"grad_norm": 0.8766337037086487, |
|
"learning_rate": 7.375678447088347e-06, |
|
"loss": 1.5154, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.8296160036790067, |
|
"grad_norm": 0.8737375140190125, |
|
"learning_rate": 7.298977877893687e-06, |
|
"loss": 1.4447, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.8305357553460566, |
|
"grad_norm": 0.9431170225143433, |
|
"learning_rate": 7.222646792325516e-06, |
|
"loss": 1.4588, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.8314555070131064, |
|
"grad_norm": 0.9367691874504089, |
|
"learning_rate": 7.146685850861851e-06, |
|
"loss": 1.4205, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.8323752586801564, |
|
"grad_norm": 0.812258780002594, |
|
"learning_rate": 7.071095710777925e-06, |
|
"loss": 1.4177, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8332950103472062, |
|
"grad_norm": 0.7034198045730591, |
|
"learning_rate": 6.995877026140468e-06, |
|
"loss": 1.4146, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.8342147620142562, |
|
"grad_norm": 0.7884905934333801, |
|
"learning_rate": 6.921030447802146e-06, |
|
"loss": 1.4616, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.8351345136813061, |
|
"grad_norm": 0.8112537860870361, |
|
"learning_rate": 6.8465566233957945e-06, |
|
"loss": 1.3435, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.8360542653483559, |
|
"grad_norm": 0.7667593955993652, |
|
"learning_rate": 6.772456197328919e-06, |
|
"loss": 1.464, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.8369740170154059, |
|
"grad_norm": 0.762269914150238, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 1.4473, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8378937686824557, |
|
"grad_norm": 0.852673351764679, |
|
"learning_rate": 6.625378101683316e-06, |
|
"loss": 1.4215, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.8388135203495056, |
|
"grad_norm": 0.7429057359695435, |
|
"learning_rate": 6.552401704742678e-06, |
|
"loss": 1.4426, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.8397332720165556, |
|
"grad_norm": 0.6884950995445251, |
|
"learning_rate": 6.4798012514067475e-06, |
|
"loss": 1.4016, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.8406530236836054, |
|
"grad_norm": 0.6550636291503906, |
|
"learning_rate": 6.407577369873069e-06, |
|
"loss": 1.4468, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.8415727753506553, |
|
"grad_norm": 0.5837852358818054, |
|
"learning_rate": 6.335730685080837e-06, |
|
"loss": 1.4036, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8424925270177053, |
|
"grad_norm": 0.5570608377456665, |
|
"learning_rate": 6.264261818705419e-06, |
|
"loss": 1.3483, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.8434122786847551, |
|
"grad_norm": 0.7056939005851746, |
|
"learning_rate": 6.193171389152997e-06, |
|
"loss": 1.3397, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.844332030351805, |
|
"grad_norm": 0.623600423336029, |
|
"learning_rate": 6.122460011555187e-06, |
|
"loss": 1.4304, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.8452517820188549, |
|
"grad_norm": 0.6012278199195862, |
|
"learning_rate": 6.052128297763804e-06, |
|
"loss": 1.3684, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.8461715336859048, |
|
"grad_norm": 0.582744836807251, |
|
"learning_rate": 5.982176856345445e-06, |
|
"loss": 1.4205, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8470912853529547, |
|
"grad_norm": 0.5616964101791382, |
|
"learning_rate": 5.912606292576283e-06, |
|
"loss": 1.3209, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.8480110370200046, |
|
"grad_norm": 0.5474282503128052, |
|
"learning_rate": 5.843417208436908e-06, |
|
"loss": 1.4125, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.8489307886870545, |
|
"grad_norm": 0.533388614654541, |
|
"learning_rate": 5.774610202606939e-06, |
|
"loss": 1.4116, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.8498505403541043, |
|
"grad_norm": 0.5694478154182434, |
|
"learning_rate": 5.706185870460018e-06, |
|
"loss": 1.509, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.8507702920211543, |
|
"grad_norm": 0.5748287439346313, |
|
"learning_rate": 5.638144804058559e-06, |
|
"loss": 1.3528, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8516900436882042, |
|
"grad_norm": 0.6192615032196045, |
|
"learning_rate": 5.5704875921486655e-06, |
|
"loss": 1.3098, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.852609795355254, |
|
"grad_norm": 0.6460704207420349, |
|
"learning_rate": 5.503214820154978e-06, |
|
"loss": 1.3839, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.853529547022304, |
|
"grad_norm": 0.620794951915741, |
|
"learning_rate": 5.436327070175728e-06, |
|
"loss": 1.4197, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.8544492986893538, |
|
"grad_norm": 0.6275455355644226, |
|
"learning_rate": 5.369824920977568e-06, |
|
"loss": 1.2891, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8553690503564038, |
|
"grad_norm": 0.5857694149017334, |
|
"learning_rate": 5.303708947990637e-06, |
|
"loss": 1.3334, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8562888020234537, |
|
"grad_norm": 0.6003711819648743, |
|
"learning_rate": 5.2379797233035824e-06, |
|
"loss": 1.395, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8572085536905035, |
|
"grad_norm": 0.6273806095123291, |
|
"learning_rate": 5.1726378156585816e-06, |
|
"loss": 1.2778, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8581283053575535, |
|
"grad_norm": 0.6366182565689087, |
|
"learning_rate": 5.10768379044641e-06, |
|
"loss": 1.3508, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.8590480570246034, |
|
"grad_norm": 0.6845077872276306, |
|
"learning_rate": 5.043118209701631e-06, |
|
"loss": 1.2843, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8599678086916532, |
|
"grad_norm": 0.6707909107208252, |
|
"learning_rate": 4.978941632097611e-06, |
|
"loss": 1.3239, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8608875603587032, |
|
"grad_norm": 0.7041406631469727, |
|
"learning_rate": 4.9151546129417804e-06, |
|
"loss": 1.2556, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.861807312025753, |
|
"grad_norm": 0.6683023571968079, |
|
"learning_rate": 4.8517577041707955e-06, |
|
"loss": 1.289, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.8627270636928029, |
|
"grad_norm": 0.6463608741760254, |
|
"learning_rate": 4.788751454345763e-06, |
|
"loss": 1.225, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.8636468153598529, |
|
"grad_norm": 0.6901978254318237, |
|
"learning_rate": 4.726136408647464e-06, |
|
"loss": 1.2177, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.8645665670269027, |
|
"grad_norm": 0.6679742336273193, |
|
"learning_rate": 4.663913108871726e-06, |
|
"loss": 1.2586, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8654863186939526, |
|
"grad_norm": 0.6778735518455505, |
|
"learning_rate": 4.60208209342462e-06, |
|
"loss": 1.183, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.8664060703610025, |
|
"grad_norm": 0.6251430511474609, |
|
"learning_rate": 4.540643897317887e-06, |
|
"loss": 1.2523, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.8673258220280524, |
|
"grad_norm": 0.6894196271896362, |
|
"learning_rate": 4.479599052164268e-06, |
|
"loss": 1.183, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.8682455736951024, |
|
"grad_norm": 0.6839209198951721, |
|
"learning_rate": 4.418948086172914e-06, |
|
"loss": 1.1992, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.8691653253621522, |
|
"grad_norm": 0.7572594285011292, |
|
"learning_rate": 4.35869152414482e-06, |
|
"loss": 1.1731, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.8700850770292021, |
|
"grad_norm": 0.7147699594497681, |
|
"learning_rate": 4.298829887468275e-06, |
|
"loss": 1.1665, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.8710048286962521, |
|
"grad_norm": 0.7666782736778259, |
|
"learning_rate": 4.2393636941143675e-06, |
|
"loss": 1.149, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.8719245803633019, |
|
"grad_norm": 0.7843433022499084, |
|
"learning_rate": 4.180293458632489e-06, |
|
"loss": 1.0903, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.8728443320303518, |
|
"grad_norm": 0.958113431930542, |
|
"learning_rate": 4.121619692145878e-06, |
|
"loss": 1.118, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.8737640836974017, |
|
"grad_norm": 1.1284202337265015, |
|
"learning_rate": 4.0633429023472e-06, |
|
"loss": 0.9711, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8746838353644516, |
|
"grad_norm": 0.8368450403213501, |
|
"learning_rate": 4.005463593494163e-06, |
|
"loss": 1.4433, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.8756035870315015, |
|
"grad_norm": 0.6638758182525635, |
|
"learning_rate": 3.947982266405159e-06, |
|
"loss": 1.4285, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.8765233386985514, |
|
"grad_norm": 0.8789987564086914, |
|
"learning_rate": 3.890899418454913e-06, |
|
"loss": 1.4212, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.8774430903656013, |
|
"grad_norm": 0.847080409526825, |
|
"learning_rate": 3.834215543570191e-06, |
|
"loss": 1.4124, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.8783628420326511, |
|
"grad_norm": 0.9596214890480042, |
|
"learning_rate": 3.777931132225526e-06, |
|
"loss": 1.3723, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8792825936997011, |
|
"grad_norm": 0.9075647592544556, |
|
"learning_rate": 3.72204667143895e-06, |
|
"loss": 1.493, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.880202345366751, |
|
"grad_norm": 0.780536413192749, |
|
"learning_rate": 3.6665626447678237e-06, |
|
"loss": 1.4126, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.8811220970338008, |
|
"grad_norm": 0.6997688412666321, |
|
"learning_rate": 3.611479532304618e-06, |
|
"loss": 1.389, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.8820418487008508, |
|
"grad_norm": 0.620875358581543, |
|
"learning_rate": 3.556797810672785e-06, |
|
"loss": 1.3514, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.8829616003679006, |
|
"grad_norm": 0.6854445338249207, |
|
"learning_rate": 3.5025179530225994e-06, |
|
"loss": 1.4661, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8838813520349506, |
|
"grad_norm": 0.7020566463470459, |
|
"learning_rate": 3.4486404290271113e-06, |
|
"loss": 1.4115, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.8848011037020005, |
|
"grad_norm": 0.6943616271018982, |
|
"learning_rate": 3.3951657048780227e-06, |
|
"loss": 1.4774, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.8857208553690503, |
|
"grad_norm": 0.7479608654975891, |
|
"learning_rate": 3.3420942432817127e-06, |
|
"loss": 1.4625, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.8866406070361003, |
|
"grad_norm": 0.7025173902511597, |
|
"learning_rate": 3.289426503455201e-06, |
|
"loss": 1.4019, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.8875603587031502, |
|
"grad_norm": 0.673040509223938, |
|
"learning_rate": 3.2371629411221848e-06, |
|
"loss": 1.4343, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.8884801103702, |
|
"grad_norm": 0.728541910648346, |
|
"learning_rate": 3.185304008509077e-06, |
|
"loss": 1.5093, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.88939986203725, |
|
"grad_norm": 0.6773453950881958, |
|
"learning_rate": 3.133850154341139e-06, |
|
"loss": 1.4002, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.8903196137042998, |
|
"grad_norm": 0.6363242864608765, |
|
"learning_rate": 3.082801823838527e-06, |
|
"loss": 1.4272, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.8912393653713497, |
|
"grad_norm": 0.5722589492797852, |
|
"learning_rate": 3.032159458712508e-06, |
|
"loss": 1.3557, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.8921591170383997, |
|
"grad_norm": 0.5886601209640503, |
|
"learning_rate": 2.981923497161615e-06, |
|
"loss": 1.3874, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8930788687054495, |
|
"grad_norm": 0.6230661273002625, |
|
"learning_rate": 2.9320943738678107e-06, |
|
"loss": 1.3784, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.8939986203724994, |
|
"grad_norm": 0.5844275951385498, |
|
"learning_rate": 2.882672519992824e-06, |
|
"loss": 1.4153, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.8949183720395493, |
|
"grad_norm": 0.6414538621902466, |
|
"learning_rate": 2.833658363174302e-06, |
|
"loss": 1.3611, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.8958381237065992, |
|
"grad_norm": 0.6074815392494202, |
|
"learning_rate": 2.785052327522214e-06, |
|
"loss": 1.3607, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.8967578753736491, |
|
"grad_norm": 0.5938957333564758, |
|
"learning_rate": 2.73685483361511e-06, |
|
"loss": 1.3765, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.897677627040699, |
|
"grad_norm": 0.5869003534317017, |
|
"learning_rate": 2.6890662984965232e-06, |
|
"loss": 1.392, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.8985973787077489, |
|
"grad_norm": 0.5588386654853821, |
|
"learning_rate": 2.6416871356713224e-06, |
|
"loss": 1.3047, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.8995171303747989, |
|
"grad_norm": 0.5922186970710754, |
|
"learning_rate": 2.594717755102205e-06, |
|
"loss": 1.3928, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.9004368820418487, |
|
"grad_norm": 0.5693724155426025, |
|
"learning_rate": 2.548158563206038e-06, |
|
"loss": 1.347, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.9013566337088986, |
|
"grad_norm": 0.6117263436317444, |
|
"learning_rate": 2.50200996285046e-06, |
|
"loss": 1.3568, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9022763853759485, |
|
"grad_norm": 0.5885259509086609, |
|
"learning_rate": 2.4562723533503083e-06, |
|
"loss": 1.4184, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.9031961370429984, |
|
"grad_norm": 0.6112256646156311, |
|
"learning_rate": 2.4109461304642256e-06, |
|
"loss": 1.3344, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.9041158887100483, |
|
"grad_norm": 0.6500238180160522, |
|
"learning_rate": 2.366031686391168e-06, |
|
"loss": 1.3372, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.9050356403770982, |
|
"grad_norm": 0.6185190677642822, |
|
"learning_rate": 2.3215294097670925e-06, |
|
"loss": 1.2273, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.9059553920441481, |
|
"grad_norm": 0.6523995995521545, |
|
"learning_rate": 2.277439685661509e-06, |
|
"loss": 1.2538, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9068751437111979, |
|
"grad_norm": 0.7136437296867371, |
|
"learning_rate": 2.2337628955742264e-06, |
|
"loss": 1.3739, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.9077948953782479, |
|
"grad_norm": 0.6043840050697327, |
|
"learning_rate": 2.1904994174319905e-06, |
|
"loss": 1.2184, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.9087146470452978, |
|
"grad_norm": 0.6362565159797668, |
|
"learning_rate": 2.1476496255852683e-06, |
|
"loss": 1.1398, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.9096343987123476, |
|
"grad_norm": 0.6597528457641602, |
|
"learning_rate": 2.1052138908049303e-06, |
|
"loss": 1.1972, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.9105541503793976, |
|
"grad_norm": 0.679057240486145, |
|
"learning_rate": 2.0631925802791606e-06, |
|
"loss": 1.2572, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9114739020464474, |
|
"grad_norm": 0.6650072336196899, |
|
"learning_rate": 2.021586057610153e-06, |
|
"loss": 1.1868, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.9123936537134973, |
|
"grad_norm": 0.6258329749107361, |
|
"learning_rate": 1.9803946828110375e-06, |
|
"loss": 1.209, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.9133134053805473, |
|
"grad_norm": 0.6818736791610718, |
|
"learning_rate": 1.9396188123027737e-06, |
|
"loss": 1.2432, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.9142331570475971, |
|
"grad_norm": 0.7300404906272888, |
|
"learning_rate": 1.8992587989110134e-06, |
|
"loss": 1.2549, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.915152908714647, |
|
"grad_norm": 0.7216602563858032, |
|
"learning_rate": 1.8593149918630925e-06, |
|
"loss": 1.1911, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.916072660381697, |
|
"grad_norm": 0.7485631704330444, |
|
"learning_rate": 1.8197877367849947e-06, |
|
"loss": 1.1326, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.9169924120487468, |
|
"grad_norm": 0.8240882158279419, |
|
"learning_rate": 1.7806773756983642e-06, |
|
"loss": 1.1299, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.9179121637157968, |
|
"grad_norm": 0.9147471189498901, |
|
"learning_rate": 1.7419842470175195e-06, |
|
"loss": 1.1179, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.9188319153828466, |
|
"grad_norm": 0.9360700249671936, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 1.0562, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.9197516670498965, |
|
"grad_norm": 1.174989104270935, |
|
"learning_rate": 1.6658510224765333e-06, |
|
"loss": 0.9121, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9206714187169465, |
|
"grad_norm": 0.8917292952537537, |
|
"learning_rate": 1.6284115853823445e-06, |
|
"loss": 1.4961, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.9215911703839963, |
|
"grad_norm": 0.6432257890701294, |
|
"learning_rate": 1.5913906982201742e-06, |
|
"loss": 1.488, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.9225109220510462, |
|
"grad_norm": 0.7689481973648071, |
|
"learning_rate": 1.5547886813245539e-06, |
|
"loss": 1.4265, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.9234306737180961, |
|
"grad_norm": 0.7164052128791809, |
|
"learning_rate": 1.5186058514055912e-06, |
|
"loss": 1.4054, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.924350425385146, |
|
"grad_norm": 0.8932134509086609, |
|
"learning_rate": 1.4828425215462848e-06, |
|
"loss": 1.403, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9252701770521959, |
|
"grad_norm": 0.8750680685043335, |
|
"learning_rate": 1.447499001199748e-06, |
|
"loss": 1.3956, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.9261899287192458, |
|
"grad_norm": 0.7176107168197632, |
|
"learning_rate": 1.4125755961865827e-06, |
|
"loss": 1.4235, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.9271096803862957, |
|
"grad_norm": 0.7204969525337219, |
|
"learning_rate": 1.3780726086922103e-06, |
|
"loss": 1.3773, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.9280294320533456, |
|
"grad_norm": 0.6472546458244324, |
|
"learning_rate": 1.3439903372642615e-06, |
|
"loss": 1.4734, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.9289491837203955, |
|
"grad_norm": 0.679750919342041, |
|
"learning_rate": 1.3103290768099797e-06, |
|
"loss": 1.5028, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9298689353874454, |
|
"grad_norm": 0.6491613984107971, |
|
"learning_rate": 1.2770891185937105e-06, |
|
"loss": 1.403, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.9307886870544952, |
|
"grad_norm": 0.6442059278488159, |
|
"learning_rate": 1.2442707502343332e-06, |
|
"loss": 1.4124, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.9317084387215452, |
|
"grad_norm": 0.5981637835502625, |
|
"learning_rate": 1.2118742557027884e-06, |
|
"loss": 1.459, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.9326281903885951, |
|
"grad_norm": 0.5459677577018738, |
|
"learning_rate": 1.1798999153196433e-06, |
|
"loss": 1.4171, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.933547942055645, |
|
"grad_norm": 0.5810702443122864, |
|
"learning_rate": 1.1483480057526363e-06, |
|
"loss": 1.3995, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9344676937226949, |
|
"grad_norm": 0.5334146022796631, |
|
"learning_rate": 1.1172188000142802e-06, |
|
"loss": 1.4004, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.9353874453897447, |
|
"grad_norm": 0.5717347860336304, |
|
"learning_rate": 1.0865125674595466e-06, |
|
"loss": 1.3843, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.9363071970567947, |
|
"grad_norm": 0.5235407948493958, |
|
"learning_rate": 1.0562295737834737e-06, |
|
"loss": 1.3558, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.9372269487238446, |
|
"grad_norm": 0.5573782324790955, |
|
"learning_rate": 1.026370081018907e-06, |
|
"loss": 1.4016, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.9381467003908944, |
|
"grad_norm": 0.5528433322906494, |
|
"learning_rate": 9.969343475342285e-07, |
|
"loss": 1.3298, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9390664520579444, |
|
"grad_norm": 0.573993980884552, |
|
"learning_rate": 9.679226280310982e-07, |
|
"loss": 1.3674, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.9399862037249943, |
|
"grad_norm": 0.5446662902832031, |
|
"learning_rate": 9.393351735422773e-07, |
|
"loss": 1.3571, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.9409059553920441, |
|
"grad_norm": 0.5892913937568665, |
|
"learning_rate": 9.111722314294358e-07, |
|
"loss": 1.3471, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.9418257070590941, |
|
"grad_norm": 0.6275593638420105, |
|
"learning_rate": 8.834340453810375e-07, |
|
"loss": 1.3269, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.9427454587261439, |
|
"grad_norm": 0.6341751217842102, |
|
"learning_rate": 8.561208554101863e-07, |
|
"loss": 1.3899, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9436652103931938, |
|
"grad_norm": 0.6272470951080322, |
|
"learning_rate": 8.292328978526109e-07, |
|
"loss": 1.3545, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.9445849620602438, |
|
"grad_norm": 0.6651190519332886, |
|
"learning_rate": 8.027704053645613e-07, |
|
"loss": 1.3397, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.9455047137272936, |
|
"grad_norm": 0.6504070162773132, |
|
"learning_rate": 7.76733606920832e-07, |
|
"loss": 1.3889, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.9464244653943436, |
|
"grad_norm": 0.639077365398407, |
|
"learning_rate": 7.511227278127697e-07, |
|
"loss": 1.3159, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.9473442170613934, |
|
"grad_norm": 0.685070812702179, |
|
"learning_rate": 7.259379896463247e-07, |
|
"loss": 1.312, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9482639687284433, |
|
"grad_norm": 0.705894947052002, |
|
"learning_rate": 7.011796103401191e-07, |
|
"loss": 1.325, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.9491837203954933, |
|
"grad_norm": 0.6670310497283936, |
|
"learning_rate": 6.768478041236037e-07, |
|
"loss": 1.3582, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.9501034720625431, |
|
"grad_norm": 0.7927426695823669, |
|
"learning_rate": 6.529427815351374e-07, |
|
"loss": 1.3767, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.951023223729593, |
|
"grad_norm": 0.6605473160743713, |
|
"learning_rate": 6.294647494202444e-07, |
|
"loss": 1.2937, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.9519429753966429, |
|
"grad_norm": 0.599684476852417, |
|
"learning_rate": 6.064139109297485e-07, |
|
"loss": 1.2802, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9528627270636928, |
|
"grad_norm": 0.6753445267677307, |
|
"learning_rate": 5.837904655180748e-07, |
|
"loss": 1.297, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.9537824787307427, |
|
"grad_norm": 0.6682940125465393, |
|
"learning_rate": 5.615946089414736e-07, |
|
"loss": 1.3073, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.9547022303977926, |
|
"grad_norm": 0.6744109392166138, |
|
"learning_rate": 5.398265332563934e-07, |
|
"loss": 1.1858, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.9556219820648425, |
|
"grad_norm": 0.6154145002365112, |
|
"learning_rate": 5.184864268177325e-07, |
|
"loss": 1.1648, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.9565417337318924, |
|
"grad_norm": 0.6836906671524048, |
|
"learning_rate": 4.975744742772848e-07, |
|
"loss": 1.2518, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9574614853989423, |
|
"grad_norm": 0.6386029720306396, |
|
"learning_rate": 4.770908565820964e-07, |
|
"loss": 1.2142, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.9583812370659922, |
|
"grad_norm": 0.6528066992759705, |
|
"learning_rate": 4.5703575097292286e-07, |
|
"loss": 1.1931, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.959300988733042, |
|
"grad_norm": 0.665433406829834, |
|
"learning_rate": 4.37409330982691e-07, |
|
"loss": 1.202, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.960220740400092, |
|
"grad_norm": 0.7009211182594299, |
|
"learning_rate": 4.182117664349783e-07, |
|
"loss": 1.2317, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.9611404920671419, |
|
"grad_norm": 0.7533866167068481, |
|
"learning_rate": 3.99443223442586e-07, |
|
"loss": 1.2128, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9620602437341917, |
|
"grad_norm": 0.7658700942993164, |
|
"learning_rate": 3.8110386440605164e-07, |
|
"loss": 1.1474, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.9629799954012417, |
|
"grad_norm": 0.7905300259590149, |
|
"learning_rate": 3.6319384801227763e-07, |
|
"loss": 1.1075, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.9638997470682915, |
|
"grad_norm": 0.9083186388015747, |
|
"learning_rate": 3.4571332923314936e-07, |
|
"loss": 1.1094, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.9648194987353415, |
|
"grad_norm": 0.9923297762870789, |
|
"learning_rate": 3.2866245932418604e-07, |
|
"loss": 1.0341, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.9657392504023914, |
|
"grad_norm": 1.4956581592559814, |
|
"learning_rate": 3.120413858232474e-07, |
|
"loss": 0.9236, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9657392504023914, |
|
"eval_loss": 1.3224910497665405, |
|
"eval_runtime": 49.9198, |
|
"eval_samples_per_second": 165.065, |
|
"eval_steps_per_second": 20.633, |
|
"step": 1050 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1088, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9092013631668224e+17, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|