|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 13863, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002164033758926639, |
|
"grad_norm": 1.1804725541377281, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7931, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004328067517853278, |
|
"grad_norm": 1.0142507971826196, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7085, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006492101276779918, |
|
"grad_norm": 1.0822486668830535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6971, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008656135035706556, |
|
"grad_norm": 0.9775955015066439, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6931, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010820168794633196, |
|
"grad_norm": 1.0533969471863989, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6748, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012984202553559835, |
|
"grad_norm": 0.9409532123493094, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6582, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.015148236312486475, |
|
"grad_norm": 0.8701409472453093, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6612, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.017312270071413113, |
|
"grad_norm": 0.8629522430209661, |
|
"learning_rate": 5e-06, |
|
"loss": 0.667, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.019476303830339752, |
|
"grad_norm": 0.9005724399072088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6664, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02164033758926639, |
|
"grad_norm": 0.9031490400120155, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6494, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02380437134819303, |
|
"grad_norm": 0.8491980446537303, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6611, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02596840510711967, |
|
"grad_norm": 0.8544450914337819, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6475, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02813243886604631, |
|
"grad_norm": 0.8940642349549588, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6676, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03029647262497295, |
|
"grad_norm": 0.8624378682337847, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6362, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.032460506383899586, |
|
"grad_norm": 0.7937661473250804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6502, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.034624540142826225, |
|
"grad_norm": 0.8511299785366766, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6397, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.036788573901752865, |
|
"grad_norm": 0.8247509378529784, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6381, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.038952607660679504, |
|
"grad_norm": 0.8606650790301921, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6507, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.041116641419606144, |
|
"grad_norm": 0.9000039308059945, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6517, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04328067517853278, |
|
"grad_norm": 0.874046131263459, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04544470893745942, |
|
"grad_norm": 0.826945338144314, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6331, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04760874269638606, |
|
"grad_norm": 0.8647577576035517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6388, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0497727764553127, |
|
"grad_norm": 0.8357940501078124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6468, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05193681021423934, |
|
"grad_norm": 0.8205082815962632, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6206, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05410084397316598, |
|
"grad_norm": 0.8029856499548153, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6252, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05626487773209262, |
|
"grad_norm": 0.8266962137493432, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6267, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05842891149101926, |
|
"grad_norm": 0.7760155385414025, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6233, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0605929452499459, |
|
"grad_norm": 0.857439553904383, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6247, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06275697900887253, |
|
"grad_norm": 0.8571645215071354, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6291, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06492101276779917, |
|
"grad_norm": 0.8324700525072513, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6313, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06708504652672581, |
|
"grad_norm": 0.7832243300229523, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6329, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06924908028565245, |
|
"grad_norm": 0.7493686602077012, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6198, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07141311404457909, |
|
"grad_norm": 0.8110396523299721, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6268, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07357714780350573, |
|
"grad_norm": 0.7729143403225553, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6131, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07574118156243237, |
|
"grad_norm": 0.844299608680804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.628, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07790521532135901, |
|
"grad_norm": 0.8311658262532373, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6284, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08006924908028565, |
|
"grad_norm": 0.7958716863199661, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6166, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08223328283921229, |
|
"grad_norm": 0.8412185496400149, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6458, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08439731659813893, |
|
"grad_norm": 0.7950152179064658, |
|
"learning_rate": 5e-06, |
|
"loss": 0.625, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08656135035706557, |
|
"grad_norm": 0.7728715222121862, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6246, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0887253841159922, |
|
"grad_norm": 0.806073545694218, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6315, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09088941787491885, |
|
"grad_norm": 0.8489148782720433, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6145, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09305345163384549, |
|
"grad_norm": 0.7918376736911522, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6192, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09521748539277212, |
|
"grad_norm": 0.8400111831435656, |
|
"learning_rate": 5e-06, |
|
"loss": 0.621, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09738151915169876, |
|
"grad_norm": 0.7614028740715489, |
|
"learning_rate": 5e-06, |
|
"loss": 0.605, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0995455529106254, |
|
"grad_norm": 0.8577505860556417, |
|
"learning_rate": 5e-06, |
|
"loss": 0.607, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.10170958666955204, |
|
"grad_norm": 0.8729565243092611, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6229, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.10387362042847868, |
|
"grad_norm": 0.822547387381443, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6213, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.10603765418740532, |
|
"grad_norm": 0.7797820836510193, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6055, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.10820168794633196, |
|
"grad_norm": 0.8039389365692253, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6158, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1103657217052586, |
|
"grad_norm": 0.8014536678260006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6078, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11252975546418524, |
|
"grad_norm": 0.8655111256073293, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6279, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.11469378922311188, |
|
"grad_norm": 0.872957811479719, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6172, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.11685782298203852, |
|
"grad_norm": 0.8819039368989797, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6121, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11902185674096516, |
|
"grad_norm": 0.8640564878023476, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5962, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1211858904998918, |
|
"grad_norm": 0.8207671083993288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6061, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.12334992425881844, |
|
"grad_norm": 0.8639259231144906, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6102, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.12551395801774506, |
|
"grad_norm": 0.8252682175137697, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5919, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.12767799177667172, |
|
"grad_norm": 0.8178974092958998, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6166, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12984202553559834, |
|
"grad_norm": 0.8895228285922248, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6098, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.132006059294525, |
|
"grad_norm": 0.7969967604154639, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6077, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.13417009305345162, |
|
"grad_norm": 0.8683433820973785, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6099, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.13633412681237828, |
|
"grad_norm": 0.8063824032551594, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6127, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1384981605713049, |
|
"grad_norm": 0.8136662571064663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5968, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.14066219433023155, |
|
"grad_norm": 0.822893712320517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6038, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14282622808915818, |
|
"grad_norm": 0.8485980406618856, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6127, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.14499026184808483, |
|
"grad_norm": 0.8396390754266708, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6105, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.14715429560701146, |
|
"grad_norm": 0.863650354540203, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5947, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1493183293659381, |
|
"grad_norm": 0.8767137343250653, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5944, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.15148236312486474, |
|
"grad_norm": 0.7785697177833184, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5957, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1536463968837914, |
|
"grad_norm": 0.7903385744648517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5887, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.15581043064271802, |
|
"grad_norm": 0.8304334914378555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5972, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.15797446440164467, |
|
"grad_norm": 0.8032146774897264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6055, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1601384981605713, |
|
"grad_norm": 0.818462114006868, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6086, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.16230253191949795, |
|
"grad_norm": 0.7999227700105283, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16446656567842458, |
|
"grad_norm": 0.779725395098173, |
|
"learning_rate": 5e-06, |
|
"loss": 0.581, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.16663059943735123, |
|
"grad_norm": 0.8501680445359217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6034, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.16879463319627785, |
|
"grad_norm": 0.7955262980950765, |
|
"learning_rate": 5e-06, |
|
"loss": 0.591, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.1709586669552045, |
|
"grad_norm": 0.7969375284741476, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5917, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.17312270071413113, |
|
"grad_norm": 0.7651500787357158, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5925, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1752867344730578, |
|
"grad_norm": 0.7869498336245857, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5919, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1774507682319844, |
|
"grad_norm": 0.8171995660498751, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5938, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.17961480199091107, |
|
"grad_norm": 0.7836587740083709, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5786, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1817788357498377, |
|
"grad_norm": 0.8784878650037878, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6044, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.18394286950876435, |
|
"grad_norm": 0.800243869063219, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5889, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.18610690326769097, |
|
"grad_norm": 0.8018420320157924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6087, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.18827093702661762, |
|
"grad_norm": 0.8180751714275638, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6037, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.19043497078554425, |
|
"grad_norm": 0.785600856617542, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5837, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1925990045444709, |
|
"grad_norm": 0.8412200127939977, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5896, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.19476303830339753, |
|
"grad_norm": 0.8025538669907342, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5991, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19692707206232418, |
|
"grad_norm": 0.7720798100780025, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5891, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1990911058212508, |
|
"grad_norm": 0.8661764118040983, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5931, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.20125513958017746, |
|
"grad_norm": 0.8120743515574185, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5957, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2034191733391041, |
|
"grad_norm": 0.8345160783897795, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5984, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.20558320709803074, |
|
"grad_norm": 0.811502391003602, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6081, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.20774724085695737, |
|
"grad_norm": 0.7825461582026372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5948, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.20991127461588402, |
|
"grad_norm": 0.8055902800373363, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5982, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.21207530837481064, |
|
"grad_norm": 0.8388084051375468, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5893, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2142393421337373, |
|
"grad_norm": 0.7679133259037194, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5907, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.21640337589266392, |
|
"grad_norm": 0.8624733193059415, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5814, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21856740965159058, |
|
"grad_norm": 0.7984486425858306, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5913, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2207314434105172, |
|
"grad_norm": 0.8510028087137639, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5949, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.22289547716944386, |
|
"grad_norm": 0.7996439613799075, |
|
"learning_rate": 5e-06, |
|
"loss": 0.601, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.22505951092837048, |
|
"grad_norm": 0.8088661553876958, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6074, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.22722354468729714, |
|
"grad_norm": 0.8163892550046303, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5863, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.22938757844622376, |
|
"grad_norm": 0.9418880337035802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6033, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2315516122051504, |
|
"grad_norm": 0.8308760617654204, |
|
"learning_rate": 5e-06, |
|
"loss": 0.603, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.23371564596407704, |
|
"grad_norm": 0.8145198410135479, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5947, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.23587967972300367, |
|
"grad_norm": 0.8623357006048706, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6056, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.23804371348193032, |
|
"grad_norm": 0.7965551896548404, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5923, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24020774724085694, |
|
"grad_norm": 0.8052327006114539, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6007, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2423717809997836, |
|
"grad_norm": 0.8213993699298745, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5797, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.24453581475871022, |
|
"grad_norm": 0.8149866187723401, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5941, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.24669984851763688, |
|
"grad_norm": 0.7837216156150254, |
|
"learning_rate": 5e-06, |
|
"loss": 0.59, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2488638822765635, |
|
"grad_norm": 0.8126197328739624, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5952, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.25102791603549013, |
|
"grad_norm": 0.8221669781147581, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6005, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2531919497944168, |
|
"grad_norm": 0.7963300719869713, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5877, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.25535598355334344, |
|
"grad_norm": 0.788058435618901, |
|
"learning_rate": 5e-06, |
|
"loss": 0.591, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2575200173122701, |
|
"grad_norm": 0.7519308880120247, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5928, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.2596840510711967, |
|
"grad_norm": 0.8266442618730233, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26184808483012334, |
|
"grad_norm": 0.8292557026765464, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5865, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.26401211858905, |
|
"grad_norm": 0.8853558777310622, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5983, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.26617615234797665, |
|
"grad_norm": 0.9109356382425066, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5977, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.26834018610690324, |
|
"grad_norm": 0.89504318082267, |
|
"learning_rate": 5e-06, |
|
"loss": 0.592, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2705042198658299, |
|
"grad_norm": 0.8160219829275853, |
|
"learning_rate": 5e-06, |
|
"loss": 0.593, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.27266825362475655, |
|
"grad_norm": 0.8076508336570862, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5856, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2748322873836832, |
|
"grad_norm": 0.8022139372756553, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5747, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2769963211426098, |
|
"grad_norm": 0.8145806416794947, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5887, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.27916035490153646, |
|
"grad_norm": 0.7591150136319158, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5732, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2813243886604631, |
|
"grad_norm": 0.8140968921103322, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5912, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.28348842241938976, |
|
"grad_norm": 0.7944048590141184, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5895, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.28565245617831636, |
|
"grad_norm": 0.8789105404070925, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5747, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.287816489937243, |
|
"grad_norm": 0.7796055521428117, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5881, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.28998052369616967, |
|
"grad_norm": 0.817568265263925, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5818, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2921445574550963, |
|
"grad_norm": 0.7781293420250672, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5912, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2943085912140229, |
|
"grad_norm": 0.8504639244998208, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5914, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.29647262497294957, |
|
"grad_norm": 0.848929905244684, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5705, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.2986366587318762, |
|
"grad_norm": 0.8401835594137422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5859, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3008006924908029, |
|
"grad_norm": 0.8430038720121172, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5909, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.3029647262497295, |
|
"grad_norm": 0.7925617711932909, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5822, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.30512876000865613, |
|
"grad_norm": 0.8081455173268478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5745, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3072927937675828, |
|
"grad_norm": 0.7903132825985777, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5759, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.30945682752650944, |
|
"grad_norm": 0.8243849826447428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5968, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.31162086128543603, |
|
"grad_norm": 0.7603966355823882, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5873, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3137848950443627, |
|
"grad_norm": 0.801455870233685, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5786, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.31594892880328934, |
|
"grad_norm": 0.7686291740407882, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5789, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.318112962562216, |
|
"grad_norm": 0.8554910506261316, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5795, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3202769963211426, |
|
"grad_norm": 0.820274584453091, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5756, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.32244103008006925, |
|
"grad_norm": 0.835330743611247, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5848, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3246050638389959, |
|
"grad_norm": 0.8067787217591124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5651, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32676909759792255, |
|
"grad_norm": 0.8055298656551628, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5806, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.32893313135684915, |
|
"grad_norm": 0.7643337696325749, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5836, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3310971651157758, |
|
"grad_norm": 0.802754989395744, |
|
"learning_rate": 5e-06, |
|
"loss": 0.577, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.33326119887470246, |
|
"grad_norm": 0.8227460460988874, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5781, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3354252326336291, |
|
"grad_norm": 0.8157555450231291, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5915, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3375892663925557, |
|
"grad_norm": 0.8178511979744351, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5844, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.33975330015148236, |
|
"grad_norm": 0.8640976250921749, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5867, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.341917333910409, |
|
"grad_norm": 0.8225622251467227, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5626, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.34408136766933567, |
|
"grad_norm": 0.809101202716206, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5841, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.34624540142826227, |
|
"grad_norm": 0.8007366862509202, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6002, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3484094351871889, |
|
"grad_norm": 0.80921682472726, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5756, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3505734689461156, |
|
"grad_norm": 0.7919027933880338, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5811, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.35273750270504217, |
|
"grad_norm": 0.8578709720525358, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5844, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3549015364639688, |
|
"grad_norm": 0.8919305010438147, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5867, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3570655702228955, |
|
"grad_norm": 0.8360856901044694, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5959, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.35922960398182213, |
|
"grad_norm": 0.8675129695665484, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5761, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.36139363774074873, |
|
"grad_norm": 0.7689087827995047, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5861, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3635576714996754, |
|
"grad_norm": 0.7720960735276896, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5743, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.36572170525860204, |
|
"grad_norm": 0.7721866885573503, |
|
"learning_rate": 5e-06, |
|
"loss": 0.59, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3678857390175287, |
|
"grad_norm": 0.7882290467361662, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5755, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3700497727764553, |
|
"grad_norm": 0.7556952201847303, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5665, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.37221380653538194, |
|
"grad_norm": 0.791031843589153, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5771, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3743778402943086, |
|
"grad_norm": 0.7809696368341134, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5653, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.37654187405323525, |
|
"grad_norm": 0.8629957045419663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5744, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.37870590781216185, |
|
"grad_norm": 0.735264733320356, |
|
"learning_rate": 5e-06, |
|
"loss": 0.568, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.3808699415710885, |
|
"grad_norm": 0.79820790046158, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5756, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.38303397533001515, |
|
"grad_norm": 0.7990867854378455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5741, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.3851980090889418, |
|
"grad_norm": 0.8121981810970615, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5784, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.3873620428478684, |
|
"grad_norm": 0.8133852852205034, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5843, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.38952607660679506, |
|
"grad_norm": 0.8244168262600475, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5711, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3916901103657217, |
|
"grad_norm": 0.7650337026676514, |
|
"learning_rate": 5e-06, |
|
"loss": 0.567, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.39385414412464836, |
|
"grad_norm": 0.7884841014495525, |
|
"learning_rate": 5e-06, |
|
"loss": 0.587, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.39601817788357496, |
|
"grad_norm": 0.8218222575278361, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5729, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3981822116425016, |
|
"grad_norm": 0.7928094425107333, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5677, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.40034624540142827, |
|
"grad_norm": 0.7768628011319236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5914, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4025102791603549, |
|
"grad_norm": 0.7947010305390309, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5748, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4046743129192815, |
|
"grad_norm": 0.7697471130550405, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5759, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4068383466782082, |
|
"grad_norm": 0.796128902115479, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5734, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4090023804371348, |
|
"grad_norm": 0.7659679821804417, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5631, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.4111664141960615, |
|
"grad_norm": 0.8580458852557561, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5765, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4133304479549881, |
|
"grad_norm": 0.7957186485957107, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5658, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.41549448171391473, |
|
"grad_norm": 0.7853295564565586, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5621, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4176585154728414, |
|
"grad_norm": 0.720431628977774, |
|
"learning_rate": 5e-06, |
|
"loss": 0.565, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.41982254923176804, |
|
"grad_norm": 0.7826448016952756, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5677, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.42198658299069464, |
|
"grad_norm": 0.7904824818785057, |
|
"learning_rate": 5e-06, |
|
"loss": 0.567, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4241506167496213, |
|
"grad_norm": 0.7871987194350701, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5812, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.42631465050854794, |
|
"grad_norm": 0.7967155088578817, |
|
"learning_rate": 5e-06, |
|
"loss": 0.559, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4284786842674746, |
|
"grad_norm": 0.8505915926157366, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5757, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.4306427180264012, |
|
"grad_norm": 0.7667510030206882, |
|
"learning_rate": 5e-06, |
|
"loss": 0.557, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.43280675178532785, |
|
"grad_norm": 0.8161411756414166, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5886, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4349707855442545, |
|
"grad_norm": 0.8557821729072761, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5815, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.43713481930318115, |
|
"grad_norm": 0.7980272351612475, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5782, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.43929885306210775, |
|
"grad_norm": 0.7855484364592483, |
|
"learning_rate": 5e-06, |
|
"loss": 0.565, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4414628868210344, |
|
"grad_norm": 0.742503374275201, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5726, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.44362692057996106, |
|
"grad_norm": 0.8265176138612144, |
|
"learning_rate": 5e-06, |
|
"loss": 0.565, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4457909543388877, |
|
"grad_norm": 0.7910125817205834, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5738, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4479549880978143, |
|
"grad_norm": 0.7624732216492297, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5539, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.45011902185674096, |
|
"grad_norm": 0.8024443330805618, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5684, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.4522830556156676, |
|
"grad_norm": 0.8252242841372383, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5638, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.45444708937459427, |
|
"grad_norm": 0.7918799931003818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5656, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.45661112313352087, |
|
"grad_norm": 0.8503297988500775, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5878, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.4587751568924475, |
|
"grad_norm": 0.7881054994019235, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5688, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4609391906513742, |
|
"grad_norm": 0.8298283284928384, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5662, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4631032244103008, |
|
"grad_norm": 2.6984894013557374, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5691, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4652672581692274, |
|
"grad_norm": 0.8293615927885423, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5624, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4674312919281541, |
|
"grad_norm": 0.7908261626595905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5601, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.46959532568708073, |
|
"grad_norm": 0.8407395655282288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5789, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.47175935944600733, |
|
"grad_norm": 0.7855312267072224, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5592, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.473923393204934, |
|
"grad_norm": 0.8177276022998019, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5642, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.47608742696386064, |
|
"grad_norm": 0.7674903085772374, |
|
"learning_rate": 5e-06, |
|
"loss": 0.559, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4782514607227873, |
|
"grad_norm": 0.8138473478647174, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5744, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.4804154944817139, |
|
"grad_norm": 0.7985572678524506, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5608, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.48257952824064054, |
|
"grad_norm": 0.7978238400519106, |
|
"learning_rate": 5e-06, |
|
"loss": 0.56, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.4847435619995672, |
|
"grad_norm": 0.786485961749514, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5806, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.48690759575849385, |
|
"grad_norm": 0.7835696169083621, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5813, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.48907162951742045, |
|
"grad_norm": 0.8038286577487808, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5787, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4912356632763471, |
|
"grad_norm": 0.7949579187720448, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5603, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.49339969703527375, |
|
"grad_norm": 0.7996876857887395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.564, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.4955637307942004, |
|
"grad_norm": 0.7523979963095976, |
|
"learning_rate": 5e-06, |
|
"loss": 0.572, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.497727764553127, |
|
"grad_norm": 0.7799060855112318, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5512, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.49989179831205366, |
|
"grad_norm": 0.8354130726326037, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5672, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5020558320709803, |
|
"grad_norm": 0.8174289167975042, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5745, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.504219865829907, |
|
"grad_norm": 0.7797704636452041, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5785, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5063838995888336, |
|
"grad_norm": 0.8049184278922541, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5663, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5085479333477603, |
|
"grad_norm": 0.79714737630827, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5696, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5107119671066869, |
|
"grad_norm": 0.8079273656180676, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5611, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5128760008656135, |
|
"grad_norm": 0.7996864982834264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5634, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5150400346245402, |
|
"grad_norm": 0.7392249752105758, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5441, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5172040683834668, |
|
"grad_norm": 0.8235210489123114, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5616, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5193681021423934, |
|
"grad_norm": 0.7859990977815138, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5738, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5215321359013201, |
|
"grad_norm": 0.8006842059420661, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5669, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5236961696602467, |
|
"grad_norm": 0.8301226308008953, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5585, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5258602034191734, |
|
"grad_norm": 0.8116242656076512, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5575, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5280242371781, |
|
"grad_norm": 0.7951327769075789, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5623, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5301882709370266, |
|
"grad_norm": 0.7632924959396983, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5557, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5323523046959533, |
|
"grad_norm": 0.7703765895080564, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5674, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5345163384548799, |
|
"grad_norm": 0.7519955078038866, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5517, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5366803722138065, |
|
"grad_norm": 0.777945289031897, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5503, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5388444059727332, |
|
"grad_norm": 0.7925318160395382, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5657, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5410084397316598, |
|
"grad_norm": 0.8170912915049142, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5666, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5431724734905865, |
|
"grad_norm": 0.7848624682678103, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5644, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5453365072495131, |
|
"grad_norm": 0.8305872418987535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5579, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5475005410084397, |
|
"grad_norm": 0.754778805221588, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5447, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5496645747673664, |
|
"grad_norm": 0.8433164922898455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.577, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.551828608526293, |
|
"grad_norm": 0.7364207026132309, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5502, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5539926422852196, |
|
"grad_norm": 0.7703003076672548, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5391, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5561566760441463, |
|
"grad_norm": 0.7874453563557188, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5579, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.5583207098030729, |
|
"grad_norm": 0.7946629512850162, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5667, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5604847435619996, |
|
"grad_norm": 0.7841342441931893, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5552, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.5626487773209262, |
|
"grad_norm": 0.8770518493960907, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5654, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5648128110798528, |
|
"grad_norm": 0.7612803825837043, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5481, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.5669768448387795, |
|
"grad_norm": 0.8112957896812497, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5697, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.5691408785977061, |
|
"grad_norm": 0.8292780888969601, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5631, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.5713049123566327, |
|
"grad_norm": 0.8534427911951067, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5675, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5734689461155594, |
|
"grad_norm": 0.7983159182220968, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5651, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.575632979874486, |
|
"grad_norm": 0.8286911243296953, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5661, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.5777970136334126, |
|
"grad_norm": 0.7873673354292886, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5568, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5799610473923393, |
|
"grad_norm": 0.8643973853008751, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5529, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5821250811512659, |
|
"grad_norm": 0.8297018704245965, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5646, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.5842891149101926, |
|
"grad_norm": 0.8282391086423111, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5704, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5864531486691192, |
|
"grad_norm": 0.8185226749510477, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5732, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5886171824280458, |
|
"grad_norm": 0.7837221330454157, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5668, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5907812161869725, |
|
"grad_norm": 0.7830075768638615, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5614, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5929452499458991, |
|
"grad_norm": 0.7669203988650726, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5632, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5951092837048257, |
|
"grad_norm": 0.8064484665186933, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5599, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.5972733174637525, |
|
"grad_norm": 0.8468294496439435, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5451, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.599437351222679, |
|
"grad_norm": 0.9077279068821958, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5724, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6016013849816058, |
|
"grad_norm": 0.7929898760133751, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5616, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6037654187405324, |
|
"grad_norm": 0.7971762324043354, |
|
"learning_rate": 5e-06, |
|
"loss": 0.563, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.605929452499459, |
|
"grad_norm": 0.8288075182953192, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5579, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6080934862583857, |
|
"grad_norm": 0.8498140525184206, |
|
"learning_rate": 5e-06, |
|
"loss": 0.564, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6102575200173123, |
|
"grad_norm": 0.8102245476480473, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5609, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6124215537762389, |
|
"grad_norm": 0.7890358297228374, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5665, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6145855875351656, |
|
"grad_norm": 0.7979304789718528, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5485, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6167496212940922, |
|
"grad_norm": 0.8003173415042142, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5598, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6189136550530189, |
|
"grad_norm": 0.7767086997147948, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5516, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6210776888119455, |
|
"grad_norm": 0.8045021838341775, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5368, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.6232417225708721, |
|
"grad_norm": 0.7498417279355564, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5518, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6254057563297988, |
|
"grad_norm": 0.8253099324603884, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5581, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6275697900887254, |
|
"grad_norm": 0.8079450289566134, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5488, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.629733823847652, |
|
"grad_norm": 0.7952980563003963, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5685, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6318978576065787, |
|
"grad_norm": 0.7835163645285544, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5477, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6340618913655053, |
|
"grad_norm": 0.8048903722703916, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5596, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.636225925124432, |
|
"grad_norm": 0.81044303853561, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5598, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6383899588833586, |
|
"grad_norm": 0.8174138765074157, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5505, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6405539926422852, |
|
"grad_norm": 0.8475108197008772, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5424, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.6427180264012119, |
|
"grad_norm": 0.8042022600558663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5539, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6448820601601385, |
|
"grad_norm": 0.7700287206840692, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5513, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.6470460939190651, |
|
"grad_norm": 0.8281997779232809, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5467, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.6492101276779918, |
|
"grad_norm": 0.813584320341026, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5603, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6513741614369184, |
|
"grad_norm": 0.8585543401025539, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5701, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6535381951958451, |
|
"grad_norm": 0.836920539360064, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5504, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6557022289547717, |
|
"grad_norm": 1.0694200561637055, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5536, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.6578662627136983, |
|
"grad_norm": 0.7556155594948578, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5421, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.660030296472625, |
|
"grad_norm": 0.8263966978271399, |
|
"learning_rate": 5e-06, |
|
"loss": 0.542, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6621943302315516, |
|
"grad_norm": 0.7959474918960274, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5544, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.6643583639904782, |
|
"grad_norm": 0.782166640538943, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5581, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.6665223977494049, |
|
"grad_norm": 0.8119704290024918, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5652, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.6686864315083315, |
|
"grad_norm": 0.8281560870508221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5634, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.6708504652672582, |
|
"grad_norm": 0.8095783840417501, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5601, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6730144990261848, |
|
"grad_norm": 0.795068179288108, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5678, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.6751785327851114, |
|
"grad_norm": 0.7792728010855305, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5607, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.6773425665440381, |
|
"grad_norm": 0.8179451854658257, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5435, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.6795066003029647, |
|
"grad_norm": 0.7667511572553716, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5596, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.6816706340618913, |
|
"grad_norm": 0.8029813372366518, |
|
"learning_rate": 5e-06, |
|
"loss": 0.55, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.683834667820818, |
|
"grad_norm": 0.7987681487999668, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5451, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.6859987015797446, |
|
"grad_norm": 0.779467033707979, |
|
"learning_rate": 5e-06, |
|
"loss": 0.543, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.6881627353386713, |
|
"grad_norm": 0.7628766043647324, |
|
"learning_rate": 5e-06, |
|
"loss": 0.549, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.6903267690975979, |
|
"grad_norm": 0.7850057643186282, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5668, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6924908028565245, |
|
"grad_norm": 0.8407107964724864, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5501, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6946548366154512, |
|
"grad_norm": 0.7767999916714133, |
|
"learning_rate": 5e-06, |
|
"loss": 0.543, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.6968188703743778, |
|
"grad_norm": 0.7996258238832009, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5483, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6989829041333044, |
|
"grad_norm": 0.8377643899045712, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5487, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7011469378922311, |
|
"grad_norm": 0.7514752154537118, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5597, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7033109716511577, |
|
"grad_norm": 0.8416197122709502, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5565, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7054750054100843, |
|
"grad_norm": 0.7931722916017413, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5546, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.707639039169011, |
|
"grad_norm": 0.8217577869277688, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5653, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7098030729279377, |
|
"grad_norm": 0.7118453738807855, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5402, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7119671066868644, |
|
"grad_norm": 0.7697241307191445, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5589, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.714131140445791, |
|
"grad_norm": 0.802274974087682, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5551, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7162951742047176, |
|
"grad_norm": 0.792023853084678, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5493, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7184592079636443, |
|
"grad_norm": 0.7544132296053949, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5393, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7206232417225709, |
|
"grad_norm": 0.7980914984239874, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5433, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7227872754814975, |
|
"grad_norm": 0.784437501269065, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5568, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.7249513092404242, |
|
"grad_norm": 0.7614522809136514, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5455, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7271153429993508, |
|
"grad_norm": 0.7734683988885462, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5501, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7292793767582775, |
|
"grad_norm": 0.7866152633301271, |
|
"learning_rate": 5e-06, |
|
"loss": 0.556, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7314434105172041, |
|
"grad_norm": 0.7702246648713589, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5497, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7336074442761307, |
|
"grad_norm": 0.7600423928776777, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5505, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.7357714780350574, |
|
"grad_norm": 0.807615131927307, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5634, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.737935511793984, |
|
"grad_norm": 0.7922705128025508, |
|
"learning_rate": 5e-06, |
|
"loss": 0.558, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.7400995455529106, |
|
"grad_norm": 0.8339598322147505, |
|
"learning_rate": 5e-06, |
|
"loss": 0.552, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.7422635793118373, |
|
"grad_norm": 0.7464262500668064, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5483, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.7444276130707639, |
|
"grad_norm": 0.7741739625171083, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5573, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.7465916468296906, |
|
"grad_norm": 0.7453424498568156, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5438, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.7487556805886172, |
|
"grad_norm": 0.754303311641887, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5548, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.7509197143475438, |
|
"grad_norm": 0.8689734619851907, |
|
"learning_rate": 5e-06, |
|
"loss": 0.556, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.7530837481064705, |
|
"grad_norm": 0.7773889781541226, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5494, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.7552477818653971, |
|
"grad_norm": 0.7849782050400904, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5537, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.7574118156243237, |
|
"grad_norm": 0.7637907513233742, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5559, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7595758493832504, |
|
"grad_norm": 0.7679605914857784, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5576, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.761739883142177, |
|
"grad_norm": 0.828819439384587, |
|
"learning_rate": 5e-06, |
|
"loss": 0.566, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.7639039169011037, |
|
"grad_norm": 0.8446403352206237, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5583, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.7660679506600303, |
|
"grad_norm": 0.8075329194131221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5592, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.7682319844189569, |
|
"grad_norm": 0.8442605962725928, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5439, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7703960181778836, |
|
"grad_norm": 0.8588068255220394, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5587, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.7725600519368102, |
|
"grad_norm": 0.8524091917859525, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5517, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.7747240856957368, |
|
"grad_norm": 0.8244971263231196, |
|
"learning_rate": 5e-06, |
|
"loss": 0.561, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.7768881194546635, |
|
"grad_norm": 0.7627478595291142, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5641, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.7790521532135901, |
|
"grad_norm": 0.8086037426148818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5621, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7812161869725168, |
|
"grad_norm": 0.791930301424169, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5471, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.7833802207314434, |
|
"grad_norm": 0.7878241907870572, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5521, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.78554425449037, |
|
"grad_norm": 0.7830841775537941, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5467, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.7877082882492967, |
|
"grad_norm": 0.7623641467313392, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5336, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.7898723220082233, |
|
"grad_norm": 0.8232677684209438, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5651, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.7920363557671499, |
|
"grad_norm": 0.761362883743628, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5597, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.7942003895260766, |
|
"grad_norm": 0.8637874664285103, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5602, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.7963644232850032, |
|
"grad_norm": 0.8267668782394713, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5561, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.7985284570439299, |
|
"grad_norm": 0.817446940423478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5575, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8006924908028565, |
|
"grad_norm": 0.7818597023986829, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5604, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8028565245617831, |
|
"grad_norm": 0.776499232049182, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5643, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8050205583207098, |
|
"grad_norm": 0.8364781671818916, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5438, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8071845920796364, |
|
"grad_norm": 0.8397874518563361, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5513, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.809348625838563, |
|
"grad_norm": 0.7969750238399832, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5402, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8115126595974897, |
|
"grad_norm": 0.9068956852645959, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5569, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8136766933564163, |
|
"grad_norm": 0.7536728350124055, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5419, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.815840727115343, |
|
"grad_norm": 0.7676812187895282, |
|
"learning_rate": 5e-06, |
|
"loss": 0.547, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.8180047608742697, |
|
"grad_norm": 0.762833586705676, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5521, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.8201687946331963, |
|
"grad_norm": 0.756771887345399, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5556, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.822332828392123, |
|
"grad_norm": 0.8734848583188696, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5545, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8244968621510496, |
|
"grad_norm": 0.8004103342026402, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5513, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.8266608959099762, |
|
"grad_norm": 0.7719483146665875, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5446, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.8288249296689029, |
|
"grad_norm": 0.7744925484130326, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5635, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.8309889634278295, |
|
"grad_norm": 0.7562447275301897, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5444, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.8331529971867561, |
|
"grad_norm": 0.7924604852102323, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5421, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8353170309456828, |
|
"grad_norm": 0.8520564532781372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5601, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8374810647046094, |
|
"grad_norm": 0.8746743288593334, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5509, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8396450984635361, |
|
"grad_norm": 0.7873244885756802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5472, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8418091322224627, |
|
"grad_norm": 0.8209663148355055, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5385, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.8439731659813893, |
|
"grad_norm": 0.819629928526103, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5392, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.846137199740316, |
|
"grad_norm": 0.8584352651397266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5554, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.8483012334992426, |
|
"grad_norm": 0.8503960560319187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5363, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.8504652672581692, |
|
"grad_norm": 0.8108480185136313, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5495, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.8526293010170959, |
|
"grad_norm": 0.7373181469886592, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5734, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.8547933347760225, |
|
"grad_norm": 0.8163364176974437, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5599, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.8569573685349492, |
|
"grad_norm": 0.7959374117805932, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5454, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.8591214022938758, |
|
"grad_norm": 0.812330127649017, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5471, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.8612854360528024, |
|
"grad_norm": 0.7945522211940125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5403, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.8634494698117291, |
|
"grad_norm": 0.7696968400441544, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5423, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.8656135035706557, |
|
"grad_norm": 0.7450513825177912, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5405, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8677775373295823, |
|
"grad_norm": 0.8008938478720109, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5453, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.869941571088509, |
|
"grad_norm": 0.7624217719794756, |
|
"learning_rate": 5e-06, |
|
"loss": 0.536, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.8721056048474356, |
|
"grad_norm": 0.7876625538590819, |
|
"learning_rate": 5e-06, |
|
"loss": 0.55, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.8742696386063623, |
|
"grad_norm": 0.7667825013844164, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5425, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.8764336723652889, |
|
"grad_norm": 0.8391975491875862, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5619, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.8785977061242155, |
|
"grad_norm": 0.7580142563713078, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5395, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.8807617398831422, |
|
"grad_norm": 0.873766541325802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5498, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.8829257736420688, |
|
"grad_norm": 0.8045714640840378, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5565, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.8850898074009954, |
|
"grad_norm": 0.7839455981681682, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5568, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.8872538411599221, |
|
"grad_norm": 0.7507205318651358, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5446, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8894178749188487, |
|
"grad_norm": 0.7770875343736735, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5478, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.8915819086777754, |
|
"grad_norm": 0.739795465311441, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5488, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.893745942436702, |
|
"grad_norm": 0.8343986949867656, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5502, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.8959099761956286, |
|
"grad_norm": 0.8072361062581804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5321, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.8980740099545553, |
|
"grad_norm": 0.7921719388331905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5469, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9002380437134819, |
|
"grad_norm": 0.7996953628324828, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5557, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9024020774724085, |
|
"grad_norm": 0.7272138954178266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5472, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.9045661112313352, |
|
"grad_norm": 0.8812123709262756, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5349, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.9067301449902618, |
|
"grad_norm": 0.8356868824967147, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5362, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9088941787491885, |
|
"grad_norm": 0.7972289983410239, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5493, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9110582125081151, |
|
"grad_norm": 0.8872937838904918, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5406, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.9132222462670417, |
|
"grad_norm": 0.7649271319041719, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5508, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.9153862800259684, |
|
"grad_norm": 0.8237087753008616, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5448, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.917550313784895, |
|
"grad_norm": 0.7789310787368502, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5477, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.9197143475438216, |
|
"grad_norm": 0.8216295495680673, |
|
"learning_rate": 5e-06, |
|
"loss": 0.562, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9218783813027484, |
|
"grad_norm": 0.8122833746800567, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5434, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.924042415061675, |
|
"grad_norm": 0.798727376063973, |
|
"learning_rate": 5e-06, |
|
"loss": 0.556, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.9262064488206015, |
|
"grad_norm": 0.7984668327444104, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5397, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.9283704825795283, |
|
"grad_norm": 0.7812328766656631, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5348, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.9305345163384549, |
|
"grad_norm": 0.7893412102379088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.549, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.9326985500973816, |
|
"grad_norm": 0.7981646202301648, |
|
"learning_rate": 5e-06, |
|
"loss": 0.526, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.9348625838563082, |
|
"grad_norm": 0.7730443584141122, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5339, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.9370266176152348, |
|
"grad_norm": 0.844663764946019, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5458, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.9391906513741615, |
|
"grad_norm": 0.7794979343833512, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5505, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.9413546851330881, |
|
"grad_norm": 0.7837049408297568, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5497, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.9435187188920147, |
|
"grad_norm": 0.7854813181023578, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5563, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.9456827526509414, |
|
"grad_norm": 0.8390893115478139, |
|
"learning_rate": 5e-06, |
|
"loss": 0.55, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.947846786409868, |
|
"grad_norm": 0.7933589395245781, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5375, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.9500108201687947, |
|
"grad_norm": 0.7553926376395359, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5527, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.9521748539277213, |
|
"grad_norm": 0.745261362871144, |
|
"learning_rate": 5e-06, |
|
"loss": 0.539, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.9543388876866479, |
|
"grad_norm": 0.8391407287157648, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5523, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.9565029214455746, |
|
"grad_norm": 0.8243378126848461, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5445, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.9586669552045012, |
|
"grad_norm": 0.8075823666456592, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5377, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.9608309889634278, |
|
"grad_norm": 0.813436060772594, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5519, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.9629950227223545, |
|
"grad_norm": 0.7806826995434077, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5454, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.9651590564812811, |
|
"grad_norm": 0.7670805996089919, |
|
"learning_rate": 5e-06, |
|
"loss": 0.537, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.9673230902402078, |
|
"grad_norm": 0.82069231371166, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5457, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.9694871239991344, |
|
"grad_norm": 0.8237227003500424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.556, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.971651157758061, |
|
"grad_norm": 0.8426211166204151, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5497, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.9738151915169877, |
|
"grad_norm": 0.7835202417017654, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5303, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9759792252759143, |
|
"grad_norm": 0.8392317298621986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5464, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.9781432590348409, |
|
"grad_norm": 0.7465598042444795, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5419, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.9803072927937676, |
|
"grad_norm": 0.7544743383197097, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5378, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.9824713265526942, |
|
"grad_norm": 0.764919840823264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5329, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.9846353603116209, |
|
"grad_norm": 0.7432743151815665, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5488, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.9867993940705475, |
|
"grad_norm": 0.8962154593923901, |
|
"learning_rate": 5e-06, |
|
"loss": 0.544, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.9889634278294741, |
|
"grad_norm": 0.8515589470297252, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5438, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.9911274615884008, |
|
"grad_norm": 0.7539916472553879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5368, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.9932914953473274, |
|
"grad_norm": 0.7966250502268061, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5516, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.995455529106254, |
|
"grad_norm": 0.796275990986384, |
|
"learning_rate": 5e-06, |
|
"loss": 0.535, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9976195628651807, |
|
"grad_norm": 0.7572730283592524, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5399, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.9997835966241073, |
|
"grad_norm": 0.7866241644050972, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5396, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5429847836494446, |
|
"eval_runtime": 588.4696, |
|
"eval_samples_per_second": 26.448, |
|
"eval_steps_per_second": 0.415, |
|
"step": 4621 |
|
}, |
|
{ |
|
"epoch": 1.001947630383034, |
|
"grad_norm": 0.8198402400349776, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4682, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.0041116641419605, |
|
"grad_norm": 0.7801283113390183, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4779, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.0062756979008873, |
|
"grad_norm": 0.7609745957390824, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4627, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.008439731659814, |
|
"grad_norm": 0.7821883017630472, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4713, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.0106037654187405, |
|
"grad_norm": 0.7260403031295831, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4684, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.0127677991776671, |
|
"grad_norm": 0.7812573688271381, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4736, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.0149318329365937, |
|
"grad_norm": 0.7822921675866278, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4548, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.0170958666955205, |
|
"grad_norm": 0.7694090971084214, |
|
"learning_rate": 5e-06, |
|
"loss": 0.46, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.0192599004544471, |
|
"grad_norm": 0.7459628030829183, |
|
"learning_rate": 5e-06, |
|
"loss": 0.464, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.0214239342133737, |
|
"grad_norm": 0.715134160634303, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4542, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.0235879679723003, |
|
"grad_norm": 0.7414824535187435, |
|
"learning_rate": 5e-06, |
|
"loss": 0.465, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.025752001731227, |
|
"grad_norm": 0.7453860186675011, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4605, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.0279160354901538, |
|
"grad_norm": 0.7718909976941406, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4632, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.0300800692490804, |
|
"grad_norm": 0.7645671037278959, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4736, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.032244103008007, |
|
"grad_norm": 0.7718550788244495, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4605, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.0344081367669336, |
|
"grad_norm": 0.7964499026649204, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4833, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.0365721705258601, |
|
"grad_norm": 0.834083497700352, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4766, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.0387362042847867, |
|
"grad_norm": 0.7319738782041046, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4755, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.0409002380437136, |
|
"grad_norm": 0.7462008777592158, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4688, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.0430642718026402, |
|
"grad_norm": 0.7694025080042929, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4731, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.0452283055615668, |
|
"grad_norm": 0.7712938874733282, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4783, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.0473923393204934, |
|
"grad_norm": 0.6854206720325331, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4496, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.04955637307942, |
|
"grad_norm": 0.7296190822576524, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4466, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.0517204068383468, |
|
"grad_norm": 0.8044988975714437, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4763, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.0538844405972734, |
|
"grad_norm": 0.7550484269456027, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4631, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.0560484743562, |
|
"grad_norm": 0.7716087062170465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4638, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.0582125081151266, |
|
"grad_norm": 0.7069074185268656, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4592, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.0603765418740532, |
|
"grad_norm": 0.7406983646764749, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4661, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.0625405756329798, |
|
"grad_norm": 0.8513739665965723, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4779, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.0647046093919066, |
|
"grad_norm": 0.7741744083175378, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4629, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.0668686431508332, |
|
"grad_norm": 0.841842230732826, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4559, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.0690326769097598, |
|
"grad_norm": 0.7561799157279342, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4661, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.0711967106686864, |
|
"grad_norm": 0.7452446191339013, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4796, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.073360744427613, |
|
"grad_norm": 0.7219452780323502, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4693, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.0755247781865398, |
|
"grad_norm": 0.8011050940745716, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4625, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.0776888119454664, |
|
"grad_norm": 0.794254062662002, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4734, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.079852845704393, |
|
"grad_norm": 0.7841880358208203, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4735, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.0820168794633196, |
|
"grad_norm": 0.7541874362266996, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4719, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0841809132222462, |
|
"grad_norm": 0.7832163238689167, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4781, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.086344946981173, |
|
"grad_norm": 0.7612558574099242, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4677, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.0885089807400996, |
|
"grad_norm": 0.7457279545654795, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4568, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.0906730144990262, |
|
"grad_norm": 0.7802158241592927, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4695, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.0928370482579528, |
|
"grad_norm": 0.7523296252008893, |
|
"learning_rate": 5e-06, |
|
"loss": 0.459, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.0950010820168794, |
|
"grad_norm": 0.7317362216143073, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4594, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.097165115775806, |
|
"grad_norm": 0.8089179922502759, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4701, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.0993291495347328, |
|
"grad_norm": 0.7971117700530117, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4647, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.1014931832936594, |
|
"grad_norm": 0.7797006135536613, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4746, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.103657217052586, |
|
"grad_norm": 0.8125978920365499, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4679, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.1058212508115126, |
|
"grad_norm": 0.7785309067797123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4621, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.1079852845704392, |
|
"grad_norm": 0.8022112834916565, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4547, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.110149318329366, |
|
"grad_norm": 0.7644085293828123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4694, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.1123133520882926, |
|
"grad_norm": 0.7337804491792407, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4594, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.1144773858472192, |
|
"grad_norm": 0.7430781593703488, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4655, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.1166414196061458, |
|
"grad_norm": 0.7666150250001785, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4601, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.1188054533650724, |
|
"grad_norm": 0.7926818454734849, |
|
"learning_rate": 5e-06, |
|
"loss": 0.475, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.120969487123999, |
|
"grad_norm": 0.7761288586447196, |
|
"learning_rate": 5e-06, |
|
"loss": 0.464, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.1231335208829258, |
|
"grad_norm": 0.7715150614179276, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4683, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.1252975546418524, |
|
"grad_norm": 0.768487598686053, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4597, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.127461588400779, |
|
"grad_norm": 0.7780154395955231, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4815, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.1296256221597056, |
|
"grad_norm": 0.7754600935683129, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4691, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.1317896559186322, |
|
"grad_norm": 0.7168967688429924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4576, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.133953689677559, |
|
"grad_norm": 0.7887975954709602, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4656, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.1361177234364856, |
|
"grad_norm": 0.7711181920453322, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4638, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.1382817571954122, |
|
"grad_norm": 0.8040928973948708, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4629, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.1404457909543388, |
|
"grad_norm": 0.7480038005652744, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4654, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.1426098247132654, |
|
"grad_norm": 0.7772151707383336, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4728, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.1447738584721923, |
|
"grad_norm": 0.7915680198334887, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4576, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.1469378922311189, |
|
"grad_norm": 0.7591374087955354, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4707, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.1491019259900455, |
|
"grad_norm": 0.7805726783738297, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4656, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.151265959748972, |
|
"grad_norm": 0.7721078057930081, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4711, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.1534299935078987, |
|
"grad_norm": 0.7400720348849441, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4702, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.1555940272668255, |
|
"grad_norm": 0.7435323729236517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4778, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.157758061025752, |
|
"grad_norm": 0.7911198407125987, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4665, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.1599220947846787, |
|
"grad_norm": 0.7610463033369455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4605, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.1620861285436053, |
|
"grad_norm": 0.7711693447277651, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4726, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.1642501623025319, |
|
"grad_norm": 0.7859946817663731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4617, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.1664141960614585, |
|
"grad_norm": 0.7890418189022593, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4713, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.1685782298203853, |
|
"grad_norm": 0.7220427522209382, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4572, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.1707422635793119, |
|
"grad_norm": 0.7713515210815366, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4769, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.1729062973382385, |
|
"grad_norm": 0.77210689086503, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4796, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.175070331097165, |
|
"grad_norm": 0.741038220204444, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4574, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.1772343648560917, |
|
"grad_norm": 0.7831344931379454, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4566, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.1793983986150183, |
|
"grad_norm": 0.7345331592568521, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.181562432373945, |
|
"grad_norm": 0.7820999080210016, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4578, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.1837264661328717, |
|
"grad_norm": 0.7853647666817763, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4706, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.1858904998917983, |
|
"grad_norm": 0.7893513383734171, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4645, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.1880545336507249, |
|
"grad_norm": 0.7936708468940181, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4609, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.1902185674096515, |
|
"grad_norm": 0.7199140376687585, |
|
"learning_rate": 5e-06, |
|
"loss": 0.458, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.1923826011685783, |
|
"grad_norm": 0.7381096208206134, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4659, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.194546634927505, |
|
"grad_norm": 0.8083844653278189, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4683, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.1967106686864315, |
|
"grad_norm": 0.7791767856954264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4727, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.198874702445358, |
|
"grad_norm": 0.732693234829825, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4707, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.2010387362042847, |
|
"grad_norm": 0.7467511842772838, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4573, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.2032027699632115, |
|
"grad_norm": 0.7816698329179276, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4775, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.2053668037221381, |
|
"grad_norm": 0.7443727351984066, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4669, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.2075308374810647, |
|
"grad_norm": 0.7425842695291263, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4583, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.2096948712399913, |
|
"grad_norm": 0.7240789385956823, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4576, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.211858904998918, |
|
"grad_norm": 0.8030670523927637, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4684, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.2140229387578447, |
|
"grad_norm": 0.780934768631753, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4592, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.2161869725167713, |
|
"grad_norm": 0.7842677396611177, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4641, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.218351006275698, |
|
"grad_norm": 0.8130606689249475, |
|
"learning_rate": 5e-06, |
|
"loss": 0.466, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.2205150400346245, |
|
"grad_norm": 0.7815641772761741, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4741, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.2226790737935511, |
|
"grad_norm": 0.7490386272049809, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4645, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.224843107552478, |
|
"grad_norm": 0.8212925702802806, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4832, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.2270071413114045, |
|
"grad_norm": 0.7630549664820707, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4604, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.2291711750703311, |
|
"grad_norm": 0.8239583313017402, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4708, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.2313352088292577, |
|
"grad_norm": 0.760630893549288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4694, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.2334992425881843, |
|
"grad_norm": 0.7748580962778236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4676, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.235663276347111, |
|
"grad_norm": 0.7601867125563785, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4643, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.2378273101060377, |
|
"grad_norm": 0.7954825352678104, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4609, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.2399913438649643, |
|
"grad_norm": 0.7751059458000068, |
|
"learning_rate": 5e-06, |
|
"loss": 0.459, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.242155377623891, |
|
"grad_norm": 0.7718457396804564, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.2443194113828175, |
|
"grad_norm": 0.7754578421273243, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4638, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.2464834451417441, |
|
"grad_norm": 0.7548864065745818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4584, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.2486474789006707, |
|
"grad_norm": 0.771889502196166, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4717, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.2508115126595976, |
|
"grad_norm": 0.7651949593744706, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4581, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.2529755464185242, |
|
"grad_norm": 0.7561690262824943, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4611, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.2551395801774508, |
|
"grad_norm": 0.800376700365697, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4522, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.2573036139363774, |
|
"grad_norm": 0.7501757361110241, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4702, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.259467647695304, |
|
"grad_norm": 0.7926927152467118, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4594, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.2616316814542308, |
|
"grad_norm": 0.7757167167590527, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4698, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.2637957152131574, |
|
"grad_norm": 0.7543427971274005, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4609, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.265959748972084, |
|
"grad_norm": 0.7029065517148244, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4576, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.2681237827310106, |
|
"grad_norm": 0.7949904996002711, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4728, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.2702878164899372, |
|
"grad_norm": 0.8061316601164296, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4683, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.272451850248864, |
|
"grad_norm": 0.7679124071082001, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.2746158840077906, |
|
"grad_norm": 0.7268154855749079, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4613, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.2767799177667172, |
|
"grad_norm": 0.7581450065958767, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4846, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.2789439515256438, |
|
"grad_norm": 0.7728758954937528, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4679, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.2811079852845704, |
|
"grad_norm": 0.7757132304247395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4735, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.2832720190434972, |
|
"grad_norm": 0.7385618172633933, |
|
"learning_rate": 5e-06, |
|
"loss": 0.449, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.2854360528024238, |
|
"grad_norm": 0.7462092766571883, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4607, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.2876000865613504, |
|
"grad_norm": 0.7571240627556616, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4617, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.289764120320277, |
|
"grad_norm": 0.788671204299716, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4663, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.2919281540792036, |
|
"grad_norm": 0.7494295917052568, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4653, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.2940921878381304, |
|
"grad_norm": 0.7384360351296172, |
|
"learning_rate": 5e-06, |
|
"loss": 0.465, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.2962562215970568, |
|
"grad_norm": 0.7352800951816788, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4826, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.2984202553559836, |
|
"grad_norm": 0.7602237811687234, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4567, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3005842891149102, |
|
"grad_norm": 0.7530207047121187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4684, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.3027483228738368, |
|
"grad_norm": 0.7356178071902953, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4651, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.3049123566327634, |
|
"grad_norm": 0.7797947935109668, |
|
"learning_rate": 5e-06, |
|
"loss": 0.466, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.30707639039169, |
|
"grad_norm": 0.7574370101991291, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4711, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.3092404241506168, |
|
"grad_norm": 0.7688758966039535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4583, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.3114044579095434, |
|
"grad_norm": 0.7745547556636165, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4761, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.31356849166847, |
|
"grad_norm": 0.7728585651214986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4749, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.3157325254273966, |
|
"grad_norm": 0.8170846148828539, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4657, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.3178965591863232, |
|
"grad_norm": 0.7264085797482112, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4582, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.32006059294525, |
|
"grad_norm": 0.719801823367032, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4538, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.3222246267041766, |
|
"grad_norm": 0.7607932021425476, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4715, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.3243886604631032, |
|
"grad_norm": 0.775171785274517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4647, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.3265526942220298, |
|
"grad_norm": 0.7619426266950845, |
|
"learning_rate": 5e-06, |
|
"loss": 0.472, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.3287167279809564, |
|
"grad_norm": 0.7591540829634035, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4571, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.3308807617398832, |
|
"grad_norm": 0.7905338255727203, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4732, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.3330447954988098, |
|
"grad_norm": 0.8612647077572982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4702, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.3352088292577364, |
|
"grad_norm": 0.7786711376086402, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4717, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.337372863016663, |
|
"grad_norm": 0.7846310010615345, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4625, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.3395368967755896, |
|
"grad_norm": 0.8811606228034371, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4702, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.3417009305345164, |
|
"grad_norm": 0.8146235287621797, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4596, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.343864964293443, |
|
"grad_norm": 0.7533772816124655, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4582, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.3460289980523696, |
|
"grad_norm": 0.7830512487247953, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4736, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.3481930318112962, |
|
"grad_norm": 0.7797798896935799, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4681, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.3503570655702228, |
|
"grad_norm": 0.7932078006037683, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4712, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.3525210993291497, |
|
"grad_norm": 0.8036931232108603, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4672, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.3546851330880763, |
|
"grad_norm": 0.7368268217791855, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4695, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.3568491668470029, |
|
"grad_norm": 0.7359272221589768, |
|
"learning_rate": 5e-06, |
|
"loss": 0.466, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.3590132006059295, |
|
"grad_norm": 0.8200622976978171, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4723, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.361177234364856, |
|
"grad_norm": 0.7939699615671225, |
|
"learning_rate": 5e-06, |
|
"loss": 0.468, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.3633412681237829, |
|
"grad_norm": 0.800957707217982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4529, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.3655053018827092, |
|
"grad_norm": 0.80512867222786, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4659, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.367669335641636, |
|
"grad_norm": 0.7360035804063453, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4635, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.3698333694005627, |
|
"grad_norm": 0.7716321408040111, |
|
"learning_rate": 5e-06, |
|
"loss": 0.454, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.3719974031594893, |
|
"grad_norm": 0.776027207843901, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4613, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.3741614369184159, |
|
"grad_norm": 0.7658728763444741, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4713, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.3763254706773425, |
|
"grad_norm": 0.7884808842340179, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4746, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.3784895044362693, |
|
"grad_norm": 0.7775593156734237, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4677, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.3806535381951959, |
|
"grad_norm": 0.7949890338718408, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4635, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.3828175719541225, |
|
"grad_norm": 0.7828096111334193, |
|
"learning_rate": 5e-06, |
|
"loss": 0.462, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.384981605713049, |
|
"grad_norm": 0.7814745763059232, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4718, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.3871456394719757, |
|
"grad_norm": 0.854764057136108, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4734, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.3893096732309025, |
|
"grad_norm": 0.8266509384735298, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4698, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.391473706989829, |
|
"grad_norm": 0.7213913037415728, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4487, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.3936377407487557, |
|
"grad_norm": 0.7231480436825534, |
|
"learning_rate": 5e-06, |
|
"loss": 0.459, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.3958017745076823, |
|
"grad_norm": 0.7509805363624873, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4621, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.3979658082666089, |
|
"grad_norm": 0.737834748034084, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4695, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.4001298420255357, |
|
"grad_norm": 0.7620010469419662, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4623, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.4022938757844623, |
|
"grad_norm": 0.7323866973353454, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4765, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.404457909543389, |
|
"grad_norm": 0.7648536342318661, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4644, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.4066219433023155, |
|
"grad_norm": 0.7674198954299694, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4642, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.408785977061242, |
|
"grad_norm": 0.746120158345663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4597, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.410950010820169, |
|
"grad_norm": 0.7650565264216188, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4594, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.4131140445790955, |
|
"grad_norm": 0.7369052966841463, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4599, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.415278078338022, |
|
"grad_norm": 0.7661658293815887, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4788, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.4174421120969487, |
|
"grad_norm": 0.8245544955088241, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4759, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.4196061458558753, |
|
"grad_norm": 0.7739803616267428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4634, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.4217701796148021, |
|
"grad_norm": 0.7539199622313375, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4675, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.4239342133737285, |
|
"grad_norm": 0.7650706870481234, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4635, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.4260982471326553, |
|
"grad_norm": 0.7303061857380676, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4528, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.428262280891582, |
|
"grad_norm": 0.7448893505433495, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4664, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.4304263146505085, |
|
"grad_norm": 0.8093720993778148, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4847, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.432590348409435, |
|
"grad_norm": 0.7534159752837184, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4787, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.4347543821683617, |
|
"grad_norm": 0.7650400003197116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4682, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.4369184159272885, |
|
"grad_norm": 0.7965801032383258, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4766, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.4390824496862151, |
|
"grad_norm": 0.8096153226598906, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4708, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.4412464834451417, |
|
"grad_norm": 0.7872971777972805, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4651, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.4434105172040683, |
|
"grad_norm": 0.7309523643397555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.448, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.445574550962995, |
|
"grad_norm": 0.7739858235533356, |
|
"learning_rate": 5e-06, |
|
"loss": 0.463, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.4477385847219217, |
|
"grad_norm": 0.7215324022117396, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4534, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.4499026184808483, |
|
"grad_norm": 0.8295795696846178, |
|
"learning_rate": 5e-06, |
|
"loss": 0.455, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.452066652239775, |
|
"grad_norm": 0.796699047578508, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4675, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.4542306859987015, |
|
"grad_norm": 0.7661917101852191, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4612, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.4563947197576281, |
|
"grad_norm": 0.7292322135619679, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4723, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.458558753516555, |
|
"grad_norm": 0.7274092722777359, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4668, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.4607227872754815, |
|
"grad_norm": 0.7908003272383125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4707, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.4628868210344081, |
|
"grad_norm": 0.7645468887204627, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4681, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.4650508547933347, |
|
"grad_norm": 0.770270828639935, |
|
"learning_rate": 5e-06, |
|
"loss": 0.471, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.4672148885522613, |
|
"grad_norm": 0.7506554829741943, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4792, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.4693789223111882, |
|
"grad_norm": 0.8131214331089905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4661, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.4715429560701148, |
|
"grad_norm": 0.7015838324010227, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4711, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.4737069898290414, |
|
"grad_norm": 0.7861814435987762, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4613, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.475871023587968, |
|
"grad_norm": 0.7591866835488571, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.4780350573468946, |
|
"grad_norm": 0.7404595294517535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.461, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.4801990911058214, |
|
"grad_norm": 0.7697305030650266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4598, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.482363124864748, |
|
"grad_norm": 0.784141929033681, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4702, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.4845271586236746, |
|
"grad_norm": 0.8136152308153741, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4552, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.4866911923826012, |
|
"grad_norm": 0.7345552940901736, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4767, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.4888552261415278, |
|
"grad_norm": 0.7776472050120381, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4744, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.4910192599004546, |
|
"grad_norm": 0.751123599938991, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4678, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.493183293659381, |
|
"grad_norm": 0.7437167540645744, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4659, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.4953473274183078, |
|
"grad_norm": 0.7721598263875197, |
|
"learning_rate": 5e-06, |
|
"loss": 0.482, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.4975113611772344, |
|
"grad_norm": 0.7817667866593803, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4666, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.499675394936161, |
|
"grad_norm": 0.7694139195197549, |
|
"learning_rate": 5e-06, |
|
"loss": 0.466, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.5018394286950878, |
|
"grad_norm": 0.7795215699875335, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4632, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.5040034624540142, |
|
"grad_norm": 0.7378131555326808, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4703, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.506167496212941, |
|
"grad_norm": 0.7502895598759765, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4675, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.5083315299718676, |
|
"grad_norm": 0.8323332633475088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4803, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.5104955637307942, |
|
"grad_norm": 0.7892580411151743, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4765, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.5126595974897208, |
|
"grad_norm": 0.7875592663001918, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4679, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.5148236312486474, |
|
"grad_norm": 0.7634571975953766, |
|
"learning_rate": 5e-06, |
|
"loss": 0.47, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.5169876650075742, |
|
"grad_norm": 0.7756829723486675, |
|
"learning_rate": 5e-06, |
|
"loss": 0.464, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.5191516987665008, |
|
"grad_norm": 0.7581561828919042, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4561, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.5213157325254274, |
|
"grad_norm": 0.8138028391909014, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4738, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.523479766284354, |
|
"grad_norm": 0.7713503967397953, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4536, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.5256438000432806, |
|
"grad_norm": 0.7721407676831082, |
|
"learning_rate": 5e-06, |
|
"loss": 0.477, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.5278078338022074, |
|
"grad_norm": 0.7835072836836937, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4781, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.5299718675611338, |
|
"grad_norm": 0.8062247813326742, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4729, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.5321359013200606, |
|
"grad_norm": 0.7136619017556197, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4531, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.5342999350789872, |
|
"grad_norm": 0.78043707583408, |
|
"learning_rate": 5e-06, |
|
"loss": 0.465, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.5364639688379138, |
|
"grad_norm": 0.7933315893100612, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4655, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.5386280025968406, |
|
"grad_norm": 0.7476138813332562, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4642, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.540792036355767, |
|
"grad_norm": 0.791596305147064, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4656, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.5429560701146938, |
|
"grad_norm": 0.7765122295933571, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4578, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.5451201038736204, |
|
"grad_norm": 0.7721038031057484, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4724, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.547284137632547, |
|
"grad_norm": 0.7889869463184822, |
|
"learning_rate": 5e-06, |
|
"loss": 0.472, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.5494481713914738, |
|
"grad_norm": 0.7326495041461524, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4587, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.5516122051504002, |
|
"grad_norm": 0.7713670879576949, |
|
"learning_rate": 5e-06, |
|
"loss": 0.472, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.553776238909327, |
|
"grad_norm": 0.7864345761195741, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4708, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.5559402726682536, |
|
"grad_norm": 0.7669143468828357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4622, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.5581043064271802, |
|
"grad_norm": 0.8036141108986897, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4779, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.560268340186107, |
|
"grad_norm": 0.7606518551283948, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4677, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.5624323739450334, |
|
"grad_norm": 0.7411431196268208, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4659, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.5645964077039602, |
|
"grad_norm": 0.7463278194705449, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.5667604414628868, |
|
"grad_norm": 0.7916035934870831, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4844, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.5689244752218134, |
|
"grad_norm": 0.8240660319984418, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4683, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.5710885089807403, |
|
"grad_norm": 0.800318824146685, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4633, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.5732525427396666, |
|
"grad_norm": 0.8094080756414094, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.5754165764985935, |
|
"grad_norm": 0.774844371732236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4587, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.57758061025752, |
|
"grad_norm": 0.8210197993957168, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4676, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.5797446440164467, |
|
"grad_norm": 0.7926225329081739, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4746, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.5819086777753733, |
|
"grad_norm": 0.7714683122387773, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4696, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.5840727115342998, |
|
"grad_norm": 0.7668988486606838, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4637, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.5862367452932267, |
|
"grad_norm": 0.8068671779651346, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4625, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.5884007790521533, |
|
"grad_norm": 0.7503714168675623, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.5905648128110799, |
|
"grad_norm": 0.766762311923028, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4607, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.5927288465700065, |
|
"grad_norm": 0.7400989288703183, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4586, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.594892880328933, |
|
"grad_norm": 0.7420053327580863, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4611, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.5970569140878599, |
|
"grad_norm": 0.7848315895928035, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4622, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.5992209478467863, |
|
"grad_norm": 0.7844109009233191, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4726, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.601384981605713, |
|
"grad_norm": 0.717574518313775, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4768, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.6035490153646397, |
|
"grad_norm": 0.7684187592976545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4696, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.6057130491235663, |
|
"grad_norm": 0.7309659679104782, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.607877082882493, |
|
"grad_norm": 0.7644663148759375, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4772, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.6100411166414195, |
|
"grad_norm": 0.7764874598382566, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4646, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.6122051504003463, |
|
"grad_norm": 0.7620772054300151, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4586, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.6143691841592729, |
|
"grad_norm": 0.7820555180817107, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.6165332179181995, |
|
"grad_norm": 0.7494625835808725, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4585, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.6186972516771263, |
|
"grad_norm": 0.7521807522464258, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4584, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.6208612854360527, |
|
"grad_norm": 0.7581980433304982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4588, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.6230253191949795, |
|
"grad_norm": 0.7627156140971617, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4741, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.625189352953906, |
|
"grad_norm": 0.7905461292081447, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4658, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.6273533867128327, |
|
"grad_norm": 0.8124704291753462, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4735, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.6295174204717595, |
|
"grad_norm": 0.749724335823781, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4629, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.631681454230686, |
|
"grad_norm": 0.7544817597703268, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4664, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.6338454879896127, |
|
"grad_norm": 0.7703639179989715, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4514, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.6360095217485393, |
|
"grad_norm": 0.790414972968879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4647, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.638173555507466, |
|
"grad_norm": 0.777968424055043, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4687, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.6403375892663925, |
|
"grad_norm": 0.8168837011418277, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4748, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.642501623025319, |
|
"grad_norm": 0.7880218866379387, |
|
"learning_rate": 5e-06, |
|
"loss": 0.463, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.644665656784246, |
|
"grad_norm": 0.7622801106079738, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4687, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.6468296905431725, |
|
"grad_norm": 0.7536846157018885, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4646, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.6489937243020991, |
|
"grad_norm": 0.7798435120288673, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4709, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.6511577580610257, |
|
"grad_norm": 0.7550086843977464, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4641, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.6533217918199523, |
|
"grad_norm": 0.7528156940459134, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4668, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.6554858255788791, |
|
"grad_norm": 0.7718809368052605, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4616, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.6576498593378055, |
|
"grad_norm": 0.7784564880148578, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4667, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.6598138930967323, |
|
"grad_norm": 0.7397166601008868, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4691, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.661977926855659, |
|
"grad_norm": 0.7881087574473704, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4562, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.6641419606145855, |
|
"grad_norm": 0.7536684489497238, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4581, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.6663059943735123, |
|
"grad_norm": 0.7981078552903653, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4644, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.6684700281324387, |
|
"grad_norm": 0.7019669743910957, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.6706340618913655, |
|
"grad_norm": 0.7977655257782118, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4721, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.6727980956502921, |
|
"grad_norm": 0.8017386100927005, |
|
"learning_rate": 5e-06, |
|
"loss": 0.469, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.6749621294092187, |
|
"grad_norm": 0.771451526799465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4596, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.6771261631681456, |
|
"grad_norm": 0.7227699836985734, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4546, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.679290196927072, |
|
"grad_norm": 0.7676454649666475, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4663, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.6814542306859988, |
|
"grad_norm": 0.8226610282436956, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4745, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.6836182644449253, |
|
"grad_norm": 0.7771996492106517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.459, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.685782298203852, |
|
"grad_norm": 0.7845408388142724, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4662, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.6879463319627788, |
|
"grad_norm": 0.8358661114189557, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4616, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.6901103657217051, |
|
"grad_norm": 0.745445671430633, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4633, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.692274399480632, |
|
"grad_norm": 0.7806093109649215, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4713, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.6944384332395586, |
|
"grad_norm": 0.7931013296476084, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4518, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.6966024669984852, |
|
"grad_norm": 0.7523743415278165, |
|
"learning_rate": 5e-06, |
|
"loss": 0.468, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.698766500757412, |
|
"grad_norm": 0.7657925402229067, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4653, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.7009305345163384, |
|
"grad_norm": 0.7623478102612719, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4714, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.7030945682752652, |
|
"grad_norm": 0.7981733209185972, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4699, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.7052586020341918, |
|
"grad_norm": 0.7781587550663129, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4765, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.7074226357931184, |
|
"grad_norm": 0.7587942982310993, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4771, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.709586669552045, |
|
"grad_norm": 0.7698688590200006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4647, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.7117507033109716, |
|
"grad_norm": 0.804203999005989, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4686, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.7139147370698984, |
|
"grad_norm": 0.7911772144270678, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4639, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.716078770828825, |
|
"grad_norm": 0.7836618578593015, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4666, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.7182428045877516, |
|
"grad_norm": 0.788575508265406, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4729, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.7204068383466782, |
|
"grad_norm": 0.7861934781471758, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4555, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.7225708721056048, |
|
"grad_norm": 0.7536239150523527, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4694, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.7247349058645316, |
|
"grad_norm": 0.7316152360900233, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4654, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.726898939623458, |
|
"grad_norm": 0.7959312951820343, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4742, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.7290629733823848, |
|
"grad_norm": 0.7328251440184733, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4636, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.7312270071413114, |
|
"grad_norm": 0.8721623540661089, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4727, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.733391040900238, |
|
"grad_norm": 0.7788156040311068, |
|
"learning_rate": 5e-06, |
|
"loss": 0.479, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.7355550746591648, |
|
"grad_norm": 0.7352632852244723, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4694, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.7377191084180912, |
|
"grad_norm": 0.8294281676756476, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4756, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.739883142177018, |
|
"grad_norm": 0.7872766681208978, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4714, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.7420471759359446, |
|
"grad_norm": 0.7549853443648625, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4793, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.7442112096948712, |
|
"grad_norm": 0.7752211889605723, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4568, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.746375243453798, |
|
"grad_norm": 0.7656375809085874, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.7485392772127244, |
|
"grad_norm": 0.7758127700797092, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4583, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.7507033109716512, |
|
"grad_norm": 0.7634613626649636, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4595, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.7528673447305778, |
|
"grad_norm": 0.7698077132883445, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4717, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.7550313784895044, |
|
"grad_norm": 0.7383242906768342, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.7571954122484312, |
|
"grad_norm": 0.823790637914916, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4617, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.7593594460073576, |
|
"grad_norm": 0.7581731295515508, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4599, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.7615234797662844, |
|
"grad_norm": 0.7863922041923643, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4633, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.763687513525211, |
|
"grad_norm": 0.7258530440371272, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4695, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.7658515472841376, |
|
"grad_norm": 0.7617292499788662, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4584, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.7680155810430642, |
|
"grad_norm": 0.7835216384518215, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4776, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.7701796148019908, |
|
"grad_norm": 0.7905903337960178, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4575, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.7723436485609176, |
|
"grad_norm": 0.7470273698032939, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4519, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.7745076823198442, |
|
"grad_norm": 0.7672368207649455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4648, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.7766717160787708, |
|
"grad_norm": 0.7316674700923429, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4724, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.7788357498376974, |
|
"grad_norm": 0.7360630882150441, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4638, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.780999783596624, |
|
"grad_norm": 0.781016234417425, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4728, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.7831638173555509, |
|
"grad_norm": 0.7876473046986651, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4592, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.7853278511144772, |
|
"grad_norm": 0.7404645309296104, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4624, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.787491884873404, |
|
"grad_norm": 0.7684119788472853, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4635, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.7896559186323306, |
|
"grad_norm": 0.7663695712294315, |
|
"learning_rate": 5e-06, |
|
"loss": 0.465, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.7918199523912572, |
|
"grad_norm": 0.7590055968711897, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4643, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.793983986150184, |
|
"grad_norm": 0.7895074154532604, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4671, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.7961480199091104, |
|
"grad_norm": 0.7962575230042065, |
|
"learning_rate": 5e-06, |
|
"loss": 0.472, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.7983120536680373, |
|
"grad_norm": 0.7478573445116543, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4626, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.8004760874269639, |
|
"grad_norm": 0.762202720740558, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4515, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.8026401211858905, |
|
"grad_norm": 0.8404740937407859, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4674, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.8048041549448173, |
|
"grad_norm": 0.7952968170552206, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4619, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.8069681887037436, |
|
"grad_norm": 0.7820220237908069, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4667, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.8091322224626705, |
|
"grad_norm": 0.7888823174892542, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4732, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.811296256221597, |
|
"grad_norm": 0.7350957126888091, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4532, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.8134602899805237, |
|
"grad_norm": 0.6957708222610044, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4497, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.8156243237394505, |
|
"grad_norm": 0.8053515701021636, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4785, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.8177883574983769, |
|
"grad_norm": 0.6995897457091669, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4551, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.8199523912573037, |
|
"grad_norm": 0.7753814167641445, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4621, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.8221164250162303, |
|
"grad_norm": 0.7661880273668203, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4609, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.8242804587751569, |
|
"grad_norm": 0.7609799960654735, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4736, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.8264444925340837, |
|
"grad_norm": 0.7841320073323229, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4575, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.82860852629301, |
|
"grad_norm": 0.8063573574026484, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4617, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.830772560051937, |
|
"grad_norm": 0.7854293405760782, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4777, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.8329365938108635, |
|
"grad_norm": 0.7408682039776677, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4614, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.83510062756979, |
|
"grad_norm": 0.7231058234976815, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4624, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.8372646613287167, |
|
"grad_norm": 0.800682041278424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4641, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.8394286950876433, |
|
"grad_norm": 0.7673472834532682, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4606, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.84159272884657, |
|
"grad_norm": 0.7619493537629672, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4706, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.8437567626054965, |
|
"grad_norm": 0.7972147582688511, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4592, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.8459207963644233, |
|
"grad_norm": 0.7625120492458264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4571, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.84808483012335, |
|
"grad_norm": 0.8283056055978331, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4739, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.8502488638822765, |
|
"grad_norm": 0.7573851435355785, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4671, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.8524128976412033, |
|
"grad_norm": 0.8293614574747807, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4652, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.8545769314001297, |
|
"grad_norm": 0.7412074084724187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4592, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.8567409651590565, |
|
"grad_norm": 0.8475011905897842, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4588, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.858904998917983, |
|
"grad_norm": 0.7431837931733989, |
|
"learning_rate": 5e-06, |
|
"loss": 0.453, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.8610690326769097, |
|
"grad_norm": 0.743103879188485, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4685, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.8632330664358365, |
|
"grad_norm": 0.7512142776663842, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4685, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.865397100194763, |
|
"grad_norm": 0.733739499974568, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4625, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.8675611339536897, |
|
"grad_norm": 0.7976859851306687, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4676, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.8697251677126163, |
|
"grad_norm": 0.7661212684653242, |
|
"learning_rate": 5e-06, |
|
"loss": 0.475, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.871889201471543, |
|
"grad_norm": 0.771734215920023, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4621, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.8740532352304697, |
|
"grad_norm": 0.766268946678346, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4531, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.8762172689893961, |
|
"grad_norm": 0.7754183196315589, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4673, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.878381302748323, |
|
"grad_norm": 0.7534048500045719, |
|
"learning_rate": 5e-06, |
|
"loss": 0.467, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.8805453365072495, |
|
"grad_norm": 0.7764377429907858, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4579, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.8827093702661761, |
|
"grad_norm": 0.7698958247598046, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4786, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.884873404025103, |
|
"grad_norm": 0.7651418118029404, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4594, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.8870374377840293, |
|
"grad_norm": 0.7590832586194605, |
|
"learning_rate": 5e-06, |
|
"loss": 0.46, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.8892014715429561, |
|
"grad_norm": 0.7731735855097023, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4727, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.8913655053018827, |
|
"grad_norm": 0.7856091416226185, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4503, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.8935295390608093, |
|
"grad_norm": 0.7722457110486987, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4605, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.895693572819736, |
|
"grad_norm": 0.8198508677971693, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4673, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.8978576065786625, |
|
"grad_norm": 0.7910065087434878, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4619, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.9000216403375894, |
|
"grad_norm": 0.7485212969421532, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4687, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.902185674096516, |
|
"grad_norm": 0.8215859586315127, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4555, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.9043497078554426, |
|
"grad_norm": 0.7909926394009462, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4666, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9065137416143692, |
|
"grad_norm": 0.7403806845841657, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4722, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.9086777753732957, |
|
"grad_norm": 0.7617013904099731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4553, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.9108418091322226, |
|
"grad_norm": 0.7849518527403264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4612, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.913005842891149, |
|
"grad_norm": 0.7899431737849835, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4795, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.9151698766500758, |
|
"grad_norm": 0.7232707414732422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4661, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.9173339104090024, |
|
"grad_norm": 0.7715859939569867, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4729, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.919497944167929, |
|
"grad_norm": 0.778365711933746, |
|
"learning_rate": 5e-06, |
|
"loss": 0.466, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.9216619779268558, |
|
"grad_norm": 0.7315745037821393, |
|
"learning_rate": 5e-06, |
|
"loss": 0.458, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.9238260116857822, |
|
"grad_norm": 0.7850569825810849, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4657, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.925990045444709, |
|
"grad_norm": 0.7935430629873113, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4704, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.9281540792036356, |
|
"grad_norm": 0.8082828639182642, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4694, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.9303181129625622, |
|
"grad_norm": 0.7982975795262971, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4595, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.932482146721489, |
|
"grad_norm": 0.7278908896393382, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4705, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.9346461804804154, |
|
"grad_norm": 0.7593550485177655, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4769, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.9368102142393422, |
|
"grad_norm": 0.7560080244005302, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4508, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.9389742479982688, |
|
"grad_norm": 0.7523700953082769, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4583, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.9411382817571954, |
|
"grad_norm": 0.7729932814097218, |
|
"learning_rate": 5e-06, |
|
"loss": 0.46, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.9433023155161222, |
|
"grad_norm": 0.7859581817688281, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4657, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.9454663492750486, |
|
"grad_norm": 0.7649621801554861, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4673, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.9476303830339754, |
|
"grad_norm": 0.7697491440260914, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4635, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.949794416792902, |
|
"grad_norm": 0.783274985348169, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4634, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.9519584505518286, |
|
"grad_norm": 0.715096846477804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4693, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.9541224843107552, |
|
"grad_norm": 0.8477280634175208, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4597, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.9562865180696818, |
|
"grad_norm": 0.7592259661678222, |
|
"learning_rate": 5e-06, |
|
"loss": 0.459, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.9584505518286086, |
|
"grad_norm": 0.855762642211244, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4552, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.9606145855875352, |
|
"grad_norm": 0.7724079364010088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4585, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.9627786193464618, |
|
"grad_norm": 0.7472414181252647, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4575, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.9649426531053884, |
|
"grad_norm": 0.77667179344492, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4702, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.967106686864315, |
|
"grad_norm": 0.7738412500478078, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4548, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.9692707206232418, |
|
"grad_norm": 0.6873551934444759, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4618, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.9714347543821682, |
|
"grad_norm": 0.7616982926840125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4633, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.973598788141095, |
|
"grad_norm": 0.7359315671968003, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4507, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.9757628219000216, |
|
"grad_norm": 0.7862965542736634, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4752, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.9779268556589482, |
|
"grad_norm": 0.7833594304960272, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4665, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.980090889417875, |
|
"grad_norm": 0.7749375306947249, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4606, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.9822549231768014, |
|
"grad_norm": 0.7755483335751571, |
|
"learning_rate": 5e-06, |
|
"loss": 0.46, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.9844189569357282, |
|
"grad_norm": 0.7318259540031191, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4557, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.9865829906946548, |
|
"grad_norm": 0.7415991714178316, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4699, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.9887470244535814, |
|
"grad_norm": 0.8918970620944271, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4584, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.9909110582125082, |
|
"grad_norm": 0.7854257183159034, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4659, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.9930750919714346, |
|
"grad_norm": 0.743885959515272, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4614, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.9952391257303614, |
|
"grad_norm": 0.7564617143226638, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4629, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.997403159489288, |
|
"grad_norm": 0.7956935106295134, |
|
"learning_rate": 5e-06, |
|
"loss": 0.46, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.9995671932482146, |
|
"grad_norm": 0.7902840252478065, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4605, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5312886238098145, |
|
"eval_runtime": 585.8565, |
|
"eval_samples_per_second": 26.566, |
|
"eval_steps_per_second": 0.416, |
|
"step": 9242 |
|
}, |
|
{ |
|
"epoch": 2.0017312270071415, |
|
"grad_norm": 0.8374428329689654, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3932, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.003895260766068, |
|
"grad_norm": 0.7293070552754123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3703, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.0060592945249947, |
|
"grad_norm": 0.7267288150501345, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3791, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.008223328283921, |
|
"grad_norm": 0.7328996332443125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3682, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.010387362042848, |
|
"grad_norm": 0.769240951148092, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3771, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.0125513958017747, |
|
"grad_norm": 0.782608875094357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3835, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.014715429560701, |
|
"grad_norm": 0.7343404957266344, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3672, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.016879463319628, |
|
"grad_norm": 0.7361975290672562, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3638, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.0190434970785542, |
|
"grad_norm": 0.8170669306278748, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3833, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.021207530837481, |
|
"grad_norm": 0.7599248424010396, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3799, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.023371564596408, |
|
"grad_norm": 0.7854230438584365, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3774, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.0255355983553343, |
|
"grad_norm": 0.747309033166393, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3837, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.027699632114261, |
|
"grad_norm": 0.7554391399636565, |
|
"learning_rate": 5e-06, |
|
"loss": 0.373, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.0298636658731875, |
|
"grad_norm": 0.7369701594292731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.373, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.0320276996321143, |
|
"grad_norm": 0.7920170859213356, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3829, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.034191733391041, |
|
"grad_norm": 0.7385616800337693, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3732, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.0363557671499675, |
|
"grad_norm": 0.7370778661512036, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3634, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.0385198009088943, |
|
"grad_norm": 0.7064048496785564, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3725, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.0406838346678207, |
|
"grad_norm": 0.7588040574820386, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3803, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.0428478684267475, |
|
"grad_norm": 0.6837725115579112, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3711, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.0450119021856743, |
|
"grad_norm": 0.7072237203949913, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3737, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.0471759359446007, |
|
"grad_norm": 0.7715770726086181, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3779, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.0493399697035275, |
|
"grad_norm": 0.7603839434200408, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3785, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.051504003462454, |
|
"grad_norm": 0.7811463267122977, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3755, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.0536680372213807, |
|
"grad_norm": 0.7223273231471339, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3775, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.0558320709803075, |
|
"grad_norm": 0.7489458096255904, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3752, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.057996104739234, |
|
"grad_norm": 0.7169025560301021, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3838, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.0601601384981607, |
|
"grad_norm": 0.8241052771764841, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3763, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.062324172257087, |
|
"grad_norm": 0.7344203339500558, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3759, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.064488206016014, |
|
"grad_norm": 0.7469451548032326, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3768, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.0666522397749403, |
|
"grad_norm": 0.7084397676411069, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3764, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.068816273533867, |
|
"grad_norm": 0.7938766312720807, |
|
"learning_rate": 5e-06, |
|
"loss": 0.383, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.070980307292794, |
|
"grad_norm": 0.766097768652044, |
|
"learning_rate": 5e-06, |
|
"loss": 0.376, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.0731443410517203, |
|
"grad_norm": 0.7626098304896327, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3735, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.075308374810647, |
|
"grad_norm": 0.7506050168232735, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3763, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.0774724085695735, |
|
"grad_norm": 0.7505245897537008, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3781, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.0796364423285003, |
|
"grad_norm": 0.7826239773634133, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3756, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.081800476087427, |
|
"grad_norm": 0.7278522309360076, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3752, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.0839645098463535, |
|
"grad_norm": 0.7756181784692793, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3764, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.0861285436052803, |
|
"grad_norm": 0.7359055323127612, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3722, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.0882925773642067, |
|
"grad_norm": 0.7577112116476528, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3847, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.0904566111231335, |
|
"grad_norm": 0.8136878823112847, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3897, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.0926206448820603, |
|
"grad_norm": 0.7427782161695468, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3782, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.0947846786409867, |
|
"grad_norm": 0.8424273390908451, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3879, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.0969487123999135, |
|
"grad_norm": 0.7658654614761568, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3809, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.09911274615884, |
|
"grad_norm": 0.7636850827656342, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3802, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.1012767799177667, |
|
"grad_norm": 0.7406666012902577, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3737, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 2.1034408136766936, |
|
"grad_norm": 0.731920134607451, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3668, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.10560484743562, |
|
"grad_norm": 0.7932449176977977, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3793, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 2.1077688811945468, |
|
"grad_norm": 0.7397705079775352, |
|
"learning_rate": 5e-06, |
|
"loss": 0.379, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.109932914953473, |
|
"grad_norm": 0.8144870875402731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3831, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.1120969487124, |
|
"grad_norm": 0.7451950427223708, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3762, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.1142609824713268, |
|
"grad_norm": 0.7355015739740769, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3778, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 2.116425016230253, |
|
"grad_norm": 0.7536357069669827, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3758, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.11858904998918, |
|
"grad_norm": 0.7708756762306975, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3738, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 2.1207530837481063, |
|
"grad_norm": 0.7626855696919476, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3847, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.122917117507033, |
|
"grad_norm": 0.7158584629999891, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3753, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 2.1250811512659595, |
|
"grad_norm": 0.7466487488136114, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3764, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.1272451850248864, |
|
"grad_norm": 0.7404349219412076, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3721, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 2.129409218783813, |
|
"grad_norm": 0.7668568690340375, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3768, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.1315732525427395, |
|
"grad_norm": 0.7268904811888163, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3712, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.1337372863016664, |
|
"grad_norm": 0.7719283305769358, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3902, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.1359013200605927, |
|
"grad_norm": 0.7079289824393394, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3779, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 2.1380653538195196, |
|
"grad_norm": 0.7571867101155465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3836, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.1402293875784464, |
|
"grad_norm": 0.7549119684648362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3851, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 2.1423934213373728, |
|
"grad_norm": 0.7941986493329641, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3776, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.1445574550962996, |
|
"grad_norm": 0.7402941631803586, |
|
"learning_rate": 5e-06, |
|
"loss": 0.376, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 2.146721488855226, |
|
"grad_norm": 0.7853141015981866, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3757, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.1488855226141528, |
|
"grad_norm": 0.7502860009884765, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3818, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 2.1510495563730796, |
|
"grad_norm": 0.760008223381561, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3849, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.153213590132006, |
|
"grad_norm": 0.7940769908926166, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3804, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.155377623890933, |
|
"grad_norm": 0.739081181426962, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3773, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.157541657649859, |
|
"grad_norm": 0.7503458217565563, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3712, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 2.159705691408786, |
|
"grad_norm": 0.7598931433539674, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3844, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.161869725167713, |
|
"grad_norm": 0.7494704002208226, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3895, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 2.164033758926639, |
|
"grad_norm": 0.7853450199962944, |
|
"learning_rate": 5e-06, |
|
"loss": 0.382, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.166197792685566, |
|
"grad_norm": 0.7578503693676106, |
|
"learning_rate": 5e-06, |
|
"loss": 0.383, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 2.1683618264444924, |
|
"grad_norm": 0.7484549962239924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3855, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.170525860203419, |
|
"grad_norm": 0.7614013434918991, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3696, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 2.172689893962346, |
|
"grad_norm": 0.7392030074985807, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3729, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.1748539277212724, |
|
"grad_norm": 0.7415769297940891, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3767, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.177017961480199, |
|
"grad_norm": 0.744537273333133, |
|
"learning_rate": 5e-06, |
|
"loss": 0.376, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.1791819952391256, |
|
"grad_norm": 0.7295839586667225, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3824, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 2.1813460289980524, |
|
"grad_norm": 0.749316196787019, |
|
"learning_rate": 5e-06, |
|
"loss": 0.385, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.1835100627569792, |
|
"grad_norm": 0.8144715088789032, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3869, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 2.1856740965159056, |
|
"grad_norm": 0.7376466691462915, |
|
"learning_rate": 5e-06, |
|
"loss": 0.382, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.1878381302748324, |
|
"grad_norm": 0.7553394493000064, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3831, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 2.190002164033759, |
|
"grad_norm": 0.7571271156204371, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3851, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.1921661977926856, |
|
"grad_norm": 0.7574287608767191, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3783, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 2.194330231551612, |
|
"grad_norm": 0.7883853379655534, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3862, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.196494265310539, |
|
"grad_norm": 0.7491644336765493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3824, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.1986582990694656, |
|
"grad_norm": 0.8176175667235511, |
|
"learning_rate": 5e-06, |
|
"loss": 0.382, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.200822332828392, |
|
"grad_norm": 0.7397237145396862, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3821, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 2.202986366587319, |
|
"grad_norm": 0.6965521397762487, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3696, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.205150400346245, |
|
"grad_norm": 0.7532643826436776, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3794, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 2.207314434105172, |
|
"grad_norm": 0.7654434113655321, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3777, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.209478467864099, |
|
"grad_norm": 0.7712364055070057, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3776, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 2.2116425016230252, |
|
"grad_norm": 0.731992016899879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3867, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.213806535381952, |
|
"grad_norm": 0.7463982586618078, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3816, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 2.2159705691408784, |
|
"grad_norm": 0.7624502219381054, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3764, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.2181346028998052, |
|
"grad_norm": 0.7686079427165029, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3812, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.220298636658732, |
|
"grad_norm": 0.7572457495264667, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3823, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.2224626704176584, |
|
"grad_norm": 0.8311579714029802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3894, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 2.2246267041765853, |
|
"grad_norm": 0.753836212037088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3812, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.2267907379355116, |
|
"grad_norm": 0.7777095091483516, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3745, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 2.2289547716944385, |
|
"grad_norm": 0.7345531886014162, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3814, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.2311188054533653, |
|
"grad_norm": 0.7492651509334102, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3895, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 2.2332828392122916, |
|
"grad_norm": 0.7734399829479766, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3899, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.2354468729712185, |
|
"grad_norm": 0.8170059562572255, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3721, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 2.237610906730145, |
|
"grad_norm": 0.7418910648043867, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3679, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.2397749404890717, |
|
"grad_norm": 0.7707008155582549, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3785, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.241938974247998, |
|
"grad_norm": 0.7508948653468445, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3858, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.244103008006925, |
|
"grad_norm": 0.7280052352796699, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3812, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 2.2462670417658517, |
|
"grad_norm": 0.7361553110496917, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3848, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.248431075524778, |
|
"grad_norm": 0.8110758388068879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3859, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 2.250595109283705, |
|
"grad_norm": 0.7585969507359466, |
|
"learning_rate": 5e-06, |
|
"loss": 0.37, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.2527591430426313, |
|
"grad_norm": 0.7530247958679976, |
|
"learning_rate": 5e-06, |
|
"loss": 0.387, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 2.254923176801558, |
|
"grad_norm": 0.7793241812111504, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3785, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.257087210560485, |
|
"grad_norm": 0.7359589759600907, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3818, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 2.2592512443194113, |
|
"grad_norm": 0.7074654913334124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3814, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.261415278078338, |
|
"grad_norm": 0.8029074288686185, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3939, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.2635793118372645, |
|
"grad_norm": 0.7773602608387922, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3882, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.2657433455961913, |
|
"grad_norm": 0.7803799314229609, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3861, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 2.267907379355118, |
|
"grad_norm": 0.7317655840742071, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3846, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.2700714131140445, |
|
"grad_norm": 0.7535304850406851, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3842, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 2.2722354468729713, |
|
"grad_norm": 0.818389549198698, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3864, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.2743994806318977, |
|
"grad_norm": 0.7972883371387612, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3764, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 2.2765635143908245, |
|
"grad_norm": 0.7684174991828006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3923, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.2787275481497513, |
|
"grad_norm": 0.7272379216618123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3703, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 2.2808915819086777, |
|
"grad_norm": 0.7527601200579448, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3822, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.2830556156676045, |
|
"grad_norm": 0.7661450606626169, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3836, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.285219649426531, |
|
"grad_norm": 0.779962904036604, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3833, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.2873836831854577, |
|
"grad_norm": 0.7826624741492424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3858, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 2.2895477169443845, |
|
"grad_norm": 0.745367739386281, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3814, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.291711750703311, |
|
"grad_norm": 0.730641892603881, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3772, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 2.2938757844622377, |
|
"grad_norm": 0.7669396242881692, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3839, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.296039818221164, |
|
"grad_norm": 0.7319224793844378, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3711, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 2.298203851980091, |
|
"grad_norm": 0.7954183700469217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3861, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.3003678857390177, |
|
"grad_norm": 0.7564733111942853, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3716, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 2.302531919497944, |
|
"grad_norm": 0.7563469175859392, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3817, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.304695953256871, |
|
"grad_norm": 0.740370173672384, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3785, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.3068599870157973, |
|
"grad_norm": 0.8008691481523956, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3869, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.309024020774724, |
|
"grad_norm": 0.757170756724089, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3883, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 2.311188054533651, |
|
"grad_norm": 0.7445447389513807, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3739, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.3133520882925773, |
|
"grad_norm": 0.7614800896552029, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3835, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 2.315516122051504, |
|
"grad_norm": 0.7507557413618469, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3776, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.3176801558104305, |
|
"grad_norm": 0.7721474736234565, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3773, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 2.3198441895693573, |
|
"grad_norm": 0.7313755931401409, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3895, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.322008223328284, |
|
"grad_norm": 0.7814799741219387, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3889, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 2.3241722570872105, |
|
"grad_norm": 0.7774874848897156, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3791, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.3263362908461374, |
|
"grad_norm": 0.7708201057745674, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3888, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.3285003246050637, |
|
"grad_norm": 0.7317774627158627, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3715, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.3306643583639906, |
|
"grad_norm": 0.7271265535921656, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3776, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 2.332828392122917, |
|
"grad_norm": 0.8285018373906101, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3891, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.3349924258818437, |
|
"grad_norm": 0.8019035513607989, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3841, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 2.3371564596407706, |
|
"grad_norm": 0.7544365537673512, |
|
"learning_rate": 5e-06, |
|
"loss": 0.386, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.339320493399697, |
|
"grad_norm": 0.774623922213998, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3872, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 2.3414845271586238, |
|
"grad_norm": 0.7287036153983298, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3851, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.34364856091755, |
|
"grad_norm": 0.7831306302240653, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3852, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 2.345812594676477, |
|
"grad_norm": 0.7455400541087714, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3832, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.347976628435404, |
|
"grad_norm": 0.7911713615620226, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3942, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.35014066219433, |
|
"grad_norm": 0.8028233557037938, |
|
"learning_rate": 5e-06, |
|
"loss": 0.388, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.352304695953257, |
|
"grad_norm": 0.7864179290759362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3824, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 2.3544687297121834, |
|
"grad_norm": 0.7554701896856139, |
|
"learning_rate": 5e-06, |
|
"loss": 0.393, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.35663276347111, |
|
"grad_norm": 0.723649579527871, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3848, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 2.3587967972300365, |
|
"grad_norm": 0.7542043146491088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3772, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.3609608309889634, |
|
"grad_norm": 0.7831233351849556, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3796, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 2.36312486474789, |
|
"grad_norm": 0.7789860410163189, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3829, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.3652888985068166, |
|
"grad_norm": 0.7371403610126779, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3893, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 2.3674529322657434, |
|
"grad_norm": 0.7725505898139313, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3884, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.3696169660246698, |
|
"grad_norm": 0.7347104158979803, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3905, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.3717809997835966, |
|
"grad_norm": 0.7657492814282947, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3879, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.3739450335425234, |
|
"grad_norm": 0.7535828749091847, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3875, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 2.3761090673014498, |
|
"grad_norm": 0.7577252254306687, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3779, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.3782731010603766, |
|
"grad_norm": 0.754449061571296, |
|
"learning_rate": 5e-06, |
|
"loss": 0.384, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 2.380437134819303, |
|
"grad_norm": 0.7286408029795997, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3812, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.38260116857823, |
|
"grad_norm": 0.7630893647158181, |
|
"learning_rate": 5e-06, |
|
"loss": 0.381, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 2.3847652023371566, |
|
"grad_norm": 0.7962306496261992, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3876, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.386929236096083, |
|
"grad_norm": 0.7677708510965319, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3845, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 2.38909326985501, |
|
"grad_norm": 0.7570281432515957, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3867, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.391257303613936, |
|
"grad_norm": 0.8132940105852546, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3782, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.393421337372863, |
|
"grad_norm": 0.739800914008478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3823, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.39558537113179, |
|
"grad_norm": 0.7714600709258795, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3934, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 2.397749404890716, |
|
"grad_norm": 0.8296198735762226, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3737, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.399913438649643, |
|
"grad_norm": 0.7783027532344778, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3898, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 2.4020774724085694, |
|
"grad_norm": 0.7605774127832313, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3786, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.404241506167496, |
|
"grad_norm": 0.7161203998165129, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3729, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 2.406405539926423, |
|
"grad_norm": 0.8001326738731909, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3895, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.4085695736853494, |
|
"grad_norm": 0.7852408542137601, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3921, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 2.4107336074442762, |
|
"grad_norm": 0.7641378077706924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3881, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.4128976412032026, |
|
"grad_norm": 0.7369344114660852, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3848, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.4150616749621294, |
|
"grad_norm": 0.7728142277616102, |
|
"learning_rate": 5e-06, |
|
"loss": 0.388, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.4172257087210562, |
|
"grad_norm": 0.7938596540542148, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3754, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 2.4193897424799826, |
|
"grad_norm": 0.7277868215488438, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3803, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.4215537762389094, |
|
"grad_norm": 0.7682514933014444, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3789, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 2.423717809997836, |
|
"grad_norm": 0.769584672945386, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3844, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.4258818437567626, |
|
"grad_norm": 0.7295995976083359, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3795, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 2.4280458775156895, |
|
"grad_norm": 0.7547865237203139, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3828, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.430209911274616, |
|
"grad_norm": 0.8150532849289923, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3858, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 2.4323739450335427, |
|
"grad_norm": 0.8095778619727545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3924, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.434537978792469, |
|
"grad_norm": 0.7612807753325169, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3825, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.436702012551396, |
|
"grad_norm": 0.84684876401696, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3912, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.4388660463103227, |
|
"grad_norm": 0.7505318974427521, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3821, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 2.441030080069249, |
|
"grad_norm": 0.7463667758343082, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3779, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.443194113828176, |
|
"grad_norm": 0.7930997256650184, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3824, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 2.4453581475871022, |
|
"grad_norm": 0.7232751240140999, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3712, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.447522181346029, |
|
"grad_norm": 0.7452902861534266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3829, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 2.449686215104956, |
|
"grad_norm": 0.7894533038001047, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3901, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.4518502488638823, |
|
"grad_norm": 0.7822422085940205, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3894, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 2.454014282622809, |
|
"grad_norm": 0.7790104348646221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3772, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.4561783163817354, |
|
"grad_norm": 0.7674597626994477, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3952, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.4583423501406623, |
|
"grad_norm": 0.7389051490728133, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3886, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.4605063838995886, |
|
"grad_norm": 0.7648071653406447, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3785, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 2.4626704176585155, |
|
"grad_norm": 0.7485263164164858, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3834, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.4648344514174423, |
|
"grad_norm": 0.7571813819723854, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3891, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 2.4669984851763687, |
|
"grad_norm": 0.79013779876267, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3787, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.4691625189352955, |
|
"grad_norm": 0.7899502403657093, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3792, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 2.471326552694222, |
|
"grad_norm": 0.79065022776973, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3849, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.4734905864531487, |
|
"grad_norm": 0.7579456308475134, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3906, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 2.4756546202120755, |
|
"grad_norm": 0.7684812839256481, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3846, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.477818653971002, |
|
"grad_norm": 0.7541428841884413, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3949, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.4799826877299287, |
|
"grad_norm": 0.773906564919006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3906, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.482146721488855, |
|
"grad_norm": 0.8020255649891496, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3925, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 2.484310755247782, |
|
"grad_norm": 0.7615441009856345, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4048, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.4864747890067083, |
|
"grad_norm": 0.7653863910560466, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3849, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.488638822765635, |
|
"grad_norm": 0.7982102560926431, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3791, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.490802856524562, |
|
"grad_norm": 0.7646849476306503, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3915, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.4929668902834883, |
|
"grad_norm": 0.7926740161568403, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3847, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.495130924042415, |
|
"grad_norm": 0.7857256749815332, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3901, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.4972949578013415, |
|
"grad_norm": 0.7811301407036569, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3822, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.4994589915602683, |
|
"grad_norm": 0.7612258678572895, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3873, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.501623025319195, |
|
"grad_norm": 0.7510835321762522, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3907, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.5037870590781215, |
|
"grad_norm": 0.7672276862765546, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3828, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.5059510928370483, |
|
"grad_norm": 0.7628691005049446, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3837, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.5081151265959747, |
|
"grad_norm": 0.7471986630619987, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3863, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.5102791603549015, |
|
"grad_norm": 0.7706154307172854, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3896, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.5124431941138283, |
|
"grad_norm": 0.7343855290129636, |
|
"learning_rate": 5e-06, |
|
"loss": 0.387, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.5146072278727547, |
|
"grad_norm": 0.7503993070592432, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3947, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.5167712616316815, |
|
"grad_norm": 0.7894715279800384, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3896, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.518935295390608, |
|
"grad_norm": 0.7565826940704703, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3873, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.5210993291495347, |
|
"grad_norm": 0.7680776687871832, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3938, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.5232633629084615, |
|
"grad_norm": 0.7896385741117732, |
|
"learning_rate": 5e-06, |
|
"loss": 0.379, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.525427396667388, |
|
"grad_norm": 0.7482445016179938, |
|
"learning_rate": 5e-06, |
|
"loss": 0.387, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.5275914304263147, |
|
"grad_norm": 0.7788787667528324, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3912, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.529755464185241, |
|
"grad_norm": 0.7802367273135542, |
|
"learning_rate": 5e-06, |
|
"loss": 0.391, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.531919497944168, |
|
"grad_norm": 0.7907791607502596, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3889, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.5340835317030947, |
|
"grad_norm": 0.735159844361493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3866, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.536247565462021, |
|
"grad_norm": 0.8002646416305854, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3797, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.538411599220948, |
|
"grad_norm": 0.744137757018372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3851, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.5405756329798743, |
|
"grad_norm": 0.8089884009703747, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3876, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.542739666738801, |
|
"grad_norm": 0.7409059443870979, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3817, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.544903700497728, |
|
"grad_norm": 0.7474137046476967, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3809, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.5470677342566543, |
|
"grad_norm": 0.7328688526405038, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3847, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.549231768015581, |
|
"grad_norm": 0.7309917622922488, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3821, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.5513958017745075, |
|
"grad_norm": 0.760296512703151, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3847, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.5535598355334344, |
|
"grad_norm": 0.7675760208277899, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3784, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.555723869292361, |
|
"grad_norm": 0.7239541839213967, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3893, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.5578879030512875, |
|
"grad_norm": 0.7575635250945256, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3866, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.5600519368102144, |
|
"grad_norm": 0.8205016916935968, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3884, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.5622159705691407, |
|
"grad_norm": 0.7630887493655477, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3822, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.5643800043280676, |
|
"grad_norm": 0.7483344691244171, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3861, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.5665440380869944, |
|
"grad_norm": 0.7836573802364927, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3859, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.5687080718459208, |
|
"grad_norm": 0.7758135442923644, |
|
"learning_rate": 5e-06, |
|
"loss": 0.389, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.5708721056048476, |
|
"grad_norm": 0.8156880128058609, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3815, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.573036139363774, |
|
"grad_norm": 0.769477713621125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3887, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.5752001731227008, |
|
"grad_norm": 0.7539503843193486, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3746, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.5773642068816276, |
|
"grad_norm": 0.7665614539752887, |
|
"learning_rate": 5e-06, |
|
"loss": 0.383, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.579528240640554, |
|
"grad_norm": 0.78863002604713, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3932, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.581692274399481, |
|
"grad_norm": 0.7860997141249391, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3822, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.583856308158407, |
|
"grad_norm": 0.7450886855994443, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3847, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.586020341917334, |
|
"grad_norm": 0.7184291709731961, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4006, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.588184375676261, |
|
"grad_norm": 0.7395095877340896, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3805, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.590348409435187, |
|
"grad_norm": 0.7326541647113222, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3926, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.5925124431941136, |
|
"grad_norm": 0.7590091456087876, |
|
"learning_rate": 5e-06, |
|
"loss": 0.39, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.5946764769530404, |
|
"grad_norm": 0.7514373484529209, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3805, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.596840510711967, |
|
"grad_norm": 0.7974717819972812, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3814, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.599004544470894, |
|
"grad_norm": 0.7663442947904777, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3919, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 2.6011685782298204, |
|
"grad_norm": 0.7869861761124946, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3883, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.6033326119887468, |
|
"grad_norm": 0.7503773304052421, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3877, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 2.6054966457476736, |
|
"grad_norm": 0.776303816000796, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3985, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.6076606795066004, |
|
"grad_norm": 0.759982286533669, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3899, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.609824713265527, |
|
"grad_norm": 0.8131552609328792, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3884, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.6119887470244536, |
|
"grad_norm": 0.7476915162359377, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3857, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 2.61415278078338, |
|
"grad_norm": 0.8030188195536405, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3848, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.616316814542307, |
|
"grad_norm": 0.7550164235383711, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3903, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 2.6184808483012336, |
|
"grad_norm": 0.759412823543429, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3832, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.62064488206016, |
|
"grad_norm": 0.7595851928907493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3882, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 2.622808915819087, |
|
"grad_norm": 0.7557311993407309, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3927, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.624972949578013, |
|
"grad_norm": 0.7935821634398855, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3848, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 2.62713698333694, |
|
"grad_norm": 0.7407643308725581, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3871, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.629301017095867, |
|
"grad_norm": 0.729366375580239, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3873, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.631465050854793, |
|
"grad_norm": 0.7561366246431421, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3822, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.63362908461372, |
|
"grad_norm": 0.7667634155030516, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3809, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 2.6357931183726464, |
|
"grad_norm": 0.7885773621343677, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3817, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.6379571521315732, |
|
"grad_norm": 0.7440441002401869, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3987, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 2.6401211858905, |
|
"grad_norm": 0.7810250085742799, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3782, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.6422852196494264, |
|
"grad_norm": 0.8150657736573159, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3896, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 2.6444492534083532, |
|
"grad_norm": 0.7279096250776349, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3821, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.6466132871672796, |
|
"grad_norm": 0.766036376483687, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3853, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 2.6487773209262064, |
|
"grad_norm": 0.7629756533767561, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3848, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.6509413546851333, |
|
"grad_norm": 0.7258995252608367, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3873, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.6531053884440596, |
|
"grad_norm": 0.7487644012773005, |
|
"learning_rate": 5e-06, |
|
"loss": 0.389, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.6552694222029865, |
|
"grad_norm": 0.7730940322471979, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3919, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 2.657433455961913, |
|
"grad_norm": 0.7261832821091123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3772, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.6595974897208396, |
|
"grad_norm": 0.7841634979405185, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3955, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 2.6617615234797665, |
|
"grad_norm": 0.7443154389593502, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3998, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.663925557238693, |
|
"grad_norm": 0.792617400623899, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3838, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 2.6660895909976197, |
|
"grad_norm": 0.7430565465776605, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3926, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.668253624756546, |
|
"grad_norm": 0.7601315336074875, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3935, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 2.670417658515473, |
|
"grad_norm": 0.7349512479796254, |
|
"learning_rate": 5e-06, |
|
"loss": 0.386, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.6725816922743997, |
|
"grad_norm": 0.7492793091950313, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3939, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.674745726033326, |
|
"grad_norm": 0.8020327135634121, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3933, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.676909759792253, |
|
"grad_norm": 0.7821248575887879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3889, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 2.6790737935511793, |
|
"grad_norm": 0.7764574267508418, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3834, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.681237827310106, |
|
"grad_norm": 0.7581668979481178, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3894, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 2.683401861069033, |
|
"grad_norm": 0.7312153550603386, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3871, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.6855658948279593, |
|
"grad_norm": 0.8290124241375181, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3778, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 2.687729928586886, |
|
"grad_norm": 0.7317732999168243, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3863, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.6898939623458125, |
|
"grad_norm": 0.7638469327765998, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3829, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 2.6920579961047393, |
|
"grad_norm": 0.7949708235476729, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3926, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.694222029863666, |
|
"grad_norm": 0.7550935663260143, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3783, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.6963860636225925, |
|
"grad_norm": 0.8313055045861929, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3817, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.6985500973815193, |
|
"grad_norm": 0.8005354523317457, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3868, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 2.7007141311404457, |
|
"grad_norm": 0.7670070573325029, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3815, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.7028781648993725, |
|
"grad_norm": 0.7508083437598002, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3882, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 2.7050421986582993, |
|
"grad_norm": 0.7787188976232684, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3872, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.7072062324172257, |
|
"grad_norm": 0.7683661547819274, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3849, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 2.7093702661761525, |
|
"grad_norm": 0.7391247866655715, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3806, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.711534299935079, |
|
"grad_norm": 0.7565014644999606, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3733, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 2.7136983336940057, |
|
"grad_norm": 0.8085171262046021, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3737, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.7158623674529325, |
|
"grad_norm": 0.7858515305447751, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3875, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.718026401211859, |
|
"grad_norm": 0.7365212017152488, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3908, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.7201904349707853, |
|
"grad_norm": 0.7744330424464411, |
|
"learning_rate": 5e-06, |
|
"loss": 0.385, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 2.722354468729712, |
|
"grad_norm": 0.7564513841846556, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3867, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.724518502488639, |
|
"grad_norm": 0.7750108310504817, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3892, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 2.7266825362475657, |
|
"grad_norm": 0.7782061698199593, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3952, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.728846570006492, |
|
"grad_norm": 0.7822075760319362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3981, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 2.7310106037654185, |
|
"grad_norm": 0.7764669704006216, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3873, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.7331746375243453, |
|
"grad_norm": 0.746571146392982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3857, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 2.735338671283272, |
|
"grad_norm": 0.7727176603555808, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3849, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.7375027050421985, |
|
"grad_norm": 0.7716153482496758, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3942, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.7396667388011253, |
|
"grad_norm": 0.7506221057021802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3828, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.7418307725600517, |
|
"grad_norm": 0.765605207662551, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3896, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 2.7439948063189785, |
|
"grad_norm": 0.7936436590710892, |
|
"learning_rate": 5e-06, |
|
"loss": 0.384, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.7461588400779053, |
|
"grad_norm": 0.7417315221453376, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3864, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 2.7483228738368317, |
|
"grad_norm": 0.7638299083549244, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3882, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.7504869075957585, |
|
"grad_norm": 0.7401370791792725, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3736, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 2.752650941354685, |
|
"grad_norm": 0.7604282539682512, |
|
"learning_rate": 5e-06, |
|
"loss": 0.386, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.7548149751136117, |
|
"grad_norm": 0.7593327654847288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3874, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 2.7569790088725386, |
|
"grad_norm": 0.762010718640477, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3728, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.759143042631465, |
|
"grad_norm": 0.7464411042463334, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3853, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.7613070763903917, |
|
"grad_norm": 0.7527404376235197, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3805, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.763471110149318, |
|
"grad_norm": 0.7413553870174003, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3827, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 2.765635143908245, |
|
"grad_norm": 0.8112331079474291, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3912, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.7677991776671718, |
|
"grad_norm": 0.7606524264570311, |
|
"learning_rate": 5e-06, |
|
"loss": 0.384, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 2.769963211426098, |
|
"grad_norm": 0.7688787180412004, |
|
"learning_rate": 5e-06, |
|
"loss": 0.386, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.772127245185025, |
|
"grad_norm": 0.7709908015790655, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3911, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 2.7742912789439513, |
|
"grad_norm": 0.8256088144670327, |
|
"learning_rate": 5e-06, |
|
"loss": 0.394, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.776455312702878, |
|
"grad_norm": 0.76732285875986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3825, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 2.778619346461805, |
|
"grad_norm": 0.7623048216172926, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3812, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.7807833802207313, |
|
"grad_norm": 0.7770120662585185, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3825, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.782947413979658, |
|
"grad_norm": 0.7659653162936821, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3802, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.7851114477385845, |
|
"grad_norm": 0.7708274852623006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 2.7872754814975114, |
|
"grad_norm": 0.7743267205110804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3833, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.789439515256438, |
|
"grad_norm": 0.7749387611421825, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3953, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 2.7916035490153646, |
|
"grad_norm": 0.7586126347403827, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3853, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.7937675827742914, |
|
"grad_norm": 0.7500835141793593, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3929, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 2.7959316165332178, |
|
"grad_norm": 0.74663392656643, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3795, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.7980956502921446, |
|
"grad_norm": 0.7279989402913795, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3934, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 2.8002596840510714, |
|
"grad_norm": 0.782757718266062, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3902, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.8024237178099978, |
|
"grad_norm": 0.7541794890470066, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3905, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.8045877515689246, |
|
"grad_norm": 0.7511833194905094, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3803, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.806751785327851, |
|
"grad_norm": 0.7345651454318538, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3838, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 2.808915819086778, |
|
"grad_norm": 0.7430756885822685, |
|
"learning_rate": 5e-06, |
|
"loss": 0.375, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.8110798528457046, |
|
"grad_norm": 0.7503404811162424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3899, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 2.813243886604631, |
|
"grad_norm": 0.7839538272866499, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.815407920363558, |
|
"grad_norm": 0.8075664166536066, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3869, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 2.817571954122484, |
|
"grad_norm": 0.8026133483602639, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3957, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.819735987881411, |
|
"grad_norm": 0.744545263944405, |
|
"learning_rate": 5e-06, |
|
"loss": 0.386, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 2.821900021640338, |
|
"grad_norm": 0.7535839428276422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3808, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.824064055399264, |
|
"grad_norm": 0.7957853276701218, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3793, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.826228089158191, |
|
"grad_norm": 0.7665463998428926, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3817, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.8283921229171174, |
|
"grad_norm": 0.7747068764554864, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3899, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 2.830556156676044, |
|
"grad_norm": 0.7529729147926931, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3986, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.832720190434971, |
|
"grad_norm": 0.7714949651199653, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3849, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 2.8348842241938974, |
|
"grad_norm": 0.757669054423436, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3973, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.8370482579528242, |
|
"grad_norm": 0.7109714152621212, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3951, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 2.8392122917117506, |
|
"grad_norm": 0.7676176881192389, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3878, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.8413763254706774, |
|
"grad_norm": 0.7850663677878515, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3932, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 2.8435403592296042, |
|
"grad_norm": 0.7225327094498046, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3831, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.8457043929885306, |
|
"grad_norm": 0.7817333715491703, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3895, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.847868426747457, |
|
"grad_norm": 0.7709262647174362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3903, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.850032460506384, |
|
"grad_norm": 0.7521690368016893, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3841, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 2.8521964942653106, |
|
"grad_norm": 0.7654288882047556, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3965, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.8543605280242375, |
|
"grad_norm": 0.7797805693245492, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3882, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 2.856524561783164, |
|
"grad_norm": 0.767159401606924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3967, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.85868859554209, |
|
"grad_norm": 0.7700593597456216, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3818, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 2.860852629301017, |
|
"grad_norm": 0.7699670336942503, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3981, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.863016663059944, |
|
"grad_norm": 0.7532067312686156, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3845, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.86518069681887, |
|
"grad_norm": 0.7700933480079477, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3916, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.867344730577797, |
|
"grad_norm": 0.7466578230564417, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3894, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.8695087643367234, |
|
"grad_norm": 0.7937501563778528, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3816, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.8716727980956502, |
|
"grad_norm": 0.7389001187669237, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3728, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 2.873836831854577, |
|
"grad_norm": 0.7578905999176232, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3812, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.8760008656135034, |
|
"grad_norm": 0.8151694337796392, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3861, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 2.8781648993724303, |
|
"grad_norm": 0.7471342559011176, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3888, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.8803289331313566, |
|
"grad_norm": 0.7422252462621732, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3922, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 2.8824929668902834, |
|
"grad_norm": 0.7615451869859242, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3829, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.8846570006492103, |
|
"grad_norm": 0.8256789814798213, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3952, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 2.8868210344081366, |
|
"grad_norm": 0.76680451948221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3956, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.8889850681670635, |
|
"grad_norm": 0.7439841068189852, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3776, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.89114910192599, |
|
"grad_norm": 0.7398497595894248, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3877, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.8933131356849167, |
|
"grad_norm": 0.7739248139645774, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3929, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 2.8954771694438435, |
|
"grad_norm": 0.8440378872399906, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3876, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.89764120320277, |
|
"grad_norm": 0.7705375133560814, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3871, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 2.8998052369616967, |
|
"grad_norm": 0.7344474907711162, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3988, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.901969270720623, |
|
"grad_norm": 0.7691718865505679, |
|
"learning_rate": 5e-06, |
|
"loss": 0.392, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 2.90413330447955, |
|
"grad_norm": 0.7491038351612413, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3927, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.9062973382384767, |
|
"grad_norm": 0.7519583645050334, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3845, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 2.908461371997403, |
|
"grad_norm": 0.8244800954009015, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3832, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.91062540575633, |
|
"grad_norm": 0.8157251210241125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3805, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.9127894395152563, |
|
"grad_norm": 0.7721623213908909, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3942, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.914953473274183, |
|
"grad_norm": 0.7726737654084024, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3931, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 2.91711750703311, |
|
"grad_norm": 0.7550960174025876, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3885, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.9192815407920363, |
|
"grad_norm": 0.7679327991153526, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3964, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 2.921445574550963, |
|
"grad_norm": 0.7823759384711927, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3809, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.9236096083098895, |
|
"grad_norm": 0.7512641558224188, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3842, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 2.9257736420688163, |
|
"grad_norm": 0.782519582204221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3942, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.927937675827743, |
|
"grad_norm": 0.7935222646576868, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3871, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 2.9301017095866695, |
|
"grad_norm": 0.7490260485361179, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3924, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.9322657433455963, |
|
"grad_norm": 0.7829574236017007, |
|
"learning_rate": 5e-06, |
|
"loss": 0.387, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.9344297771045227, |
|
"grad_norm": 0.8053438714777432, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3997, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.9365938108634495, |
|
"grad_norm": 0.789271548816451, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3921, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 2.9387578446223763, |
|
"grad_norm": 0.7620667953803887, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3945, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.9409218783813027, |
|
"grad_norm": 0.8157019776942496, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3973, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 2.9430859121402295, |
|
"grad_norm": 0.7566983087966317, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3973, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.945249945899156, |
|
"grad_norm": 0.7850341888405364, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3852, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 2.9474139796580827, |
|
"grad_norm": 0.7756606362123769, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3873, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.9495780134170095, |
|
"grad_norm": 0.7582467951052595, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4002, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 2.951742047175936, |
|
"grad_norm": 0.7922689379335354, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3948, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.9539060809348627, |
|
"grad_norm": 0.7435887161247465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3934, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.956070114693789, |
|
"grad_norm": 0.7677386746970113, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3842, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.958234148452716, |
|
"grad_norm": 0.7621270826776456, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3882, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 2.9603981822116427, |
|
"grad_norm": 0.7115127400494038, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3869, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.962562215970569, |
|
"grad_norm": 0.7875693326081818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3818, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 2.964726249729496, |
|
"grad_norm": 0.7236217251683649, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3817, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.9668902834884223, |
|
"grad_norm": 0.8125742954355849, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3854, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 2.969054317247349, |
|
"grad_norm": 0.7524427643623132, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3903, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.971218351006276, |
|
"grad_norm": 0.7390063903999405, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3808, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 2.9733823847652023, |
|
"grad_norm": 0.7511379236710563, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3826, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.9755464185241287, |
|
"grad_norm": 0.8210016741195086, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3935, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.9777104522830555, |
|
"grad_norm": 0.7700571385479135, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3947, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.9798744860419824, |
|
"grad_norm": 0.7901788530732445, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3918, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 2.982038519800909, |
|
"grad_norm": 0.790326497189608, |
|
"learning_rate": 5e-06, |
|
"loss": 0.395, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.9842025535598355, |
|
"grad_norm": 0.7970458568075771, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3838, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 2.986366587318762, |
|
"grad_norm": 0.7937921564780092, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3871, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.9885306210776887, |
|
"grad_norm": 0.7869992321634875, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3861, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 2.9906946548366156, |
|
"grad_norm": 0.7507057758028739, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3863, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.992858688595542, |
|
"grad_norm": 0.8051759990871201, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3811, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 2.9950227223544688, |
|
"grad_norm": 0.7596345710239549, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3904, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.997186756113395, |
|
"grad_norm": 0.7917910564755264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3977, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.999350789872322, |
|
"grad_norm": 0.732648425467274, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3911, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.546510636806488, |
|
"eval_runtime": 589.5092, |
|
"eval_samples_per_second": 26.402, |
|
"eval_steps_per_second": 0.414, |
|
"step": 13863 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 13863, |
|
"total_flos": 3633869689454592.0, |
|
"train_loss": 0.474967997638056, |
|
"train_runtime": 94524.6869, |
|
"train_samples_per_second": 9.385, |
|
"train_steps_per_second": 0.147 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13863, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3633869689454592.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|