|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.02622865022092052, |
|
"eval_steps": 326, |
|
"global_step": 1303, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0129432249363408e-05, |
|
"grad_norm": 1.1866097338497639e-05, |
|
"learning_rate": 2e-05, |
|
"loss": 46.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0129432249363408e-05, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 126.1545, |
|
"eval_samples_per_second": 165.813, |
|
"eval_steps_per_second": 82.906, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 4.0258864498726816e-05, |
|
"grad_norm": 2.147201303159818e-05, |
|
"learning_rate": 4e-05, |
|
"loss": 46.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 6.038829674809022e-05, |
|
"grad_norm": 1.848486135713756e-05, |
|
"learning_rate": 6e-05, |
|
"loss": 46.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 8.051772899745363e-05, |
|
"grad_norm": 1.654278821661137e-05, |
|
"learning_rate": 8e-05, |
|
"loss": 46.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00010064716124681703, |
|
"grad_norm": 2.277838393638376e-05, |
|
"learning_rate": 0.0001, |
|
"loss": 46.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00012077659349618043, |
|
"grad_norm": 2.333819975319784e-05, |
|
"learning_rate": 0.00012, |
|
"loss": 46.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00014090602574554385, |
|
"grad_norm": 1.976581188500859e-05, |
|
"learning_rate": 0.00014, |
|
"loss": 46.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00016103545799490726, |
|
"grad_norm": 2.9277169232955202e-05, |
|
"learning_rate": 0.00016, |
|
"loss": 46.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00018116489024427065, |
|
"grad_norm": 1.2510759916040115e-05, |
|
"learning_rate": 0.00018, |
|
"loss": 46.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00020129432249363407, |
|
"grad_norm": 1.7789652702049352e-05, |
|
"learning_rate": 0.0002, |
|
"loss": 46.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00022142375474299748, |
|
"grad_norm": 2.230467725894414e-05, |
|
"learning_rate": 0.00019999970482981582, |
|
"loss": 46.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00024155318699236087, |
|
"grad_norm": 2.8929885957040824e-05, |
|
"learning_rate": 0.0001999988193210057, |
|
"loss": 46.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0002616826192417243, |
|
"grad_norm": 2.140910510206595e-05, |
|
"learning_rate": 0.00019999734347879723, |
|
"loss": 46.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0002818120514910877, |
|
"grad_norm": 1.3324294741323683e-05, |
|
"learning_rate": 0.0001999952773119029, |
|
"loss": 46.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0003019414837404511, |
|
"grad_norm": 6.112633127486333e-05, |
|
"learning_rate": 0.00019999262083252007, |
|
"loss": 46.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00032207091598981453, |
|
"grad_norm": 2.477996349625755e-05, |
|
"learning_rate": 0.00019998937405633105, |
|
"loss": 46.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0003422003482391779, |
|
"grad_norm": 2.2150932636577636e-05, |
|
"learning_rate": 0.00019998553700250284, |
|
"loss": 46.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0003623297804885413, |
|
"grad_norm": 1.1595971955102868e-05, |
|
"learning_rate": 0.00019998110969368717, |
|
"loss": 46.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00038245921273790474, |
|
"grad_norm": 1.8772680050460622e-05, |
|
"learning_rate": 0.00019997609215602019, |
|
"loss": 46.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00040258864498726813, |
|
"grad_norm": 1.745060035318602e-05, |
|
"learning_rate": 0.00019997048441912246, |
|
"loss": 46.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0004227180772366315, |
|
"grad_norm": 3.103197013842873e-05, |
|
"learning_rate": 0.0001999642865160987, |
|
"loss": 46.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00044284750948599496, |
|
"grad_norm": 3.2184922019951046e-05, |
|
"learning_rate": 0.0001999574984835377, |
|
"loss": 46.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00046297694173535835, |
|
"grad_norm": 2.257189953525085e-05, |
|
"learning_rate": 0.00019995012036151186, |
|
"loss": 46.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00048310637398472174, |
|
"grad_norm": 3.554321301635355e-05, |
|
"learning_rate": 0.00019994215219357728, |
|
"loss": 46.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0005032358062340851, |
|
"grad_norm": 1.5587129382765852e-05, |
|
"learning_rate": 0.00019993359402677323, |
|
"loss": 46.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0005233652384834486, |
|
"grad_norm": 9.828573638515081e-06, |
|
"learning_rate": 0.00019992444591162206, |
|
"loss": 46.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.000543494670732812, |
|
"grad_norm": 1.708105810394045e-05, |
|
"learning_rate": 0.00019991470790212877, |
|
"loss": 46.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0005636241029821754, |
|
"grad_norm": 2.235212923551444e-05, |
|
"learning_rate": 0.00019990438005578075, |
|
"loss": 46.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0005837535352315388, |
|
"grad_norm": 2.0345447410363704e-05, |
|
"learning_rate": 0.00019989346243354746, |
|
"loss": 46.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0006038829674809022, |
|
"grad_norm": 2.3022035747999325e-05, |
|
"learning_rate": 0.00019988195509988005, |
|
"loss": 46.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0006240123997302656, |
|
"grad_norm": 2.097547439916525e-05, |
|
"learning_rate": 0.00019986985812271092, |
|
"loss": 46.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0006441418319796291, |
|
"grad_norm": 2.48163087235298e-05, |
|
"learning_rate": 0.00019985717157345345, |
|
"loss": 46.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0006642712642289924, |
|
"grad_norm": 1.3824127563566435e-05, |
|
"learning_rate": 0.00019984389552700144, |
|
"loss": 46.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0006844006964783558, |
|
"grad_norm": 5.524979133042507e-05, |
|
"learning_rate": 0.0001998300300617287, |
|
"loss": 46.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0007045301287277192, |
|
"grad_norm": 2.9547367375926115e-05, |
|
"learning_rate": 0.00019981557525948875, |
|
"loss": 46.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0007246595609770826, |
|
"grad_norm": 3.511565591907129e-05, |
|
"learning_rate": 0.00019980053120561411, |
|
"loss": 46.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0007447889932264461, |
|
"grad_norm": 1.500822963862447e-05, |
|
"learning_rate": 0.00019978489798891584, |
|
"loss": 46.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0007649184254758095, |
|
"grad_norm": 2.595680416561663e-05, |
|
"learning_rate": 0.00019976867570168318, |
|
"loss": 46.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0007850478577251729, |
|
"grad_norm": 2.681766818568576e-05, |
|
"learning_rate": 0.00019975186443968286, |
|
"loss": 46.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0008051772899745363, |
|
"grad_norm": 3.518196172080934e-05, |
|
"learning_rate": 0.0001997344643021585, |
|
"loss": 46.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0008253067222238997, |
|
"grad_norm": 2.3766757294652052e-05, |
|
"learning_rate": 0.00019971647539183013, |
|
"loss": 46.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.000845436154473263, |
|
"grad_norm": 1.9241595509811305e-05, |
|
"learning_rate": 0.00019969789781489362, |
|
"loss": 46.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0008655655867226265, |
|
"grad_norm": 2.352761111978907e-05, |
|
"learning_rate": 0.00019967873168101984, |
|
"loss": 46.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0008856950189719899, |
|
"grad_norm": 2.3743756173644215e-05, |
|
"learning_rate": 0.00019965897710335422, |
|
"loss": 46.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0009058244512213533, |
|
"grad_norm": 3.65232554031536e-05, |
|
"learning_rate": 0.00019963863419851605, |
|
"loss": 46.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0009259538834707167, |
|
"grad_norm": 2.59846947301412e-05, |
|
"learning_rate": 0.00019961770308659767, |
|
"loss": 46.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0009460833157200801, |
|
"grad_norm": 2.885664434870705e-05, |
|
"learning_rate": 0.00019959618389116387, |
|
"loss": 46.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0009662127479694435, |
|
"grad_norm": 2.3175163732958026e-05, |
|
"learning_rate": 0.0001995740767392512, |
|
"loss": 46.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.000986342180218807, |
|
"grad_norm": 4.130275920033455e-05, |
|
"learning_rate": 0.0001995513817613671, |
|
"loss": 46.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0010064716124681702, |
|
"grad_norm": 3.658945206552744e-05, |
|
"learning_rate": 0.00019952809909148914, |
|
"loss": 46.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0010266010447175337, |
|
"grad_norm": 2.976952600874938e-05, |
|
"learning_rate": 0.0001995042288670643, |
|
"loss": 46.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0010467304769668972, |
|
"grad_norm": 1.5616597011103295e-05, |
|
"learning_rate": 0.00019947977122900822, |
|
"loss": 46.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0010668599092162605, |
|
"grad_norm": 2.330297138541937e-05, |
|
"learning_rate": 0.0001994547263217042, |
|
"loss": 46.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.001086989341465624, |
|
"grad_norm": 2.5345374524476938e-05, |
|
"learning_rate": 0.00019942909429300238, |
|
"loss": 46.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0011071187737149873, |
|
"grad_norm": 2.747085818555206e-05, |
|
"learning_rate": 0.00019940287529421902, |
|
"loss": 46.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0011272482059643508, |
|
"grad_norm": 5.7161822041962296e-05, |
|
"learning_rate": 0.00019937606948013548, |
|
"loss": 46.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0011473776382137143, |
|
"grad_norm": 1.3162572031433228e-05, |
|
"learning_rate": 0.00019934867700899722, |
|
"loss": 46.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0011675070704630776, |
|
"grad_norm": 3.8153884815983474e-05, |
|
"learning_rate": 0.00019932069804251312, |
|
"loss": 46.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.001187636502712441, |
|
"grad_norm": 2.5788935090531595e-05, |
|
"learning_rate": 0.0001992921327458543, |
|
"loss": 46.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0012077659349618043, |
|
"grad_norm": 1.2793129826604854e-05, |
|
"learning_rate": 0.00019926298128765323, |
|
"loss": 46.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0012278953672111678, |
|
"grad_norm": 2.963062252092641e-05, |
|
"learning_rate": 0.00019923324384000276, |
|
"loss": 46.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0012480247994605311, |
|
"grad_norm": 1.7501424736110494e-05, |
|
"learning_rate": 0.00019920292057845499, |
|
"loss": 46.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0012681542317098946, |
|
"grad_norm": 2.3330876501859166e-05, |
|
"learning_rate": 0.00019917201168202043, |
|
"loss": 46.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0012882836639592581, |
|
"grad_norm": 1.3082960322208237e-05, |
|
"learning_rate": 0.00019914051733316678, |
|
"loss": 46.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0013084130962086214, |
|
"grad_norm": 2.3455559130525216e-05, |
|
"learning_rate": 0.00019910843771781783, |
|
"loss": 46.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0013285425284579849, |
|
"grad_norm": 1.9461349438643083e-05, |
|
"learning_rate": 0.00019907577302535255, |
|
"loss": 46.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0013486719607073482, |
|
"grad_norm": 3.472498428891413e-05, |
|
"learning_rate": 0.00019904252344860382, |
|
"loss": 46.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0013688013929567117, |
|
"grad_norm": 2.7159438104717992e-05, |
|
"learning_rate": 0.00019900868918385726, |
|
"loss": 46.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0013889308252060752, |
|
"grad_norm": 1.6992695236695e-05, |
|
"learning_rate": 0.00019897427043085022, |
|
"loss": 46.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0014090602574554384, |
|
"grad_norm": 2.162869532185141e-05, |
|
"learning_rate": 0.0001989392673927705, |
|
"loss": 46.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.001429189689704802, |
|
"grad_norm": 5.969742778688669e-05, |
|
"learning_rate": 0.00019890368027625517, |
|
"loss": 46.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0014493191219541652, |
|
"grad_norm": 2.1275785911711864e-05, |
|
"learning_rate": 0.00019886750929138934, |
|
"loss": 46.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0014694485542035287, |
|
"grad_norm": 2.3872542442404665e-05, |
|
"learning_rate": 0.0001988307546517049, |
|
"loss": 46.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0014895779864528922, |
|
"grad_norm": 5.359681381378323e-05, |
|
"learning_rate": 0.00019879341657417935, |
|
"loss": 46.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0015097074187022555, |
|
"grad_norm": 2.5549368729116395e-05, |
|
"learning_rate": 0.00019875549527923449, |
|
"loss": 46.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.001529836850951619, |
|
"grad_norm": 2.281313754792791e-05, |
|
"learning_rate": 0.00019871699099073493, |
|
"loss": 46.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0015499662832009823, |
|
"grad_norm": 3.20350554829929e-05, |
|
"learning_rate": 0.0001986779039359871, |
|
"loss": 46.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0015700957154503458, |
|
"grad_norm": 3.160408232361078e-05, |
|
"learning_rate": 0.00019863823434573762, |
|
"loss": 46.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.001590225147699709, |
|
"grad_norm": 2.4337972718058154e-05, |
|
"learning_rate": 0.00019859798245417217, |
|
"loss": 46.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0016103545799490725, |
|
"grad_norm": 3.159191328450106e-05, |
|
"learning_rate": 0.0001985571484989138, |
|
"loss": 46.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.001630484012198436, |
|
"grad_norm": 2.5550882128300145e-05, |
|
"learning_rate": 0.00019851573272102195, |
|
"loss": 46.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0016506134444477993, |
|
"grad_norm": 1.8689172065933235e-05, |
|
"learning_rate": 0.0001984737353649906, |
|
"loss": 46.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0016707428766971628, |
|
"grad_norm": 2.9251643354655243e-05, |
|
"learning_rate": 0.00019843115667874707, |
|
"loss": 46.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.001690872308946526, |
|
"grad_norm": 3.018877760041505e-05, |
|
"learning_rate": 0.00019838799691365065, |
|
"loss": 46.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0017110017411958896, |
|
"grad_norm": 1.1726152479241136e-05, |
|
"learning_rate": 0.00019834425632449075, |
|
"loss": 46.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.001731131173445253, |
|
"grad_norm": 2.3671049348195083e-05, |
|
"learning_rate": 0.00019829993516948577, |
|
"loss": 46.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0017512606056946164, |
|
"grad_norm": 2.0576631868607365e-05, |
|
"learning_rate": 0.00019825503371028136, |
|
"loss": 46.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0017713900379439798, |
|
"grad_norm": 1.466808589611901e-05, |
|
"learning_rate": 0.000198209552211949, |
|
"loss": 46.0, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0017915194701933431, |
|
"grad_norm": 2.361923543503508e-05, |
|
"learning_rate": 0.00019816349094298427, |
|
"loss": 46.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0018116489024427066, |
|
"grad_norm": 1.9187695215805434e-05, |
|
"learning_rate": 0.0001981168501753055, |
|
"loss": 46.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0018317783346920701, |
|
"grad_norm": 2.630672861414496e-05, |
|
"learning_rate": 0.0001980696301842519, |
|
"loss": 46.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0018519077669414334, |
|
"grad_norm": 1.8121598259313032e-05, |
|
"learning_rate": 0.00019802183124858222, |
|
"loss": 46.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.001872037199190797, |
|
"grad_norm": 3.593276414903812e-05, |
|
"learning_rate": 0.00019797345365047284, |
|
"loss": 46.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0018921666314401602, |
|
"grad_norm": 2.5328612537123263e-05, |
|
"learning_rate": 0.0001979244976755162, |
|
"loss": 46.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0019122960636895237, |
|
"grad_norm": 3.064305201405659e-05, |
|
"learning_rate": 0.00019787496361271925, |
|
"loss": 46.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.001932425495938887, |
|
"grad_norm": 2.1601079424726777e-05, |
|
"learning_rate": 0.00019782485175450155, |
|
"loss": 46.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0019525549281882504, |
|
"grad_norm": 1.7290110918111168e-05, |
|
"learning_rate": 0.0001977741623966936, |
|
"loss": 46.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.001972684360437614, |
|
"grad_norm": 1.116962175728986e-05, |
|
"learning_rate": 0.00019772289583853514, |
|
"loss": 46.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0019928137926869772, |
|
"grad_norm": 1.0275795830239076e-05, |
|
"learning_rate": 0.00019767105238267338, |
|
"loss": 46.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0020129432249363405, |
|
"grad_norm": 2.2131345758680254e-05, |
|
"learning_rate": 0.00019761863233516117, |
|
"loss": 46.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002033072657185704, |
|
"grad_norm": 3.4143031371058896e-05, |
|
"learning_rate": 0.0001975656360054552, |
|
"loss": 46.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0020532020894350675, |
|
"grad_norm": 3.857325282297097e-05, |
|
"learning_rate": 0.0001975120637064142, |
|
"loss": 46.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0020733315216844308, |
|
"grad_norm": 2.403794314886909e-05, |
|
"learning_rate": 0.00019745791575429705, |
|
"loss": 46.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0020934609539337945, |
|
"grad_norm": 3.789052425418049e-05, |
|
"learning_rate": 0.00019740319246876106, |
|
"loss": 46.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0021135903861831578, |
|
"grad_norm": 3.8589034375036135e-05, |
|
"learning_rate": 0.00019734789417285976, |
|
"loss": 46.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.002133719818432521, |
|
"grad_norm": 2.034025419561658e-05, |
|
"learning_rate": 0.0001972920211930414, |
|
"loss": 46.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0021538492506818843, |
|
"grad_norm": 1.9496819732012227e-05, |
|
"learning_rate": 0.0001972355738591467, |
|
"loss": 46.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.002173978682931248, |
|
"grad_norm": 1.7886142813949846e-05, |
|
"learning_rate": 0.00019717855250440705, |
|
"loss": 46.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0021941081151806113, |
|
"grad_norm": 1.818929194996599e-05, |
|
"learning_rate": 0.00019712095746544255, |
|
"loss": 46.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0022142375474299746, |
|
"grad_norm": 2.199762820964679e-05, |
|
"learning_rate": 0.00019706278908225992, |
|
"loss": 46.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0022343669796793383, |
|
"grad_norm": 2.1755575289716944e-05, |
|
"learning_rate": 0.00019700404769825068, |
|
"loss": 46.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0022544964119287016, |
|
"grad_norm": 3.8793521525803953e-05, |
|
"learning_rate": 0.00019694473366018887, |
|
"loss": 46.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.002274625844178065, |
|
"grad_norm": 3.468850627541542e-05, |
|
"learning_rate": 0.00019688484731822923, |
|
"loss": 46.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0022947552764274286, |
|
"grad_norm": 2.4715391191421077e-05, |
|
"learning_rate": 0.00019682438902590498, |
|
"loss": 46.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.002314884708676792, |
|
"grad_norm": 3.426595503697172e-05, |
|
"learning_rate": 0.0001967633591401259, |
|
"loss": 46.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.002335014140926155, |
|
"grad_norm": 5.176919148652814e-05, |
|
"learning_rate": 0.000196701758021176, |
|
"loss": 46.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0023551435731755184, |
|
"grad_norm": 2.376974771323148e-05, |
|
"learning_rate": 0.00019663958603271148, |
|
"loss": 46.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.002375273005424882, |
|
"grad_norm": 2.0293871784815565e-05, |
|
"learning_rate": 0.0001965768435417588, |
|
"loss": 46.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0023954024376742454, |
|
"grad_norm": 4.838638415094465e-05, |
|
"learning_rate": 0.00019651353091871215, |
|
"loss": 46.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0024155318699236087, |
|
"grad_norm": 2.106054307660088e-05, |
|
"learning_rate": 0.00019644964853733152, |
|
"loss": 46.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0024356613021729724, |
|
"grad_norm": 2.7618483727565035e-05, |
|
"learning_rate": 0.0001963851967747404, |
|
"loss": 46.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0024557907344223357, |
|
"grad_norm": 1.421527485945262e-05, |
|
"learning_rate": 0.00019632017601142355, |
|
"loss": 46.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.002475920166671699, |
|
"grad_norm": 3.1367508199764416e-05, |
|
"learning_rate": 0.00019625458663122478, |
|
"loss": 46.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0024960495989210622, |
|
"grad_norm": 3.238041608710773e-05, |
|
"learning_rate": 0.00019618842902134465, |
|
"loss": 46.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.002516179031170426, |
|
"grad_norm": 2.0453908291528933e-05, |
|
"learning_rate": 0.00019612170357233836, |
|
"loss": 46.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0025363084634197892, |
|
"grad_norm": 1.5395889931824058e-05, |
|
"learning_rate": 0.00019605441067811302, |
|
"loss": 46.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0025564378956691525, |
|
"grad_norm": 2.2598505893256515e-05, |
|
"learning_rate": 0.00019598655073592585, |
|
"loss": 46.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0025765673279185162, |
|
"grad_norm": 2.011835022130981e-05, |
|
"learning_rate": 0.0001959181241463814, |
|
"loss": 46.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0025966967601678795, |
|
"grad_norm": 2.2615582565777004e-05, |
|
"learning_rate": 0.00019584913131342953, |
|
"loss": 46.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0026168261924172428, |
|
"grad_norm": 2.472496998962015e-05, |
|
"learning_rate": 0.0001957795726443628, |
|
"loss": 46.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0026369556246666065, |
|
"grad_norm": 2.1229192498140037e-05, |
|
"learning_rate": 0.000195709448549814, |
|
"loss": 46.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0026570850569159698, |
|
"grad_norm": 3.1881041650194675e-05, |
|
"learning_rate": 0.00019563875944375407, |
|
"loss": 46.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.002677214489165333, |
|
"grad_norm": 3.062764881178737e-05, |
|
"learning_rate": 0.0001955675057434893, |
|
"loss": 46.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0026973439214146963, |
|
"grad_norm": 3.407730400795117e-05, |
|
"learning_rate": 0.00019549568786965903, |
|
"loss": 46.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.00271747335366406, |
|
"grad_norm": 2.335791396035347e-05, |
|
"learning_rate": 0.00019542330624623322, |
|
"loss": 46.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0027376027859134233, |
|
"grad_norm": 2.1637504687532783e-05, |
|
"learning_rate": 0.00019535036130050975, |
|
"loss": 46.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.0027577322181627866, |
|
"grad_norm": 2.3219181457534432e-05, |
|
"learning_rate": 0.00019527685346311212, |
|
"loss": 46.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0027778616504121503, |
|
"grad_norm": 1.165738285635598e-05, |
|
"learning_rate": 0.0001952027831679867, |
|
"loss": 46.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0027979910826615136, |
|
"grad_norm": 2.6394216547487304e-05, |
|
"learning_rate": 0.00019512815085240046, |
|
"loss": 46.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.002818120514910877, |
|
"grad_norm": 2.7199243049835786e-05, |
|
"learning_rate": 0.000195052956956938, |
|
"loss": 46.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.00283824994716024, |
|
"grad_norm": 1.723020432109479e-05, |
|
"learning_rate": 0.00019497720192549926, |
|
"loss": 46.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.002858379379409604, |
|
"grad_norm": 2.4921268050093204e-05, |
|
"learning_rate": 0.00019490088620529678, |
|
"loss": 46.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.002878508811658967, |
|
"grad_norm": 2.3121931008063257e-05, |
|
"learning_rate": 0.00019482401024685308, |
|
"loss": 46.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0028986382439083304, |
|
"grad_norm": 4.1502407839288935e-05, |
|
"learning_rate": 0.0001947465745039979, |
|
"loss": 46.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.002918767676157694, |
|
"grad_norm": 3.218562051188201e-05, |
|
"learning_rate": 0.0001946685794338658, |
|
"loss": 46.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0029388971084070574, |
|
"grad_norm": 1.8879612980526872e-05, |
|
"learning_rate": 0.00019459002549689308, |
|
"loss": 46.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0029590265406564207, |
|
"grad_norm": 2.8899030439788476e-05, |
|
"learning_rate": 0.0001945109131568154, |
|
"loss": 46.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.0029791559729057844, |
|
"grad_norm": 3.5309523809701204e-05, |
|
"learning_rate": 0.00019443124288066475, |
|
"loss": 46.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0029992854051551477, |
|
"grad_norm": 4.7148212615866214e-05, |
|
"learning_rate": 0.00019435101513876703, |
|
"loss": 46.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.003019414837404511, |
|
"grad_norm": 3.963925701100379e-05, |
|
"learning_rate": 0.00019427023040473896, |
|
"loss": 46.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0030395442696538742, |
|
"grad_norm": 2.9483388061635196e-05, |
|
"learning_rate": 0.0001941888891554854, |
|
"loss": 46.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.003059673701903238, |
|
"grad_norm": 2.0797941033379175e-05, |
|
"learning_rate": 0.00019410699187119663, |
|
"loss": 46.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.0030798031341526012, |
|
"grad_norm": 2.525432500988245e-05, |
|
"learning_rate": 0.00019402453903534533, |
|
"loss": 46.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.0030999325664019645, |
|
"grad_norm": 1.9120217984891497e-05, |
|
"learning_rate": 0.0001939415311346839, |
|
"loss": 46.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0031200619986513282, |
|
"grad_norm": 2.6778399842442013e-05, |
|
"learning_rate": 0.0001938579686592415, |
|
"loss": 46.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0031401914309006915, |
|
"grad_norm": 2.4967603167169727e-05, |
|
"learning_rate": 0.00019377385210232113, |
|
"loss": 46.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.003160320863150055, |
|
"grad_norm": 2.38423963310197e-05, |
|
"learning_rate": 0.0001936891819604968, |
|
"loss": 46.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.003180450295399418, |
|
"grad_norm": 5.6928216508822516e-05, |
|
"learning_rate": 0.00019360395873361055, |
|
"loss": 46.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.0032005797276487818, |
|
"grad_norm": 4.014354999526404e-05, |
|
"learning_rate": 0.00019351818292476946, |
|
"loss": 46.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.003220709159898145, |
|
"grad_norm": 4.82712421217002e-05, |
|
"learning_rate": 0.00019343185504034277, |
|
"loss": 46.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0032408385921475083, |
|
"grad_norm": 3.384835144970566e-05, |
|
"learning_rate": 0.0001933449755899588, |
|
"loss": 46.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.003260968024396872, |
|
"grad_norm": 1.4583272786694579e-05, |
|
"learning_rate": 0.0001932575450865021, |
|
"loss": 46.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0032810974566462353, |
|
"grad_norm": 4.5586399210151285e-05, |
|
"learning_rate": 0.00019316956404611012, |
|
"loss": 46.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.0033012268888955986, |
|
"grad_norm": 4.526826523942873e-05, |
|
"learning_rate": 0.00019308103298817052, |
|
"loss": 46.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0033213563211449623, |
|
"grad_norm": 5.154962491360493e-05, |
|
"learning_rate": 0.00019299195243531792, |
|
"loss": 46.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0033414857533943256, |
|
"grad_norm": 2.3496044377679937e-05, |
|
"learning_rate": 0.00019290232291343067, |
|
"loss": 46.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.003361615185643689, |
|
"grad_norm": 3.0550760129699484e-05, |
|
"learning_rate": 0.0001928121449516281, |
|
"loss": 46.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.003381744617893052, |
|
"grad_norm": 2.7053209123550914e-05, |
|
"learning_rate": 0.00019272141908226707, |
|
"loss": 46.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.003401874050142416, |
|
"grad_norm": 1.612185405974742e-05, |
|
"learning_rate": 0.0001926301458409391, |
|
"loss": 46.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.003422003482391779, |
|
"grad_norm": 1.803100349206943e-05, |
|
"learning_rate": 0.00019253832576646688, |
|
"loss": 46.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0034421329146411424, |
|
"grad_norm": 1.77473557414487e-05, |
|
"learning_rate": 0.00019244595940090143, |
|
"loss": 46.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.003462262346890506, |
|
"grad_norm": 2.4842493075993843e-05, |
|
"learning_rate": 0.00019235304728951866, |
|
"loss": 46.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.0034823917791398694, |
|
"grad_norm": 3.840986391878687e-05, |
|
"learning_rate": 0.00019225958998081633, |
|
"loss": 46.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0035025212113892327, |
|
"grad_norm": 3.629952698247507e-05, |
|
"learning_rate": 0.0001921655880265106, |
|
"loss": 46.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.003522650643638596, |
|
"grad_norm": 3.082855619140901e-05, |
|
"learning_rate": 0.00019207104198153295, |
|
"loss": 46.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0035427800758879597, |
|
"grad_norm": 8.436971984338015e-05, |
|
"learning_rate": 0.0001919759524040269, |
|
"loss": 46.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.003562909508137323, |
|
"grad_norm": 3.003582423843909e-05, |
|
"learning_rate": 0.0001918803198553446, |
|
"loss": 46.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0035830389403866863, |
|
"grad_norm": 4.6667788410559297e-05, |
|
"learning_rate": 0.00019178414490004356, |
|
"loss": 46.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.00360316837263605, |
|
"grad_norm": 3.2573891076026484e-05, |
|
"learning_rate": 0.00019168742810588335, |
|
"loss": 46.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.0036232978048854132, |
|
"grad_norm": 2.6542162231635302e-05, |
|
"learning_rate": 0.00019159017004382234, |
|
"loss": 46.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0036434272371347765, |
|
"grad_norm": 2.6043957404908724e-05, |
|
"learning_rate": 0.00019149237128801404, |
|
"loss": 46.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.0036635566693841402, |
|
"grad_norm": 1.9306073227198794e-05, |
|
"learning_rate": 0.000191394032415804, |
|
"loss": 46.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.0036836861016335035, |
|
"grad_norm": 4.7370471293106675e-05, |
|
"learning_rate": 0.00019129515400772635, |
|
"loss": 46.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.003703815533882867, |
|
"grad_norm": 3.607594771892764e-05, |
|
"learning_rate": 0.00019119573664750018, |
|
"loss": 46.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.00372394496613223, |
|
"grad_norm": 4.207424717606045e-05, |
|
"learning_rate": 0.00019109578092202628, |
|
"loss": 46.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.003744074398381594, |
|
"grad_norm": 4.7341436584247276e-05, |
|
"learning_rate": 0.00019099528742138371, |
|
"loss": 46.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.003764203830630957, |
|
"grad_norm": 6.413136725313962e-05, |
|
"learning_rate": 0.00019089425673882615, |
|
"loss": 46.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0037843332628803203, |
|
"grad_norm": 3.3956010156543925e-05, |
|
"learning_rate": 0.0001907926894707785, |
|
"loss": 46.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.003804462695129684, |
|
"grad_norm": 7.443443610100076e-05, |
|
"learning_rate": 0.00019069058621683336, |
|
"loss": 46.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0038245921273790473, |
|
"grad_norm": 9.83256395556964e-05, |
|
"learning_rate": 0.0001905879475797474, |
|
"loss": 46.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0038447215596284106, |
|
"grad_norm": 2.799310823320411e-05, |
|
"learning_rate": 0.00019048477416543801, |
|
"loss": 46.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.003864850991877774, |
|
"grad_norm": 2.725904414546676e-05, |
|
"learning_rate": 0.00019038106658297944, |
|
"loss": 46.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.0038849804241271376, |
|
"grad_norm": 1.805232386686839e-05, |
|
"learning_rate": 0.00019027682544459947, |
|
"loss": 46.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.003905109856376501, |
|
"grad_norm": 2.9510436434065923e-05, |
|
"learning_rate": 0.00019017205136567556, |
|
"loss": 46.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.003925239288625864, |
|
"grad_norm": 3.2932246540440246e-05, |
|
"learning_rate": 0.00019006674496473144, |
|
"loss": 46.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.003945368720875228, |
|
"grad_norm": 3.495354394544847e-05, |
|
"learning_rate": 0.00018996090686343328, |
|
"loss": 46.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.003965498153124591, |
|
"grad_norm": 6.263954128371552e-05, |
|
"learning_rate": 0.0001898545376865861, |
|
"loss": 46.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.0039856275853739544, |
|
"grad_norm": 2.9388587790890597e-05, |
|
"learning_rate": 0.00018974763806213013, |
|
"loss": 46.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.004005757017623318, |
|
"grad_norm": 2.9143146093701944e-05, |
|
"learning_rate": 0.000189640208621137, |
|
"loss": 46.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.004025886449872681, |
|
"grad_norm": 2.8607553758774884e-05, |
|
"learning_rate": 0.00018953224999780605, |
|
"loss": 46.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004046015882122045, |
|
"grad_norm": 2.6011948648374528e-05, |
|
"learning_rate": 0.00018942376282946066, |
|
"loss": 46.0, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.004066145314371408, |
|
"grad_norm": 5.046524165663868e-05, |
|
"learning_rate": 0.0001893147477565443, |
|
"loss": 46.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.004086274746620771, |
|
"grad_norm": 2.9760611141682602e-05, |
|
"learning_rate": 0.000189205205422617, |
|
"loss": 46.0, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.004106404178870135, |
|
"grad_norm": 8.055127545958385e-05, |
|
"learning_rate": 0.0001890951364743514, |
|
"loss": 46.0, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.004126533611119499, |
|
"grad_norm": 3.0201517802197486e-05, |
|
"learning_rate": 0.00018898454156152886, |
|
"loss": 46.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.0041466630433688615, |
|
"grad_norm": 3.596295937313698e-05, |
|
"learning_rate": 0.0001888734213370359, |
|
"loss": 46.0, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.004166792475618225, |
|
"grad_norm": 3.9855971408542246e-05, |
|
"learning_rate": 0.00018876177645685998, |
|
"loss": 46.0, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.004186921907867589, |
|
"grad_norm": 2.937594945251476e-05, |
|
"learning_rate": 0.00018864960758008592, |
|
"loss": 46.0, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.004207051340116952, |
|
"grad_norm": 2.6503237677388825e-05, |
|
"learning_rate": 0.00018853691536889188, |
|
"loss": 46.0, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.0042271807723663155, |
|
"grad_norm": 2.7466578103485517e-05, |
|
"learning_rate": 0.0001884237004885455, |
|
"loss": 46.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.004247310204615679, |
|
"grad_norm": 2.5270055630244315e-05, |
|
"learning_rate": 0.0001883099636073999, |
|
"loss": 46.0, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.004267439636865042, |
|
"grad_norm": 4.509964492172003e-05, |
|
"learning_rate": 0.0001881957053968898, |
|
"loss": 46.0, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.004287569069114406, |
|
"grad_norm": 4.1347884689457715e-05, |
|
"learning_rate": 0.00018808092653152753, |
|
"loss": 46.0, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.004307698501363769, |
|
"grad_norm": 2.3344733563135378e-05, |
|
"learning_rate": 0.00018796562768889913, |
|
"loss": 46.0, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.004327827933613132, |
|
"grad_norm": 3.056141213164665e-05, |
|
"learning_rate": 0.0001878498095496601, |
|
"loss": 46.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.004347957365862496, |
|
"grad_norm": 1.8424869267619215e-05, |
|
"learning_rate": 0.00018773347279753177, |
|
"loss": 46.0, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.004368086798111859, |
|
"grad_norm": 3.535512223606929e-05, |
|
"learning_rate": 0.00018761661811929686, |
|
"loss": 46.0, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.004388216230361223, |
|
"grad_norm": 2.6731742764241062e-05, |
|
"learning_rate": 0.00018749924620479585, |
|
"loss": 46.0, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.004408345662610586, |
|
"grad_norm": 4.029847332276404e-05, |
|
"learning_rate": 0.0001873813577469224, |
|
"loss": 46.0, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.004428475094859949, |
|
"grad_norm": 4.0732127672526985e-05, |
|
"learning_rate": 0.0001872629534416197, |
|
"loss": 46.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.004448604527109313, |
|
"grad_norm": 2.8962362193851732e-05, |
|
"learning_rate": 0.0001871440339878762, |
|
"loss": 46.0, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.004468733959358677, |
|
"grad_norm": 4.08275009249337e-05, |
|
"learning_rate": 0.0001870246000877214, |
|
"loss": 46.0, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.0044888633916080395, |
|
"grad_norm": 3.2036841730587184e-05, |
|
"learning_rate": 0.00018690465244622183, |
|
"loss": 46.0, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.004508992823857403, |
|
"grad_norm": 5.666902507073246e-05, |
|
"learning_rate": 0.00018678419177147685, |
|
"loss": 46.0, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.004529122256106767, |
|
"grad_norm": 1.926498043758329e-05, |
|
"learning_rate": 0.0001866632187746145, |
|
"loss": 46.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.00454925168835613, |
|
"grad_norm": 5.15770552738104e-05, |
|
"learning_rate": 0.00018654173416978714, |
|
"loss": 46.0, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.0045693811206054934, |
|
"grad_norm": 4.0023831388680264e-05, |
|
"learning_rate": 0.0001864197386741674, |
|
"loss": 46.0, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.004589510552854857, |
|
"grad_norm": 2.732311622821726e-05, |
|
"learning_rate": 0.00018629723300794408, |
|
"loss": 46.0, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.00460963998510422, |
|
"grad_norm": 3.606328391470015e-05, |
|
"learning_rate": 0.00018617421789431747, |
|
"loss": 46.0, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.004629769417353584, |
|
"grad_norm": 4.1729483200469986e-05, |
|
"learning_rate": 0.0001860506940594955, |
|
"loss": 46.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0046498988496029466, |
|
"grad_norm": 4.251101199770346e-05, |
|
"learning_rate": 0.00018592666223268917, |
|
"loss": 46.0, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.00467002828185231, |
|
"grad_norm": 4.2483963625272736e-05, |
|
"learning_rate": 0.00018580212314610846, |
|
"loss": 46.0, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.004690157714101674, |
|
"grad_norm": 3.098902016063221e-05, |
|
"learning_rate": 0.0001856770775349579, |
|
"loss": 46.0, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.004710287146351037, |
|
"grad_norm": 2.9945371352368966e-05, |
|
"learning_rate": 0.00018555152613743215, |
|
"loss": 46.0, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.0047304165786004005, |
|
"grad_norm": 4.764752884511836e-05, |
|
"learning_rate": 0.00018542546969471183, |
|
"loss": 46.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.004750546010849764, |
|
"grad_norm": 2.68215353571577e-05, |
|
"learning_rate": 0.00018529890895095902, |
|
"loss": 46.0, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.004770675443099127, |
|
"grad_norm": 5.318366311257705e-05, |
|
"learning_rate": 0.00018517184465331288, |
|
"loss": 46.0, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.004790804875348491, |
|
"grad_norm": 7.759372965665534e-05, |
|
"learning_rate": 0.00018504427755188521, |
|
"loss": 46.0, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.0048109343075978545, |
|
"grad_norm": 2.4518141799489968e-05, |
|
"learning_rate": 0.00018491620839975617, |
|
"loss": 46.0, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.004831063739847217, |
|
"grad_norm": 2.9744596758973785e-05, |
|
"learning_rate": 0.00018478763795296962, |
|
"loss": 46.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.004851193172096581, |
|
"grad_norm": 3.903737888322212e-05, |
|
"learning_rate": 0.0001846585669705288, |
|
"loss": 46.0, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.004871322604345945, |
|
"grad_norm": 3.140496482956223e-05, |
|
"learning_rate": 0.00018452899621439182, |
|
"loss": 46.0, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.004891452036595308, |
|
"grad_norm": 2.7846319426316768e-05, |
|
"learning_rate": 0.00018439892644946722, |
|
"loss": 46.0, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.004911581468844671, |
|
"grad_norm": 2.935269549197983e-05, |
|
"learning_rate": 0.00018426835844360929, |
|
"loss": 46.0, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.004931710901094035, |
|
"grad_norm": 2.9461683880072087e-05, |
|
"learning_rate": 0.00018413729296761364, |
|
"loss": 46.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.004951840333343398, |
|
"grad_norm": 3.557924719643779e-05, |
|
"learning_rate": 0.00018400573079521278, |
|
"loss": 46.0, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.004971969765592762, |
|
"grad_norm": 3.282381294411607e-05, |
|
"learning_rate": 0.0001838736727030712, |
|
"loss": 46.0, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.0049920991978421245, |
|
"grad_norm": 4.159653326496482e-05, |
|
"learning_rate": 0.00018374111947078124, |
|
"loss": 46.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.005012228630091488, |
|
"grad_norm": 3.4549964766483754e-05, |
|
"learning_rate": 0.00018360807188085807, |
|
"loss": 46.0, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.005032358062340852, |
|
"grad_norm": 4.0204184188041836e-05, |
|
"learning_rate": 0.00018347453071873536, |
|
"loss": 46.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.005052487494590215, |
|
"grad_norm": 8.349636482307687e-05, |
|
"learning_rate": 0.00018334049677276045, |
|
"loss": 46.0, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.0050726169268395785, |
|
"grad_norm": 3.3643322240095586e-05, |
|
"learning_rate": 0.0001832059708341899, |
|
"loss": 46.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.005092746359088942, |
|
"grad_norm": 3.255937190260738e-05, |
|
"learning_rate": 0.00018307095369718456, |
|
"loss": 46.0, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.005112875791338305, |
|
"grad_norm": 3.45467560691759e-05, |
|
"learning_rate": 0.00018293544615880517, |
|
"loss": 46.0, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.005133005223587669, |
|
"grad_norm": 6.099267557146959e-05, |
|
"learning_rate": 0.00018279944901900737, |
|
"loss": 46.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.0051531346558370324, |
|
"grad_norm": 3.314892455819063e-05, |
|
"learning_rate": 0.00018266296308063718, |
|
"loss": 46.0, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.005173264088086395, |
|
"grad_norm": 2.7799773306469433e-05, |
|
"learning_rate": 0.00018252598914942622, |
|
"loss": 46.0, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.005193393520335759, |
|
"grad_norm": 4.2107418266823515e-05, |
|
"learning_rate": 0.00018238852803398689, |
|
"loss": 46.0, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.005213522952585123, |
|
"grad_norm": 6.404446321539581e-05, |
|
"learning_rate": 0.00018225058054580765, |
|
"loss": 46.0, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.0052336523848344856, |
|
"grad_norm": 5.3031737479614094e-05, |
|
"learning_rate": 0.0001821121474992482, |
|
"loss": 46.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.005253781817083849, |
|
"grad_norm": 4.130045635974966e-05, |
|
"learning_rate": 0.00018197322971153467, |
|
"loss": 46.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.005273911249333213, |
|
"grad_norm": 4.748915307573043e-05, |
|
"learning_rate": 0.0001818338280027549, |
|
"loss": 46.0, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.005294040681582576, |
|
"grad_norm": 2.8563030355144292e-05, |
|
"learning_rate": 0.00018169394319585345, |
|
"loss": 46.0, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.0053141701138319395, |
|
"grad_norm": 4.959934449288994e-05, |
|
"learning_rate": 0.00018155357611662672, |
|
"loss": 46.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.005334299546081302, |
|
"grad_norm": 4.6712710172869265e-05, |
|
"learning_rate": 0.0001814127275937183, |
|
"loss": 46.0, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.005354428978330666, |
|
"grad_norm": 0.00011124753655167297, |
|
"learning_rate": 0.0001812713984586139, |
|
"loss": 46.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.00537455841058003, |
|
"grad_norm": 4.563620314002037e-05, |
|
"learning_rate": 0.00018112958954563646, |
|
"loss": 46.0, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.005394687842829393, |
|
"grad_norm": 5.554988456424326e-05, |
|
"learning_rate": 0.00018098730169194117, |
|
"loss": 46.0, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.005414817275078756, |
|
"grad_norm": 4.447490573511459e-05, |
|
"learning_rate": 0.00018084453573751072, |
|
"loss": 46.0, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.00543494670732812, |
|
"grad_norm": 3.21212355629541e-05, |
|
"learning_rate": 0.00018070129252515014, |
|
"loss": 46.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.005455076139577483, |
|
"grad_norm": 3.499364902381785e-05, |
|
"learning_rate": 0.00018055757290048202, |
|
"loss": 46.0, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.005475205571826847, |
|
"grad_norm": 4.179975076112896e-05, |
|
"learning_rate": 0.00018041337771194121, |
|
"loss": 46.0, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.00549533500407621, |
|
"grad_norm": 5.2844952733721584e-05, |
|
"learning_rate": 0.0001802687078107702, |
|
"loss": 46.0, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.005515464436325573, |
|
"grad_norm": 2.9436003387672827e-05, |
|
"learning_rate": 0.0001801235640510138, |
|
"loss": 46.0, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.005535593868574937, |
|
"grad_norm": 0.00010626760922605172, |
|
"learning_rate": 0.0001799779472895142, |
|
"loss": 46.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.005555723300824301, |
|
"grad_norm": 7.006096711847931e-05, |
|
"learning_rate": 0.00017983185838590587, |
|
"loss": 46.0, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.0055758527330736635, |
|
"grad_norm": 4.731449007522315e-05, |
|
"learning_rate": 0.0001796852982026107, |
|
"loss": 46.0, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.005595982165323027, |
|
"grad_norm": 2.740498530329205e-05, |
|
"learning_rate": 0.00017953826760483255, |
|
"loss": 46.0, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.005616111597572391, |
|
"grad_norm": 2.5784778699744493e-05, |
|
"learning_rate": 0.00017939076746055239, |
|
"loss": 46.0, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.005636241029821754, |
|
"grad_norm": 3.0875242373440415e-05, |
|
"learning_rate": 0.00017924279864052313, |
|
"loss": 46.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0056563704620711175, |
|
"grad_norm": 2.555253195168916e-05, |
|
"learning_rate": 0.00017909436201826444, |
|
"loss": 46.0, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.00567649989432048, |
|
"grad_norm": 3.1929652323015034e-05, |
|
"learning_rate": 0.00017894545847005764, |
|
"loss": 46.0, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.005696629326569844, |
|
"grad_norm": 5.2126772061455995e-05, |
|
"learning_rate": 0.00017879608887494045, |
|
"loss": 46.0, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.005716758758819208, |
|
"grad_norm": 2.7905460228794254e-05, |
|
"learning_rate": 0.00017864625411470193, |
|
"loss": 46.0, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.005736888191068571, |
|
"grad_norm": 5.273651913739741e-05, |
|
"learning_rate": 0.00017849595507387714, |
|
"loss": 46.0, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.005757017623317934, |
|
"grad_norm": 2.429057531116996e-05, |
|
"learning_rate": 0.00017834519263974197, |
|
"loss": 46.0, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.005777147055567298, |
|
"grad_norm": 3.3973785320995376e-05, |
|
"learning_rate": 0.00017819396770230793, |
|
"loss": 46.0, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.005797276487816661, |
|
"grad_norm": 3.730989556061104e-05, |
|
"learning_rate": 0.0001780422811543169, |
|
"loss": 46.0, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.0058174059200660246, |
|
"grad_norm": 5.928779864916578e-05, |
|
"learning_rate": 0.00017789013389123582, |
|
"loss": 46.0, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.005837535352315388, |
|
"grad_norm": 3.284361446276307e-05, |
|
"learning_rate": 0.00017773752681125133, |
|
"loss": 46.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.005857664784564751, |
|
"grad_norm": 2.5975041353376582e-05, |
|
"learning_rate": 0.00017758446081526472, |
|
"loss": 46.0, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.005877794216814115, |
|
"grad_norm": 4.9675658374326304e-05, |
|
"learning_rate": 0.00017743093680688628, |
|
"loss": 46.0, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.0058979236490634785, |
|
"grad_norm": 3.443100649747066e-05, |
|
"learning_rate": 0.00017727695569243025, |
|
"loss": 46.0, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.005918053081312841, |
|
"grad_norm": 4.2306735849706456e-05, |
|
"learning_rate": 0.00017712251838090929, |
|
"loss": 46.0, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.005938182513562205, |
|
"grad_norm": 5.587004852714017e-05, |
|
"learning_rate": 0.00017696762578402918, |
|
"loss": 46.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.005958311945811569, |
|
"grad_norm": 4.021718632429838e-05, |
|
"learning_rate": 0.0001768122788161835, |
|
"loss": 46.0, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.005978441378060932, |
|
"grad_norm": 3.435139296925627e-05, |
|
"learning_rate": 0.00017665647839444808, |
|
"loss": 46.0, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.005998570810310295, |
|
"grad_norm": 4.693563096225262e-05, |
|
"learning_rate": 0.0001765002254385757, |
|
"loss": 46.0, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.006018700242559658, |
|
"grad_norm": 3.511687464197166e-05, |
|
"learning_rate": 0.0001763435208709906, |
|
"loss": 46.0, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.006038829674809022, |
|
"grad_norm": 5.281609992380254e-05, |
|
"learning_rate": 0.00017618636561678316, |
|
"loss": 46.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006058959107058386, |
|
"grad_norm": 6.96783245075494e-05, |
|
"learning_rate": 0.0001760287606037043, |
|
"loss": 46.0, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.0060790885393077485, |
|
"grad_norm": 3.3282187359873205e-05, |
|
"learning_rate": 0.00017587070676215993, |
|
"loss": 46.0, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.006099217971557112, |
|
"grad_norm": 7.593463669763878e-05, |
|
"learning_rate": 0.0001757122050252058, |
|
"loss": 46.0, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.006119347403806476, |
|
"grad_norm": 6.294970808085054e-05, |
|
"learning_rate": 0.0001755532563285416, |
|
"loss": 46.0, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.006139476836055839, |
|
"grad_norm": 3.691632446134463e-05, |
|
"learning_rate": 0.0001753938616105056, |
|
"loss": 46.0, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.0061596062683052025, |
|
"grad_norm": 4.616468140739016e-05, |
|
"learning_rate": 0.0001752340218120693, |
|
"loss": 46.0, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.006179735700554566, |
|
"grad_norm": 2.737195791269187e-05, |
|
"learning_rate": 0.00017507373787683142, |
|
"loss": 46.0, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.006199865132803929, |
|
"grad_norm": 6.505291094072163e-05, |
|
"learning_rate": 0.00017491301075101278, |
|
"loss": 46.0, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.006219994565053293, |
|
"grad_norm": 5.131972284289077e-05, |
|
"learning_rate": 0.0001747518413834505, |
|
"loss": 46.0, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.0062401239973026565, |
|
"grad_norm": 4.8223384510492906e-05, |
|
"learning_rate": 0.0001745902307255924, |
|
"loss": 46.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.006260253429552019, |
|
"grad_norm": 3.8179550756467506e-05, |
|
"learning_rate": 0.00017442817973149145, |
|
"loss": 46.0, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.006280382861801383, |
|
"grad_norm": 7.28157683624886e-05, |
|
"learning_rate": 0.0001742656893578001, |
|
"loss": 46.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.006300512294050747, |
|
"grad_norm": 4.902153159491718e-05, |
|
"learning_rate": 0.00017410276056376456, |
|
"loss": 46.0, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.00632064172630011, |
|
"grad_norm": 6.659854261670262e-05, |
|
"learning_rate": 0.00017393939431121933, |
|
"loss": 46.0, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.006340771158549473, |
|
"grad_norm": 5.896111542824656e-05, |
|
"learning_rate": 0.00017377559156458132, |
|
"loss": 46.0, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.006360900590798836, |
|
"grad_norm": 3.361068957019597e-05, |
|
"learning_rate": 0.00017361135329084428, |
|
"loss": 46.0, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.0063810300230482, |
|
"grad_norm": 8.01550195319578e-05, |
|
"learning_rate": 0.00017344668045957305, |
|
"loss": 46.0, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.0064011594552975636, |
|
"grad_norm": 7.291202200576663e-05, |
|
"learning_rate": 0.0001732815740428978, |
|
"loss": 46.0, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.006421288887546926, |
|
"grad_norm": 4.988636646885425e-05, |
|
"learning_rate": 0.00017311603501550838, |
|
"loss": 46.0, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.00644141831979629, |
|
"grad_norm": 4.8562131269136444e-05, |
|
"learning_rate": 0.00017295006435464848, |
|
"loss": 46.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.006461547752045654, |
|
"grad_norm": 3.899990770150907e-05, |
|
"learning_rate": 0.00017278366304010993, |
|
"loss": 46.0, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.006481677184295017, |
|
"grad_norm": 8.76895574037917e-05, |
|
"learning_rate": 0.00017261683205422687, |
|
"loss": 46.0, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.00650180661654438, |
|
"grad_norm": 6.916802522027865e-05, |
|
"learning_rate": 0.00017244957238186993, |
|
"loss": 46.0, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.006521936048793744, |
|
"grad_norm": 7.918164919828996e-05, |
|
"learning_rate": 0.00017228188501044043, |
|
"loss": 46.0, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.006542065481043107, |
|
"grad_norm": 0.00010430561087559909, |
|
"learning_rate": 0.00017211377092986476, |
|
"loss": 46.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.006562194913292471, |
|
"grad_norm": 3.571771958377212e-05, |
|
"learning_rate": 0.00017194523113258804, |
|
"loss": 46.0, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.006562194913292471, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 125.9586, |
|
"eval_samples_per_second": 166.07, |
|
"eval_steps_per_second": 83.035, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.006582324345541834, |
|
"grad_norm": 7.556305354228243e-05, |
|
"learning_rate": 0.00017177626661356884, |
|
"loss": 46.0, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.006602453777791197, |
|
"grad_norm": 7.451939745806158e-05, |
|
"learning_rate": 0.0001716068783702729, |
|
"loss": 46.0, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.006622583210040561, |
|
"grad_norm": 9.252296149497852e-05, |
|
"learning_rate": 0.00017143706740266733, |
|
"loss": 46.0, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.006642712642289925, |
|
"grad_norm": 7.886350795160979e-05, |
|
"learning_rate": 0.00017126683471321494, |
|
"loss": 46.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.0066628420745392875, |
|
"grad_norm": 7.060460484353825e-05, |
|
"learning_rate": 0.00017109618130686793, |
|
"loss": 46.0, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.006682971506788651, |
|
"grad_norm": 9.136456355918199e-05, |
|
"learning_rate": 0.00017092510819106228, |
|
"loss": 46.0, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.006703100939038014, |
|
"grad_norm": 0.00014541424752678722, |
|
"learning_rate": 0.00017075361637571164, |
|
"loss": 46.0, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.006723230371287378, |
|
"grad_norm": 5.236260767560452e-05, |
|
"learning_rate": 0.00017058170687320144, |
|
"loss": 46.0, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.0067433598035367415, |
|
"grad_norm": 3.249632572988048e-05, |
|
"learning_rate": 0.00017040938069838284, |
|
"loss": 46.0, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.006763489235786104, |
|
"grad_norm": 0.00012241276272106916, |
|
"learning_rate": 0.00017023663886856681, |
|
"loss": 46.0, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.006783618668035468, |
|
"grad_norm": 7.044512312859297e-05, |
|
"learning_rate": 0.0001700634824035182, |
|
"loss": 46.0, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.006803748100284832, |
|
"grad_norm": 3.4949163818964735e-05, |
|
"learning_rate": 0.00016988991232544943, |
|
"loss": 46.0, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.006823877532534195, |
|
"grad_norm": 5.8312176406616345e-05, |
|
"learning_rate": 0.00016971592965901472, |
|
"loss": 46.0, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.006844006964783558, |
|
"grad_norm": 6.132174894446507e-05, |
|
"learning_rate": 0.00016954153543130405, |
|
"loss": 46.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.006864136397032922, |
|
"grad_norm": 7.451802957803011e-05, |
|
"learning_rate": 0.00016936673067183695, |
|
"loss": 46.0, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.006884265829282285, |
|
"grad_norm": 4.202104901196435e-05, |
|
"learning_rate": 0.00016919151641255642, |
|
"loss": 46.0, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.006904395261531649, |
|
"grad_norm": 5.8602174249244854e-05, |
|
"learning_rate": 0.00016901589368782303, |
|
"loss": 46.0, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.006924524693781012, |
|
"grad_norm": 6.12097283010371e-05, |
|
"learning_rate": 0.00016883986353440856, |
|
"loss": 46.0, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.006944654126030375, |
|
"grad_norm": 7.694535452174023e-05, |
|
"learning_rate": 0.0001686634269914901, |
|
"loss": 46.0, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.006964783558279739, |
|
"grad_norm": 4.796484427060932e-05, |
|
"learning_rate": 0.00016848658510064377, |
|
"loss": 46.0, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.0069849129905291026, |
|
"grad_norm": 3.562847996363416e-05, |
|
"learning_rate": 0.00016830933890583865, |
|
"loss": 46.0, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.007005042422778465, |
|
"grad_norm": 5.831445741932839e-05, |
|
"learning_rate": 0.00016813168945343062, |
|
"loss": 46.0, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.007025171855027829, |
|
"grad_norm": 5.701630288967863e-05, |
|
"learning_rate": 0.000167953637792156, |
|
"loss": 46.0, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.007045301287277192, |
|
"grad_norm": 6.599428888875991e-05, |
|
"learning_rate": 0.00016777518497312576, |
|
"loss": 46.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.007065430719526556, |
|
"grad_norm": 7.63636635383591e-05, |
|
"learning_rate": 0.00016759633204981885, |
|
"loss": 46.0, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.007085560151775919, |
|
"grad_norm": 5.485086148837581e-05, |
|
"learning_rate": 0.00016741708007807625, |
|
"loss": 46.0, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.007105689584025282, |
|
"grad_norm": 8.1733385741245e-05, |
|
"learning_rate": 0.0001672374301160948, |
|
"loss": 46.0, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.007125819016274646, |
|
"grad_norm": 7.260946586029604e-05, |
|
"learning_rate": 0.00016705738322442067, |
|
"loss": 46.0, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.00714594844852401, |
|
"grad_norm": 0.00012448117195162922, |
|
"learning_rate": 0.0001668769404659434, |
|
"loss": 46.0, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.0071660778807733725, |
|
"grad_norm": 0.00018412985082250088, |
|
"learning_rate": 0.00016669610290588938, |
|
"loss": 46.0, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.007186207313022736, |
|
"grad_norm": 0.00010343602480133995, |
|
"learning_rate": 0.00016651487161181575, |
|
"loss": 46.0, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.0072063367452721, |
|
"grad_norm": 7.606980216223747e-05, |
|
"learning_rate": 0.00016633324765360404, |
|
"loss": 46.0, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.007226466177521463, |
|
"grad_norm": 4.61212002846878e-05, |
|
"learning_rate": 0.00016615123210345374, |
|
"loss": 46.0, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.0072465956097708265, |
|
"grad_norm": 3.344099968671799e-05, |
|
"learning_rate": 0.00016596882603587613, |
|
"loss": 46.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.00726672504202019, |
|
"grad_norm": 6.0584614402614534e-05, |
|
"learning_rate": 0.00016578603052768787, |
|
"loss": 46.0, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.007286854474269553, |
|
"grad_norm": 0.00010081299842568114, |
|
"learning_rate": 0.00016560284665800463, |
|
"loss": 46.0, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.007306983906518917, |
|
"grad_norm": 0.00018100201850757003, |
|
"learning_rate": 0.00016541927550823475, |
|
"loss": 46.0, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.0073271133387682805, |
|
"grad_norm": 6.115916767157614e-05, |
|
"learning_rate": 0.00016523531816207285, |
|
"loss": 46.0, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.007347242771017643, |
|
"grad_norm": 0.0001454094162909314, |
|
"learning_rate": 0.00016505097570549334, |
|
"loss": 46.0, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.007367372203267007, |
|
"grad_norm": 7.579627708764747e-05, |
|
"learning_rate": 0.00016486624922674423, |
|
"loss": 46.0, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.00738750163551637, |
|
"grad_norm": 5.6147979194065556e-05, |
|
"learning_rate": 0.0001646811398163405, |
|
"loss": 46.0, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.007407631067765734, |
|
"grad_norm": 2.967609543702565e-05, |
|
"learning_rate": 0.00016449564856705763, |
|
"loss": 46.0, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.007427760500015097, |
|
"grad_norm": 4.6177185140550137e-05, |
|
"learning_rate": 0.00016430977657392543, |
|
"loss": 46.0, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.00744788993226446, |
|
"grad_norm": 6.124021456344053e-05, |
|
"learning_rate": 0.00016412352493422132, |
|
"loss": 46.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.007468019364513824, |
|
"grad_norm": 6.325580761767924e-05, |
|
"learning_rate": 0.00016393689474746383, |
|
"loss": 46.0, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.007488148796763188, |
|
"grad_norm": 4.645885928766802e-05, |
|
"learning_rate": 0.00016374988711540634, |
|
"loss": 46.0, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.00750827822901255, |
|
"grad_norm": 6.079759623389691e-05, |
|
"learning_rate": 0.00016356250314203044, |
|
"loss": 46.0, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.007528407661261914, |
|
"grad_norm": 6.65177867631428e-05, |
|
"learning_rate": 0.00016337474393353932, |
|
"loss": 46.0, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.007548537093511278, |
|
"grad_norm": 6.509361992357299e-05, |
|
"learning_rate": 0.00016318661059835135, |
|
"loss": 46.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.007568666525760641, |
|
"grad_norm": 6.14354939898476e-05, |
|
"learning_rate": 0.0001629981042470936, |
|
"loss": 46.0, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.007588795958010004, |
|
"grad_norm": 0.00011426959827076644, |
|
"learning_rate": 0.00016280922599259517, |
|
"loss": 46.0, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.007608925390259368, |
|
"grad_norm": 5.5513559345854446e-05, |
|
"learning_rate": 0.00016261997694988064, |
|
"loss": 46.0, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.007629054822508731, |
|
"grad_norm": 5.053329368820414e-05, |
|
"learning_rate": 0.00016243035823616347, |
|
"loss": 46.0, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.007649184254758095, |
|
"grad_norm": 4.8892205086303875e-05, |
|
"learning_rate": 0.0001622403709708395, |
|
"loss": 46.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.007669313687007458, |
|
"grad_norm": 6.474481051554903e-05, |
|
"learning_rate": 0.00016205001627548019, |
|
"loss": 46.0, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.007689443119256821, |
|
"grad_norm": 6.014668906573206e-05, |
|
"learning_rate": 0.0001618592952738263, |
|
"loss": 46.0, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.007709572551506185, |
|
"grad_norm": 7.018409087322652e-05, |
|
"learning_rate": 0.00016166820909178074, |
|
"loss": 46.0, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.007729701983755548, |
|
"grad_norm": 0.00011436323984526098, |
|
"learning_rate": 0.00016147675885740242, |
|
"loss": 46.0, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.0077498314160049115, |
|
"grad_norm": 7.127400749595836e-05, |
|
"learning_rate": 0.00016128494570089944, |
|
"loss": 46.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.007769960848254275, |
|
"grad_norm": 8.55454636621289e-05, |
|
"learning_rate": 0.0001610927707546222, |
|
"loss": 46.0, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.007790090280503638, |
|
"grad_norm": 6.892836972838268e-05, |
|
"learning_rate": 0.00016090023515305703, |
|
"loss": 46.0, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.007810219712753002, |
|
"grad_norm": 0.0001008848994388245, |
|
"learning_rate": 0.0001607073400328193, |
|
"loss": 46.0, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.007830349145002365, |
|
"grad_norm": 5.973876977805048e-05, |
|
"learning_rate": 0.00016051408653264675, |
|
"loss": 46.0, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.007850478577251728, |
|
"grad_norm": 9.112283441936597e-05, |
|
"learning_rate": 0.00016032047579339287, |
|
"loss": 46.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.007870608009501092, |
|
"grad_norm": 9.579762991052121e-05, |
|
"learning_rate": 0.00016012650895801995, |
|
"loss": 46.0, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.007890737441750456, |
|
"grad_norm": 6.637965998379514e-05, |
|
"learning_rate": 0.00015993218717159254, |
|
"loss": 46.0, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.00791086687399982, |
|
"grad_norm": 3.44514446624089e-05, |
|
"learning_rate": 0.00015973751158127058, |
|
"loss": 46.0, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.007930996306249181, |
|
"grad_norm": 3.686073250719346e-05, |
|
"learning_rate": 0.00015954248333630266, |
|
"loss": 46.0, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.007951125738498545, |
|
"grad_norm": 5.8047575294040143e-05, |
|
"learning_rate": 0.0001593471035880193, |
|
"loss": 46.0, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.007971255170747909, |
|
"grad_norm": 0.0001278682757401839, |
|
"learning_rate": 0.00015915137348982596, |
|
"loss": 46.0, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.007991384602997273, |
|
"grad_norm": 5.004106424166821e-05, |
|
"learning_rate": 0.00015895529419719643, |
|
"loss": 46.0, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.008011514035246636, |
|
"grad_norm": 0.00018534505215939134, |
|
"learning_rate": 0.00015875886686766597, |
|
"loss": 46.0, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.008031643467496, |
|
"grad_norm": 7.75425141910091e-05, |
|
"learning_rate": 0.00015856209266082436, |
|
"loss": 46.0, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.008051772899745362, |
|
"grad_norm": 0.00016450489056296647, |
|
"learning_rate": 0.0001583649727383092, |
|
"loss": 46.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008071902331994726, |
|
"grad_norm": 0.00011991400242550299, |
|
"learning_rate": 0.00015816750826379896, |
|
"loss": 46.0, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.00809203176424409, |
|
"grad_norm": 7.743191963527352e-05, |
|
"learning_rate": 0.00015796970040300612, |
|
"loss": 46.0, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.008112161196493453, |
|
"grad_norm": 5.860636883880943e-05, |
|
"learning_rate": 0.0001577715503236704, |
|
"loss": 46.0, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.008132290628742817, |
|
"grad_norm": 6.259889778448269e-05, |
|
"learning_rate": 0.00015757305919555164, |
|
"loss": 46.0, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.00815242006099218, |
|
"grad_norm": 0.0001673314836807549, |
|
"learning_rate": 0.00015737422819042313, |
|
"loss": 46.0, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.008172549493241543, |
|
"grad_norm": 0.00013351505913306028, |
|
"learning_rate": 0.00015717505848206455, |
|
"loss": 46.0, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.008192678925490906, |
|
"grad_norm": 5.856342249899171e-05, |
|
"learning_rate": 0.00015697555124625508, |
|
"loss": 46.0, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.00821280835774027, |
|
"grad_norm": 8.595505642006174e-05, |
|
"learning_rate": 0.00015677570766076652, |
|
"loss": 46.0, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.008232937789989634, |
|
"grad_norm": 8.992596121970564e-05, |
|
"learning_rate": 0.0001565755289053562, |
|
"loss": 46.0, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.008253067222238997, |
|
"grad_norm": 0.00011127449397463351, |
|
"learning_rate": 0.00015637501616176005, |
|
"loss": 46.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.00827319665448836, |
|
"grad_norm": 7.977043424034491e-05, |
|
"learning_rate": 0.00015617417061368586, |
|
"loss": 46.0, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.008293326086737723, |
|
"grad_norm": 7.366786303464323e-05, |
|
"learning_rate": 0.0001559729934468059, |
|
"loss": 46.0, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.008313455518987087, |
|
"grad_norm": 6.224372191354632e-05, |
|
"learning_rate": 0.0001557714858487502, |
|
"loss": 46.0, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.00833358495123645, |
|
"grad_norm": 0.00012246175901964307, |
|
"learning_rate": 0.00015556964900909952, |
|
"loss": 46.0, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.008353714383485814, |
|
"grad_norm": 9.144249634118751e-05, |
|
"learning_rate": 0.00015536748411937814, |
|
"loss": 46.0, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.008373843815735178, |
|
"grad_norm": 4.544908370007761e-05, |
|
"learning_rate": 0.00015516499237304703, |
|
"loss": 46.0, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.00839397324798454, |
|
"grad_norm": 7.032406574580818e-05, |
|
"learning_rate": 0.00015496217496549673, |
|
"loss": 46.0, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.008414102680233904, |
|
"grad_norm": 0.00028849008958786726, |
|
"learning_rate": 0.00015475903309404023, |
|
"loss": 46.0, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.008434232112483267, |
|
"grad_norm": 7.347687642322853e-05, |
|
"learning_rate": 0.00015455556795790603, |
|
"loss": 46.0, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.008454361544732631, |
|
"grad_norm": 0.0001396266306983307, |
|
"learning_rate": 0.000154351780758231, |
|
"loss": 46.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.008474490976981995, |
|
"grad_norm": 7.422738417517394e-05, |
|
"learning_rate": 0.00015414767269805317, |
|
"loss": 46.0, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.008494620409231358, |
|
"grad_norm": 0.00010193362686550245, |
|
"learning_rate": 0.00015394324498230487, |
|
"loss": 46.0, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.00851474984148072, |
|
"grad_norm": 3.995158476755023e-05, |
|
"learning_rate": 0.00015373849881780542, |
|
"loss": 46.0, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.008534879273730084, |
|
"grad_norm": 0.00010771408415166661, |
|
"learning_rate": 0.00015353343541325406, |
|
"loss": 46.0, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.008555008705979448, |
|
"grad_norm": 9.801267879083753e-05, |
|
"learning_rate": 0.00015332805597922285, |
|
"loss": 46.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.008575138138228812, |
|
"grad_norm": 5.4972933867247775e-05, |
|
"learning_rate": 0.00015312236172814955, |
|
"loss": 46.0, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.008595267570478175, |
|
"grad_norm": 0.00011087340681115165, |
|
"learning_rate": 0.0001529163538743303, |
|
"loss": 46.0, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.008615397002727537, |
|
"grad_norm": 9.032541129272431e-05, |
|
"learning_rate": 0.00015271003363391268, |
|
"loss": 46.0, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.008635526434976901, |
|
"grad_norm": 9.607595711713657e-05, |
|
"learning_rate": 0.00015250340222488826, |
|
"loss": 46.0, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.008655655867226265, |
|
"grad_norm": 0.00015448669728357345, |
|
"learning_rate": 0.00015229646086708574, |
|
"loss": 46.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.008675785299475628, |
|
"grad_norm": 4.9507445510244e-05, |
|
"learning_rate": 0.0001520892107821635, |
|
"loss": 46.0, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.008695914731724992, |
|
"grad_norm": 0.0001763905311236158, |
|
"learning_rate": 0.0001518816531936024, |
|
"loss": 46.0, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.008716044163974356, |
|
"grad_norm": 0.0001318747381446883, |
|
"learning_rate": 0.0001516737893266987, |
|
"loss": 46.0, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.008736173596223718, |
|
"grad_norm": 0.00010619282693369314, |
|
"learning_rate": 0.00015146562040855676, |
|
"loss": 46.0, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.008756303028473082, |
|
"grad_norm": 8.555618114769459e-05, |
|
"learning_rate": 0.00015125714766808167, |
|
"loss": 46.0, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.008776432460722445, |
|
"grad_norm": 0.00016039168986026198, |
|
"learning_rate": 0.00015104837233597223, |
|
"loss": 46.0, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.008796561892971809, |
|
"grad_norm": 0.00014670997916255146, |
|
"learning_rate": 0.00015083929564471343, |
|
"loss": 46.0, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.008816691325221173, |
|
"grad_norm": 6.258589564822614e-05, |
|
"learning_rate": 0.00015062991882856946, |
|
"loss": 46.0, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.008836820757470536, |
|
"grad_norm": 7.728593482170254e-05, |
|
"learning_rate": 0.00015042024312357616, |
|
"loss": 46.0, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.008856950189719898, |
|
"grad_norm": 9.296549978898838e-05, |
|
"learning_rate": 0.00015021026976753385, |
|
"loss": 46.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.008877079621969262, |
|
"grad_norm": 9.728507575346157e-05, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 46.0, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.008897209054218626, |
|
"grad_norm": 0.00014637406275141984, |
|
"learning_rate": 0.000149789435062282, |
|
"loss": 46.0, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.00891733848646799, |
|
"grad_norm": 5.648445221595466e-05, |
|
"learning_rate": 0.00014957857619742957, |
|
"loss": 46.0, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.008937467918717353, |
|
"grad_norm": 0.00013621490506920964, |
|
"learning_rate": 0.0001493674246502278, |
|
"loss": 46.0, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.008957597350966715, |
|
"grad_norm": 6.250360456760973e-05, |
|
"learning_rate": 0.00014915598166718945, |
|
"loss": 46.0, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.008977726783216079, |
|
"grad_norm": 0.00017833786841947585, |
|
"learning_rate": 0.00014894424849654783, |
|
"loss": 46.0, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.008997856215465443, |
|
"grad_norm": 6.154891161713749e-05, |
|
"learning_rate": 0.00014873222638824937, |
|
"loss": 46.0, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.009017985647714806, |
|
"grad_norm": 0.0001515242620371282, |
|
"learning_rate": 0.0001485199165939461, |
|
"loss": 46.0, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.00903811507996417, |
|
"grad_norm": 9.545722423354164e-05, |
|
"learning_rate": 0.00014830732036698845, |
|
"loss": 46.0, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.009058244512213534, |
|
"grad_norm": 5.560795034398325e-05, |
|
"learning_rate": 0.0001480944389624178, |
|
"loss": 46.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.009078373944462896, |
|
"grad_norm": 0.000126198137877509, |
|
"learning_rate": 0.00014788127363695897, |
|
"loss": 46.0, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.00909850337671226, |
|
"grad_norm": 9.512303222436458e-05, |
|
"learning_rate": 0.00014766782564901298, |
|
"loss": 46.0, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.009118632808961623, |
|
"grad_norm": 0.0001474691671319306, |
|
"learning_rate": 0.00014745409625864942, |
|
"loss": 46.0, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.009138762241210987, |
|
"grad_norm": 7.026526873232797e-05, |
|
"learning_rate": 0.0001472400867275992, |
|
"loss": 46.0, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.00915889167346035, |
|
"grad_norm": 0.00010350546654080972, |
|
"learning_rate": 0.00014702579831924698, |
|
"loss": 46.0, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.009179021105709714, |
|
"grad_norm": 6.921013118699193e-05, |
|
"learning_rate": 0.00014681123229862367, |
|
"loss": 46.0, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.009199150537959076, |
|
"grad_norm": 0.00011726860975613818, |
|
"learning_rate": 0.0001465963899323992, |
|
"loss": 46.0, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.00921927997020844, |
|
"grad_norm": 8.906117000151426e-05, |
|
"learning_rate": 0.00014638127248887473, |
|
"loss": 46.0, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.009239409402457804, |
|
"grad_norm": 0.00013452931307256222, |
|
"learning_rate": 0.00014616588123797535, |
|
"loss": 46.0, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.009259538834707167, |
|
"grad_norm": 9.775407670531422e-05, |
|
"learning_rate": 0.0001459502174512426, |
|
"loss": 46.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.009279668266956531, |
|
"grad_norm": 8.385746332351118e-05, |
|
"learning_rate": 0.0001457342824018269, |
|
"loss": 46.0, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.009299797699205893, |
|
"grad_norm": 5.9415571740828454e-05, |
|
"learning_rate": 0.00014551807736447995, |
|
"loss": 46.0, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.009319927131455257, |
|
"grad_norm": 0.00011705618089763448, |
|
"learning_rate": 0.0001453016036155474, |
|
"loss": 46.0, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.00934005656370462, |
|
"grad_norm": 8.00532943685539e-05, |
|
"learning_rate": 0.00014508486243296122, |
|
"loss": 46.0, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.009360185995953984, |
|
"grad_norm": 0.00010752366506494582, |
|
"learning_rate": 0.00014486785509623202, |
|
"loss": 46.0, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.009380315428203348, |
|
"grad_norm": 0.00011672089749481529, |
|
"learning_rate": 0.00014465058288644174, |
|
"loss": 46.0, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.009400444860452712, |
|
"grad_norm": 0.0001032515792758204, |
|
"learning_rate": 0.00014443304708623597, |
|
"loss": 46.0, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.009420574292702074, |
|
"grad_norm": 7.637974340468645e-05, |
|
"learning_rate": 0.00014421524897981637, |
|
"loss": 46.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.009440703724951437, |
|
"grad_norm": 0.00010200442193308845, |
|
"learning_rate": 0.00014399718985293297, |
|
"loss": 46.0, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.009460833157200801, |
|
"grad_norm": 0.00010882026253966615, |
|
"learning_rate": 0.00014377887099287698, |
|
"loss": 46.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.009480962589450165, |
|
"grad_norm": 0.00022409467783290893, |
|
"learning_rate": 0.00014356029368847264, |
|
"loss": 46.0, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.009501092021699529, |
|
"grad_norm": 0.00014206068590283394, |
|
"learning_rate": 0.0001433414592300701, |
|
"loss": 46.0, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.009521221453948892, |
|
"grad_norm": 5.281836274662055e-05, |
|
"learning_rate": 0.00014312236890953744, |
|
"loss": 46.0, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.009541350886198254, |
|
"grad_norm": 0.00011217795690754429, |
|
"learning_rate": 0.00014290302402025334, |
|
"loss": 46.0, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.009561480318447618, |
|
"grad_norm": 0.00011971918138442561, |
|
"learning_rate": 0.00014268342585709913, |
|
"loss": 46.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.009581609750696982, |
|
"grad_norm": 0.0002392362366663292, |
|
"learning_rate": 0.00014246357571645152, |
|
"loss": 46.0, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.009601739182946345, |
|
"grad_norm": 7.42652773624286e-05, |
|
"learning_rate": 0.00014224347489617456, |
|
"loss": 46.0, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.009621868615195709, |
|
"grad_norm": 0.00019455078290775418, |
|
"learning_rate": 0.00014202312469561228, |
|
"loss": 46.0, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.009641998047445071, |
|
"grad_norm": 0.00014721274783369154, |
|
"learning_rate": 0.00014180252641558084, |
|
"loss": 46.0, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.009662127479694435, |
|
"grad_norm": 0.00016650428005959839, |
|
"learning_rate": 0.00014158168135836095, |
|
"loss": 46.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.009682256911943798, |
|
"grad_norm": 0.0002557269181124866, |
|
"learning_rate": 0.00014136059082769017, |
|
"loss": 46.0, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.009702386344193162, |
|
"grad_norm": 0.00013280926214065403, |
|
"learning_rate": 0.00014113925612875512, |
|
"loss": 46.0, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.009722515776442526, |
|
"grad_norm": 0.00014411240408662707, |
|
"learning_rate": 0.00014091767856818388, |
|
"loss": 46.0, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.00974264520869189, |
|
"grad_norm": 0.00010560146620264277, |
|
"learning_rate": 0.00014069585945403822, |
|
"loss": 46.0, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.009762774640941252, |
|
"grad_norm": 0.00022722291760146618, |
|
"learning_rate": 0.00014047380009580594, |
|
"loss": 46.0, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.009782904073190615, |
|
"grad_norm": 0.00011408658610889688, |
|
"learning_rate": 0.00014025150180439308, |
|
"loss": 46.0, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.009803033505439979, |
|
"grad_norm": 5.5666998378001153e-05, |
|
"learning_rate": 0.00014002896589211618, |
|
"loss": 46.0, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.009823162937689343, |
|
"grad_norm": 6.966435466893017e-05, |
|
"learning_rate": 0.00013980619367269455, |
|
"loss": 46.0, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.009843292369938706, |
|
"grad_norm": 0.00026750058168545365, |
|
"learning_rate": 0.00013958318646124259, |
|
"loss": 46.0, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.00986342180218807, |
|
"grad_norm": 7.481938519049436e-05, |
|
"learning_rate": 0.0001393599455742618, |
|
"loss": 46.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.009883551234437432, |
|
"grad_norm": 8.790163701632991e-05, |
|
"learning_rate": 0.00013913647232963332, |
|
"loss": 46.0, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.009903680666686796, |
|
"grad_norm": 0.00020705680071841925, |
|
"learning_rate": 0.00013891276804660991, |
|
"loss": 46.0, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.00992381009893616, |
|
"grad_norm": 8.553229417884722e-05, |
|
"learning_rate": 0.00013868883404580823, |
|
"loss": 46.0, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.009943939531185523, |
|
"grad_norm": 7.284261664608493e-05, |
|
"learning_rate": 0.00013846467164920116, |
|
"loss": 46.0, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.009964068963434887, |
|
"grad_norm": 0.00010190778266405687, |
|
"learning_rate": 0.00013824028218010977, |
|
"loss": 46.0, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.009984198395684249, |
|
"grad_norm": 0.00012867138138972223, |
|
"learning_rate": 0.00013801566696319562, |
|
"loss": 46.0, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.010004327827933613, |
|
"grad_norm": 0.00010176874639000744, |
|
"learning_rate": 0.0001377908273244531, |
|
"loss": 46.0, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.010024457260182976, |
|
"grad_norm": 0.00021709667635150254, |
|
"learning_rate": 0.0001375657645912014, |
|
"loss": 46.0, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.01004458669243234, |
|
"grad_norm": 0.0002606755297165364, |
|
"learning_rate": 0.0001373404800920765, |
|
"loss": 46.0, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.010064716124681704, |
|
"grad_norm": 0.00011558020196389407, |
|
"learning_rate": 0.00013711497515702398, |
|
"loss": 46.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.010084845556931068, |
|
"grad_norm": 0.0002265808725496754, |
|
"learning_rate": 0.0001368892511172903, |
|
"loss": 46.0, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.01010497498918043, |
|
"grad_norm": 0.00011339668708387762, |
|
"learning_rate": 0.0001366633093054157, |
|
"loss": 46.0, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.010125104421429793, |
|
"grad_norm": 0.00012689345749095082, |
|
"learning_rate": 0.00013643715105522589, |
|
"loss": 46.0, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.010145233853679157, |
|
"grad_norm": 0.0001654831285122782, |
|
"learning_rate": 0.0001362107777018243, |
|
"loss": 46.0, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.01016536328592852, |
|
"grad_norm": 0.00022711421479471028, |
|
"learning_rate": 0.0001359841905815842, |
|
"loss": 46.0, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.010185492718177884, |
|
"grad_norm": 0.00016627443255856633, |
|
"learning_rate": 0.00013575739103214088, |
|
"loss": 46.0, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.010205622150427248, |
|
"grad_norm": 0.00012627684918697923, |
|
"learning_rate": 0.0001355303803923836, |
|
"loss": 46.0, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.01022575158267661, |
|
"grad_norm": 9.957009751815349e-05, |
|
"learning_rate": 0.00013530316000244782, |
|
"loss": 46.0, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.010245881014925974, |
|
"grad_norm": 0.00010302881128154695, |
|
"learning_rate": 0.0001350757312037072, |
|
"loss": 46.0, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.010266010447175337, |
|
"grad_norm": 7.863016071496531e-05, |
|
"learning_rate": 0.00013484809533876582, |
|
"loss": 46.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.010286139879424701, |
|
"grad_norm": 0.00017418930656276643, |
|
"learning_rate": 0.00013462025375145, |
|
"loss": 46.0, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.010306269311674065, |
|
"grad_norm": 0.0002265576331410557, |
|
"learning_rate": 0.00013439220778680067, |
|
"loss": 46.0, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.010326398743923427, |
|
"grad_norm": 9.675358887761831e-05, |
|
"learning_rate": 0.00013416395879106515, |
|
"loss": 46.0, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.01034652817617279, |
|
"grad_norm": 0.00034641881939023733, |
|
"learning_rate": 0.00013393550811168948, |
|
"loss": 46.0, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.010366657608422154, |
|
"grad_norm": 0.00022542629449162632, |
|
"learning_rate": 0.00013370685709731015, |
|
"loss": 46.0, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.010386787040671518, |
|
"grad_norm": 8.584894385421649e-05, |
|
"learning_rate": 0.00013347800709774652, |
|
"loss": 46.0, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.010406916472920882, |
|
"grad_norm": 0.0001710738433757797, |
|
"learning_rate": 0.0001332489594639924, |
|
"loss": 46.0, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.010427045905170245, |
|
"grad_norm": 0.00018284583347849548, |
|
"learning_rate": 0.00013301971554820853, |
|
"loss": 46.0, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.010447175337419607, |
|
"grad_norm": 6.281906826188788e-05, |
|
"learning_rate": 0.00013279027670371426, |
|
"loss": 46.0, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.010467304769668971, |
|
"grad_norm": 0.00013591159950010478, |
|
"learning_rate": 0.00013256064428497966, |
|
"loss": 46.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.010487434201918335, |
|
"grad_norm": 0.00013636577932629734, |
|
"learning_rate": 0.00013233081964761766, |
|
"loss": 46.0, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.010507563634167699, |
|
"grad_norm": 0.00017571232456248254, |
|
"learning_rate": 0.0001321008041483758, |
|
"loss": 46.0, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.010527693066417062, |
|
"grad_norm": 6.699233199469745e-05, |
|
"learning_rate": 0.0001318705991451285, |
|
"loss": 46.0, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.010547822498666426, |
|
"grad_norm": 0.00018198716861661524, |
|
"learning_rate": 0.00013164020599686882, |
|
"loss": 46.0, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.010567951930915788, |
|
"grad_norm": 0.000168314523762092, |
|
"learning_rate": 0.00013140962606370048, |
|
"loss": 46.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.010588081363165152, |
|
"grad_norm": 0.00014153076335787773, |
|
"learning_rate": 0.0001311788607068299, |
|
"loss": 46.0, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.010608210795414515, |
|
"grad_norm": 0.00012689942377619445, |
|
"learning_rate": 0.00013094791128855814, |
|
"loss": 46.0, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.010628340227663879, |
|
"grad_norm": 9.956786379916593e-05, |
|
"learning_rate": 0.0001307167791722729, |
|
"loss": 46.0, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.010648469659913243, |
|
"grad_norm": 8.192278619389981e-05, |
|
"learning_rate": 0.00013048546572244036, |
|
"loss": 46.0, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.010668599092162605, |
|
"grad_norm": 0.00021154977730475366, |
|
"learning_rate": 0.0001302539723045971, |
|
"loss": 46.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.010688728524411968, |
|
"grad_norm": 0.00013896448945160955, |
|
"learning_rate": 0.00013002230028534234, |
|
"loss": 46.0, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.010708857956661332, |
|
"grad_norm": 0.0001234956580447033, |
|
"learning_rate": 0.00012979045103232945, |
|
"loss": 46.0, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.010728987388910696, |
|
"grad_norm": 0.0001562229444971308, |
|
"learning_rate": 0.00012955842591425818, |
|
"loss": 46.0, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.01074911682116006, |
|
"grad_norm": 0.0003348892496433109, |
|
"learning_rate": 0.00012932622630086648, |
|
"loss": 46.0, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.010769246253409423, |
|
"grad_norm": 8.87279020389542e-05, |
|
"learning_rate": 0.0001290938535629224, |
|
"loss": 46.0, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.010789375685658785, |
|
"grad_norm": 8.013709157239646e-05, |
|
"learning_rate": 0.00012886130907221603, |
|
"loss": 46.0, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.010809505117908149, |
|
"grad_norm": 0.00012092996621504426, |
|
"learning_rate": 0.00012862859420155134, |
|
"loss": 46.0, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.010829634550157513, |
|
"grad_norm": 9.32335969991982e-05, |
|
"learning_rate": 0.00012839571032473814, |
|
"loss": 46.0, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.010849763982406876, |
|
"grad_norm": 0.00016681026318110526, |
|
"learning_rate": 0.00012816265881658405, |
|
"loss": 46.0, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.01086989341465624, |
|
"grad_norm": 0.00014514310169033706, |
|
"learning_rate": 0.00012792944105288612, |
|
"loss": 46.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.010890022846905604, |
|
"grad_norm": 0.00012650150165427476, |
|
"learning_rate": 0.000127696058410423, |
|
"loss": 46.0, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.010910152279154966, |
|
"grad_norm": 0.00029365395312197506, |
|
"learning_rate": 0.00012746251226694662, |
|
"loss": 46.0, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.01093028171140433, |
|
"grad_norm": 0.00012223645171616226, |
|
"learning_rate": 0.00012722880400117413, |
|
"loss": 46.0, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.010950411143653693, |
|
"grad_norm": 0.0004023347864858806, |
|
"learning_rate": 0.00012699493499277983, |
|
"loss": 46.0, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.010970540575903057, |
|
"grad_norm": 0.00011891735630342737, |
|
"learning_rate": 0.00012676090662238682, |
|
"loss": 46.0, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.01099067000815242, |
|
"grad_norm": 0.00016821158351376653, |
|
"learning_rate": 0.00012652672027155904, |
|
"loss": 46.0, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.011010799440401783, |
|
"grad_norm": 7.64864671509713e-05, |
|
"learning_rate": 0.00012629237732279314, |
|
"loss": 46.0, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.011030928872651146, |
|
"grad_norm": 0.00015364130376838148, |
|
"learning_rate": 0.0001260578791595101, |
|
"loss": 46.0, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.01105105830490051, |
|
"grad_norm": 0.0001618131500435993, |
|
"learning_rate": 0.00012582322716604718, |
|
"loss": 46.0, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.011071187737149874, |
|
"grad_norm": 0.0001929528807522729, |
|
"learning_rate": 0.0001255884227276499, |
|
"loss": 46.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.011091317169399238, |
|
"grad_norm": 0.00011355496098985896, |
|
"learning_rate": 0.0001253534672304636, |
|
"loss": 46.0, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.011111446601648601, |
|
"grad_norm": 0.00011806943803094327, |
|
"learning_rate": 0.00012511836206152545, |
|
"loss": 46.0, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.011131576033897963, |
|
"grad_norm": 0.00016547176346648484, |
|
"learning_rate": 0.00012488310860875622, |
|
"loss": 46.0, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.011151705466147327, |
|
"grad_norm": 0.00012969633098691702, |
|
"learning_rate": 0.0001246477082609519, |
|
"loss": 46.0, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.01117183489839669, |
|
"grad_norm": 9.5839895948302e-05, |
|
"learning_rate": 0.00012441216240777585, |
|
"loss": 46.0, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.011191964330646054, |
|
"grad_norm": 0.00026142006390728056, |
|
"learning_rate": 0.0001241764724397503, |
|
"loss": 46.0, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.011212093762895418, |
|
"grad_norm": 5.916105510550551e-05, |
|
"learning_rate": 0.00012394063974824828, |
|
"loss": 46.0, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.011232223195144782, |
|
"grad_norm": 0.00010269715858157724, |
|
"learning_rate": 0.00012370466572548538, |
|
"loss": 46.0, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.011252352627394144, |
|
"grad_norm": 0.00017692089022602886, |
|
"learning_rate": 0.0001234685517645115, |
|
"loss": 46.0, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.011272482059643507, |
|
"grad_norm": 0.00013197977386880666, |
|
"learning_rate": 0.00012323229925920273, |
|
"loss": 46.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.011292611491892871, |
|
"grad_norm": 0.00019733706722036004, |
|
"learning_rate": 0.00012299590960425288, |
|
"loss": 46.0, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.011312740924142235, |
|
"grad_norm": 0.0001352672406937927, |
|
"learning_rate": 0.00012275938419516552, |
|
"loss": 46.0, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.011332870356391599, |
|
"grad_norm": 0.00016101048095151782, |
|
"learning_rate": 0.0001225227244282457, |
|
"loss": 46.0, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.01135299978864096, |
|
"grad_norm": 0.0002455017529428005, |
|
"learning_rate": 0.00012228593170059151, |
|
"loss": 46.0, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.011373129220890324, |
|
"grad_norm": 0.00036364022525958717, |
|
"learning_rate": 0.000122049007410086, |
|
"loss": 46.0, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.011393258653139688, |
|
"grad_norm": 0.00016836596478242427, |
|
"learning_rate": 0.00012181195295538895, |
|
"loss": 46.0, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.011413388085389052, |
|
"grad_norm": 0.00024714317987672985, |
|
"learning_rate": 0.00012157476973592842, |
|
"loss": 46.0, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.011433517517638415, |
|
"grad_norm": 0.0002926415763795376, |
|
"learning_rate": 0.00012133745915189278, |
|
"loss": 46.0, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.01145364694988778, |
|
"grad_norm": 0.00012063339818269014, |
|
"learning_rate": 0.00012110002260422218, |
|
"loss": 46.0, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.011473776382137141, |
|
"grad_norm": 0.00014082356938160956, |
|
"learning_rate": 0.00012086246149460038, |
|
"loss": 46.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.011493905814386505, |
|
"grad_norm": 0.0002749360864982009, |
|
"learning_rate": 0.00012062477722544656, |
|
"loss": 46.0, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.011514035246635869, |
|
"grad_norm": 0.00013284625310916454, |
|
"learning_rate": 0.00012038697119990687, |
|
"loss": 46.0, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.011534164678885232, |
|
"grad_norm": 8.731486741453409e-05, |
|
"learning_rate": 0.00012014904482184633, |
|
"loss": 46.0, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.011554294111134596, |
|
"grad_norm": 7.719992572674528e-05, |
|
"learning_rate": 0.00011991099949584032, |
|
"loss": 46.0, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.01157442354338396, |
|
"grad_norm": 0.00010548291902523488, |
|
"learning_rate": 0.00011967283662716653, |
|
"loss": 46.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.011594552975633322, |
|
"grad_norm": 0.0002630742092151195, |
|
"learning_rate": 0.00011943455762179654, |
|
"loss": 46.0, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.011614682407882685, |
|
"grad_norm": 0.00016908656107261777, |
|
"learning_rate": 0.00011919616388638748, |
|
"loss": 46.0, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.011634811840132049, |
|
"grad_norm": 0.00010908886906690896, |
|
"learning_rate": 0.0001189576568282738, |
|
"loss": 46.0, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.011654941272381413, |
|
"grad_norm": 8.737723692320287e-05, |
|
"learning_rate": 0.00011871903785545897, |
|
"loss": 46.0, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.011675070704630777, |
|
"grad_norm": 0.00011381749209249392, |
|
"learning_rate": 0.00011848030837660709, |
|
"loss": 46.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.011695200136880139, |
|
"grad_norm": 0.00023508115555159748, |
|
"learning_rate": 0.00011824146980103467, |
|
"loss": 46.0, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.011715329569129502, |
|
"grad_norm": 0.0002138228010153398, |
|
"learning_rate": 0.00011800252353870224, |
|
"loss": 46.0, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.011735459001378866, |
|
"grad_norm": 0.0002100839774357155, |
|
"learning_rate": 0.00011776347100020602, |
|
"loss": 46.0, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.01175558843362823, |
|
"grad_norm": 0.00025784672470763326, |
|
"learning_rate": 0.00011752431359676968, |
|
"loss": 46.0, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.011775717865877593, |
|
"grad_norm": 9.872866212390363e-05, |
|
"learning_rate": 0.00011728505274023584, |
|
"loss": 46.0, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.011795847298126957, |
|
"grad_norm": 8.426361455349252e-05, |
|
"learning_rate": 0.00011704568984305802, |
|
"loss": 46.0, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.011815976730376319, |
|
"grad_norm": 0.0003703351248987019, |
|
"learning_rate": 0.00011680622631829197, |
|
"loss": 46.0, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.011836106162625683, |
|
"grad_norm": 0.00012765347491949797, |
|
"learning_rate": 0.00011656666357958751, |
|
"loss": 46.0, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.011856235594875046, |
|
"grad_norm": 0.00010474120790604502, |
|
"learning_rate": 0.00011632700304118032, |
|
"loss": 46.0, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.01187636502712441, |
|
"grad_norm": 0.00026702586910687387, |
|
"learning_rate": 0.0001160872461178832, |
|
"loss": 46.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.011896494459373774, |
|
"grad_norm": 0.00018060579895973206, |
|
"learning_rate": 0.00011584739422507804, |
|
"loss": 46.0, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.011916623891623138, |
|
"grad_norm": 0.0004400400212034583, |
|
"learning_rate": 0.00011560744877870748, |
|
"loss": 46.0, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.0119367533238725, |
|
"grad_norm": 0.00011154530511703342, |
|
"learning_rate": 0.00011536741119526628, |
|
"loss": 46.0, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.011956882756121863, |
|
"grad_norm": 9.707292338134721e-05, |
|
"learning_rate": 0.00011512728289179323, |
|
"loss": 46.0, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.011977012188371227, |
|
"grad_norm": 0.00012266647536307573, |
|
"learning_rate": 0.00011488706528586261, |
|
"loss": 46.0, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.01199714162062059, |
|
"grad_norm": 6.54510804452002e-05, |
|
"learning_rate": 0.00011464675979557593, |
|
"loss": 46.0, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.012017271052869954, |
|
"grad_norm": 0.00019303473527543247, |
|
"learning_rate": 0.00011440636783955356, |
|
"loss": 46.0, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.012037400485119316, |
|
"grad_norm": 0.00021508029021788388, |
|
"learning_rate": 0.00011416589083692619, |
|
"loss": 46.0, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.01205752991736868, |
|
"grad_norm": 0.00014299601025413722, |
|
"learning_rate": 0.00011392533020732666, |
|
"loss": 46.0, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.012077659349618044, |
|
"grad_norm": 0.00017883341934066266, |
|
"learning_rate": 0.00011368468737088148, |
|
"loss": 46.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.012097788781867408, |
|
"grad_norm": 0.0002254635328426957, |
|
"learning_rate": 0.00011344396374820244, |
|
"loss": 46.0, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.012117918214116771, |
|
"grad_norm": 0.0002371317968936637, |
|
"learning_rate": 0.0001132031607603783, |
|
"loss": 46.0, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.012138047646366135, |
|
"grad_norm": 0.00012665463145822287, |
|
"learning_rate": 0.0001129622798289663, |
|
"loss": 46.0, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.012158177078615497, |
|
"grad_norm": 0.00019802236056420952, |
|
"learning_rate": 0.00011272132237598376, |
|
"loss": 46.0, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.01217830651086486, |
|
"grad_norm": 0.00011026608990505338, |
|
"learning_rate": 0.00011248028982389989, |
|
"loss": 46.0, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.012198435943114224, |
|
"grad_norm": 9.928762301569805e-05, |
|
"learning_rate": 0.00011223918359562708, |
|
"loss": 46.0, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.012218565375363588, |
|
"grad_norm": 0.00023735944705549628, |
|
"learning_rate": 0.00011199800511451273, |
|
"loss": 46.0, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.012238694807612952, |
|
"grad_norm": 0.00011722726048901677, |
|
"learning_rate": 0.0001117567558043308, |
|
"loss": 46.0, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.012258824239862316, |
|
"grad_norm": 0.0003526516375131905, |
|
"learning_rate": 0.00011151543708927335, |
|
"loss": 46.0, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.012278953672111678, |
|
"grad_norm": 0.00022850584355182946, |
|
"learning_rate": 0.00011127405039394216, |
|
"loss": 46.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.012299083104361041, |
|
"grad_norm": 0.0005510961636900902, |
|
"learning_rate": 0.00011103259714334034, |
|
"loss": 46.0, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.012319212536610405, |
|
"grad_norm": 0.00013431145634967834, |
|
"learning_rate": 0.00011079107876286387, |
|
"loss": 46.0, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.012339341968859769, |
|
"grad_norm": 0.0001544607657706365, |
|
"learning_rate": 0.0001105494966782933, |
|
"loss": 46.0, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.012359471401109132, |
|
"grad_norm": 8.791110303718597e-05, |
|
"learning_rate": 0.0001103078523157852, |
|
"loss": 46.0, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.012379600833358494, |
|
"grad_norm": 0.00023833720479160547, |
|
"learning_rate": 0.00011006614710186372, |
|
"loss": 46.0, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.012399730265607858, |
|
"grad_norm": 0.00016108158160932362, |
|
"learning_rate": 0.00010982438246341238, |
|
"loss": 46.0, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.012419859697857222, |
|
"grad_norm": 0.00022613555483985692, |
|
"learning_rate": 0.00010958255982766538, |
|
"loss": 46.0, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.012439989130106585, |
|
"grad_norm": 0.00022462922788690776, |
|
"learning_rate": 0.00010934068062219945, |
|
"loss": 46.0, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.01246011856235595, |
|
"grad_norm": 0.00031467695953324437, |
|
"learning_rate": 0.0001090987462749251, |
|
"loss": 46.0, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.012480247994605313, |
|
"grad_norm": 0.0002814480976667255, |
|
"learning_rate": 0.00010885675821407844, |
|
"loss": 46.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.012500377426854675, |
|
"grad_norm": 0.0001427562237950042, |
|
"learning_rate": 0.00010861471786821275, |
|
"loss": 46.0, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.012520506859104039, |
|
"grad_norm": 0.00010559640941210091, |
|
"learning_rate": 0.00010837262666618983, |
|
"loss": 46.0, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.012540636291353402, |
|
"grad_norm": 0.00022459396859630942, |
|
"learning_rate": 0.00010813048603717182, |
|
"loss": 46.0, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.012560765723602766, |
|
"grad_norm": 0.0003838833072222769, |
|
"learning_rate": 0.0001078882974106126, |
|
"loss": 46.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.01258089515585213, |
|
"grad_norm": 0.00013236599625088274, |
|
"learning_rate": 0.00010764606221624933, |
|
"loss": 46.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.012601024588101493, |
|
"grad_norm": 0.0002815214393194765, |
|
"learning_rate": 0.00010740378188409426, |
|
"loss": 46.0, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.012621154020350855, |
|
"grad_norm": 9.716377826407552e-05, |
|
"learning_rate": 0.00010716145784442593, |
|
"loss": 46.0, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.01264128345260022, |
|
"grad_norm": 0.00029817328322678804, |
|
"learning_rate": 0.00010691909152778094, |
|
"loss": 46.0, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.012661412884849583, |
|
"grad_norm": 0.00011512849596329033, |
|
"learning_rate": 0.00010667668436494558, |
|
"loss": 46.0, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.012681542317098947, |
|
"grad_norm": 0.00021800924150738865, |
|
"learning_rate": 0.00010643423778694712, |
|
"loss": 46.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.01270167174934831, |
|
"grad_norm": 0.00012208014959469438, |
|
"learning_rate": 0.0001061917532250456, |
|
"loss": 46.0, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.012721801181597672, |
|
"grad_norm": 0.00015742589312139899, |
|
"learning_rate": 0.00010594923211072532, |
|
"loss": 46.0, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.012741930613847036, |
|
"grad_norm": 0.00029806559905409813, |
|
"learning_rate": 0.00010570667587568626, |
|
"loss": 46.0, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.0127620600460964, |
|
"grad_norm": 0.00018120172899216413, |
|
"learning_rate": 0.00010546408595183578, |
|
"loss": 46.0, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.012782189478345763, |
|
"grad_norm": 0.00016689879703335464, |
|
"learning_rate": 0.00010522146377128021, |
|
"loss": 46.0, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.012802318910595127, |
|
"grad_norm": 0.000355700176442042, |
|
"learning_rate": 0.00010497881076631615, |
|
"loss": 46.0, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.01282244834284449, |
|
"grad_norm": 0.0001786172651918605, |
|
"learning_rate": 0.00010473612836942226, |
|
"loss": 46.0, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.012842577775093853, |
|
"grad_norm": 0.0003312894550617784, |
|
"learning_rate": 0.00010449341801325073, |
|
"loss": 46.0, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.012862707207343217, |
|
"grad_norm": 0.00023194189998321235, |
|
"learning_rate": 0.00010425068113061873, |
|
"loss": 46.0, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.01288283663959258, |
|
"grad_norm": 0.0004088008718099445, |
|
"learning_rate": 0.00010400791915450009, |
|
"loss": 46.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.012902966071841944, |
|
"grad_norm": 0.0001682123402133584, |
|
"learning_rate": 0.00010376513351801673, |
|
"loss": 46.0, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.012923095504091308, |
|
"grad_norm": 0.00013815666898153722, |
|
"learning_rate": 0.00010352232565443032, |
|
"loss": 46.0, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.012943224936340671, |
|
"grad_norm": 0.0002450251195114106, |
|
"learning_rate": 0.00010327949699713366, |
|
"loss": 46.0, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.012963354368590033, |
|
"grad_norm": 0.00019522267393767834, |
|
"learning_rate": 0.00010303664897964232, |
|
"loss": 46.0, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.012983483800839397, |
|
"grad_norm": 0.00013197500084061176, |
|
"learning_rate": 0.00010279378303558624, |
|
"loss": 46.0, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.01300361323308876, |
|
"grad_norm": 0.00034008765942417085, |
|
"learning_rate": 0.00010255090059870107, |
|
"loss": 46.0, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.013023742665338124, |
|
"grad_norm": 0.00030950226937420666, |
|
"learning_rate": 0.00010230800310281992, |
|
"loss": 46.0, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.013043872097587488, |
|
"grad_norm": 0.0002216809953097254, |
|
"learning_rate": 0.00010206509198186476, |
|
"loss": 46.0, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.01306400152983685, |
|
"grad_norm": 0.0005021773395128548, |
|
"learning_rate": 0.00010182216866983796, |
|
"loss": 46.0, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.013084130962086214, |
|
"grad_norm": 0.0003279381198808551, |
|
"learning_rate": 0.00010157923460081394, |
|
"loss": 46.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.013104260394335578, |
|
"grad_norm": 0.000169211023603566, |
|
"learning_rate": 0.00010133629120893055, |
|
"loss": 46.0, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.013124389826584941, |
|
"grad_norm": 0.0004348910879343748, |
|
"learning_rate": 0.00010109333992838072, |
|
"loss": 46.0, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.013124389826584941, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 126.4432, |
|
"eval_samples_per_second": 165.434, |
|
"eval_steps_per_second": 82.717, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.013144519258834305, |
|
"grad_norm": 0.00034002333995886147, |
|
"learning_rate": 0.00010085038219340393, |
|
"loss": 46.0, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.013164648691083669, |
|
"grad_norm": 0.00015378076932393014, |
|
"learning_rate": 0.00010060741943827776, |
|
"loss": 46.0, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.01318477812333303, |
|
"grad_norm": 0.0002593511308077723, |
|
"learning_rate": 0.00010036445309730944, |
|
"loss": 46.0, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.013204907555582394, |
|
"grad_norm": 0.00023333106946665794, |
|
"learning_rate": 0.00010012148460482738, |
|
"loss": 46.0, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.013225036987831758, |
|
"grad_norm": 0.00023424337268806994, |
|
"learning_rate": 9.987851539517262e-05, |
|
"loss": 46.0, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.013245166420081122, |
|
"grad_norm": 0.00014827096310909837, |
|
"learning_rate": 9.963554690269058e-05, |
|
"loss": 46.0, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.013265295852330486, |
|
"grad_norm": 0.00013894452422391623, |
|
"learning_rate": 9.939258056172225e-05, |
|
"loss": 46.0, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.01328542528457985, |
|
"grad_norm": 0.00020886877609882504, |
|
"learning_rate": 9.914961780659609e-05, |
|
"loss": 46.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.013305554716829211, |
|
"grad_norm": 0.00015650583372917026, |
|
"learning_rate": 9.890666007161929e-05, |
|
"loss": 46.0, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.013325684149078575, |
|
"grad_norm": 0.00018213962903246284, |
|
"learning_rate": 9.866370879106947e-05, |
|
"loss": 46.0, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.013345813581327939, |
|
"grad_norm": 0.0003600477648433298, |
|
"learning_rate": 9.84207653991861e-05, |
|
"loss": 46.0, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.013365943013577302, |
|
"grad_norm": 0.0002795457548927516, |
|
"learning_rate": 9.817783133016206e-05, |
|
"loss": 46.0, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.013386072445826666, |
|
"grad_norm": 0.0002572405501268804, |
|
"learning_rate": 9.793490801813528e-05, |
|
"loss": 46.0, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.013406201878076028, |
|
"grad_norm": 0.0002055472432402894, |
|
"learning_rate": 9.769199689718009e-05, |
|
"loss": 46.0, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.013426331310325392, |
|
"grad_norm": 0.0004195273795630783, |
|
"learning_rate": 9.744909940129895e-05, |
|
"loss": 46.0, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.013446460742574756, |
|
"grad_norm": 0.0004413472779560834, |
|
"learning_rate": 9.720621696441378e-05, |
|
"loss": 46.0, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.01346659017482412, |
|
"grad_norm": 0.00018565787468105555, |
|
"learning_rate": 9.69633510203577e-05, |
|
"loss": 46.0, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.013486719607073483, |
|
"grad_norm": 0.00014439223741646856, |
|
"learning_rate": 9.672050300286636e-05, |
|
"loss": 46.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.013506849039322847, |
|
"grad_norm": 0.0003282301186118275, |
|
"learning_rate": 9.64776743455697e-05, |
|
"loss": 46.0, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.013526978471572209, |
|
"grad_norm": 0.00015751754108350724, |
|
"learning_rate": 9.623486648198326e-05, |
|
"loss": 46.0, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.013547107903821572, |
|
"grad_norm": 0.00012771219189744443, |
|
"learning_rate": 9.599208084549993e-05, |
|
"loss": 46.0, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.013567237336070936, |
|
"grad_norm": 0.00033567333593964577, |
|
"learning_rate": 9.574931886938128e-05, |
|
"loss": 46.0, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.0135873667683203, |
|
"grad_norm": 0.00019254954531788826, |
|
"learning_rate": 9.550658198674931e-05, |
|
"loss": 46.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.013607496200569663, |
|
"grad_norm": 0.0002889351744670421, |
|
"learning_rate": 9.526387163057777e-05, |
|
"loss": 46.0, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.013627625632819027, |
|
"grad_norm": 0.00026451353915035725, |
|
"learning_rate": 9.502118923368388e-05, |
|
"loss": 46.0, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.01364775506506839, |
|
"grad_norm": 0.00015573820564895868, |
|
"learning_rate": 9.477853622871984e-05, |
|
"loss": 46.0, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.013667884497317753, |
|
"grad_norm": 0.00014926907897461206, |
|
"learning_rate": 9.453591404816423e-05, |
|
"loss": 46.0, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.013688013929567117, |
|
"grad_norm": 0.0003569158725440502, |
|
"learning_rate": 9.429332412431377e-05, |
|
"loss": 46.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.01370814336181648, |
|
"grad_norm": 0.00013413053238764405, |
|
"learning_rate": 9.405076788927469e-05, |
|
"loss": 46.0, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.013728272794065844, |
|
"grad_norm": 0.000353492476278916, |
|
"learning_rate": 9.380824677495441e-05, |
|
"loss": 46.0, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.013748402226315206, |
|
"grad_norm": 0.0003761777188628912, |
|
"learning_rate": 9.356576221305289e-05, |
|
"loss": 46.0, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.01376853165856457, |
|
"grad_norm": 0.0002234268467873335, |
|
"learning_rate": 9.332331563505444e-05, |
|
"loss": 46.0, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.013788661090813933, |
|
"grad_norm": 0.00026313794660381973, |
|
"learning_rate": 9.308090847221905e-05, |
|
"loss": 46.0, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.013808790523063297, |
|
"grad_norm": 0.0005154896061867476, |
|
"learning_rate": 9.283854215557409e-05, |
|
"loss": 46.0, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.01382891995531266, |
|
"grad_norm": 0.00024182203924283385, |
|
"learning_rate": 9.259621811590578e-05, |
|
"loss": 46.0, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.013849049387562025, |
|
"grad_norm": 0.00014097105304244906, |
|
"learning_rate": 9.235393778375068e-05, |
|
"loss": 46.0, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.013869178819811387, |
|
"grad_norm": 0.0003665017429739237, |
|
"learning_rate": 9.211170258938747e-05, |
|
"loss": 46.0, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.01388930825206075, |
|
"grad_norm": 0.00021189030667301267, |
|
"learning_rate": 9.18695139628282e-05, |
|
"loss": 46.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.013909437684310114, |
|
"grad_norm": 0.0005137083935551345, |
|
"learning_rate": 9.162737333381019e-05, |
|
"loss": 46.0, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.013929567116559478, |
|
"grad_norm": 0.00035794309224002063, |
|
"learning_rate": 9.138528213178727e-05, |
|
"loss": 46.0, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.013949696548808841, |
|
"grad_norm": 0.00044030786375515163, |
|
"learning_rate": 9.11432417859216e-05, |
|
"loss": 46.0, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.013969825981058205, |
|
"grad_norm": 0.0002957424148917198, |
|
"learning_rate": 9.090125372507492e-05, |
|
"loss": 46.0, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.013989955413307567, |
|
"grad_norm": 0.00022510747658088803, |
|
"learning_rate": 9.065931937780059e-05, |
|
"loss": 46.0, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.01401008484555693, |
|
"grad_norm": 0.00029219602583907545, |
|
"learning_rate": 9.041744017233462e-05, |
|
"loss": 46.0, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.014030214277806295, |
|
"grad_norm": 0.0005821465747430921, |
|
"learning_rate": 9.017561753658764e-05, |
|
"loss": 46.0, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.014050343710055658, |
|
"grad_norm": 0.00017983518773689866, |
|
"learning_rate": 8.993385289813627e-05, |
|
"loss": 46.0, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.014070473142305022, |
|
"grad_norm": 0.0003665021213237196, |
|
"learning_rate": 8.969214768421483e-05, |
|
"loss": 46.0, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.014090602574554384, |
|
"grad_norm": 0.00022963494120631367, |
|
"learning_rate": 8.945050332170672e-05, |
|
"loss": 46.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.014110732006803748, |
|
"grad_norm": 0.00016433373093605042, |
|
"learning_rate": 8.920892123713614e-05, |
|
"loss": 46.0, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.014130861439053111, |
|
"grad_norm": 0.0002634669654071331, |
|
"learning_rate": 8.89674028566597e-05, |
|
"loss": 46.0, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.014150990871302475, |
|
"grad_norm": 0.00027517983107827604, |
|
"learning_rate": 8.872594960605785e-05, |
|
"loss": 46.0, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.014171120303551839, |
|
"grad_norm": 0.0004251411010045558, |
|
"learning_rate": 8.848456291072666e-05, |
|
"loss": 46.0, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.014191249735801202, |
|
"grad_norm": 0.00023084439453668892, |
|
"learning_rate": 8.82432441956692e-05, |
|
"loss": 46.0, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.014211379168050564, |
|
"grad_norm": 0.0002691158442758024, |
|
"learning_rate": 8.80019948854873e-05, |
|
"loss": 46.0, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.014231508600299928, |
|
"grad_norm": 0.000261798471910879, |
|
"learning_rate": 8.776081640437294e-05, |
|
"loss": 46.0, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.014251638032549292, |
|
"grad_norm": 0.0004933233722113073, |
|
"learning_rate": 8.751971017610012e-05, |
|
"loss": 46.0, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.014271767464798656, |
|
"grad_norm": 0.00015099802112672478, |
|
"learning_rate": 8.727867762401623e-05, |
|
"loss": 46.0, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.01429189689704802, |
|
"grad_norm": 0.00032548594754189253, |
|
"learning_rate": 8.703772017103372e-05, |
|
"loss": 46.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.014312026329297383, |
|
"grad_norm": 0.00044292572420090437, |
|
"learning_rate": 8.679683923962174e-05, |
|
"loss": 46.0, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.014332155761546745, |
|
"grad_norm": 0.00028910860419273376, |
|
"learning_rate": 8.655603625179759e-05, |
|
"loss": 46.0, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.014352285193796109, |
|
"grad_norm": 0.0002870987809728831, |
|
"learning_rate": 8.631531262911857e-05, |
|
"loss": 46.0, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.014372414626045472, |
|
"grad_norm": 0.00021710267174057662, |
|
"learning_rate": 8.607466979267338e-05, |
|
"loss": 46.0, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.014392544058294836, |
|
"grad_norm": 0.00029545449069701135, |
|
"learning_rate": 8.583410916307386e-05, |
|
"loss": 46.0, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.0144126734905442, |
|
"grad_norm": 0.0002639677841216326, |
|
"learning_rate": 8.559363216044647e-05, |
|
"loss": 46.0, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.014432802922793562, |
|
"grad_norm": 0.00033363461261615157, |
|
"learning_rate": 8.53532402044241e-05, |
|
"loss": 46.0, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.014452932355042926, |
|
"grad_norm": 0.00029945329879410565, |
|
"learning_rate": 8.51129347141374e-05, |
|
"loss": 46.0, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.01447306178729229, |
|
"grad_norm": 0.0004964692052453756, |
|
"learning_rate": 8.487271710820681e-05, |
|
"loss": 46.0, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.014493191219541653, |
|
"grad_norm": 0.00026980109396390617, |
|
"learning_rate": 8.463258880473373e-05, |
|
"loss": 46.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.014513320651791017, |
|
"grad_norm": 0.0004024615336675197, |
|
"learning_rate": 8.439255122129254e-05, |
|
"loss": 46.0, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.01453345008404038, |
|
"grad_norm": 0.0006139858742244542, |
|
"learning_rate": 8.415260577492195e-05, |
|
"loss": 46.0, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.014553579516289742, |
|
"grad_norm": 0.000292949698632583, |
|
"learning_rate": 8.391275388211684e-05, |
|
"loss": 46.0, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.014573708948539106, |
|
"grad_norm": 0.00030586167122237384, |
|
"learning_rate": 8.367299695881973e-05, |
|
"loss": 46.0, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.01459383838078847, |
|
"grad_norm": 0.0002910511684603989, |
|
"learning_rate": 8.34333364204125e-05, |
|
"loss": 46.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.014613967813037834, |
|
"grad_norm": 0.0002732513239607215, |
|
"learning_rate": 8.319377368170808e-05, |
|
"loss": 46.0, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.014634097245287197, |
|
"grad_norm": 0.0002025508729275316, |
|
"learning_rate": 8.295431015694202e-05, |
|
"loss": 46.0, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.014654226677536561, |
|
"grad_norm": 0.0005748551338911057, |
|
"learning_rate": 8.271494725976418e-05, |
|
"loss": 46.0, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.014674356109785923, |
|
"grad_norm": 0.0005893989582546055, |
|
"learning_rate": 8.247568640323036e-05, |
|
"loss": 46.0, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.014694485542035287, |
|
"grad_norm": 0.0007098098867572844, |
|
"learning_rate": 8.223652899979402e-05, |
|
"loss": 46.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.01471461497428465, |
|
"grad_norm": 0.00044822742347605526, |
|
"learning_rate": 8.199747646129775e-05, |
|
"loss": 46.0, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.014734744406534014, |
|
"grad_norm": 0.0006507154321298003, |
|
"learning_rate": 8.175853019896534e-05, |
|
"loss": 46.0, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.014754873838783378, |
|
"grad_norm": 0.00015137386799324304, |
|
"learning_rate": 8.15196916233929e-05, |
|
"loss": 46.0, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.01477500327103274, |
|
"grad_norm": 0.0002277817839058116, |
|
"learning_rate": 8.128096214454105e-05, |
|
"loss": 46.0, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.014795132703282103, |
|
"grad_norm": 0.00034286073059774935, |
|
"learning_rate": 8.104234317172621e-05, |
|
"loss": 46.0, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.014815262135531467, |
|
"grad_norm": 0.00032818439649417996, |
|
"learning_rate": 8.080383611361254e-05, |
|
"loss": 46.0, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.014835391567780831, |
|
"grad_norm": 0.0006836484535597265, |
|
"learning_rate": 8.056544237820351e-05, |
|
"loss": 46.0, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.014855521000030195, |
|
"grad_norm": 0.00038759320159442723, |
|
"learning_rate": 8.03271633728335e-05, |
|
"loss": 46.0, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.014875650432279558, |
|
"grad_norm": 0.0003174393787048757, |
|
"learning_rate": 8.008900050415973e-05, |
|
"loss": 46.0, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.01489577986452892, |
|
"grad_norm": 0.0004829028621315956, |
|
"learning_rate": 7.985095517815371e-05, |
|
"loss": 46.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.014915909296778284, |
|
"grad_norm": 0.0003439805586822331, |
|
"learning_rate": 7.961302880009314e-05, |
|
"loss": 46.0, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.014936038729027648, |
|
"grad_norm": 0.00036893304786644876, |
|
"learning_rate": 7.937522277455343e-05, |
|
"loss": 46.0, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.014956168161277011, |
|
"grad_norm": 0.00026861962396651506, |
|
"learning_rate": 7.913753850539964e-05, |
|
"loss": 46.0, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.014976297593526375, |
|
"grad_norm": 0.0002473319473210722, |
|
"learning_rate": 7.889997739577783e-05, |
|
"loss": 46.0, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.014996427025775739, |
|
"grad_norm": 0.0005373766180127859, |
|
"learning_rate": 7.866254084810724e-05, |
|
"loss": 46.0, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.0150165564580251, |
|
"grad_norm": 0.0004670672060456127, |
|
"learning_rate": 7.842523026407159e-05, |
|
"loss": 46.0, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.015036685890274465, |
|
"grad_norm": 0.00029645231552422047, |
|
"learning_rate": 7.818804704461108e-05, |
|
"loss": 46.0, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.015056815322523828, |
|
"grad_norm": 0.00033488948247395456, |
|
"learning_rate": 7.795099258991404e-05, |
|
"loss": 46.0, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.015076944754773192, |
|
"grad_norm": 0.00025332943187095225, |
|
"learning_rate": 7.771406829940852e-05, |
|
"loss": 46.0, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.015097074187022556, |
|
"grad_norm": 0.0003477553545963019, |
|
"learning_rate": 7.747727557175434e-05, |
|
"loss": 46.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.015117203619271918, |
|
"grad_norm": 0.0003306921571493149, |
|
"learning_rate": 7.724061580483449e-05, |
|
"loss": 46.0, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.015137333051521281, |
|
"grad_norm": 0.0003417869738768786, |
|
"learning_rate": 7.700409039574717e-05, |
|
"loss": 46.0, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.015157462483770645, |
|
"grad_norm": 0.0004558273358270526, |
|
"learning_rate": 7.676770074079732e-05, |
|
"loss": 46.0, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.015177591916020009, |
|
"grad_norm": 0.00022576648916583508, |
|
"learning_rate": 7.653144823548852e-05, |
|
"loss": 46.0, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.015197721348269373, |
|
"grad_norm": 0.0003926701901946217, |
|
"learning_rate": 7.62953342745146e-05, |
|
"loss": 46.0, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.015217850780518736, |
|
"grad_norm": 0.0004790101374965161, |
|
"learning_rate": 7.605936025175174e-05, |
|
"loss": 46.0, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.015237980212768098, |
|
"grad_norm": 0.00037941025220789015, |
|
"learning_rate": 7.582352756024971e-05, |
|
"loss": 46.0, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.015258109645017462, |
|
"grad_norm": 0.0003313767083454877, |
|
"learning_rate": 7.558783759222417e-05, |
|
"loss": 46.0, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.015278239077266826, |
|
"grad_norm": 0.00023061798128765076, |
|
"learning_rate": 7.535229173904811e-05, |
|
"loss": 46.0, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.01529836850951619, |
|
"grad_norm": 0.00033232785062864423, |
|
"learning_rate": 7.511689139124382e-05, |
|
"loss": 46.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.015318497941765553, |
|
"grad_norm": 0.0007354258559644222, |
|
"learning_rate": 7.488163793847458e-05, |
|
"loss": 46.0, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.015338627374014917, |
|
"grad_norm": 0.00019556190818548203, |
|
"learning_rate": 7.464653276953643e-05, |
|
"loss": 46.0, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.015358756806264279, |
|
"grad_norm": 0.0005126126925460994, |
|
"learning_rate": 7.441157727235015e-05, |
|
"loss": 46.0, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.015378886238513642, |
|
"grad_norm": 0.00027637736639007926, |
|
"learning_rate": 7.417677283395284e-05, |
|
"loss": 46.0, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.015399015670763006, |
|
"grad_norm": 0.0008606024202890694, |
|
"learning_rate": 7.394212084048995e-05, |
|
"loss": 46.0, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.01541914510301237, |
|
"grad_norm": 0.0005743610672652721, |
|
"learning_rate": 7.370762267720685e-05, |
|
"loss": 46.0, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.015439274535261734, |
|
"grad_norm": 0.0006563942297361791, |
|
"learning_rate": 7.347327972844096e-05, |
|
"loss": 46.0, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.015459403967511096, |
|
"grad_norm": 0.00022202875697985291, |
|
"learning_rate": 7.323909337761317e-05, |
|
"loss": 46.0, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.01547953339976046, |
|
"grad_norm": 0.0002219324087491259, |
|
"learning_rate": 7.30050650072202e-05, |
|
"loss": 46.0, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.015499662832009823, |
|
"grad_norm": 0.00032914732582867146, |
|
"learning_rate": 7.277119599882586e-05, |
|
"loss": 46.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.015519792264259187, |
|
"grad_norm": 0.0005626956117339432, |
|
"learning_rate": 7.25374877330534e-05, |
|
"loss": 46.0, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.01553992169650855, |
|
"grad_norm": 0.0002807240525726229, |
|
"learning_rate": 7.230394158957705e-05, |
|
"loss": 46.0, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.015560051128757914, |
|
"grad_norm": 0.0005751781282015145, |
|
"learning_rate": 7.20705589471139e-05, |
|
"loss": 46.0, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.015580180561007276, |
|
"grad_norm": 0.0006733541958965361, |
|
"learning_rate": 7.1837341183416e-05, |
|
"loss": 46.0, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.01560030999325664, |
|
"grad_norm": 0.00039969501085579395, |
|
"learning_rate": 7.160428967526187e-05, |
|
"loss": 46.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.015620439425506004, |
|
"grad_norm": 0.0005381114315241575, |
|
"learning_rate": 7.137140579844871e-05, |
|
"loss": 46.0, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.015640568857755367, |
|
"grad_norm": 0.0006002363516017795, |
|
"learning_rate": 7.1138690927784e-05, |
|
"loss": 46.0, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.01566069829000473, |
|
"grad_norm": 0.000290636089630425, |
|
"learning_rate": 7.090614643707762e-05, |
|
"loss": 46.0, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.015680827722254095, |
|
"grad_norm": 0.00021310077863745391, |
|
"learning_rate": 7.067377369913352e-05, |
|
"loss": 46.0, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.015700957154503457, |
|
"grad_norm": 0.00011205086775589734, |
|
"learning_rate": 7.044157408574185e-05, |
|
"loss": 46.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.015721086586752822, |
|
"grad_norm": 0.0011209280928596854, |
|
"learning_rate": 7.020954896767058e-05, |
|
"loss": 46.0, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.015741216019002184, |
|
"grad_norm": 0.0005297974566929042, |
|
"learning_rate": 6.997769971465769e-05, |
|
"loss": 46.0, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.015761345451251546, |
|
"grad_norm": 0.0007235727971419692, |
|
"learning_rate": 6.974602769540289e-05, |
|
"loss": 46.0, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.01578147488350091, |
|
"grad_norm": 0.0008870816673152149, |
|
"learning_rate": 6.951453427755968e-05, |
|
"loss": 46.0, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.015801604315750273, |
|
"grad_norm": 0.00041311554377898574, |
|
"learning_rate": 6.928322082772712e-05, |
|
"loss": 46.0, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.01582173374799964, |
|
"grad_norm": 0.0003360177797731012, |
|
"learning_rate": 6.905208871144187e-05, |
|
"loss": 46.0, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.015841863180249, |
|
"grad_norm": 0.00035951961763203144, |
|
"learning_rate": 6.882113929317015e-05, |
|
"loss": 46.0, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.015861992612498363, |
|
"grad_norm": 0.0004993542679585516, |
|
"learning_rate": 6.859037393629957e-05, |
|
"loss": 46.0, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.01588212204474773, |
|
"grad_norm": 0.0004983010003343225, |
|
"learning_rate": 6.835979400313122e-05, |
|
"loss": 46.0, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.01590225147699709, |
|
"grad_norm": 0.00040434906259179115, |
|
"learning_rate": 6.81294008548715e-05, |
|
"loss": 46.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.015922380909246456, |
|
"grad_norm": 0.0005002523539587855, |
|
"learning_rate": 6.789919585162423e-05, |
|
"loss": 46.0, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.015942510341495818, |
|
"grad_norm": 0.0006788838654756546, |
|
"learning_rate": 6.766918035238237e-05, |
|
"loss": 46.0, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.015962639773745183, |
|
"grad_norm": 0.0002748892002273351, |
|
"learning_rate": 6.743935571502038e-05, |
|
"loss": 46.0, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.015982769205994545, |
|
"grad_norm": 0.00030586449429392815, |
|
"learning_rate": 6.720972329628577e-05, |
|
"loss": 46.0, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.016002898638243907, |
|
"grad_norm": 0.0005200284067541361, |
|
"learning_rate": 6.698028445179148e-05, |
|
"loss": 46.0, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.016023028070493273, |
|
"grad_norm": 0.00024440689594484866, |
|
"learning_rate": 6.675104053600763e-05, |
|
"loss": 46.0, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.016043157502742635, |
|
"grad_norm": 0.0006268357392400503, |
|
"learning_rate": 6.65219929022535e-05, |
|
"loss": 46.0, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.016063286934992, |
|
"grad_norm": 0.00042199273593723774, |
|
"learning_rate": 6.629314290268987e-05, |
|
"loss": 46.0, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.016083416367241362, |
|
"grad_norm": 0.00035459554055705667, |
|
"learning_rate": 6.606449188831057e-05, |
|
"loss": 46.0, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.016103545799490724, |
|
"grad_norm": 0.00029721111059188843, |
|
"learning_rate": 6.583604120893488e-05, |
|
"loss": 46.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01612367523174009, |
|
"grad_norm": 0.0003098206070717424, |
|
"learning_rate": 6.560779221319938e-05, |
|
"loss": 46.0, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.01614380466398945, |
|
"grad_norm": 0.00051769835408777, |
|
"learning_rate": 6.537974624855003e-05, |
|
"loss": 46.0, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.016163934096238817, |
|
"grad_norm": 0.0006196299218572676, |
|
"learning_rate": 6.51519046612342e-05, |
|
"loss": 46.0, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.01618406352848818, |
|
"grad_norm": 0.0002488850150257349, |
|
"learning_rate": 6.492426879629282e-05, |
|
"loss": 46.0, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.01620419296073754, |
|
"grad_norm": 0.0009111189283430576, |
|
"learning_rate": 6.46968399975522e-05, |
|
"loss": 46.0, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.016224322392986906, |
|
"grad_norm": 0.000280485866824165, |
|
"learning_rate": 6.446961960761643e-05, |
|
"loss": 46.0, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.016244451825236268, |
|
"grad_norm": 0.0004848411481361836, |
|
"learning_rate": 6.424260896785914e-05, |
|
"loss": 46.0, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.016264581257485634, |
|
"grad_norm": 0.0007137816864997149, |
|
"learning_rate": 6.40158094184158e-05, |
|
"loss": 46.0, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.016284710689734996, |
|
"grad_norm": 0.0005314049194566905, |
|
"learning_rate": 6.378922229817575e-05, |
|
"loss": 46.0, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.01630484012198436, |
|
"grad_norm": 0.0008834300679154694, |
|
"learning_rate": 6.356284894477412e-05, |
|
"loss": 46.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.016324969554233723, |
|
"grad_norm": 0.0008476293878629804, |
|
"learning_rate": 6.333669069458432e-05, |
|
"loss": 46.0, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.016345098986483085, |
|
"grad_norm": 0.0003947268414776772, |
|
"learning_rate": 6.311074888270971e-05, |
|
"loss": 46.0, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.01636522841873245, |
|
"grad_norm": 0.0003291845496278256, |
|
"learning_rate": 6.288502484297607e-05, |
|
"loss": 46.0, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.016385357850981812, |
|
"grad_norm": 0.0005163813475519419, |
|
"learning_rate": 6.265951990792347e-05, |
|
"loss": 46.0, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.016405487283231178, |
|
"grad_norm": 0.00034108126419596374, |
|
"learning_rate": 6.243423540879865e-05, |
|
"loss": 46.0, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.01642561671548054, |
|
"grad_norm": 0.0003455891564954072, |
|
"learning_rate": 6.220917267554686e-05, |
|
"loss": 46.0, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.016445746147729902, |
|
"grad_norm": 0.001084479154087603, |
|
"learning_rate": 6.198433303680439e-05, |
|
"loss": 46.0, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.016465875579979267, |
|
"grad_norm": 0.0003289970045443624, |
|
"learning_rate": 6.175971781989025e-05, |
|
"loss": 46.0, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.01648600501222863, |
|
"grad_norm": 0.0003814552037511021, |
|
"learning_rate": 6.153532835079886e-05, |
|
"loss": 46.0, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.016506134444477995, |
|
"grad_norm": 0.0013550389558076859, |
|
"learning_rate": 6.131116595419178e-05, |
|
"loss": 46.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.016526263876727357, |
|
"grad_norm": 0.0005670760874636471, |
|
"learning_rate": 6.108723195339011e-05, |
|
"loss": 46.0, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.01654639330897672, |
|
"grad_norm": 0.0008530982304364443, |
|
"learning_rate": 6.086352767036673e-05, |
|
"loss": 46.0, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.016566522741226084, |
|
"grad_norm": 0.0002651612740010023, |
|
"learning_rate": 6.064005442573824e-05, |
|
"loss": 46.0, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.016586652173475446, |
|
"grad_norm": 0.0009935569250956178, |
|
"learning_rate": 6.041681353875746e-05, |
|
"loss": 46.0, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.01660678160572481, |
|
"grad_norm": 0.00029908836586400867, |
|
"learning_rate": 6.019380632730546e-05, |
|
"loss": 46.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.016626911037974174, |
|
"grad_norm": 0.00025813686079345644, |
|
"learning_rate": 5.997103410788385e-05, |
|
"loss": 46.0, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.01664704047022354, |
|
"grad_norm": 0.0006937151192687452, |
|
"learning_rate": 5.9748498195606925e-05, |
|
"loss": 46.0, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.0166671699024729, |
|
"grad_norm": 0.0004930637078359723, |
|
"learning_rate": 5.952619990419408e-05, |
|
"loss": 46.0, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.016687299334722263, |
|
"grad_norm": 0.0004578740627039224, |
|
"learning_rate": 5.9304140545961784e-05, |
|
"loss": 46.0, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.01670742876697163, |
|
"grad_norm": 0.00044765419443137944, |
|
"learning_rate": 5.9082321431816156e-05, |
|
"loss": 46.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.01672755819922099, |
|
"grad_norm": 0.00078958785161376, |
|
"learning_rate": 5.88607438712449e-05, |
|
"loss": 46.0, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.016747687631470356, |
|
"grad_norm": 0.0006307225557975471, |
|
"learning_rate": 5.863940917230986e-05, |
|
"loss": 46.0, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.016767817063719718, |
|
"grad_norm": 0.0006907914648763835, |
|
"learning_rate": 5.841831864163909e-05, |
|
"loss": 46.0, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.01678794649596908, |
|
"grad_norm": 0.0006139145698398352, |
|
"learning_rate": 5.8197473584419184e-05, |
|
"loss": 46.0, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.016808075928218445, |
|
"grad_norm": 0.0009209556155838072, |
|
"learning_rate": 5.7976875304387756e-05, |
|
"loss": 46.0, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.016828205360467807, |
|
"grad_norm": 0.0005617816932499409, |
|
"learning_rate": 5.7756525103825474e-05, |
|
"loss": 46.0, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.016848334792717173, |
|
"grad_norm": 0.0006842644652351737, |
|
"learning_rate": 5.753642428354852e-05, |
|
"loss": 46.0, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.016868464224966535, |
|
"grad_norm": 0.0002727070532273501, |
|
"learning_rate": 5.731657414290085e-05, |
|
"loss": 46.0, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.016888593657215897, |
|
"grad_norm": 0.0005894514033570886, |
|
"learning_rate": 5.7096975979746704e-05, |
|
"loss": 46.0, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.016908723089465262, |
|
"grad_norm": 0.00021878795814700425, |
|
"learning_rate": 5.687763109046255e-05, |
|
"loss": 46.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.016928852521714624, |
|
"grad_norm": 0.0005657190340571105, |
|
"learning_rate": 5.665854076992991e-05, |
|
"loss": 46.0, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.01694898195396399, |
|
"grad_norm": 0.0005830818554386497, |
|
"learning_rate": 5.643970631152735e-05, |
|
"loss": 46.0, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.01696911138621335, |
|
"grad_norm": 0.0007164289709180593, |
|
"learning_rate": 5.622112900712304e-05, |
|
"loss": 46.0, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.016989240818462717, |
|
"grad_norm": 0.0004906615940853953, |
|
"learning_rate": 5.600281014706703e-05, |
|
"loss": 46.0, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.01700937025071208, |
|
"grad_norm": 0.001047360710799694, |
|
"learning_rate": 5.57847510201837e-05, |
|
"loss": 46.0, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.01702949968296144, |
|
"grad_norm": 0.00040790237835608423, |
|
"learning_rate": 5.556695291376406e-05, |
|
"loss": 46.0, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.017049629115210806, |
|
"grad_norm": 0.0006064849440008402, |
|
"learning_rate": 5.5349417113558254e-05, |
|
"loss": 46.0, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.01706975854746017, |
|
"grad_norm": 0.0001834803551901132, |
|
"learning_rate": 5.5132144903768e-05, |
|
"loss": 46.0, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.017089887979709534, |
|
"grad_norm": 0.00027216278249397874, |
|
"learning_rate": 5.491513756703881e-05, |
|
"loss": 46.0, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.017110017411958896, |
|
"grad_norm": 0.0005655901040881872, |
|
"learning_rate": 5.46983963844526e-05, |
|
"loss": 46.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.017130146844208258, |
|
"grad_norm": 0.0013301552971825004, |
|
"learning_rate": 5.448192263552006e-05, |
|
"loss": 46.0, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.017150276276457623, |
|
"grad_norm": 0.00028807963826693594, |
|
"learning_rate": 5.426571759817314e-05, |
|
"loss": 46.0, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.017170405708706985, |
|
"grad_norm": 0.0008422695682384074, |
|
"learning_rate": 5.4049782548757386e-05, |
|
"loss": 46.0, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.01719053514095635, |
|
"grad_norm": 0.0004891850403510034, |
|
"learning_rate": 5.383411876202464e-05, |
|
"loss": 46.0, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.017210664573205713, |
|
"grad_norm": 0.00028051427216269076, |
|
"learning_rate": 5.36187275111253e-05, |
|
"loss": 46.0, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.017230794005455075, |
|
"grad_norm": 0.0012559060705825686, |
|
"learning_rate": 5.340361006760082e-05, |
|
"loss": 46.0, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.01725092343770444, |
|
"grad_norm": 0.0007434745784848928, |
|
"learning_rate": 5.318876770137634e-05, |
|
"loss": 46.0, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.017271052869953802, |
|
"grad_norm": 0.0007889253320172429, |
|
"learning_rate": 5.297420168075307e-05, |
|
"loss": 46.0, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.017291182302203167, |
|
"grad_norm": 0.0008923442219384015, |
|
"learning_rate": 5.275991327240082e-05, |
|
"loss": 46.0, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.01731131173445253, |
|
"grad_norm": 0.0007664742297492921, |
|
"learning_rate": 5.254590374135058e-05, |
|
"loss": 46.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.017331441166701895, |
|
"grad_norm": 0.0003808206529356539, |
|
"learning_rate": 5.233217435098707e-05, |
|
"loss": 46.0, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.017351570598951257, |
|
"grad_norm": 0.0008439875091426075, |
|
"learning_rate": 5.2118726363041036e-05, |
|
"loss": 46.0, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.01737170003120062, |
|
"grad_norm": 0.0005331309512257576, |
|
"learning_rate": 5.190556103758223e-05, |
|
"loss": 46.0, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.017391829463449984, |
|
"grad_norm": 0.0005547069013118744, |
|
"learning_rate": 5.1692679633011564e-05, |
|
"loss": 46.0, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.017411958895699346, |
|
"grad_norm": 0.000526057556271553, |
|
"learning_rate": 5.148008340605392e-05, |
|
"loss": 46.0, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.01743208832794871, |
|
"grad_norm": 0.0012951105600222945, |
|
"learning_rate": 5.1267773611750624e-05, |
|
"loss": 46.0, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.017452217760198074, |
|
"grad_norm": 0.0010913871228694916, |
|
"learning_rate": 5.1055751503452144e-05, |
|
"loss": 46.0, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.017472347192447436, |
|
"grad_norm": 0.0005590246873907745, |
|
"learning_rate": 5.0844018332810594e-05, |
|
"loss": 46.0, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.0174924766246968, |
|
"grad_norm": 0.0006127689266577363, |
|
"learning_rate": 5.0632575349772225e-05, |
|
"loss": 46.0, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.017512606056946163, |
|
"grad_norm": 0.0004413559508975595, |
|
"learning_rate": 5.0421423802570454e-05, |
|
"loss": 46.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.01753273548919553, |
|
"grad_norm": 0.00039344895048998296, |
|
"learning_rate": 5.0210564937718055e-05, |
|
"loss": 46.0, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.01755286492144489, |
|
"grad_norm": 0.0009540827013552189, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 46.0, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.017572994353694252, |
|
"grad_norm": 0.0008320124470628798, |
|
"learning_rate": 4.978973023246616e-05, |
|
"loss": 46.0, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.017593123785943618, |
|
"grad_norm": 0.001188238151371479, |
|
"learning_rate": 4.957975687642389e-05, |
|
"loss": 46.0, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.01761325321819298, |
|
"grad_norm": 0.0008758010808378458, |
|
"learning_rate": 4.937008117143055e-05, |
|
"loss": 46.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.017633382650442345, |
|
"grad_norm": 0.0005536804674193263, |
|
"learning_rate": 4.9160704355286577e-05, |
|
"loss": 46.0, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.017653512082691707, |
|
"grad_norm": 0.00037108969991095364, |
|
"learning_rate": 4.895162766402781e-05, |
|
"loss": 46.0, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.017673641514941073, |
|
"grad_norm": 0.0011138038244098425, |
|
"learning_rate": 4.8742852331918364e-05, |
|
"loss": 46.0, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.017693770947190435, |
|
"grad_norm": 0.0004553370818030089, |
|
"learning_rate": 4.8534379591443246e-05, |
|
"loss": 46.0, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.017713900379439797, |
|
"grad_norm": 0.0010616140207275748, |
|
"learning_rate": 4.8326210673301284e-05, |
|
"loss": 46.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.017734029811689162, |
|
"grad_norm": 0.00036784596159122884, |
|
"learning_rate": 4.811834680639765e-05, |
|
"loss": 46.0, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.017754159243938524, |
|
"grad_norm": 0.00037242978578433394, |
|
"learning_rate": 4.791078921783653e-05, |
|
"loss": 46.0, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.01777428867618789, |
|
"grad_norm": 0.0014223081525415182, |
|
"learning_rate": 4.770353913291428e-05, |
|
"loss": 46.0, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.01779441810843725, |
|
"grad_norm": 0.0004683698061853647, |
|
"learning_rate": 4.749659777511177e-05, |
|
"loss": 46.0, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.017814547540686614, |
|
"grad_norm": 0.0004508081474341452, |
|
"learning_rate": 4.728996636608738e-05, |
|
"loss": 46.0, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.01783467697293598, |
|
"grad_norm": 0.000599596940446645, |
|
"learning_rate": 4.708364612566969e-05, |
|
"loss": 46.0, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.01785480640518534, |
|
"grad_norm": 0.0004682582803070545, |
|
"learning_rate": 4.6877638271850485e-05, |
|
"loss": 46.0, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.017874935837434706, |
|
"grad_norm": 0.00035204822779633105, |
|
"learning_rate": 4.667194402077714e-05, |
|
"loss": 46.0, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.01789506526968407, |
|
"grad_norm": 0.0009401136194355786, |
|
"learning_rate": 4.646656458674595e-05, |
|
"loss": 46.0, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.01791519470193343, |
|
"grad_norm": 0.0006439790595322847, |
|
"learning_rate": 4.62615011821946e-05, |
|
"loss": 46.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.017935324134182796, |
|
"grad_norm": 0.0006773846689611673, |
|
"learning_rate": 4.6056755017695155e-05, |
|
"loss": 46.0, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.017955453566432158, |
|
"grad_norm": 0.0009548621601425111, |
|
"learning_rate": 4.585232730194682e-05, |
|
"loss": 46.0, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.017975582998681523, |
|
"grad_norm": 0.0008568911580368876, |
|
"learning_rate": 4.5648219241769054e-05, |
|
"loss": 46.0, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.017995712430930885, |
|
"grad_norm": 0.00021023969748057425, |
|
"learning_rate": 4.5444432042093996e-05, |
|
"loss": 46.0, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.01801584186318025, |
|
"grad_norm": 0.0003755395009648055, |
|
"learning_rate": 4.524096690595978e-05, |
|
"loss": 46.0, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.018035971295429613, |
|
"grad_norm": 0.0010003233328461647, |
|
"learning_rate": 4.5037825034503304e-05, |
|
"loss": 46.0, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.018056100727678975, |
|
"grad_norm": 0.0007916453178040683, |
|
"learning_rate": 4.4835007626953e-05, |
|
"loss": 46.0, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.01807623015992834, |
|
"grad_norm": 0.0002515468222554773, |
|
"learning_rate": 4.4632515880621894e-05, |
|
"loss": 46.0, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.018096359592177702, |
|
"grad_norm": 0.0010216659866273403, |
|
"learning_rate": 4.443035099090048e-05, |
|
"loss": 46.0, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.018116489024427068, |
|
"grad_norm": 0.0006039583240635693, |
|
"learning_rate": 4.4228514151249825e-05, |
|
"loss": 46.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01813661845667643, |
|
"grad_norm": 0.0011847359128296375, |
|
"learning_rate": 4.4027006553194115e-05, |
|
"loss": 46.0, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.01815674788892579, |
|
"grad_norm": 0.0013753952225670218, |
|
"learning_rate": 4.3825829386314166e-05, |
|
"loss": 46.0, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.018176877321175157, |
|
"grad_norm": 0.0006966555956751108, |
|
"learning_rate": 4.362498383823996e-05, |
|
"loss": 46.0, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.01819700675342452, |
|
"grad_norm": 0.000611517985817045, |
|
"learning_rate": 4.342447109464385e-05, |
|
"loss": 46.0, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.018217136185673884, |
|
"grad_norm": 0.001369222765788436, |
|
"learning_rate": 4.322429233923351e-05, |
|
"loss": 46.0, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.018237265617923246, |
|
"grad_norm": 0.00040184592944569886, |
|
"learning_rate": 4.3024448753744925e-05, |
|
"loss": 46.0, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.01825739505017261, |
|
"grad_norm": 0.0007477894541807473, |
|
"learning_rate": 4.282494151793548e-05, |
|
"loss": 46.0, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.018277524482421974, |
|
"grad_norm": 0.0008548839250579476, |
|
"learning_rate": 4.2625771809576874e-05, |
|
"loss": 46.0, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.018297653914671336, |
|
"grad_norm": 0.0004542908282019198, |
|
"learning_rate": 4.242694080444837e-05, |
|
"loss": 46.0, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.0183177833469207, |
|
"grad_norm": 0.0004831959377042949, |
|
"learning_rate": 4.2228449676329616e-05, |
|
"loss": 46.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.018337912779170063, |
|
"grad_norm": 0.0005519616534002125, |
|
"learning_rate": 4.2030299596993883e-05, |
|
"loss": 46.0, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.01835804221141943, |
|
"grad_norm": 0.0007433863938786089, |
|
"learning_rate": 4.1832491736201077e-05, |
|
"loss": 46.0, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.01837817164366879, |
|
"grad_norm": 0.0009301244281232357, |
|
"learning_rate": 4.1635027261690827e-05, |
|
"loss": 46.0, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.018398301075918153, |
|
"grad_norm": 0.0016711597563698888, |
|
"learning_rate": 4.143790733917564e-05, |
|
"loss": 46.0, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.018418430508167518, |
|
"grad_norm": 0.0014043203555047512, |
|
"learning_rate": 4.124113313233404e-05, |
|
"loss": 46.0, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.01843855994041688, |
|
"grad_norm": 0.001079570734873414, |
|
"learning_rate": 4.1044705802803574e-05, |
|
"loss": 46.0, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.018458689372666245, |
|
"grad_norm": 0.0006708208820782602, |
|
"learning_rate": 4.084862651017406e-05, |
|
"loss": 46.0, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.018478818804915607, |
|
"grad_norm": 0.00034618499921634793, |
|
"learning_rate": 4.065289641198073e-05, |
|
"loss": 46.0, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.01849894823716497, |
|
"grad_norm": 0.0003828117623925209, |
|
"learning_rate": 4.045751666369736e-05, |
|
"loss": 46.0, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.018519077669414335, |
|
"grad_norm": 0.00042931470670737326, |
|
"learning_rate": 4.026248841872946e-05, |
|
"loss": 46.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.018539207101663697, |
|
"grad_norm": 0.0014193730894476175, |
|
"learning_rate": 4.006781282840748e-05, |
|
"loss": 46.0, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.018559336533913062, |
|
"grad_norm": 0.0003629166749306023, |
|
"learning_rate": 3.987349104198007e-05, |
|
"loss": 46.0, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.018579465966162424, |
|
"grad_norm": 0.0006790644838474691, |
|
"learning_rate": 3.9679524206607156e-05, |
|
"loss": 46.0, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.018599595398411786, |
|
"grad_norm": 0.0003729971940629184, |
|
"learning_rate": 3.948591346735325e-05, |
|
"loss": 46.0, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.01861972483066115, |
|
"grad_norm": 0.0007552574970759451, |
|
"learning_rate": 3.929265996718072e-05, |
|
"loss": 46.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.018639854262910514, |
|
"grad_norm": 0.0008475988288410008, |
|
"learning_rate": 3.9099764846943e-05, |
|
"loss": 46.0, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.01865998369515988, |
|
"grad_norm": 0.0007365219644270837, |
|
"learning_rate": 3.89072292453778e-05, |
|
"loss": 46.0, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.01868011312740924, |
|
"grad_norm": 0.0006838237750343978, |
|
"learning_rate": 3.871505429910057e-05, |
|
"loss": 46.0, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.018700242559658607, |
|
"grad_norm": 0.001090203644707799, |
|
"learning_rate": 3.85232411425976e-05, |
|
"loss": 46.0, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.01872037199190797, |
|
"grad_norm": 0.0004651647468563169, |
|
"learning_rate": 3.833179090821929e-05, |
|
"loss": 46.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.01874050142415733, |
|
"grad_norm": 0.0019039801554754376, |
|
"learning_rate": 3.814070472617375e-05, |
|
"loss": 46.0, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.018760630856406696, |
|
"grad_norm": 0.0003899121074937284, |
|
"learning_rate": 3.794998372451981e-05, |
|
"loss": 46.0, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.018780760288656058, |
|
"grad_norm": 0.0008817263296805322, |
|
"learning_rate": 3.775962902916056e-05, |
|
"loss": 46.0, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.018800889720905423, |
|
"grad_norm": 0.00196042750030756, |
|
"learning_rate": 3.756964176383655e-05, |
|
"loss": 46.0, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.018821019153154785, |
|
"grad_norm": 0.0003646701225079596, |
|
"learning_rate": 3.7380023050119415e-05, |
|
"loss": 46.0, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.018841148585404147, |
|
"grad_norm": 0.0007882235804572701, |
|
"learning_rate": 3.7190774007404835e-05, |
|
"loss": 46.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.018861278017653513, |
|
"grad_norm": 0.0008490802138112485, |
|
"learning_rate": 3.700189575290641e-05, |
|
"loss": 46.0, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.018881407449902875, |
|
"grad_norm": 0.0003721664543263614, |
|
"learning_rate": 3.681338940164868e-05, |
|
"loss": 46.0, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.01890153688215224, |
|
"grad_norm": 0.00035278795985504985, |
|
"learning_rate": 3.6625256066460735e-05, |
|
"loss": 46.0, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.018921666314401602, |
|
"grad_norm": 0.0005430346354842186, |
|
"learning_rate": 3.6437496857969566e-05, |
|
"loss": 46.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.018941795746650964, |
|
"grad_norm": 0.0013423273339867592, |
|
"learning_rate": 3.625011288459365e-05, |
|
"loss": 46.0, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.01896192517890033, |
|
"grad_norm": 0.0003662093076854944, |
|
"learning_rate": 3.606310525253621e-05, |
|
"loss": 46.0, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.01898205461114969, |
|
"grad_norm": 0.0005090291379019618, |
|
"learning_rate": 3.5876475065778715e-05, |
|
"loss": 46.0, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.019002184043399057, |
|
"grad_norm": 0.0002727884566411376, |
|
"learning_rate": 3.5690223426074576e-05, |
|
"loss": 46.0, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.01902231347564842, |
|
"grad_norm": 0.0005081517156213522, |
|
"learning_rate": 3.550435143294238e-05, |
|
"loss": 46.0, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.019042442907897784, |
|
"grad_norm": 0.0008462371188215911, |
|
"learning_rate": 3.531886018365954e-05, |
|
"loss": 46.0, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.019062572340147146, |
|
"grad_norm": 0.0013481457717716694, |
|
"learning_rate": 3.513375077325575e-05, |
|
"loss": 46.0, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.01908270177239651, |
|
"grad_norm": 0.0006557535380125046, |
|
"learning_rate": 3.4949024294506674e-05, |
|
"loss": 46.0, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.019102831204645874, |
|
"grad_norm": 0.0005414308398030698, |
|
"learning_rate": 3.476468183792716e-05, |
|
"loss": 46.0, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.019122960636895236, |
|
"grad_norm": 0.0014492279151454568, |
|
"learning_rate": 3.458072449176525e-05, |
|
"loss": 46.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.0191430900691446, |
|
"grad_norm": 0.0007959827198646963, |
|
"learning_rate": 3.439715334199538e-05, |
|
"loss": 46.0, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.019163219501393963, |
|
"grad_norm": 0.0005054565845057368, |
|
"learning_rate": 3.4213969472312154e-05, |
|
"loss": 46.0, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.019183348933643325, |
|
"grad_norm": 0.0010196175426244736, |
|
"learning_rate": 3.403117396412391e-05, |
|
"loss": 46.0, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.01920347836589269, |
|
"grad_norm": 0.0007254026713781059, |
|
"learning_rate": 3.384876789654631e-05, |
|
"loss": 46.0, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.019223607798142053, |
|
"grad_norm": 0.000904410204384476, |
|
"learning_rate": 3.366675234639601e-05, |
|
"loss": 46.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.019243737230391418, |
|
"grad_norm": 0.0011299613397568464, |
|
"learning_rate": 3.348512838818425e-05, |
|
"loss": 46.0, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.01926386666264078, |
|
"grad_norm": 0.0011055005015805364, |
|
"learning_rate": 3.3303897094110636e-05, |
|
"loss": 46.0, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.019283996094890142, |
|
"grad_norm": 0.0005328382831066847, |
|
"learning_rate": 3.3123059534056634e-05, |
|
"loss": 46.0, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.019304125527139507, |
|
"grad_norm": 0.0006177676841616631, |
|
"learning_rate": 3.294261677557935e-05, |
|
"loss": 46.0, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.01932425495938887, |
|
"grad_norm": 0.0007813276024535298, |
|
"learning_rate": 3.2762569883905205e-05, |
|
"loss": 46.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.019344384391638235, |
|
"grad_norm": 0.000981914228759706, |
|
"learning_rate": 3.258291992192377e-05, |
|
"loss": 46.0, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.019364513823887597, |
|
"grad_norm": 0.000979884178377688, |
|
"learning_rate": 3.240366795018117e-05, |
|
"loss": 46.0, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.019384643256136962, |
|
"grad_norm": 0.0005548804765567183, |
|
"learning_rate": 3.222481502687425e-05, |
|
"loss": 46.0, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.019404772688386324, |
|
"grad_norm": 0.0007180179818533361, |
|
"learning_rate": 3.2046362207844e-05, |
|
"loss": 46.0, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.019424902120635686, |
|
"grad_norm": 0.0011889664456248283, |
|
"learning_rate": 3.1868310546569424e-05, |
|
"loss": 46.0, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.019445031552885052, |
|
"grad_norm": 0.00041984309791587293, |
|
"learning_rate": 3.1690661094161364e-05, |
|
"loss": 46.0, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.019465160985134414, |
|
"grad_norm": 0.0005576315452344716, |
|
"learning_rate": 3.151341489935627e-05, |
|
"loss": 46.0, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.01948529041738378, |
|
"grad_norm": 0.0008044608402997255, |
|
"learning_rate": 3.133657300850995e-05, |
|
"loss": 46.0, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.01950541984963314, |
|
"grad_norm": 0.0006808895850554109, |
|
"learning_rate": 3.116013646559146e-05, |
|
"loss": 46.0, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.019525549281882503, |
|
"grad_norm": 0.000420604192186147, |
|
"learning_rate": 3.0984106312177e-05, |
|
"loss": 46.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.01954567871413187, |
|
"grad_norm": 0.0008205743506550789, |
|
"learning_rate": 3.0808483587443595e-05, |
|
"loss": 46.0, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.01956580814638123, |
|
"grad_norm": 0.0006578009924851358, |
|
"learning_rate": 3.063326932816307e-05, |
|
"loss": 46.0, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.019585937578630596, |
|
"grad_norm": 0.0017218554858118296, |
|
"learning_rate": 3.045846456869592e-05, |
|
"loss": 46.0, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.019606067010879958, |
|
"grad_norm": 0.0008508884930051863, |
|
"learning_rate": 3.0284070340985295e-05, |
|
"loss": 46.0, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.01962619644312932, |
|
"grad_norm": 0.0006469383952207863, |
|
"learning_rate": 3.011008767455059e-05, |
|
"loss": 46.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.019646325875378685, |
|
"grad_norm": 0.00034800561843439937, |
|
"learning_rate": 2.9936517596481818e-05, |
|
"loss": 46.0, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.019666455307628047, |
|
"grad_norm": 0.0005060382536612451, |
|
"learning_rate": 2.9763361131433208e-05, |
|
"loss": 46.0, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.019686584739877413, |
|
"grad_norm": 0.0005251033580861986, |
|
"learning_rate": 2.9590619301617183e-05, |
|
"loss": 46.0, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.019686584739877413, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 126.6494, |
|
"eval_samples_per_second": 165.165, |
|
"eval_steps_per_second": 82.582, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.019706714172126775, |
|
"grad_norm": 0.00031089509138837457, |
|
"learning_rate": 2.9418293126798603e-05, |
|
"loss": 46.0, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.01972684360437614, |
|
"grad_norm": 0.0008214189438149333, |
|
"learning_rate": 2.9246383624288387e-05, |
|
"loss": 46.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.019746973036625502, |
|
"grad_norm": 0.0012812769273295999, |
|
"learning_rate": 2.9074891808937753e-05, |
|
"loss": 46.0, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.019767102468874864, |
|
"grad_norm": 0.0005773354787379503, |
|
"learning_rate": 2.8903818693132077e-05, |
|
"loss": 46.0, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.01978723190112423, |
|
"grad_norm": 0.000667093729134649, |
|
"learning_rate": 2.873316528678507e-05, |
|
"loss": 46.0, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.01980736133337359, |
|
"grad_norm": 0.0005955504602752626, |
|
"learning_rate": 2.856293259733266e-05, |
|
"loss": 46.0, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.019827490765622957, |
|
"grad_norm": 0.0009190890123136342, |
|
"learning_rate": 2.8393121629727138e-05, |
|
"loss": 46.0, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.01984762019787232, |
|
"grad_norm": 0.000597997335717082, |
|
"learning_rate": 2.8223733386431185e-05, |
|
"loss": 46.0, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.01986774963012168, |
|
"grad_norm": 0.001080994145013392, |
|
"learning_rate": 2.8054768867411974e-05, |
|
"loss": 46.0, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.019887879062371046, |
|
"grad_norm": 0.0009957716101780534, |
|
"learning_rate": 2.788622907013526e-05, |
|
"loss": 46.0, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.01990800849462041, |
|
"grad_norm": 0.0005448372685350478, |
|
"learning_rate": 2.7718114989559552e-05, |
|
"loss": 46.0, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.019928137926869774, |
|
"grad_norm": 0.0005995734827592969, |
|
"learning_rate": 2.7550427618130127e-05, |
|
"loss": 46.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.019948267359119136, |
|
"grad_norm": 0.0009233965538442135, |
|
"learning_rate": 2.738316794577315e-05, |
|
"loss": 46.0, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.019968396791368498, |
|
"grad_norm": 0.000919658865313977, |
|
"learning_rate": 2.7216336959890076e-05, |
|
"loss": 46.0, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.019988526223617863, |
|
"grad_norm": 0.0008890883764252067, |
|
"learning_rate": 2.704993564535152e-05, |
|
"loss": 46.0, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.020008655655867225, |
|
"grad_norm": 0.0005916538066230714, |
|
"learning_rate": 2.688396498449164e-05, |
|
"loss": 46.0, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.02002878508811659, |
|
"grad_norm": 0.0006696307682432234, |
|
"learning_rate": 2.671842595710219e-05, |
|
"loss": 46.0, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.020048914520365953, |
|
"grad_norm": 0.0006121333572082222, |
|
"learning_rate": 2.655331954042699e-05, |
|
"loss": 46.0, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.020069043952615318, |
|
"grad_norm": 0.0007363299373537302, |
|
"learning_rate": 2.638864670915572e-05, |
|
"loss": 46.0, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.02008917338486468, |
|
"grad_norm": 0.0006978671881370246, |
|
"learning_rate": 2.622440843541869e-05, |
|
"loss": 46.0, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.020109302817114042, |
|
"grad_norm": 0.0010457762982696295, |
|
"learning_rate": 2.6060605688780694e-05, |
|
"loss": 46.0, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.020129432249363408, |
|
"grad_norm": 0.0013878996251150966, |
|
"learning_rate": 2.5897239436235466e-05, |
|
"loss": 46.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02014956168161277, |
|
"grad_norm": 0.0007541460217908025, |
|
"learning_rate": 2.5734310642199943e-05, |
|
"loss": 46.0, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.020169691113862135, |
|
"grad_norm": 0.0015688574640080333, |
|
"learning_rate": 2.557182026850855e-05, |
|
"loss": 46.0, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.020189820546111497, |
|
"grad_norm": 0.0007213862845674157, |
|
"learning_rate": 2.5409769274407637e-05, |
|
"loss": 46.0, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.02020994997836086, |
|
"grad_norm": 0.0005947791505604982, |
|
"learning_rate": 2.524815861654952e-05, |
|
"loss": 46.0, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.020230079410610224, |
|
"grad_norm": 0.0006019077845849097, |
|
"learning_rate": 2.5086989248987248e-05, |
|
"loss": 46.0, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.020250208842859586, |
|
"grad_norm": 0.0012076911516487598, |
|
"learning_rate": 2.492626212316862e-05, |
|
"loss": 46.0, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.020270338275108952, |
|
"grad_norm": 0.0010813012486323714, |
|
"learning_rate": 2.476597818793075e-05, |
|
"loss": 46.0, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.020290467707358314, |
|
"grad_norm": 0.000990718137472868, |
|
"learning_rate": 2.460613838949437e-05, |
|
"loss": 46.0, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.020310597139607676, |
|
"grad_norm": 0.001392314094118774, |
|
"learning_rate": 2.444674367145845e-05, |
|
"loss": 46.0, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.02033072657185704, |
|
"grad_norm": 0.0014354052254930139, |
|
"learning_rate": 2.4287794974794208e-05, |
|
"loss": 46.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.020350856004106403, |
|
"grad_norm": 0.00045173740363679826, |
|
"learning_rate": 2.4129293237840066e-05, |
|
"loss": 46.0, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.02037098543635577, |
|
"grad_norm": 0.0011678735027089715, |
|
"learning_rate": 2.397123939629574e-05, |
|
"loss": 46.0, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.02039111486860513, |
|
"grad_norm": 0.0005970303900539875, |
|
"learning_rate": 2.3813634383216853e-05, |
|
"loss": 46.0, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.020411244300854496, |
|
"grad_norm": 0.0007846534135751426, |
|
"learning_rate": 2.3656479129009422e-05, |
|
"loss": 46.0, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.020431373733103858, |
|
"grad_norm": 0.00044201669516041875, |
|
"learning_rate": 2.3499774561424327e-05, |
|
"loss": 46.0, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.02045150316535322, |
|
"grad_norm": 0.001156677259132266, |
|
"learning_rate": 2.3343521605551967e-05, |
|
"loss": 46.0, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.020471632597602585, |
|
"grad_norm": 0.0006299121305346489, |
|
"learning_rate": 2.3187721183816503e-05, |
|
"loss": 46.0, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.020491762029851947, |
|
"grad_norm": 0.0007507737609557807, |
|
"learning_rate": 2.303237421597082e-05, |
|
"loss": 46.0, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.020511891462101313, |
|
"grad_norm": 0.0007438535685651004, |
|
"learning_rate": 2.2877481619090734e-05, |
|
"loss": 46.0, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.020532020894350675, |
|
"grad_norm": 0.0016225421568378806, |
|
"learning_rate": 2.2723044307569775e-05, |
|
"loss": 46.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.020552150326600037, |
|
"grad_norm": 0.00039501202991232276, |
|
"learning_rate": 2.2569063193113716e-05, |
|
"loss": 46.0, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.020572279758849402, |
|
"grad_norm": 0.0004267230106052011, |
|
"learning_rate": 2.2415539184735323e-05, |
|
"loss": 46.0, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.020592409191098764, |
|
"grad_norm": 0.0008113220101222396, |
|
"learning_rate": 2.2262473188748667e-05, |
|
"loss": 46.0, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.02061253862334813, |
|
"grad_norm": 0.0018733137985691428, |
|
"learning_rate": 2.210986610876421e-05, |
|
"loss": 46.0, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.02063266805559749, |
|
"grad_norm": 0.0011937421513721347, |
|
"learning_rate": 2.1957718845683106e-05, |
|
"loss": 46.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.020652797487846854, |
|
"grad_norm": 0.0011691252002492547, |
|
"learning_rate": 2.180603229769208e-05, |
|
"loss": 46.0, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.02067292692009622, |
|
"grad_norm": 0.0005129252676852047, |
|
"learning_rate": 2.1654807360258068e-05, |
|
"loss": 46.0, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.02069305635234558, |
|
"grad_norm": 0.001547716441564262, |
|
"learning_rate": 2.15040449261229e-05, |
|
"loss": 46.0, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.020713185784594947, |
|
"grad_norm": 0.0005555424140766263, |
|
"learning_rate": 2.1353745885298103e-05, |
|
"loss": 46.0, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.02073331521684431, |
|
"grad_norm": 0.0006457091076299548, |
|
"learning_rate": 2.120391112505955e-05, |
|
"loss": 46.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.020753444649093674, |
|
"grad_norm": 0.0005488627939485013, |
|
"learning_rate": 2.1054541529942374e-05, |
|
"loss": 46.0, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.020773574081343036, |
|
"grad_norm": 0.001043295138515532, |
|
"learning_rate": 2.090563798173557e-05, |
|
"loss": 46.0, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.020793703513592398, |
|
"grad_norm": 0.0010731170186772943, |
|
"learning_rate": 2.0757201359476884e-05, |
|
"loss": 46.0, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.020813832945841763, |
|
"grad_norm": 0.0010378467850387096, |
|
"learning_rate": 2.060923253944761e-05, |
|
"loss": 46.0, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.020833962378091125, |
|
"grad_norm": 0.0009367475286126137, |
|
"learning_rate": 2.0461732395167475e-05, |
|
"loss": 46.0, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.02085409181034049, |
|
"grad_norm": 0.0012599321780726314, |
|
"learning_rate": 2.03147017973893e-05, |
|
"loss": 46.0, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.020874221242589853, |
|
"grad_norm": 0.0007634823559783399, |
|
"learning_rate": 2.0168141614094126e-05, |
|
"loss": 46.0, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.020894350674839215, |
|
"grad_norm": 0.0008573421509936452, |
|
"learning_rate": 2.0022052710485874e-05, |
|
"loss": 46.0, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.02091448010708858, |
|
"grad_norm": 0.0009650752181187272, |
|
"learning_rate": 1.9876435948986228e-05, |
|
"loss": 46.0, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.020934609539337942, |
|
"grad_norm": 0.0008246242650784552, |
|
"learning_rate": 1.973129218922981e-05, |
|
"loss": 46.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.020954738971587308, |
|
"grad_norm": 0.0008325594244524837, |
|
"learning_rate": 1.95866222880588e-05, |
|
"loss": 46.0, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.02097486840383667, |
|
"grad_norm": 0.0007449231925420463, |
|
"learning_rate": 1.9442427099518022e-05, |
|
"loss": 46.0, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.02099499783608603, |
|
"grad_norm": 0.0016277192626148462, |
|
"learning_rate": 1.9298707474849843e-05, |
|
"loss": 46.0, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.021015127268335397, |
|
"grad_norm": 0.0005063241114839911, |
|
"learning_rate": 1.9155464262489298e-05, |
|
"loss": 46.0, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.02103525670058476, |
|
"grad_norm": 0.0008978778496384621, |
|
"learning_rate": 1.9012698308058852e-05, |
|
"loss": 46.0, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.021055386132834124, |
|
"grad_norm": 0.0007100282236933708, |
|
"learning_rate": 1.8870410454363573e-05, |
|
"loss": 46.0, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.021075515565083486, |
|
"grad_norm": 0.0006800219998694956, |
|
"learning_rate": 1.872860154138608e-05, |
|
"loss": 46.0, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.021095644997332852, |
|
"grad_norm": 0.0006419627461582422, |
|
"learning_rate": 1.858727240628171e-05, |
|
"loss": 46.0, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.021115774429582214, |
|
"grad_norm": 0.0004995979252271354, |
|
"learning_rate": 1.8446423883373286e-05, |
|
"loss": 46.0, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.021135903861831576, |
|
"grad_norm": 0.0007924246601760387, |
|
"learning_rate": 1.8306056804146575e-05, |
|
"loss": 46.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.02115603329408094, |
|
"grad_norm": 0.0006777087110094726, |
|
"learning_rate": 1.816617199724512e-05, |
|
"loss": 46.0, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.021176162726330303, |
|
"grad_norm": 0.0013369604712352157, |
|
"learning_rate": 1.8026770288465323e-05, |
|
"loss": 46.0, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.02119629215857967, |
|
"grad_norm": 0.0005232554394751787, |
|
"learning_rate": 1.7887852500751822e-05, |
|
"loss": 46.0, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.02121642159082903, |
|
"grad_norm": 0.0013328958302736282, |
|
"learning_rate": 1.7749419454192373e-05, |
|
"loss": 46.0, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.021236551023078393, |
|
"grad_norm": 0.0014558390248566866, |
|
"learning_rate": 1.7611471966013127e-05, |
|
"loss": 46.0, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.021256680455327758, |
|
"grad_norm": 0.0010977151105180383, |
|
"learning_rate": 1.7474010850573775e-05, |
|
"loss": 46.0, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.02127680988757712, |
|
"grad_norm": 0.000998729607090354, |
|
"learning_rate": 1.7337036919362827e-05, |
|
"loss": 46.0, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.021296939319826486, |
|
"grad_norm": 0.00059292814694345, |
|
"learning_rate": 1.7200550980992647e-05, |
|
"loss": 46.0, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.021317068752075848, |
|
"grad_norm": 0.0010517132468521595, |
|
"learning_rate": 1.706455384119485e-05, |
|
"loss": 46.0, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.02133719818432521, |
|
"grad_norm": 0.0010053809965029359, |
|
"learning_rate": 1.6929046302815443e-05, |
|
"loss": 46.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.021357327616574575, |
|
"grad_norm": 0.0006442563608288765, |
|
"learning_rate": 1.6794029165810133e-05, |
|
"loss": 46.0, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.021377457048823937, |
|
"grad_norm": 0.0004650287446565926, |
|
"learning_rate": 1.665950322723957e-05, |
|
"loss": 46.0, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.021397586481073302, |
|
"grad_norm": 0.0011159973219037056, |
|
"learning_rate": 1.652546928126466e-05, |
|
"loss": 46.0, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.021417715913322664, |
|
"grad_norm": 0.0005395881598815322, |
|
"learning_rate": 1.6391928119141965e-05, |
|
"loss": 46.0, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.02143784534557203, |
|
"grad_norm": 0.0005475838552229106, |
|
"learning_rate": 1.625888052921878e-05, |
|
"loss": 46.0, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.021457974777821392, |
|
"grad_norm": 0.0009001667494885623, |
|
"learning_rate": 1.612632729692881e-05, |
|
"loss": 46.0, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.021478104210070754, |
|
"grad_norm": 0.0011171525111421943, |
|
"learning_rate": 1.599426920478726e-05, |
|
"loss": 46.0, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.02149823364232012, |
|
"grad_norm": 0.0007830538670532405, |
|
"learning_rate": 1.586270703238637e-05, |
|
"loss": 46.0, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.02151836307456948, |
|
"grad_norm": 0.0005228519439697266, |
|
"learning_rate": 1.573164155639073e-05, |
|
"loss": 46.0, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.021538492506818847, |
|
"grad_norm": 0.0009138689492829144, |
|
"learning_rate": 1.560107355053282e-05, |
|
"loss": 46.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.02155862193906821, |
|
"grad_norm": 0.0011538650142028928, |
|
"learning_rate": 1.5471003785608184e-05, |
|
"loss": 46.0, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.02157875137131757, |
|
"grad_norm": 0.0009152950951829553, |
|
"learning_rate": 1.534143302947123e-05, |
|
"loss": 46.0, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.021598880803566936, |
|
"grad_norm": 0.0008602161542512476, |
|
"learning_rate": 1.5212362047030427e-05, |
|
"loss": 46.0, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.021619010235816298, |
|
"grad_norm": 0.000630200607702136, |
|
"learning_rate": 1.5083791600243857e-05, |
|
"loss": 46.0, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.021639139668065663, |
|
"grad_norm": 0.001679250504821539, |
|
"learning_rate": 1.4955722448114807e-05, |
|
"loss": 46.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.021659269100315025, |
|
"grad_norm": 0.0008510241750627756, |
|
"learning_rate": 1.4828155346687123e-05, |
|
"loss": 46.0, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.021679398532564387, |
|
"grad_norm": 0.0007962991949170828, |
|
"learning_rate": 1.4701091049040994e-05, |
|
"loss": 46.0, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.021699527964813753, |
|
"grad_norm": 0.00033583008917048573, |
|
"learning_rate": 1.4574530305288158e-05, |
|
"loss": 46.0, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.021719657397063115, |
|
"grad_norm": 0.0015587556408718228, |
|
"learning_rate": 1.4448473862567857e-05, |
|
"loss": 46.0, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.02173978682931248, |
|
"grad_norm": 0.0016931117279455066, |
|
"learning_rate": 1.4322922465042132e-05, |
|
"loss": 46.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.021759916261561842, |
|
"grad_norm": 0.0011070282198488712, |
|
"learning_rate": 1.4197876853891557e-05, |
|
"loss": 46.0, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.021780045693811208, |
|
"grad_norm": 0.0012120172614231706, |
|
"learning_rate": 1.4073337767310834e-05, |
|
"loss": 46.0, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.02180017512606057, |
|
"grad_norm": 0.00137874367646873, |
|
"learning_rate": 1.3949305940504541e-05, |
|
"loss": 46.0, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.02182030455830993, |
|
"grad_norm": 0.0011860841186717153, |
|
"learning_rate": 1.3825782105682527e-05, |
|
"loss": 46.0, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.021840433990559297, |
|
"grad_norm": 0.0007199230021797121, |
|
"learning_rate": 1.3702766992055927e-05, |
|
"loss": 46.0, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.02186056342280866, |
|
"grad_norm": 0.0006517477449961007, |
|
"learning_rate": 1.3580261325832578e-05, |
|
"loss": 46.0, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.021880692855058025, |
|
"grad_norm": 0.0005604965263046324, |
|
"learning_rate": 1.3458265830212891e-05, |
|
"loss": 46.0, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.021900822287307387, |
|
"grad_norm": 0.0008536526001989841, |
|
"learning_rate": 1.333678122538553e-05, |
|
"loss": 46.0, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.02192095171955675, |
|
"grad_norm": 0.00035072650643996894, |
|
"learning_rate": 1.321580822852313e-05, |
|
"loss": 46.0, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.021941081151806114, |
|
"grad_norm": 0.0014688886003568769, |
|
"learning_rate": 1.3095347553778193e-05, |
|
"loss": 46.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.021961210584055476, |
|
"grad_norm": 0.0017621091101318598, |
|
"learning_rate": 1.2975399912278608e-05, |
|
"loss": 46.0, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.02198134001630484, |
|
"grad_norm": 0.0005523571744561195, |
|
"learning_rate": 1.2855966012123822e-05, |
|
"loss": 46.0, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.022001469448554203, |
|
"grad_norm": 0.0010343191679567099, |
|
"learning_rate": 1.2737046558380305e-05, |
|
"loss": 46.0, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.022021598880803565, |
|
"grad_norm": 0.0010563414543867111, |
|
"learning_rate": 1.2618642253077628e-05, |
|
"loss": 46.0, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.02204172831305293, |
|
"grad_norm": 0.0008712798589840531, |
|
"learning_rate": 1.2500753795204157e-05, |
|
"loss": 46.0, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.022061857745302293, |
|
"grad_norm": 0.0011983743170276284, |
|
"learning_rate": 1.2383381880703138e-05, |
|
"loss": 46.0, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.022081987177551658, |
|
"grad_norm": 0.001204630360007286, |
|
"learning_rate": 1.2266527202468248e-05, |
|
"loss": 46.0, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.02210211660980102, |
|
"grad_norm": 0.0008704798528924584, |
|
"learning_rate": 1.2150190450339915e-05, |
|
"loss": 46.0, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.022122246042050386, |
|
"grad_norm": 0.0009507142240181565, |
|
"learning_rate": 1.2034372311100905e-05, |
|
"loss": 46.0, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.022142375474299748, |
|
"grad_norm": 0.0018596797017380595, |
|
"learning_rate": 1.1919073468472475e-05, |
|
"loss": 46.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.02216250490654911, |
|
"grad_norm": 0.0010968039277940989, |
|
"learning_rate": 1.1804294603110222e-05, |
|
"loss": 46.0, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.022182634338798475, |
|
"grad_norm": 0.0012583578936755657, |
|
"learning_rate": 1.1690036392600112e-05, |
|
"loss": 46.0, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.022202763771047837, |
|
"grad_norm": 0.0011010583257302642, |
|
"learning_rate": 1.1576299511454513e-05, |
|
"loss": 46.0, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.022222893203297202, |
|
"grad_norm": 0.000499493908137083, |
|
"learning_rate": 1.1463084631108101e-05, |
|
"loss": 46.0, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.022243022635546564, |
|
"grad_norm": 0.0009412445360794663, |
|
"learning_rate": 1.135039241991408e-05, |
|
"loss": 46.0, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.022263152067795926, |
|
"grad_norm": 0.0010741801233962178, |
|
"learning_rate": 1.1238223543140024e-05, |
|
"loss": 46.0, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.022283281500045292, |
|
"grad_norm": 0.0016603496624156833, |
|
"learning_rate": 1.1126578662964115e-05, |
|
"loss": 46.0, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.022303410932294654, |
|
"grad_norm": 0.0007736904663033783, |
|
"learning_rate": 1.1015458438471116e-05, |
|
"loss": 46.0, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.02232354036454402, |
|
"grad_norm": 0.0007681693532504141, |
|
"learning_rate": 1.0904863525648633e-05, |
|
"loss": 46.0, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.02234366979679338, |
|
"grad_norm": 0.0009165824740193784, |
|
"learning_rate": 1.0794794577383016e-05, |
|
"loss": 46.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.022363799229042743, |
|
"grad_norm": 0.0012590873520821333, |
|
"learning_rate": 1.0685252243455712e-05, |
|
"loss": 46.0, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.02238392866129211, |
|
"grad_norm": 0.0006520768511109054, |
|
"learning_rate": 1.0576237170539383e-05, |
|
"loss": 46.0, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.02240405809354147, |
|
"grad_norm": 0.0008068094030022621, |
|
"learning_rate": 1.0467750002193944e-05, |
|
"loss": 46.0, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.022424187525790836, |
|
"grad_norm": 0.0007286273175850511, |
|
"learning_rate": 1.0359791378863005e-05, |
|
"loss": 46.0, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.022444316958040198, |
|
"grad_norm": 0.00037491964758373797, |
|
"learning_rate": 1.025236193786987e-05, |
|
"loss": 46.0, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.022464446390289564, |
|
"grad_norm": 0.0006626130198128521, |
|
"learning_rate": 1.014546231341391e-05, |
|
"loss": 46.0, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.022484575822538926, |
|
"grad_norm": 0.0007106042467057705, |
|
"learning_rate": 1.0039093136566735e-05, |
|
"loss": 46.0, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.022504705254788288, |
|
"grad_norm": 0.0007863907376304269, |
|
"learning_rate": 9.933255035268574e-06, |
|
"loss": 46.0, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.022524834687037653, |
|
"grad_norm": 0.0013863188214600086, |
|
"learning_rate": 9.827948634324447e-06, |
|
"loss": 46.0, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.022544964119287015, |
|
"grad_norm": 0.0006982397171668708, |
|
"learning_rate": 9.72317455540055e-06, |
|
"loss": 46.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.02256509355153638, |
|
"grad_norm": 0.0007036144379526377, |
|
"learning_rate": 9.61893341702056e-06, |
|
"loss": 46.0, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.022585222983785742, |
|
"grad_norm": 0.0005324577214196324, |
|
"learning_rate": 9.515225834562003e-06, |
|
"loss": 46.0, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.022605352416035104, |
|
"grad_norm": 0.00036683998769149184, |
|
"learning_rate": 9.412052420252605e-06, |
|
"loss": 46.0, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.02262548184828447, |
|
"grad_norm": 0.0008661012398079038, |
|
"learning_rate": 9.309413783166654e-06, |
|
"loss": 46.0, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.022645611280533832, |
|
"grad_norm": 0.0007851457339711487, |
|
"learning_rate": 9.207310529221525e-06, |
|
"loss": 46.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.022665740712783197, |
|
"grad_norm": 0.0005590534419752657, |
|
"learning_rate": 9.10574326117386e-06, |
|
"loss": 46.0, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.02268587014503256, |
|
"grad_norm": 0.001089409808628261, |
|
"learning_rate": 9.004712578616304e-06, |
|
"loss": 46.0, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.02270599957728192, |
|
"grad_norm": 0.0006295640487223864, |
|
"learning_rate": 8.90421907797374e-06, |
|
"loss": 46.0, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.022726129009531287, |
|
"grad_norm": 0.0009275046759285033, |
|
"learning_rate": 8.804263352499864e-06, |
|
"loss": 46.0, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.02274625844178065, |
|
"grad_norm": 0.0012473628157749772, |
|
"learning_rate": 8.70484599227367e-06, |
|
"loss": 46.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.022766387874030014, |
|
"grad_norm": 0.0009947452927008271, |
|
"learning_rate": 8.605967584195995e-06, |
|
"loss": 46.0, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.022786517306279376, |
|
"grad_norm": 0.0007231653435155749, |
|
"learning_rate": 8.507628711985983e-06, |
|
"loss": 46.0, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.02280664673852874, |
|
"grad_norm": 0.0004182531265541911, |
|
"learning_rate": 8.409829956177684e-06, |
|
"loss": 46.0, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.022826776170778103, |
|
"grad_norm": 0.0012393246870487928, |
|
"learning_rate": 8.312571894116649e-06, |
|
"loss": 46.0, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.022846905603027465, |
|
"grad_norm": 0.0009794539073482156, |
|
"learning_rate": 8.215855099956472e-06, |
|
"loss": 46.0, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.02286703503527683, |
|
"grad_norm": 0.000817556690890342, |
|
"learning_rate": 8.119680144655428e-06, |
|
"loss": 46.0, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.022887164467526193, |
|
"grad_norm": 0.001215306343510747, |
|
"learning_rate": 8.024047595973095e-06, |
|
"loss": 46.0, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.02290729389977556, |
|
"grad_norm": 0.0013395050773397088, |
|
"learning_rate": 7.92895801846707e-06, |
|
"loss": 46.0, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.02292742333202492, |
|
"grad_norm": 0.0012201687786728144, |
|
"learning_rate": 7.834411973489419e-06, |
|
"loss": 46.0, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.022947552764274282, |
|
"grad_norm": 0.0008355136960744858, |
|
"learning_rate": 7.740410019183697e-06, |
|
"loss": 46.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.022967682196523648, |
|
"grad_norm": 0.0009616176830604672, |
|
"learning_rate": 7.646952710481336e-06, |
|
"loss": 46.0, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.02298781162877301, |
|
"grad_norm": 0.000676050316542387, |
|
"learning_rate": 7.554040599098588e-06, |
|
"loss": 46.0, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.023007941061022375, |
|
"grad_norm": 0.0018640294438228011, |
|
"learning_rate": 7.461674233533123e-06, |
|
"loss": 46.0, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.023028070493271737, |
|
"grad_norm": 0.0013213737402111292, |
|
"learning_rate": 7.369854159060929e-06, |
|
"loss": 46.0, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.0230481999255211, |
|
"grad_norm": 0.001049902755767107, |
|
"learning_rate": 7.278580917732913e-06, |
|
"loss": 46.0, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.023068329357770465, |
|
"grad_norm": 0.0006768841994926333, |
|
"learning_rate": 7.187855048371917e-06, |
|
"loss": 46.0, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.023088458790019827, |
|
"grad_norm": 0.0010121595114469528, |
|
"learning_rate": 7.097677086569343e-06, |
|
"loss": 46.0, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.023108588222269192, |
|
"grad_norm": 0.001295650377869606, |
|
"learning_rate": 7.008047564682119e-06, |
|
"loss": 46.0, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.023128717654518554, |
|
"grad_norm": 0.0004917937330901623, |
|
"learning_rate": 6.91896701182948e-06, |
|
"loss": 46.0, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.02314884708676792, |
|
"grad_norm": 0.0013024702202528715, |
|
"learning_rate": 6.83043595388988e-06, |
|
"loss": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.02316897651901728, |
|
"grad_norm": 0.0009011050569824874, |
|
"learning_rate": 6.742454913497942e-06, |
|
"loss": 46.0, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.023189105951266643, |
|
"grad_norm": 0.0012644693488255143, |
|
"learning_rate": 6.6550244100412e-06, |
|
"loss": 46.0, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.02320923538351601, |
|
"grad_norm": 0.0013667414896190166, |
|
"learning_rate": 6.568144959657263e-06, |
|
"loss": 46.0, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.02322936481576537, |
|
"grad_norm": 0.0011150363134220243, |
|
"learning_rate": 6.481817075230567e-06, |
|
"loss": 46.0, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.023249494248014736, |
|
"grad_norm": 0.0010298212291672826, |
|
"learning_rate": 6.396041266389474e-06, |
|
"loss": 46.0, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.023269623680264098, |
|
"grad_norm": 0.001109607401303947, |
|
"learning_rate": 6.3108180395031965e-06, |
|
"loss": 46.0, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.02328975311251346, |
|
"grad_norm": 0.0005489352042786777, |
|
"learning_rate": 6.22614789767888e-06, |
|
"loss": 46.0, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.023309882544762826, |
|
"grad_norm": 0.0007645548903383315, |
|
"learning_rate": 6.142031340758525e-06, |
|
"loss": 46.0, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.023330011977012188, |
|
"grad_norm": 0.0007147680153138936, |
|
"learning_rate": 6.058468865316102e-06, |
|
"loss": 46.0, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.023350141409261553, |
|
"grad_norm": 0.0006330362521111965, |
|
"learning_rate": 5.975460964654689e-06, |
|
"loss": 46.0, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.023370270841510915, |
|
"grad_norm": 0.0008396542398259044, |
|
"learning_rate": 5.8930081288034014e-06, |
|
"loss": 46.0, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.023390400273760277, |
|
"grad_norm": 0.001648742356337607, |
|
"learning_rate": 5.8111108445146116e-06, |
|
"loss": 46.0, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.023410529706009642, |
|
"grad_norm": 0.0008351008291356266, |
|
"learning_rate": 5.72976959526107e-06, |
|
"loss": 46.0, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.023430659138259004, |
|
"grad_norm": 0.0012098524020984769, |
|
"learning_rate": 5.648984861232986e-06, |
|
"loss": 46.0, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.02345078857050837, |
|
"grad_norm": 0.0010954445460811257, |
|
"learning_rate": 5.568757119335244e-06, |
|
"loss": 46.0, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.023470918002757732, |
|
"grad_norm": 0.0008584211464039981, |
|
"learning_rate": 5.489086843184632e-06, |
|
"loss": 46.0, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.023491047435007097, |
|
"grad_norm": 0.000767943391110748, |
|
"learning_rate": 5.40997450310693e-06, |
|
"loss": 46.0, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.02351117686725646, |
|
"grad_norm": 0.001566538936458528, |
|
"learning_rate": 5.3314205661342155e-06, |
|
"loss": 46.0, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.02353130629950582, |
|
"grad_norm": 0.0008029242744669318, |
|
"learning_rate": 5.253425496002084e-06, |
|
"loss": 46.0, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.023551435731755187, |
|
"grad_norm": 0.0011748820543289185, |
|
"learning_rate": 5.175989753146948e-06, |
|
"loss": 46.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.02357156516400455, |
|
"grad_norm": 0.000719308911357075, |
|
"learning_rate": 5.099113794703225e-06, |
|
"loss": 46.0, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.023591694596253914, |
|
"grad_norm": 0.0005550024216063321, |
|
"learning_rate": 5.0227980745007345e-06, |
|
"loss": 46.0, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.023611824028503276, |
|
"grad_norm": 0.0007492146105505526, |
|
"learning_rate": 4.947043043062016e-06, |
|
"loss": 46.0, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.023631953460752638, |
|
"grad_norm": 0.0011210053926333785, |
|
"learning_rate": 4.87184914759955e-06, |
|
"loss": 46.0, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.023652082893002004, |
|
"grad_norm": 0.000586840498726815, |
|
"learning_rate": 4.7972168320132845e-06, |
|
"loss": 46.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.023672212325251366, |
|
"grad_norm": 0.0013588924193754792, |
|
"learning_rate": 4.7231465368879124e-06, |
|
"loss": 46.0, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.02369234175750073, |
|
"grad_norm": 0.00061926303897053, |
|
"learning_rate": 4.649638699490266e-06, |
|
"loss": 46.0, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.023712471189750093, |
|
"grad_norm": 0.0006894692778587341, |
|
"learning_rate": 4.576693753766792e-06, |
|
"loss": 46.0, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.023732600621999455, |
|
"grad_norm": 0.0008896641083993018, |
|
"learning_rate": 4.5043121303409595e-06, |
|
"loss": 46.0, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.02375273005424882, |
|
"grad_norm": 0.0010777831776067615, |
|
"learning_rate": 4.432494256510711e-06, |
|
"loss": 46.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.023772859486498182, |
|
"grad_norm": 0.0012661231448873878, |
|
"learning_rate": 4.361240556245938e-06, |
|
"loss": 46.0, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.023792988918747548, |
|
"grad_norm": 0.0015156505396589637, |
|
"learning_rate": 4.290551450185986e-06, |
|
"loss": 46.0, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.02381311835099691, |
|
"grad_norm": 0.000532010046299547, |
|
"learning_rate": 4.220427355637224e-06, |
|
"loss": 46.0, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.023833247783246275, |
|
"grad_norm": 0.0007818634621798992, |
|
"learning_rate": 4.150868686570464e-06, |
|
"loss": 46.0, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.023853377215495637, |
|
"grad_norm": 0.0010528319980949163, |
|
"learning_rate": 4.081875853618588e-06, |
|
"loss": 46.0, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.023873506647745, |
|
"grad_norm": 0.001450626994483173, |
|
"learning_rate": 4.013449264074187e-06, |
|
"loss": 46.0, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.023893636079994365, |
|
"grad_norm": 0.0018947275821119547, |
|
"learning_rate": 3.945589321886989e-06, |
|
"loss": 46.0, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.023913765512243727, |
|
"grad_norm": 0.0009489529766142368, |
|
"learning_rate": 3.878296427661676e-06, |
|
"loss": 46.0, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.023933894944493092, |
|
"grad_norm": 0.0008835981134325266, |
|
"learning_rate": 3.8115709786553435e-06, |
|
"loss": 46.0, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.023954024376742454, |
|
"grad_norm": 0.0008584621245972812, |
|
"learning_rate": 3.7454133687752524e-06, |
|
"loss": 46.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.023974153808991816, |
|
"grad_norm": 0.001248899381607771, |
|
"learning_rate": 3.6798239885764806e-06, |
|
"loss": 46.0, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.02399428324124118, |
|
"grad_norm": 0.0006818815018050373, |
|
"learning_rate": 3.614803225259622e-06, |
|
"loss": 46.0, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.024014412673490543, |
|
"grad_norm": 0.0011521173873916268, |
|
"learning_rate": 3.550351462668489e-06, |
|
"loss": 46.0, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.02403454210573991, |
|
"grad_norm": 0.0009178062318824232, |
|
"learning_rate": 3.4864690812878688e-06, |
|
"loss": 46.0, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.02405467153798927, |
|
"grad_norm": 0.0005374921602196991, |
|
"learning_rate": 3.4231564582412167e-06, |
|
"loss": 46.0, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.024074800970238633, |
|
"grad_norm": 0.0010567499557510018, |
|
"learning_rate": 3.3604139672885227e-06, |
|
"loss": 46.0, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.024094930402488, |
|
"grad_norm": 0.00042650941759347916, |
|
"learning_rate": 3.298241978824046e-06, |
|
"loss": 46.0, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.02411505983473736, |
|
"grad_norm": 0.001136740087531507, |
|
"learning_rate": 3.2366408598741072e-06, |
|
"loss": 46.0, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.024135189266986726, |
|
"grad_norm": 0.0011373634915798903, |
|
"learning_rate": 3.175610974095011e-06, |
|
"loss": 46.0, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.024155318699236088, |
|
"grad_norm": 0.00101361027918756, |
|
"learning_rate": 3.115152681770783e-06, |
|
"loss": 46.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.024175448131485453, |
|
"grad_norm": 0.000945060164667666, |
|
"learning_rate": 3.055266339811147e-06, |
|
"loss": 46.0, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.024195577563734815, |
|
"grad_norm": 0.0009245016844943166, |
|
"learning_rate": 2.9959523017493386e-06, |
|
"loss": 46.0, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.024215706995984177, |
|
"grad_norm": 0.00034907733788713813, |
|
"learning_rate": 2.9372109177400854e-06, |
|
"loss": 46.0, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.024235836428233543, |
|
"grad_norm": 0.0015949602238833904, |
|
"learning_rate": 2.8790425345574745e-06, |
|
"loss": 46.0, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.024255965860482905, |
|
"grad_norm": 0.0008049356401897967, |
|
"learning_rate": 2.821447495592977e-06, |
|
"loss": 46.0, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.02427609529273227, |
|
"grad_norm": 0.0010959411738440394, |
|
"learning_rate": 2.7644261408533155e-06, |
|
"loss": 46.0, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.024296224724981632, |
|
"grad_norm": 0.001113040023483336, |
|
"learning_rate": 2.707978806958611e-06, |
|
"loss": 46.0, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.024316354157230994, |
|
"grad_norm": 0.0006179322372190654, |
|
"learning_rate": 2.6521058271402386e-06, |
|
"loss": 46.0, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.02433648358948036, |
|
"grad_norm": 0.0004471320426091552, |
|
"learning_rate": 2.596807531238965e-06, |
|
"loss": 46.0, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.02435661302172972, |
|
"grad_norm": 0.002076149685308337, |
|
"learning_rate": 2.542084245702947e-06, |
|
"loss": 46.0, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.024376742453979087, |
|
"grad_norm": 0.0005335964378900826, |
|
"learning_rate": 2.487936293585813e-06, |
|
"loss": 46.0, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.02439687188622845, |
|
"grad_norm": 0.001850920612923801, |
|
"learning_rate": 2.4343639945448306e-06, |
|
"loss": 46.0, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.02441700131847781, |
|
"grad_norm": 0.0012666831025853753, |
|
"learning_rate": 2.3813676648388517e-06, |
|
"loss": 46.0, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.024437130750727176, |
|
"grad_norm": 0.00046365856542252004, |
|
"learning_rate": 2.3289476173266376e-06, |
|
"loss": 46.0, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.024457260182976538, |
|
"grad_norm": 0.0010178579250350595, |
|
"learning_rate": 2.2771041614648825e-06, |
|
"loss": 46.0, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.024477389615225904, |
|
"grad_norm": 0.001140277599915862, |
|
"learning_rate": 2.2258376033064354e-06, |
|
"loss": 46.0, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.024497519047475266, |
|
"grad_norm": 0.0006400442798621953, |
|
"learning_rate": 2.1751482454984706e-06, |
|
"loss": 46.0, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.02451764847972463, |
|
"grad_norm": 0.0014165055472403765, |
|
"learning_rate": 2.1250363872807655e-06, |
|
"loss": 46.0, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.024537777911973993, |
|
"grad_norm": 0.0007371717365458608, |
|
"learning_rate": 2.0755023244838136e-06, |
|
"loss": 46.0, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.024557907344223355, |
|
"grad_norm": 0.0013694085646420717, |
|
"learning_rate": 2.026546349527181e-06, |
|
"loss": 46.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.02457803677647272, |
|
"grad_norm": 0.0012485695770010352, |
|
"learning_rate": 1.978168751417786e-06, |
|
"loss": 46.0, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.024598166208722082, |
|
"grad_norm": 0.001493003685027361, |
|
"learning_rate": 1.93036981574809e-06, |
|
"loss": 46.0, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.024618295640971448, |
|
"grad_norm": 0.0006664522225037217, |
|
"learning_rate": 1.8831498246945189e-06, |
|
"loss": 46.0, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.02463842507322081, |
|
"grad_norm": 0.0011530322954058647, |
|
"learning_rate": 1.836509057015734e-06, |
|
"loss": 46.0, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.024658554505470172, |
|
"grad_norm": 0.0006030354998074472, |
|
"learning_rate": 1.7904477880510307e-06, |
|
"loss": 46.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.024678683937719537, |
|
"grad_norm": 0.001384797622449696, |
|
"learning_rate": 1.7449662897186414e-06, |
|
"loss": 46.0, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.0246988133699689, |
|
"grad_norm": 0.0006306437426246703, |
|
"learning_rate": 1.7000648305142364e-06, |
|
"loss": 46.0, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.024718942802218265, |
|
"grad_norm": 0.0005167327472008765, |
|
"learning_rate": 1.655743675509258e-06, |
|
"loss": 46.0, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.024739072234467627, |
|
"grad_norm": 0.00042707190732471645, |
|
"learning_rate": 1.6120030863493674e-06, |
|
"loss": 46.0, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.02475920166671699, |
|
"grad_norm": 0.0003029134531971067, |
|
"learning_rate": 1.5688433212529107e-06, |
|
"loss": 46.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.024779331098966354, |
|
"grad_norm": 0.0007286612526513636, |
|
"learning_rate": 1.5262646350094334e-06, |
|
"loss": 46.0, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.024799460531215716, |
|
"grad_norm": 0.0004967825370840728, |
|
"learning_rate": 1.4842672789780798e-06, |
|
"loss": 46.0, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.02481958996346508, |
|
"grad_norm": 0.0014918498927727342, |
|
"learning_rate": 1.4428515010861955e-06, |
|
"loss": 46.0, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.024839719395714444, |
|
"grad_norm": 0.0008161486475728452, |
|
"learning_rate": 1.4020175458278607e-06, |
|
"loss": 46.0, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.02485984882796381, |
|
"grad_norm": 0.0008656067657284439, |
|
"learning_rate": 1.3617656542623813e-06, |
|
"loss": 46.0, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.02487997826021317, |
|
"grad_norm": 0.0005301354103721678, |
|
"learning_rate": 1.322096064012912e-06, |
|
"loss": 46.0, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.024900107692462533, |
|
"grad_norm": 0.0011264794738963246, |
|
"learning_rate": 1.2830090092650904e-06, |
|
"loss": 46.0, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.0249202371247119, |
|
"grad_norm": 0.000983362435363233, |
|
"learning_rate": 1.244504720765549e-06, |
|
"loss": 46.0, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.02494036655696126, |
|
"grad_norm": 0.0011257297592237592, |
|
"learning_rate": 1.20658342582064e-06, |
|
"loss": 46.0, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.024960495989210626, |
|
"grad_norm": 0.0009586882661096752, |
|
"learning_rate": 1.1692453482951115e-06, |
|
"loss": 46.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.024980625421459988, |
|
"grad_norm": 0.0005041824770160019, |
|
"learning_rate": 1.1324907086106895e-06, |
|
"loss": 46.0, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.02500075485370935, |
|
"grad_norm": 0.0012379593681544065, |
|
"learning_rate": 1.096319723744843e-06, |
|
"loss": 46.0, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.025020884285958715, |
|
"grad_norm": 0.0007608251180499792, |
|
"learning_rate": 1.0607326072295087e-06, |
|
"loss": 46.0, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.025041013718208077, |
|
"grad_norm": 0.001463228720240295, |
|
"learning_rate": 1.0257295691497914e-06, |
|
"loss": 46.0, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.025061143150457443, |
|
"grad_norm": 0.0005632633110508323, |
|
"learning_rate": 9.913108161427543e-07, |
|
"loss": 46.0, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.025081272582706805, |
|
"grad_norm": 0.0013915124582126737, |
|
"learning_rate": 9.57476551396197e-07, |
|
"loss": 46.0, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.025101402014956167, |
|
"grad_norm": 0.0010674886871129274, |
|
"learning_rate": 9.242269746474575e-07, |
|
"loss": 46.0, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.025121531447205532, |
|
"grad_norm": 0.0008928573224693537, |
|
"learning_rate": 8.915622821821789e-07, |
|
"loss": 46.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.025141660879454894, |
|
"grad_norm": 0.0014497325755655766, |
|
"learning_rate": 8.594826668332445e-07, |
|
"loss": 46.0, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.02516179031170426, |
|
"grad_norm": 0.0011807921109721065, |
|
"learning_rate": 8.279883179795666e-07, |
|
"loss": 46.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.02518191974395362, |
|
"grad_norm": 0.001125653157941997, |
|
"learning_rate": 7.970794215450106e-07, |
|
"loss": 46.0, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.025202049176202987, |
|
"grad_norm": 0.0013557058991864324, |
|
"learning_rate": 7.667561599972505e-07, |
|
"loss": 46.0, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.02522217860845235, |
|
"grad_norm": 0.0003978684253524989, |
|
"learning_rate": 7.370187123467708e-07, |
|
"loss": 46.0, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.02524230804070171, |
|
"grad_norm": 0.0008096559904515743, |
|
"learning_rate": 7.078672541456999e-07, |
|
"loss": 46.0, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.025262437472951076, |
|
"grad_norm": 0.0015750976745039225, |
|
"learning_rate": 6.793019574868775e-07, |
|
"loss": 46.0, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.02528256690520044, |
|
"grad_norm": 0.000689225154928863, |
|
"learning_rate": 6.513229910027896e-07, |
|
"loss": 46.0, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.025302696337449804, |
|
"grad_norm": 0.0008678320446051657, |
|
"learning_rate": 6.239305198645462e-07, |
|
"loss": 46.0, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.025322825769699166, |
|
"grad_norm": 0.001171753858216107, |
|
"learning_rate": 5.971247057809826e-07, |
|
"loss": 46.0, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.025342955201948528, |
|
"grad_norm": 0.000669551664032042, |
|
"learning_rate": 5.709057069976265e-07, |
|
"loss": 46.0, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.025363084634197893, |
|
"grad_norm": 0.00028675812063738704, |
|
"learning_rate": 5.452736782958323e-07, |
|
"loss": 46.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.025383214066447255, |
|
"grad_norm": 0.0010722498409450054, |
|
"learning_rate": 5.20228770991793e-07, |
|
"loss": 46.0, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.02540334349869662, |
|
"grad_norm": 0.0011695049470290542, |
|
"learning_rate": 4.957711329357073e-07, |
|
"loss": 46.0, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.025423472930945983, |
|
"grad_norm": 0.0005382261006161571, |
|
"learning_rate": 4.7190090851090274e-07, |
|
"loss": 46.0, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.025443602363195345, |
|
"grad_norm": 0.0011571204522624612, |
|
"learning_rate": 4.4861823863292516e-07, |
|
"loss": 46.0, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.02546373179544471, |
|
"grad_norm": 0.0008085128501988947, |
|
"learning_rate": 4.259232607487951e-07, |
|
"loss": 46.0, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.025483861227694072, |
|
"grad_norm": 0.0012183074140921235, |
|
"learning_rate": 4.038161088361192e-07, |
|
"loss": 46.0, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.025503990659943437, |
|
"grad_norm": 0.0012540913885459304, |
|
"learning_rate": 3.8229691340234684e-07, |
|
"loss": 46.0, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.0255241200921928, |
|
"grad_norm": 0.0005392631865106523, |
|
"learning_rate": 3.613658014839594e-07, |
|
"loss": 46.0, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.025544249524442165, |
|
"grad_norm": 0.0005177839775569737, |
|
"learning_rate": 3.4102289664578177e-07, |
|
"loss": 46.0, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.025564378956691527, |
|
"grad_norm": 0.0007628992316313088, |
|
"learning_rate": 3.212683189801724e-07, |
|
"loss": 46.0, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.02558450838894089, |
|
"grad_norm": 0.0019209292950108647, |
|
"learning_rate": 3.021021851063899e-07, |
|
"loss": 46.0, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.025604637821190254, |
|
"grad_norm": 0.0006337051163427532, |
|
"learning_rate": 2.8352460816986057e-07, |
|
"loss": 46.0, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.025624767253439616, |
|
"grad_norm": 0.0004375329881440848, |
|
"learning_rate": 2.6553569784152357e-07, |
|
"loss": 46.0, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.02564489668568898, |
|
"grad_norm": 0.0008538602269254625, |
|
"learning_rate": 2.481355603171531e-07, |
|
"loss": 46.0, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.025665026117938344, |
|
"grad_norm": 0.0008380180224776268, |
|
"learning_rate": 2.3132429831682622e-07, |
|
"loss": 46.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.025685155550187706, |
|
"grad_norm": 0.0007942443480715156, |
|
"learning_rate": 2.1510201108416728e-07, |
|
"loss": 46.0, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.02570528498243707, |
|
"grad_norm": 0.0008215562556870282, |
|
"learning_rate": 1.9946879438592636e-07, |
|
"loss": 46.0, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.025725414414686433, |
|
"grad_norm": 0.0011535886442288756, |
|
"learning_rate": 1.8442474051125757e-07, |
|
"loss": 46.0, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.0257455438469358, |
|
"grad_norm": 0.00102553132455796, |
|
"learning_rate": 1.6996993827129715e-07, |
|
"loss": 46.0, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.02576567327918516, |
|
"grad_norm": 0.0006207975093275309, |
|
"learning_rate": 1.561044729985861e-07, |
|
"loss": 46.0, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.025785802711434522, |
|
"grad_norm": 0.0006774533540010452, |
|
"learning_rate": 1.428284265465596e-07, |
|
"loss": 46.0, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.025805932143683888, |
|
"grad_norm": 0.0017024496337398887, |
|
"learning_rate": 1.3014187728906945e-07, |
|
"loss": 46.0, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.02582606157593325, |
|
"grad_norm": 0.0017212866805493832, |
|
"learning_rate": 1.1804490011995129e-07, |
|
"loss": 46.0, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.025846191008182615, |
|
"grad_norm": 0.0024159506428986788, |
|
"learning_rate": 1.0653756645252477e-07, |
|
"loss": 46.0, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.025866320440431977, |
|
"grad_norm": 0.0017721692565828562, |
|
"learning_rate": 9.561994421924958e-08, |
|
"loss": 46.0, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.025886449872681343, |
|
"grad_norm": 0.0007376950816251338, |
|
"learning_rate": 8.529209787123682e-08, |
|
"loss": 46.0, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.025906579304930705, |
|
"grad_norm": 0.0007463957881554961, |
|
"learning_rate": 7.555408837794931e-08, |
|
"loss": 46.0, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.025926708737180067, |
|
"grad_norm": 0.0006077784928493202, |
|
"learning_rate": 6.640597322677967e-08, |
|
"loss": 46.0, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.025946838169429432, |
|
"grad_norm": 0.002363705076277256, |
|
"learning_rate": 5.784780642275056e-08, |
|
"loss": 46.0, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.025966967601678794, |
|
"grad_norm": 0.0013910114066675305, |
|
"learning_rate": 4.9879638488159465e-08, |
|
"loss": 46.0, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.02598709703392816, |
|
"grad_norm": 0.001159190433099866, |
|
"learning_rate": 4.2501516462334356e-08, |
|
"loss": 46.0, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.02600722646617752, |
|
"grad_norm": 0.0009958260925486684, |
|
"learning_rate": 3.5713483901300696e-08, |
|
"loss": 46.0, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.026027355898426884, |
|
"grad_norm": 0.0004471206630114466, |
|
"learning_rate": 2.9515580877559346e-08, |
|
"loss": 46.0, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.02604748533067625, |
|
"grad_norm": 0.0009266930283047259, |
|
"learning_rate": 2.3907843979831257e-08, |
|
"loss": 46.0, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.02606761476292561, |
|
"grad_norm": 0.0013211799087002873, |
|
"learning_rate": 1.8890306312846495e-08, |
|
"loss": 46.0, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.026087744195174976, |
|
"grad_norm": 0.001065763528458774, |
|
"learning_rate": 1.446299749716662e-08, |
|
"loss": 46.0, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.02610787362742434, |
|
"grad_norm": 0.001209865789860487, |
|
"learning_rate": 1.0625943668973736e-08, |
|
"loss": 46.0, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.0261280030596737, |
|
"grad_norm": 0.0011283751809969544, |
|
"learning_rate": 7.379167479948379e-09, |
|
"loss": 46.0, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.026148132491923066, |
|
"grad_norm": 0.0004522628150880337, |
|
"learning_rate": 4.722688097125172e-09, |
|
"loss": 46.0, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.026168261924172428, |
|
"grad_norm": 0.0005747976247221231, |
|
"learning_rate": 2.656521202770712e-09, |
|
"loss": 46.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.026188391356421793, |
|
"grad_norm": 0.0009918762370944023, |
|
"learning_rate": 1.1806789942947484e-09, |
|
"loss": 46.0, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.026208520788671155, |
|
"grad_norm": 0.0008219339651986957, |
|
"learning_rate": 2.9517018420577305e-10, |
|
"loss": 46.0, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.02622865022092052, |
|
"grad_norm": 0.0007112828898243606, |
|
"learning_rate": 0.0, |
|
"loss": 46.0, |
|
"step": 1303 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1303, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 326, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 30018797125632.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|