Training in progress, step 372, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +663 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:710f323d2d148d19a77874e7041b6682ca6fdf00fb8ff65d993905368b01d329
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:48a6f0994f83dc96cc6f751735e6fb97678ad4bc393d97a9dc093fd599ec6bc4
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d28277b8c5696e9609288685953a64988fc3bd9fe09e1d48c95ba342438e1db6
 size 41120084

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9a822bab0e997a745b26087d054e11d6c01f3452497bdb451ee15d4a3a6c0d7
 size 41120084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d290d0f18d2c63d334eda98204765110cec7c5f5c7d088e8f0e88675b235ebea
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f79e2f3a046f199a98a51df125904aa982942d6fc81655bc2badc3d61093187d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bed55d74992475f85034de9808b502db25c265c1fbe10ac1ead4e6ef3743a36b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:07e62fa0f3a9a79aa024e3df97b6370a7c2c27576fa400f2d11682e04348f4b3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7505043712172159,
   "eval_steps": 93,
-  "global_step": 279,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1984,6 +1984,665 @@
       "eval_samples_per_second": 14.63,
       "eval_steps_per_second": 7.362,
       "step": 279
     }
   ],
   "logging_steps": 1,
@@ -1998,12 +2657,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.088420770584986e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0013449899125757,
   "eval_steps": 93,
+  "global_step": 372,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.63,
       "eval_steps_per_second": 7.362,
       "step": 279
+    },
+    {
+      "epoch": 0.7531943510423672,
+      "grad_norm": 3.4270105361938477,
+      "learning_rate": 3.0215773883115706e-05,
+      "loss": 0.5658,
+      "step": 280
+    },
+    {
+      "epoch": 0.7558843308675185,
+      "grad_norm": 3.4167211055755615,
+      "learning_rate": 2.9596815548187908e-05,
+      "loss": 0.1781,
+      "step": 281
+    },
+    {
+      "epoch": 0.7585743106926698,
+      "grad_norm": 3.9443657398223877,
+      "learning_rate": 2.8983159609539635e-05,
+      "loss": 0.5545,
+      "step": 282
+    },
+    {
+      "epoch": 0.7612642905178211,
+      "grad_norm": 3.164463758468628,
+      "learning_rate": 2.8374852284497446e-05,
+      "loss": 0.334,
+      "step": 283
+    },
+    {
+      "epoch": 0.7639542703429725,
+      "grad_norm": 3.6277055740356445,
+      "learning_rate": 2.7771939387558554e-05,
+      "loss": 0.411,
+      "step": 284
+    },
+    {
+      "epoch": 0.7666442501681238,
+      "grad_norm": 4.296345233917236,
+      "learning_rate": 2.717446632694025e-05,
+      "loss": 0.3111,
+      "step": 285
+    },
+    {
+      "epoch": 0.769334229993275,
+      "grad_norm": 4.06040096282959,
+      "learning_rate": 2.6582478101160167e-05,
+      "loss": 0.4634,
+      "step": 286
+    },
+    {
+      "epoch": 0.7720242098184263,
+      "grad_norm": 4.600436687469482,
+      "learning_rate": 2.599601929564709e-05,
+      "loss": 0.6998,
+      "step": 287
+    },
+    {
+      "epoch": 0.7747141896435776,
+      "grad_norm": 3.8486735820770264,
+      "learning_rate": 2.5415134079383006e-05,
+      "loss": 0.3987,
+      "step": 288
+    },
+    {
+      "epoch": 0.777404169468729,
+      "grad_norm": 5.362851142883301,
+      "learning_rate": 2.4839866201576646e-05,
+      "loss": 0.3466,
+      "step": 289
+    },
+    {
+      "epoch": 0.7800941492938803,
+      "grad_norm": 3.8688018321990967,
+      "learning_rate": 2.4270258988368376e-05,
+      "loss": 0.2902,
+      "step": 290
+    },
+    {
+      "epoch": 0.7827841291190316,
+      "grad_norm": 4.354773044586182,
+      "learning_rate": 2.3706355339567286e-05,
+      "loss": 0.4149,
+      "step": 291
+    },
+    {
+      "epoch": 0.7854741089441829,
+      "grad_norm": 7.11607027053833,
+      "learning_rate": 2.3148197725419983e-05,
+      "loss": 0.7291,
+      "step": 292
+    },
+    {
+      "epoch": 0.7881640887693342,
+      "grad_norm": 5.43526029586792,
+      "learning_rate": 2.2595828183412172e-05,
+      "loss": 0.2716,
+      "step": 293
+    },
+    {
+      "epoch": 0.7908540685944856,
+      "grad_norm": 3.004659414291382,
+      "learning_rate": 2.2049288315102412e-05,
+      "loss": 0.3067,
+      "step": 294
+    },
+    {
+      "epoch": 0.7935440484196369,
+      "grad_norm": 4.5855560302734375,
+      "learning_rate": 2.1508619282989084e-05,
+      "loss": 0.2618,
+      "step": 295
+    },
+    {
+      "epoch": 0.7962340282447882,
+      "grad_norm": 4.773977756500244,
+      "learning_rate": 2.097386180741019e-05,
+      "loss": 0.5023,
+      "step": 296
+    },
+    {
+      "epoch": 0.7989240080699395,
+      "grad_norm": 9.166229248046875,
+      "learning_rate": 2.0445056163476374e-05,
+      "loss": 0.4224,
+      "step": 297
+    },
+    {
+      "epoch": 0.8016139878950908,
+      "grad_norm": 6.276297092437744,
+      "learning_rate": 1.9922242178037864e-05,
+      "loss": 0.8068,
+      "step": 298
+    },
+    {
+      "epoch": 0.8043039677202422,
+      "grad_norm": 5.523612976074219,
+      "learning_rate": 1.940545922668472e-05,
+      "loss": 0.4406,
+      "step": 299
+    },
+    {
+      "epoch": 0.8069939475453934,
+      "grad_norm": 1.5128313302993774,
+      "learning_rate": 1.88947462307814e-05,
+      "loss": 0.0216,
+      "step": 300
+    },
+    {
+      "epoch": 0.8096839273705447,
+      "grad_norm": 2.8309073448181152,
+      "learning_rate": 1.8390141654535265e-05,
+      "loss": 1.299,
+      "step": 301
+    },
+    {
+      "epoch": 0.812373907195696,
+      "grad_norm": 3.6739649772644043,
+      "learning_rate": 1.789168350209983e-05,
+      "loss": 1.5798,
+      "step": 302
+    },
+    {
+      "epoch": 0.8150638870208473,
+      "grad_norm": 3.935307741165161,
+      "learning_rate": 1.739940931471239e-05,
+      "loss": 1.295,
+      "step": 303
+    },
+    {
+      "epoch": 0.8177538668459986,
+      "grad_norm": 4.4844865798950195,
+      "learning_rate": 1.6913356167866578e-05,
+      "loss": 1.225,
+      "step": 304
+    },
+    {
+      "epoch": 0.82044384667115,
+      "grad_norm": 4.518765449523926,
+      "learning_rate": 1.6433560668520176e-05,
+      "loss": 1.4111,
+      "step": 305
+    },
+    {
+      "epoch": 0.8231338264963013,
+      "grad_norm": 4.362013339996338,
+      "learning_rate": 1.5960058952337887e-05,
+      "loss": 1.1839,
+      "step": 306
+    },
+    {
+      "epoch": 0.8258238063214526,
+      "grad_norm": 4.76102352142334,
+      "learning_rate": 1.5492886680969963e-05,
+      "loss": 1.2118,
+      "step": 307
+    },
+    {
+      "epoch": 0.8285137861466039,
+      "grad_norm": 5.4755539894104,
+      "learning_rate": 1.5032079039366209e-05,
+      "loss": 1.4798,
+      "step": 308
+    },
+    {
+      "epoch": 0.8312037659717552,
+      "grad_norm": 3.792975902557373,
+      "learning_rate": 1.4577670733126203e-05,
+      "loss": 0.7013,
+      "step": 309
+    },
+    {
+      "epoch": 0.8338937457969066,
+      "grad_norm": 5.135954856872559,
+      "learning_rate": 1.4129695985885228e-05,
+      "loss": 1.5141,
+      "step": 310
+    },
+    {
+      "epoch": 0.8365837256220578,
+      "grad_norm": 3.417525291442871,
+      "learning_rate": 1.3688188536736968e-05,
+      "loss": 0.8687,
+      "step": 311
+    },
+    {
+      "epoch": 0.8392737054472091,
+      "grad_norm": 4.7601728439331055,
+      "learning_rate": 1.3253181637692324e-05,
+      "loss": 0.9127,
+      "step": 312
+    },
+    {
+      "epoch": 0.8419636852723604,
+      "grad_norm": 4.601919174194336,
+      "learning_rate": 1.2824708051175016e-05,
+      "loss": 1.0878,
+      "step": 313
+    },
+    {
+      "epoch": 0.8446536650975117,
+      "grad_norm": 3.320221185684204,
+      "learning_rate": 1.2402800047554208e-05,
+      "loss": 0.6061,
+      "step": 314
+    },
+    {
+      "epoch": 0.8473436449226631,
+      "grad_norm": 4.236156463623047,
+      "learning_rate": 1.1987489402713981e-05,
+      "loss": 0.7456,
+      "step": 315
+    },
+    {
+      "epoch": 0.8500336247478144,
+      "grad_norm": 6.007240295410156,
+      "learning_rate": 1.1578807395660207e-05,
+      "loss": 1.5298,
+      "step": 316
+    },
+    {
+      "epoch": 0.8527236045729657,
+      "grad_norm": 5.775532245635986,
+      "learning_rate": 1.1176784806164676e-05,
+      "loss": 0.7343,
+      "step": 317
+    },
+    {
+      "epoch": 0.855413584398117,
+      "grad_norm": 5.709627628326416,
+      "learning_rate": 1.078145191244706e-05,
+      "loss": 1.2876,
+      "step": 318
+    },
+    {
+      "epoch": 0.8581035642232683,
+      "grad_norm": 5.935501575469971,
+      "learning_rate": 1.0392838488894463e-05,
+      "loss": 0.9374,
+      "step": 319
+    },
+    {
+      "epoch": 0.8607935440484197,
+      "grad_norm": 4.249516010284424,
+      "learning_rate": 1.0010973803818857e-05,
+      "loss": 0.5061,
+      "step": 320
+    },
+    {
+      "epoch": 0.863483523873571,
+      "grad_norm": 4.154758453369141,
+      "learning_rate": 9.635886617252975e-06,
+      "loss": 0.1188,
+      "step": 321
+    },
+    {
+      "epoch": 0.8661735036987223,
+      "grad_norm": 3.874020576477051,
+      "learning_rate": 9.267605178784033e-06,
+      "loss": 0.4923,
+      "step": 322
+    },
+    {
+      "epoch": 0.8688634835238735,
+      "grad_norm": 3.575878143310547,
+      "learning_rate": 8.906157225426315e-06,
+      "loss": 0.3217,
+      "step": 323
+    },
+    {
+      "epoch": 0.8715534633490248,
+      "grad_norm": 4.050719261169434,
+      "learning_rate": 8.55156997953197e-06,
+      "loss": 0.4612,
+      "step": 324
+    },
+    {
+      "epoch": 0.8742434431741762,
+      "grad_norm": 3.588498830795288,
+      "learning_rate": 8.203870146740932e-06,
+      "loss": 0.2259,
+      "step": 325
+    },
+    {
+      "epoch": 0.8769334229993275,
+      "grad_norm": 5.262954235076904,
+      "learning_rate": 7.86308391396956e-06,
+      "loss": 0.7654,
+      "step": 326
+    },
+    {
+      "epoch": 0.8796234028244788,
+      "grad_norm": 5.5735087394714355,
+      "learning_rate": 7.529236947438256e-06,
+      "loss": 0.5849,
+      "step": 327
+    },
+    {
+      "epoch": 0.8823133826496301,
+      "grad_norm": 4.838580131530762,
+      "learning_rate": 7.202354390738608e-06,
+      "loss": 0.3913,
+      "step": 328
+    },
+    {
+      "epoch": 0.8850033624747814,
+      "grad_norm": 5.6935038566589355,
+      "learning_rate": 6.882460862939522e-06,
+      "loss": 0.7206,
+      "step": 329
+    },
+    {
+      "epoch": 0.8876933422999328,
+      "grad_norm": 2.3508174419403076,
+      "learning_rate": 6.5695804567332044e-06,
+      "loss": 0.1703,
+      "step": 330
+    },
+    {
+      "epoch": 0.8903833221250841,
+      "grad_norm": 5.699828624725342,
+      "learning_rate": 6.263736736620551e-06,
+      "loss": 0.4676,
+      "step": 331
+    },
+    {
+      "epoch": 0.8930733019502354,
+      "grad_norm": 4.048695087432861,
+      "learning_rate": 5.964952737136353e-06,
+      "loss": 0.5628,
+      "step": 332
+    },
+    {
+      "epoch": 0.8957632817753867,
+      "grad_norm": 4.811221599578857,
+      "learning_rate": 5.673250961114529e-06,
+      "loss": 0.7418,
+      "step": 333
+    },
+    {
+      "epoch": 0.898453261600538,
+      "grad_norm": 3.3414437770843506,
+      "learning_rate": 5.388653377993324e-06,
+      "loss": 0.3143,
+      "step": 334
+    },
+    {
+      "epoch": 0.9011432414256894,
+      "grad_norm": 5.924250602722168,
+      "learning_rate": 5.111181422160671e-06,
+      "loss": 0.5284,
+      "step": 335
+    },
+    {
+      "epoch": 0.9038332212508406,
+      "grad_norm": 6.767046928405762,
+      "learning_rate": 4.840855991339799e-06,
+      "loss": 0.6351,
+      "step": 336
+    },
+    {
+      "epoch": 0.9065232010759919,
+      "grad_norm": 4.555798053741455,
+      "learning_rate": 4.577697445015472e-06,
+      "loss": 0.5253,
+      "step": 337
+    },
+    {
+      "epoch": 0.9092131809011432,
+      "grad_norm": 5.7803730964660645,
+      "learning_rate": 4.321725602900473e-06,
+      "loss": 0.7582,
+      "step": 338
+    },
+    {
+      "epoch": 0.9119031607262945,
+      "grad_norm": 4.016640663146973,
+      "learning_rate": 4.072959743443017e-06,
+      "loss": 0.2845,
+      "step": 339
+    },
+    {
+      "epoch": 0.9145931405514459,
+      "grad_norm": 5.46890926361084,
+      "learning_rate": 3.83141860237467e-06,
+      "loss": 0.6128,
+      "step": 340
+    },
+    {
+      "epoch": 0.9172831203765972,
+      "grad_norm": 4.543710708618164,
+      "learning_rate": 3.5971203712993894e-06,
+      "loss": 0.5227,
+      "step": 341
+    },
+    {
+      "epoch": 0.9199731002017485,
+      "grad_norm": 4.0189008712768555,
+      "learning_rate": 3.3700826963233735e-06,
+      "loss": 0.4072,
+      "step": 342
+    },
+    {
+      "epoch": 0.9226630800268998,
+      "grad_norm": 5.0270490646362305,
+      "learning_rate": 3.1503226767260252e-06,
+      "loss": 0.5361,
+      "step": 343
+    },
+    {
+      "epoch": 0.9253530598520511,
+      "grad_norm": 7.237580299377441,
+      "learning_rate": 2.9378568636721835e-06,
+      "loss": 0.9466,
+      "step": 344
+    },
+    {
+      "epoch": 0.9280430396772025,
+      "grad_norm": 8.795455932617188,
+      "learning_rate": 2.732701258965531e-06,
+      "loss": 0.6604,
+      "step": 345
+    },
+    {
+      "epoch": 0.9307330195023538,
+      "grad_norm": 11.6528959274292,
+      "learning_rate": 2.5348713138434564e-06,
+      "loss": 0.5807,
+      "step": 346
+    },
+    {
+      "epoch": 0.933422999327505,
+      "grad_norm": 8.07696533203125,
+      "learning_rate": 2.3443819278132996e-06,
+      "loss": 0.7975,
+      "step": 347
+    },
+    {
+      "epoch": 0.9361129791526563,
+      "grad_norm": 4.788589954376221,
+      "learning_rate": 2.161247447530268e-06,
+      "loss": 0.6227,
+      "step": 348
+    },
+    {
+      "epoch": 0.9388029589778076,
+      "grad_norm": 7.453376293182373,
+      "learning_rate": 1.985481665716882e-06,
+      "loss": 0.4651,
+      "step": 349
+    },
+    {
+      "epoch": 0.9414929388029589,
+      "grad_norm": 4.3519392013549805,
+      "learning_rate": 1.8170978201241474e-06,
+      "loss": 0.1668,
+      "step": 350
+    },
+    {
+      "epoch": 0.9441829186281103,
+      "grad_norm": 3.087855577468872,
+      "learning_rate": 1.6561085925346332e-06,
+      "loss": 1.2559,
+      "step": 351
+    },
+    {
+      "epoch": 0.9468728984532616,
+      "grad_norm": 3.9484481811523438,
+      "learning_rate": 1.5025261078073005e-06,
+      "loss": 1.0505,
+      "step": 352
+    },
+    {
+      "epoch": 0.9495628782784129,
+      "grad_norm": 4.509681701660156,
+      "learning_rate": 1.3563619329643119e-06,
+      "loss": 1.316,
+      "step": 353
+    },
+    {
+      "epoch": 0.9522528581035642,
+      "grad_norm": 4.409306049346924,
+      "learning_rate": 1.2176270763198828e-06,
+      "loss": 0.9114,
+      "step": 354
+    },
+    {
+      "epoch": 0.9549428379287155,
+      "grad_norm": 5.652538299560547,
+      "learning_rate": 1.0863319866512346e-06,
+      "loss": 1.1458,
+      "step": 355
+    },
+    {
+      "epoch": 0.9576328177538669,
+      "grad_norm": 6.170865535736084,
+      "learning_rate": 9.624865524115346e-07,
+      "loss": 1.1232,
+      "step": 356
+    },
+    {
+      "epoch": 0.9603227975790182,
+      "grad_norm": 5.357152938842773,
+      "learning_rate": 8.461001009852809e-07,
+      "loss": 0.9592,
+      "step": 357
+    },
+    {
+      "epoch": 0.9630127774041695,
+      "grad_norm": 4.322149753570557,
+      "learning_rate": 7.371813979857312e-07,
+      "loss": 0.7773,
+      "step": 358
+    },
+    {
+      "epoch": 0.9657027572293208,
+      "grad_norm": 3.6123275756835938,
+      "learning_rate": 6.357386465947301e-07,
+      "loss": 0.5652,
+      "step": 359
+    },
+    {
+      "epoch": 0.968392737054472,
+      "grad_norm": 3.7311031818389893,
+      "learning_rate": 5.417794869449377e-07,
+      "loss": 0.6096,
+      "step": 360
+    },
+    {
+      "epoch": 0.9710827168796234,
+      "grad_norm": 5.762843608856201,
+      "learning_rate": 4.5531099554435576e-07,
+      "loss": 0.9279,
+      "step": 361
+    },
+    {
+      "epoch": 0.9737726967047747,
+      "grad_norm": 4.97388219833374,
+      "learning_rate": 3.763396847433875e-07,
+      "loss": 0.5789,
+      "step": 362
+    },
+    {
+      "epoch": 0.976462676529926,
+      "grad_norm": 4.815624713897705,
+      "learning_rate": 3.048715022443749e-07,
+      "loss": 0.5138,
+      "step": 363
+    },
+    {
+      "epoch": 0.9791526563550773,
+      "grad_norm": 3.541781425476074,
+      "learning_rate": 2.409118306536229e-07,
+      "loss": 0.259,
+      "step": 364
+    },
+    {
+      "epoch": 0.9818426361802286,
+      "grad_norm": 2.7444493770599365,
+      "learning_rate": 1.8446548707604648e-07,
+      "loss": 0.2707,
+      "step": 365
+    },
+    {
+      "epoch": 0.98453261600538,
+      "grad_norm": 5.796267986297607,
+      "learning_rate": 1.3553672275230523e-07,
+      "loss": 0.5347,
+      "step": 366
+    },
+    {
+      "epoch": 0.9872225958305313,
+      "grad_norm": 5.090404987335205,
+      "learning_rate": 9.412922273871471e-08,
+      "loss": 0.3201,
+      "step": 367
+    },
+    {
+      "epoch": 0.9899125756556826,
+      "grad_norm": 4.630456924438477,
+      "learning_rate": 6.024610562962441e-08,
+      "loss": 0.4391,
+      "step": 368
+    },
+    {
+      "epoch": 0.9926025554808339,
+      "grad_norm": 4.325840473175049,
+      "learning_rate": 3.388992332259422e-08,
+      "loss": 0.3675,
+      "step": 369
+    },
+    {
+      "epoch": 0.9952925353059852,
+      "grad_norm": 9.686969757080078,
+      "learning_rate": 1.506266082615948e-08,
+      "loss": 0.6909,
+      "step": 370
+    },
+    {
+      "epoch": 0.9979825151311366,
+      "grad_norm": 4.668429851531982,
+      "learning_rate": 3.7657361103837776e-09,
+      "loss": 0.285,
+      "step": 371
+    },
+    {
+      "epoch": 1.0013449899125757,
+      "grad_norm": 4.755204200744629,
+      "learning_rate": 0.0,
+      "loss": 0.9972,
+      "step": 372
+    },
+    {
+      "epoch": 1.0013449899125757,
+      "eval_loss": 0.7320420145988464,
+      "eval_runtime": 10.7106,
+      "eval_samples_per_second": 14.658,
+      "eval_steps_per_second": 7.376,
+      "step": 372
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.211789436077998e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null