Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2ce0f4c8d8b1cd7da464a8106e14a9c0cd1c5a03b3d2f3eab5b30d57c23f477
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7019242e6a8942afeff2a19415f548e5a5147256b90599a02e01759120fe68e
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14195dfaaa53d220f79a747985a754d5896d6173f98f9f17629c457385ad3348
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:556fd0b1f19ab282ce7851b7552a779990fcc7c5a544779582a365335a4c5e78
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12ab33565d17f20a3b506e1fdeacc2a2d7e780bd23b3e41998045e828437aef0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:751a3765ff678ffa254180c2fe471eb1a7b3d2147cb53dab521a6e72c94e6672
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.07774519175291061,
-  "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.3651538862806468,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 7.083,
       "eval_steps_per_second": 1.773,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.140510070197453e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.07711078971624374,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.4173187271778821,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.083,
       "eval_steps_per_second": 1.773,
       "step": 350
+    },
+    {
+      "epoch": 0.36619718309859156,
+      "grad_norm": 0.5994611978530884,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 0.6173,
+      "step": 351
+    },
+    {
+      "epoch": 0.36724047991653624,
+      "grad_norm": 0.5942060947418213,
+      "learning_rate": 3.691267552111183e-06,
+      "loss": 0.6104,
+      "step": 352
+    },
+    {
+      "epoch": 0.368283776734481,
+      "grad_norm": 0.5567615032196045,
+      "learning_rate": 3.54088980417534e-06,
+      "loss": 0.6416,
+      "step": 353
+    },
+    {
+      "epoch": 0.36932707355242567,
+      "grad_norm": 0.5868862271308899,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 0.678,
+      "step": 354
+    },
+    {
+      "epoch": 0.37037037037037035,
+      "grad_norm": 0.5797458291053772,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 0.6504,
+      "step": 355
+    },
+    {
+      "epoch": 0.3714136671883151,
+      "grad_norm": 0.5654410123825073,
+      "learning_rate": 3.1078826033397843e-06,
+      "loss": 0.6043,
+      "step": 356
+    },
+    {
+      "epoch": 0.37245696400625977,
+      "grad_norm": 0.5999300479888916,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 0.6457,
+      "step": 357
+    },
+    {
+      "epoch": 0.3735002608242045,
+      "grad_norm": 0.5953872799873352,
+      "learning_rate": 2.8344093371128424e-06,
+      "loss": 0.6465,
+      "step": 358
+    },
+    {
+      "epoch": 0.3745435576421492,
+      "grad_norm": 0.613161027431488,
+      "learning_rate": 2.70225907856374e-06,
+      "loss": 0.5718,
+      "step": 359
+    },
+    {
+      "epoch": 0.3755868544600939,
+      "grad_norm": 0.5564877986907959,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 0.6005,
+      "step": 360
+    },
+    {
+      "epoch": 0.3766301512780386,
+      "grad_norm": 0.6765579581260681,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.7112,
+      "step": 361
+    },
+    {
+      "epoch": 0.3776734480959833,
+      "grad_norm": 0.5490257143974304,
+      "learning_rate": 2.324256102563188e-06,
+      "loss": 0.6579,
+      "step": 362
+    },
+    {
+      "epoch": 0.37871674491392804,
+      "grad_norm": 0.649297833442688,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 0.6601,
+      "step": 363
+    },
+    {
+      "epoch": 0.3797600417318727,
+      "grad_norm": 0.5968593955039978,
+      "learning_rate": 2.087708544541689e-06,
+      "loss": 0.651,
+      "step": 364
+    },
+    {
+      "epoch": 0.3808033385498174,
+      "grad_norm": 0.5954743027687073,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 0.6675,
+      "step": 365
+    },
+    {
+      "epoch": 0.38184663536776214,
+      "grad_norm": 0.5616008639335632,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 0.6781,
+      "step": 366
+    },
+    {
+      "epoch": 0.3828899321857068,
+      "grad_norm": 0.549443244934082,
+      "learning_rate": 1.7562223328224325e-06,
+      "loss": 0.6671,
+      "step": 367
+    },
+    {
+      "epoch": 0.38393322900365157,
+      "grad_norm": 0.6333698630332947,
+      "learning_rate": 1.6519785107311891e-06,
+      "loss": 0.6408,
+      "step": 368
+    },
+    {
+      "epoch": 0.38497652582159625,
+      "grad_norm": 0.6950687766075134,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 0.5624,
+      "step": 369
+    },
+    {
+      "epoch": 0.38601982263954093,
+      "grad_norm": 0.5817149877548218,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 0.655,
+      "step": 370
+    },
+    {
+      "epoch": 0.38706311945748567,
+      "grad_norm": 0.5664610266685486,
+      "learning_rate": 1.358096486081778e-06,
+      "loss": 0.6021,
+      "step": 371
+    },
+    {
+      "epoch": 0.38810641627543035,
+      "grad_norm": 0.7143059372901917,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 0.6826,
+      "step": 372
+    },
+    {
+      "epoch": 0.38914971309337504,
+      "grad_norm": 0.5702341794967651,
+      "learning_rate": 1.1779460585363944e-06,
+      "loss": 0.593,
+      "step": 373
+    },
+    {
+      "epoch": 0.3901930099113198,
+      "grad_norm": 0.6763701438903809,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.6419,
+      "step": 374
+    },
+    {
+      "epoch": 0.39123630672926446,
+      "grad_norm": 0.7248067855834961,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 0.402,
+      "step": 375
+    },
+    {
+      "epoch": 0.3922796035472092,
+      "grad_norm": 0.4371187388896942,
+      "learning_rate": 9.314936930293283e-07,
+      "loss": 0.0874,
+      "step": 376
+    },
+    {
+      "epoch": 0.3933229003651539,
+      "grad_norm": 0.10949412733316422,
+      "learning_rate": 8.557039732283944e-07,
+      "loss": 0.0009,
+      "step": 377
+    },
+    {
+      "epoch": 0.39436619718309857,
+      "grad_norm": 0.0785563513636589,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 0.0006,
+      "step": 378
+    },
+    {
+      "epoch": 0.3954094940010433,
+      "grad_norm": 0.019972898066043854,
+      "learning_rate": 7.136959534174592e-07,
+      "loss": 0.0002,
+      "step": 379
+    },
+    {
+      "epoch": 0.396452790818988,
+      "grad_norm": 0.0424511581659317,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 0.0002,
+      "step": 380
+    },
+    {
+      "epoch": 0.3974960876369327,
+      "grad_norm": 0.1595275104045868,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 0.0004,
+      "step": 381
+    },
+    {
+      "epoch": 0.3985393844548774,
+      "grad_norm": 0.16627691686153412,
+      "learning_rate": 5.246800274474439e-07,
+      "loss": 0.0004,
+      "step": 382
+    },
+    {
+      "epoch": 0.3995826812728221,
+      "grad_norm": 0.010276157408952713,
+      "learning_rate": 4.680902408635335e-07,
+      "loss": 0.0002,
+      "step": 383
+    },
+    {
+      "epoch": 0.40062597809076683,
+      "grad_norm": 0.16451296210289001,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 0.0009,
+      "step": 384
+    },
+    {
+      "epoch": 0.4016692749087115,
+      "grad_norm": 0.02851867489516735,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 0.0003,
+      "step": 385
+    },
+    {
+      "epoch": 0.40271257172665625,
+      "grad_norm": 0.07447977364063263,
+      "learning_rate": 3.1761885408435054e-07,
+      "loss": 0.0006,
+      "step": 386
+    },
+    {
+      "epoch": 0.40375586854460094,
+      "grad_norm": 0.0858677551150322,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.0005,
+      "step": 387
+    },
+    {
+      "epoch": 0.4047991653625456,
+      "grad_norm": 0.4173906147480011,
+      "learning_rate": 2.334182641175686e-07,
+      "loss": 0.0032,
+      "step": 388
+    },
+    {
+      "epoch": 0.40584246218049036,
+      "grad_norm": 0.16964755952358246,
+      "learning_rate": 1.9616057881935436e-07,
+      "loss": 0.0008,
+      "step": 389
+    },
+    {
+      "epoch": 0.40688575899843504,
+      "grad_norm": 0.0739935114979744,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 0.0004,
+      "step": 390
+    },
+    {
+      "epoch": 0.4079290558163798,
+      "grad_norm": 0.00929514691233635,
+      "learning_rate": 1.3134251542544774e-07,
+      "loss": 0.0002,
+      "step": 391
+    },
+    {
+      "epoch": 0.40897235263432447,
+      "grad_norm": 0.01341878343373537,
+      "learning_rate": 1.0378634328099269e-07,
+      "loss": 0.0002,
+      "step": 392
+    },
+    {
+      "epoch": 0.41001564945226915,
+      "grad_norm": 0.03436583653092384,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 0.0003,
+      "step": 393
+    },
+    {
+      "epoch": 0.4110589462702139,
+      "grad_norm": 0.053878240287303925,
+      "learning_rate": 5.838865838366792e-08,
+      "loss": 0.0005,
+      "step": 394
+    },
+    {
+      "epoch": 0.41210224308815857,
+      "grad_norm": 0.0045427302829921246,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 0.0001,
+      "step": 395
+    },
+    {
+      "epoch": 0.4131455399061033,
+      "grad_norm": 0.18612140417099,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 0.0017,
+      "step": 396
+    },
+    {
+      "epoch": 0.414188836724048,
+      "grad_norm": 0.5833466649055481,
+      "learning_rate": 1.4599295990352924e-08,
+      "loss": 0.01,
+      "step": 397
+    },
+    {
+      "epoch": 0.4152321335419927,
+      "grad_norm": 0.05140767619013786,
+      "learning_rate": 6.488751431266149e-09,
+      "loss": 0.0004,
+      "step": 398
+    },
+    {
+      "epoch": 0.4162754303599374,
+      "grad_norm": 1.317895770072937,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 0.0146,
+      "step": 399
+    },
+    {
+      "epoch": 0.4173187271778821,
+      "grad_norm": 2.8698835372924805,
+      "learning_rate": 0.0,
+      "loss": 0.0348,
+      "step": 400
+    },
+    {
+      "epoch": 0.4173187271778821,
+      "eval_loss": 0.07711078971624374,
+      "eval_runtime": 227.9288,
+      "eval_samples_per_second": 7.081,
+      "eval_steps_per_second": 1.772,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0449874722131804e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null