Training in progress, step 650, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14b032a73b98015c425161a08cf382d4195c6e7edf76e4e437a4431736faa8f7
 size 578859568

 version https://git-lfs.github.com/spec/v1
+oid sha256:91ea2e833e395bd896b7338e0b159889cb0a9805a20a0ba81249634cf8be6acb
 size 578859568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fed801e682e5e68307b58f34bd4cd335b4789e2ce208c4c30cc8ae0d7c94bd66
 size 295198386

 version https://git-lfs.github.com/spec/v1
+oid sha256:8408296ac89ee50ae11d1a24615517bae811a4a08e26902b3f91c3361afa9523
 size 295198386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbf275d60f9d8a7677b056dc7c047a17ef88e4423f27115e72b43306be7f392b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d04b06ab68f7f17dc4df2206cd558b4bd98d8e29313159b73014f73bdd405dcc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5b6a73723cf527ee8aafd1afa2781dcc9cc28ce480c143c5bb0a790cbe7887b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:18275b6327bd0d7d1ad9ae6ef36f205b2f0d81f6499994f6ecb9553362d17a42
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.6910951733589172,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.016678132883023746,
   "eval_steps": 50,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4311,6 +4311,364 @@
       "eval_samples_per_second": 11.449,
       "eval_steps_per_second": 5.734,
       "step": 600
     }
   ],
   "logging_steps": 1,
@@ -4325,7 +4683,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -4334,12 +4692,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.830931258133709e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.6910951733589172,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.018067977289942392,
   "eval_steps": 50,
+  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.449,
       "eval_steps_per_second": 5.734,
       "step": 600
+    },
+    {
+      "epoch": 0.016705929771162118,
+      "grad_norm": 0.11464305222034454,
+      "learning_rate": 0.0007000509087229895,
+      "loss": 0.9103,
+      "step": 601
+    },
+    {
+      "epoch": 0.01673372665930049,
+      "grad_norm": 0.09989949315786362,
+      "learning_rate": 0.0006970252132582728,
+      "loss": 0.8176,
+      "step": 602
+    },
+    {
+      "epoch": 0.016761523547438865,
+      "grad_norm": 0.09268354624509811,
+      "learning_rate": 0.0006940025687462952,
+      "loss": 0.7983,
+      "step": 603
+    },
+    {
+      "epoch": 0.016789320435577237,
+      "grad_norm": 0.09152427315711975,
+      "learning_rate": 0.0006909830056250527,
+      "loss": 0.7975,
+      "step": 604
+    },
+    {
+      "epoch": 0.01681711732371561,
+      "grad_norm": 0.09386585652828217,
+      "learning_rate": 0.000687966554301513,
+      "loss": 0.8584,
+      "step": 605
+    },
+    {
+      "epoch": 0.01684491421185398,
+      "grad_norm": 0.08477571606636047,
+      "learning_rate": 0.0006849532451513074,
+      "loss": 0.7387,
+      "step": 606
+    },
+    {
+      "epoch": 0.016872711099992357,
+      "grad_norm": 0.08988666534423828,
+      "learning_rate": 0.0006819431085184251,
+      "loss": 0.8264,
+      "step": 607
+    },
+    {
+      "epoch": 0.01690050798813073,
+      "grad_norm": 0.09714596718549728,
+      "learning_rate": 0.0006789361747149092,
+      "loss": 0.9452,
+      "step": 608
+    },
+    {
+      "epoch": 0.0169283048762691,
+      "grad_norm": 0.10461269319057465,
+      "learning_rate": 0.0006759324740205494,
+      "loss": 0.7174,
+      "step": 609
+    },
+    {
+      "epoch": 0.016956101764407476,
+      "grad_norm": 0.09161835163831711,
+      "learning_rate": 0.0006729320366825784,
+      "loss": 0.796,
+      "step": 610
+    },
+    {
+      "epoch": 0.016983898652545848,
+      "grad_norm": 0.0949753150343895,
+      "learning_rate": 0.0006699348929153668,
+      "loss": 0.975,
+      "step": 611
+    },
+    {
+      "epoch": 0.01701169554068422,
+      "grad_norm": 0.09028909355401993,
+      "learning_rate": 0.0006669410729001193,
+      "loss": 0.7738,
+      "step": 612
+    },
+    {
+      "epoch": 0.017039492428822592,
+      "grad_norm": 0.08454867452383041,
+      "learning_rate": 0.0006639506067845697,
+      "loss": 0.7062,
+      "step": 613
+    },
+    {
+      "epoch": 0.017067289316960967,
+      "grad_norm": 0.10592840611934662,
+      "learning_rate": 0.0006609635246826793,
+      "loss": 0.7745,
+      "step": 614
+    },
+    {
+      "epoch": 0.01709508620509934,
+      "grad_norm": 0.09267466515302658,
+      "learning_rate": 0.0006579798566743314,
+      "loss": 0.8491,
+      "step": 615
+    },
+    {
+      "epoch": 0.01712288309323771,
+      "grad_norm": 0.10221099853515625,
+      "learning_rate": 0.0006549996328050296,
+      "loss": 0.9564,
+      "step": 616
+    },
+    {
+      "epoch": 0.017150679981376087,
+      "grad_norm": 0.09640829265117645,
+      "learning_rate": 0.000652022883085595,
+      "loss": 0.6694,
+      "step": 617
+    },
+    {
+      "epoch": 0.01717847686951446,
+      "grad_norm": 0.09555254131555557,
+      "learning_rate": 0.0006490496374918646,
+      "loss": 0.7825,
+      "step": 618
+    },
+    {
+      "epoch": 0.01720627375765283,
+      "grad_norm": 0.1080060750246048,
+      "learning_rate": 0.0006460799259643883,
+      "loss": 0.8122,
+      "step": 619
+    },
+    {
+      "epoch": 0.017234070645791202,
+      "grad_norm": 0.09308885037899017,
+      "learning_rate": 0.0006431137784081283,
+      "loss": 0.7393,
+      "step": 620
+    },
+    {
+      "epoch": 0.017261867533929578,
+      "grad_norm": 0.10485529899597168,
+      "learning_rate": 0.0006401512246921576,
+      "loss": 0.7577,
+      "step": 621
+    },
+    {
+      "epoch": 0.01728966442206795,
+      "grad_norm": 0.10300412029027939,
+      "learning_rate": 0.0006371922946493591,
+      "loss": 0.7016,
+      "step": 622
+    },
+    {
+      "epoch": 0.01731746131020632,
+      "grad_norm": 0.09915035963058472,
+      "learning_rate": 0.0006342370180761255,
+      "loss": 0.7562,
+      "step": 623
+    },
+    {
+      "epoch": 0.017345258198344694,
+      "grad_norm": 0.11094118654727936,
+      "learning_rate": 0.0006312854247320594,
+      "loss": 0.7113,
+      "step": 624
+    },
+    {
+      "epoch": 0.01737305508648307,
+      "grad_norm": 0.09752795100212097,
+      "learning_rate": 0.0006283375443396726,
+      "loss": 0.7649,
+      "step": 625
+    },
+    {
+      "epoch": 0.01740085197462144,
+      "grad_norm": 0.10030993074178696,
+      "learning_rate": 0.0006253934065840879,
+      "loss": 0.7446,
+      "step": 626
+    },
+    {
+      "epoch": 0.017428648862759813,
+      "grad_norm": 0.1134578287601471,
+      "learning_rate": 0.0006224530411127403,
+      "loss": 0.8147,
+      "step": 627
+    },
+    {
+      "epoch": 0.01745644575089819,
+      "grad_norm": 0.09963490813970566,
+      "learning_rate": 0.000619516477535077,
+      "loss": 0.6904,
+      "step": 628
+    },
+    {
+      "epoch": 0.01748424263903656,
+      "grad_norm": 0.10086818039417267,
+      "learning_rate": 0.0006165837454222607,
+      "loss": 0.5791,
+      "step": 629
+    },
+    {
+      "epoch": 0.017512039527174932,
+      "grad_norm": 0.11571143567562103,
+      "learning_rate": 0.0006136548743068713,
+      "loss": 0.7572,
+      "step": 630
+    },
+    {
+      "epoch": 0.017539836415313304,
+      "grad_norm": 0.10508367419242859,
+      "learning_rate": 0.0006107298936826086,
+      "loss": 0.5869,
+      "step": 631
+    },
+    {
+      "epoch": 0.01756763330345168,
+      "grad_norm": 0.1044749990105629,
+      "learning_rate": 0.0006078088330039945,
+      "loss": 0.595,
+      "step": 632
+    },
+    {
+      "epoch": 0.01759543019159005,
+      "grad_norm": 0.1138482466340065,
+      "learning_rate": 0.0006048917216860781,
+      "loss": 0.668,
+      "step": 633
+    },
+    {
+      "epoch": 0.017623227079728424,
+      "grad_norm": 0.10499613732099533,
+      "learning_rate": 0.0006019785891041381,
+      "loss": 0.6028,
+      "step": 634
+    },
+    {
+      "epoch": 0.0176510239678668,
+      "grad_norm": 0.10078407824039459,
+      "learning_rate": 0.0005990694645933865,
+      "loss": 0.5796,
+      "step": 635
+    },
+    {
+      "epoch": 0.01767882085600517,
+      "grad_norm": 0.09239528328180313,
+      "learning_rate": 0.0005961643774486753,
+      "loss": 0.5735,
+      "step": 636
+    },
+    {
+      "epoch": 0.017706617744143543,
+      "grad_norm": 0.09768297523260117,
+      "learning_rate": 0.0005932633569242,
+      "loss": 0.5082,
+      "step": 637
+    },
+    {
+      "epoch": 0.017734414632281915,
+      "grad_norm": 0.10613156110048294,
+      "learning_rate": 0.0005903664322332048,
+      "loss": 0.5554,
+      "step": 638
+    },
+    {
+      "epoch": 0.01776221152042029,
+      "grad_norm": 0.10876414179801941,
+      "learning_rate": 0.000587473632547689,
+      "loss": 0.6091,
+      "step": 639
+    },
+    {
+      "epoch": 0.017790008408558662,
+      "grad_norm": 0.10759898275136948,
+      "learning_rate": 0.0005845849869981136,
+      "loss": 0.5748,
+      "step": 640
+    },
+    {
+      "epoch": 0.017817805296697034,
+      "grad_norm": 0.12154053151607513,
+      "learning_rate": 0.0005817005246731073,
+      "loss": 0.6063,
+      "step": 641
+    },
+    {
+      "epoch": 0.017845602184835406,
+      "grad_norm": 0.11394521594047546,
+      "learning_rate": 0.0005788202746191734,
+      "loss": 0.6124,
+      "step": 642
+    },
+    {
+      "epoch": 0.01787339907297378,
+      "grad_norm": 0.09602084010839462,
+      "learning_rate": 0.0005759442658403985,
+      "loss": 0.4391,
+      "step": 643
+    },
+    {
+      "epoch": 0.017901195961112153,
+      "grad_norm": 0.12600000202655792,
+      "learning_rate": 0.0005730725272981583,
+      "loss": 0.6201,
+      "step": 644
+    },
+    {
+      "epoch": 0.017928992849250525,
+      "grad_norm": 0.1129770576953888,
+      "learning_rate": 0.0005702050879108284,
+      "loss": 0.4814,
+      "step": 645
+    },
+    {
+      "epoch": 0.0179567897373889,
+      "grad_norm": 0.121727854013443,
+      "learning_rate": 0.0005673419765534915,
+      "loss": 0.5071,
+      "step": 646
+    },
+    {
+      "epoch": 0.017984586625527273,
+      "grad_norm": 0.11814267188310623,
+      "learning_rate": 0.0005644832220576479,
+      "loss": 0.5387,
+      "step": 647
+    },
+    {
+      "epoch": 0.018012383513665645,
+      "grad_norm": 0.14177252352237701,
+      "learning_rate": 0.0005616288532109225,
+      "loss": 0.6006,
+      "step": 648
+    },
+    {
+      "epoch": 0.018040180401804017,
+      "grad_norm": 0.17021676898002625,
+      "learning_rate": 0.0005587788987567784,
+      "loss": 0.5445,
+      "step": 649
+    },
+    {
+      "epoch": 0.018067977289942392,
+      "grad_norm": 0.17510192096233368,
+      "learning_rate": 0.0005559333873942258,
+      "loss": 0.5694,
+      "step": 650
+    },
+    {
+      "epoch": 0.018067977289942392,
+      "eval_loss": 0.7097320556640625,
+      "eval_runtime": 50.365,
+      "eval_samples_per_second": 11.456,
+      "eval_steps_per_second": 5.738,
+      "step": 650
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.150175529644851e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null