Training in progress, step 116, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02984974277b9aedef870915ae775cc190c8845abfc0da11356cbec68c834621
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:43a91690adb9887476aae3560db1ce524c846bfd036d867e50ec94af9bc1b4ee
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db2ab0c1625b0cace34243f6b1a2b89c4bd5229c9e932a670aafc98e937b8c3f
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:69a99a1a2721396f9f01686619afdcec0203331c5e8971f4e9b3cf719566b905
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:691bffbcf7f3cc880ac044e9193c3daf47e32a323a9fd73572ab7275b19b8169
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:df97fc4cd15fa85ea2a9d2ac2106f6a796d77aef212047d98e6ee87d3e75bb9e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04da0ae6b4988b3c6efad572e9418c79c5dec4e1be8a07bb5648f45d02dba97c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5db974519fd77fbcd1d1516436fa53f7a6999ec0d08fdab2b48306286e57ccd6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.0009016587864607573,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.8639308855291576,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,118 @@
       "eval_samples_per_second": 9.586,
       "eval_steps_per_second": 2.409,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -754,12 +866,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0402646152537702e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.0009016587864607573,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.0021598272138228,
   "eval_steps": 50,
+  "global_step": 116,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.586,
       "eval_steps_per_second": 2.409,
       "step": 100
+    },
+    {
+      "epoch": 0.8725701943844493,
+      "grad_norm": 0.012060822919011116,
+      "learning_rate": 4.860108501712824e-06,
+      "loss": 0.0003,
+      "step": 101
+    },
+    {
+      "epoch": 0.8812095032397408,
+      "grad_norm": 0.060871824622154236,
+      "learning_rate": 4.242719137849077e-06,
+      "loss": 0.0007,
+      "step": 102
+    },
+    {
+      "epoch": 0.8898488120950324,
+      "grad_norm": 0.04358503967523575,
+      "learning_rate": 3.6655196284083317e-06,
+      "loss": 0.0006,
+      "step": 103
+    },
+    {
+      "epoch": 0.8984881209503239,
+      "grad_norm": 0.015056795440614223,
+      "learning_rate": 3.1290169432939553e-06,
+      "loss": 0.0003,
+      "step": 104
+    },
+    {
+      "epoch": 0.9071274298056156,
+      "grad_norm": 0.06826309114694595,
+      "learning_rate": 2.6336823072904304e-06,
+      "loss": 0.0019,
+      "step": 105
+    },
+    {
+      "epoch": 0.9157667386609071,
+      "grad_norm": 0.004253576509654522,
+      "learning_rate": 2.179950786173879e-06,
+      "loss": 0.0002,
+      "step": 106
+    },
+    {
+      "epoch": 0.9244060475161987,
+      "grad_norm": 0.027383577078580856,
+      "learning_rate": 1.7682209045820686e-06,
+      "loss": 0.0004,
+      "step": 107
+    },
+    {
+      "epoch": 0.9330453563714903,
+      "grad_norm": 0.01728072762489319,
+      "learning_rate": 1.3988542959794627e-06,
+      "loss": 0.0003,
+      "step": 108
+    },
+    {
+      "epoch": 0.9416846652267818,
+      "grad_norm": 0.11028740555047989,
+      "learning_rate": 1.0721753850247984e-06,
+      "loss": 0.0021,
+      "step": 109
+    },
+    {
+      "epoch": 0.9503239740820735,
+      "grad_norm": 0.007546401582658291,
+      "learning_rate": 7.884711026201585e-07,
+      "loss": 0.0002,
+      "step": 110
+    },
+    {
+      "epoch": 0.958963282937365,
+      "grad_norm": 0.011820383369922638,
+      "learning_rate": 5.479906338917984e-07,
+      "loss": 0.0002,
+      "step": 111
+    },
+    {
+      "epoch": 0.9676025917926566,
+      "grad_norm": 0.027329521253705025,
+      "learning_rate": 3.5094519932415417e-07,
+      "loss": 0.0002,
+      "step": 112
+    },
+    {
+      "epoch": 0.9762419006479481,
+      "grad_norm": 0.027211442589759827,
+      "learning_rate": 1.975078692391552e-07,
+      "loss": 0.0012,
+      "step": 113
+    },
+    {
+      "epoch": 0.9848812095032398,
+      "grad_norm": 0.013164684176445007,
+      "learning_rate": 8.781341178393244e-08,
+      "loss": 0.0004,
+      "step": 114
+    },
+    {
+      "epoch": 0.9935205183585313,
+      "grad_norm": 0.0681779533624649,
+      "learning_rate": 2.1958174560282595e-08,
+      "loss": 0.0021,
+      "step": 115
+    },
+    {
+      "epoch": 1.0021598272138228,
+      "grad_norm": 0.34717699885368347,
+      "learning_rate": 0.0,
+      "loss": 0.0036,
+      "step": 116
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.3075543649878016e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null