Training in progress, step 60, checkpoint

Files changed (7) hide show

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,9 +20,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "layer",
     "Wqkv",
-    "out_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "Wqkv",
+    "out_proj",
+    "layer"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:868a3f917a5878e2b6944b4ac3f3554c8fa88ed3406f72ba3442e357efae7293
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbec2fd4afd82e2174d8e90843450aa9cfec17114609e0daaec109e20feb58b9
 size 5752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e951c6db3d322f827b37e2420f3724bf14039e3709cbf9e3ba4c8f422f38c721
 size 15814

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdef43d2b373c43f94b1dfcbd9016d90a7c196769b7519c0951291dc44b8ca91
 size 15814

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aeab1516692d8735d1cf9eb8242b9bfc315e61f76d8cb7444790c4e87ea94971
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ae59499d6fa89d93656a32994352236f76ecdb3c0d5d7d01bbf5e497aa8ee6c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72b1decdef7148b48fd5a7101075992e3183d005c57731608543a60017a8211b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:066ba65f4953fa1b75b645f57a930b28fbba82ba12586f9907c767ebd5948500
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-20",
-  "epoch": 0.7843137254901961,
   "eval_steps": 20,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -31,17 +31,25 @@
       "eval_samples_per_second": 390.671,
       "eval_steps_per_second": 6.246,
       "step": 40
     }
   ],
   "logging_steps": 100,
-  "max_steps": 51,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
   "save_steps": 20,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "args": {
-        "early_stopping_patience": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
@@ -54,12 +62,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 25501284433920.0,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-20",
+  "epoch": 0.5687203791469194,
   "eval_steps": 20,
+  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 390.671,
       "eval_steps_per_second": 6.246,
       "step": 40
+    },
+    {
+      "epoch": 0.5687203791469194,
+      "eval_loss": 11.5,
+      "eval_runtime": 1.643,
+      "eval_samples_per_second": 167.381,
+      "eval_steps_per_second": 5.478,
+      "step": 60
     }
   ],
   "logging_steps": 100,
+  "max_steps": 5250,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 50,
   "save_steps": 20,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "args": {
+        "early_stopping_patience": 3,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 31876605542400.0,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6ad55a4e2150c662533f989b30551151cd3acdbedc1c1aeda6af73b49cffe40
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d59480f4cc4c2b69c07a440b6b3314d38f79b7adeca28982abe6546fc7cb33d
 size 6776