Training in progress, step 124, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +172 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ecea0eb506966c3adaa92776fafcbf9f0c95cbf73cbbbc19e27e94a9bf4d83b
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:f675eca4718ef678b42c4e45fdc39331d4412cd7383b7fb96f23877618a4cf72
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c40476b383d1af7e2da47ec2d51547f78a12f9b02b551128264a8681d94eb307
 size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:8460ee5ac1391d7cc29200316cb512ebbc8c558db6ad943e68e9e9670355f54b
 size 43122580

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d797c1c1d05d29db78de514e4d3a8fc182d878f2bee91c4600913b90a3dbad17
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea8fad6c960ead350c17031a8995ec22d65156da68444c9500a59e6a4123354e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0f69efa683b8ebdc926b4b9298d921afcb3d3666fff913ace452d35225fdfd2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbd42b891ad014f5057f3a5965f615fead617014a669ce0c44e21eb37af35b63
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.07799232006072998,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.07573247503194964,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,174 @@
       "eval_samples_per_second": 6.682,
       "eval_steps_per_second": 1.67,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -746,12 +914,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.025880320459407e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.07799232006072998,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.09390826903961755,
   "eval_steps": 100,
+  "global_step": 124,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.682,
       "eval_steps_per_second": 1.67,
       "step": 100
+    },
+    {
+      "epoch": 0.07648979978226914,
+      "grad_norm": 0.2453308403491974,
+      "learning_rate": 1.9423489433902186e-05,
+      "loss": 0.0635,
+      "step": 101
+    },
+    {
+      "epoch": 0.07724712453258863,
+      "grad_norm": 0.286064088344574,
+      "learning_rate": 1.7822218477475494e-05,
+      "loss": 0.0783,
+      "step": 102
+    },
+    {
+      "epoch": 0.07800444928290813,
+      "grad_norm": 0.29971766471862793,
+      "learning_rate": 1.6283352173747145e-05,
+      "loss": 0.0698,
+      "step": 103
+    },
+    {
+      "epoch": 0.07876177403322762,
+      "grad_norm": 0.3119301497936249,
+      "learning_rate": 1.4808059116167305e-05,
+      "loss": 0.0773,
+      "step": 104
+    },
+    {
+      "epoch": 0.07951909878354713,
+      "grad_norm": 0.20733344554901123,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 0.0604,
+      "step": 105
+    },
+    {
+      "epoch": 0.08027642353386662,
+      "grad_norm": 0.25688570737838745,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 0.0638,
+      "step": 106
+    },
+    {
+      "epoch": 0.08103374828418611,
+      "grad_norm": 0.28935614228248596,
+      "learning_rate": 1.0774576138160597e-05,
+      "loss": 0.0713,
+      "step": 107
+    },
+    {
+      "epoch": 0.08179107303450561,
+      "grad_norm": 0.24278295040130615,
+      "learning_rate": 9.564283930242257e-06,
+      "loss": 0.1038,
+      "step": 108
+    },
+    {
+      "epoch": 0.0825483977848251,
+      "grad_norm": 0.30280330777168274,
+      "learning_rate": 8.422667334494249e-06,
+      "loss": 0.0981,
+      "step": 109
+    },
+    {
+      "epoch": 0.08330572253514461,
+      "grad_norm": 0.2627258598804474,
+      "learning_rate": 7.350593278519824e-06,
+      "loss": 0.0663,
+      "step": 110
+    },
+    {
+      "epoch": 0.0840630472854641,
+      "grad_norm": 0.2400035709142685,
+      "learning_rate": 6.3488758802945354e-06,
+      "loss": 0.071,
+      "step": 111
+    },
+    {
+      "epoch": 0.08482037203578359,
+      "grad_norm": 0.31241780519485474,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 0.0658,
+      "step": 112
+    },
+    {
+      "epoch": 0.0855776967861031,
+      "grad_norm": 0.3294007480144501,
+      "learning_rate": 4.559499812049251e-06,
+      "loss": 0.0978,
+      "step": 113
+    },
+    {
+      "epoch": 0.08633502153642258,
+      "grad_norm": 0.22428545355796814,
+      "learning_rate": 3.7731999690749585e-06,
+      "loss": 0.0613,
+      "step": 114
+    },
+    {
+      "epoch": 0.08709234628674209,
+      "grad_norm": 0.28080588579177856,
+      "learning_rate": 3.059973406066963e-06,
+      "loss": 0.0716,
+      "step": 115
+    },
+    {
+      "epoch": 0.08784967103706158,
+      "grad_norm": 0.26730701327323914,
+      "learning_rate": 2.420361737256438e-06,
+      "loss": 0.0673,
+      "step": 116
+    },
+    {
+      "epoch": 0.08860699578738107,
+      "grad_norm": 0.34970346093177795,
+      "learning_rate": 1.8548506747582129e-06,
+      "loss": 0.0773,
+      "step": 117
+    },
+    {
+      "epoch": 0.08936432053770058,
+      "grad_norm": 0.2649919390678406,
+      "learning_rate": 1.3638696597277679e-06,
+      "loss": 0.0659,
+      "step": 118
+    },
+    {
+      "epoch": 0.09012164528802007,
+      "grad_norm": 0.2567705512046814,
+      "learning_rate": 9.477915362496758e-07,
+      "loss": 0.0663,
+      "step": 119
+    },
+    {
+      "epoch": 0.09087897003833957,
+      "grad_norm": 0.2665155827999115,
+      "learning_rate": 6.069322682050516e-07,
+      "loss": 0.0572,
+      "step": 120
+    },
+    {
+      "epoch": 0.09163629478865906,
+      "grad_norm": 0.25261205434799194,
+      "learning_rate": 3.415506993330153e-07,
+      "loss": 0.0683,
+      "step": 121
+    },
+    {
+      "epoch": 0.09239361953897855,
+      "grad_norm": 0.26622474193573,
+      "learning_rate": 1.518483566683826e-07,
+      "loss": 0.0637,
+      "step": 122
+    },
+    {
+      "epoch": 0.09315094428929806,
+      "grad_norm": 0.2766965925693512,
+      "learning_rate": 3.796929750485845e-08,
+      "loss": 0.0843,
+      "step": 123
+    },
+    {
+      "epoch": 0.09390826903961755,
+      "grad_norm": 0.2631254494190216,
+      "learning_rate": 0.0,
+      "loss": 0.08,
+      "step": 124
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.4796378195021e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null