Training in progress, step 1088, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +270 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b5640fad40281fd5d639511356095a8552ab13034cf39c186ed59e06ab055a1
 size 54285928

 version https://git-lfs.github.com/spec/v1
+oid sha256:384f36007b97fbb92ffad76cabf9847376c7a4cb47cbde2ce83ad9d3c2e5b138
 size 54285928

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1e01ad8da81a858e8e938a7bdca8254bfa5438d6b1c7251bdd13f12c56d5e1f
 size 27753786

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f190adcd340a44a909ebf73f7c0c5cc971db6f330a46183ade6020952f14add
 size 27753786

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:508337fd7bc9ca1cce78c0c53b3e5fba6c6a4bdf1bdeb4293058ee7e7b6238a1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8451f5ea486a190b6e371450fc18194c8ef19d966279c816f43e84a399cdb84
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a5cc0d0ebf737ac8a43a5138b0ded560b01a769ad496c84f0d332e37eb84e28
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f518c75cfe5a3d34bf6d4285b5324ac723c5d3cb3e7dcf0617a8e3578699ca5d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.3224910497665405,
   "best_model_checkpoint": "miner_id_24/checkpoint-1050",
-  "epoch": 0.9657392504023914,
   "eval_steps": 150,
-  "global_step": 1050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7421,6 +7421,272 @@
       "eval_samples_per_second": 165.065,
       "eval_steps_per_second": 20.633,
       "step": 1050
     }
   ],
   "logging_steps": 1,
@@ -7444,12 +7710,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.9092013631668224e+17,
   "train_batch_size": 12,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.3224910497665405,
   "best_model_checkpoint": "miner_id_24/checkpoint-1050",
+  "epoch": 1.0006898137502873,
   "eval_steps": 150,
+  "global_step": 1088,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 165.065,
       "eval_steps_per_second": 20.633,
       "step": 1050
+    },
+    {
+      "epoch": 0.9666590020694412,
+      "grad_norm": 0.7578225135803223,
+      "learning_rate": 2.958502525492457e-07,
+      "loss": 1.4715,
+      "step": 1051
+    },
+    {
+      "epoch": 0.9675787537364912,
+      "grad_norm": 0.5387850403785706,
+      "learning_rate": 2.800891996009025e-07,
+      "loss": 1.5156,
+      "step": 1052
+    },
+    {
+      "epoch": 0.9684985054035411,
+      "grad_norm": 0.5452224016189575,
+      "learning_rate": 2.6475836335553836e-07,
+      "loss": 1.4171,
+      "step": 1053
+    },
+    {
+      "epoch": 0.9694182570705909,
+      "grad_norm": 0.5857959389686584,
+      "learning_rate": 2.498578764678849e-07,
+      "loss": 1.454,
+      "step": 1054
+    },
+    {
+      "epoch": 0.9703380087376409,
+      "grad_norm": 0.5615691542625427,
+      "learning_rate": 2.3538786786896915e-07,
+      "loss": 1.3929,
+      "step": 1055
+    },
+    {
+      "epoch": 0.9712577604046907,
+      "grad_norm": 0.5565760731697083,
+      "learning_rate": 2.2134846276494202e-07,
+      "loss": 1.4978,
+      "step": 1056
+    },
+    {
+      "epoch": 0.9721775120717406,
+      "grad_norm": 0.6373360753059387,
+      "learning_rate": 2.0773978263605166e-07,
+      "loss": 1.4766,
+      "step": 1057
+    },
+    {
+      "epoch": 0.9730972637387906,
+      "grad_norm": 0.6223818063735962,
+      "learning_rate": 1.9456194523554406e-07,
+      "loss": 1.4494,
+      "step": 1058
+    },
+    {
+      "epoch": 0.9740170154058404,
+      "grad_norm": 0.5713940858840942,
+      "learning_rate": 1.8181506458869736e-07,
+      "loss": 1.4204,
+      "step": 1059
+    },
+    {
+      "epoch": 0.9749367670728903,
+      "grad_norm": 0.6217029690742493,
+      "learning_rate": 1.69499250991767e-07,
+      "loss": 1.3679,
+      "step": 1060
+    },
+    {
+      "epoch": 0.9758565187399402,
+      "grad_norm": 0.5862613320350647,
+      "learning_rate": 1.576146110111032e-07,
+      "loss": 1.428,
+      "step": 1061
+    },
+    {
+      "epoch": 0.9767762704069901,
+      "grad_norm": 0.6091289520263672,
+      "learning_rate": 1.4616124748217385e-07,
+      "loss": 1.4198,
+      "step": 1062
+    },
+    {
+      "epoch": 0.97769602207404,
+      "grad_norm": 0.515957236289978,
+      "learning_rate": 1.351392595087042e-07,
+      "loss": 1.3173,
+      "step": 1063
+    },
+    {
+      "epoch": 0.9786157737410899,
+      "grad_norm": 0.5550262331962585,
+      "learning_rate": 1.245487424618108e-07,
+      "loss": 1.3209,
+      "step": 1064
+    },
+    {
+      "epoch": 0.9795355254081398,
+      "grad_norm": 0.6746039986610413,
+      "learning_rate": 1.1438978797916888e-07,
+      "loss": 1.3918,
+      "step": 1065
+    },
+    {
+      "epoch": 0.9804552770751896,
+      "grad_norm": 0.5552029013633728,
+      "learning_rate": 1.0466248396424073e-07,
+      "loss": 1.3515,
+      "step": 1066
+    },
+    {
+      "epoch": 0.9813750287422396,
+      "grad_norm": 0.6203471422195435,
+      "learning_rate": 9.536691458548741e-08,
+      "loss": 1.4412,
+      "step": 1067
+    },
+    {
+      "epoch": 0.9822947804092895,
+      "grad_norm": 0.5462220311164856,
+      "learning_rate": 8.650316027566386e-08,
+      "loss": 1.3411,
+      "step": 1068
+    },
+    {
+      "epoch": 0.9832145320763394,
+      "grad_norm": 0.5441474318504333,
+      "learning_rate": 7.807129773110822e-08,
+      "loss": 1.3372,
+      "step": 1069
+    },
+    {
+      "epoch": 0.9841342837433893,
+      "grad_norm": 0.6476730704307556,
+      "learning_rate": 7.007139991108135e-08,
+      "loss": 1.328,
+      "step": 1070
+    },
+    {
+      "epoch": 0.9850540354104392,
+      "grad_norm": 0.5515516400337219,
+      "learning_rate": 6.25035360371451e-08,
+      "loss": 1.372,
+      "step": 1071
+    },
+    {
+      "epoch": 0.9859737870774891,
+      "grad_norm": 0.6750530004501343,
+      "learning_rate": 5.536777159254603e-08,
+      "loss": 1.3563,
+      "step": 1072
+    },
+    {
+      "epoch": 0.986893538744539,
+      "grad_norm": 0.5728088021278381,
+      "learning_rate": 4.8664168321671534e-08,
+      "loss": 1.318,
+      "step": 1073
+    },
+    {
+      "epoch": 0.9878132904115888,
+      "grad_norm": 0.6397655606269836,
+      "learning_rate": 4.239278422948911e-08,
+      "loss": 1.286,
+      "step": 1074
+    },
+    {
+      "epoch": 0.9887330420786388,
+      "grad_norm": 0.5922835469245911,
+      "learning_rate": 3.655367358106343e-08,
+      "loss": 1.2589,
+      "step": 1075
+    },
+    {
+      "epoch": 0.9896527937456887,
+      "grad_norm": 0.6236873865127563,
+      "learning_rate": 3.1146886901090025e-08,
+      "loss": 1.2131,
+      "step": 1076
+    },
+    {
+      "epoch": 0.9905725454127385,
+      "grad_norm": 0.5909478664398193,
+      "learning_rate": 2.617247097342901e-08,
+      "loss": 1.2015,
+      "step": 1077
+    },
+    {
+      "epoch": 0.9914922970797885,
+      "grad_norm": 0.6112456321716309,
+      "learning_rate": 2.1630468840738714e-08,
+      "loss": 1.3083,
+      "step": 1078
+    },
+    {
+      "epoch": 0.9924120487468383,
+      "grad_norm": 0.6521921157836914,
+      "learning_rate": 1.7520919804075998e-08,
+      "loss": 1.2579,
+      "step": 1079
+    },
+    {
+      "epoch": 0.9933318004138882,
+      "grad_norm": 0.5833930373191833,
+      "learning_rate": 1.3843859422574268e-08,
+      "loss": 1.196,
+      "step": 1080
+    },
+    {
+      "epoch": 0.9942515520809382,
+      "grad_norm": 0.6652984619140625,
+      "learning_rate": 1.0599319513115991e-08,
+      "loss": 1.197,
+      "step": 1081
+    },
+    {
+      "epoch": 0.995171303747988,
+      "grad_norm": 0.6548473834991455,
+      "learning_rate": 7.787328150071771e-09,
+      "loss": 1.2061,
+      "step": 1082
+    },
+    {
+      "epoch": 0.996091055415038,
+      "grad_norm": 0.6595301628112793,
+      "learning_rate": 5.40790966505611e-09,
+      "loss": 1.1764,
+      "step": 1083
+    },
+    {
+      "epoch": 0.9970108070820879,
+      "grad_norm": 0.753955602645874,
+      "learning_rate": 3.4610846467109103e-09,
+      "loss": 1.1202,
+      "step": 1084
+    },
+    {
+      "epoch": 0.9979305587491377,
+      "grad_norm": 0.7465300559997559,
+      "learning_rate": 1.9468699405444934e-09,
+      "loss": 1.1407,
+      "step": 1085
+    },
+    {
+      "epoch": 0.9988503104161877,
+      "grad_norm": 0.8871564865112305,
+      "learning_rate": 8.652786487484132e-10,
+      "loss": 1.0533,
+      "step": 1086
+    },
+    {
+      "epoch": 0.9997700620832375,
+      "grad_norm": 1.0818983316421509,
+      "learning_rate": 2.1632013013084262e-10,
+      "loss": 0.9817,
+      "step": 1087
+    },
+    {
+      "epoch": 1.0006898137502873,
+      "grad_norm": 2.3892836570739746,
+      "learning_rate": 0.0,
+      "loss": 2.0309,
+      "step": 1088
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.9792675536371712e+17,
   "train_batch_size": 12,
   "trial_name": null,
   "trial_params": null