Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8895600a81c9aa3d0d5c497a7f4c267843a1aa6c53aaacebeb0ce264dbe4484b
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b744a1a38fc655a81047bef52add24cea1eed792fdba9a959977ebcbdeb001c
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27d1e3be788bd7e51b0e486a9baa2de4820f7d8277f4f1f37ad0bcec5fedf456
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd3cc3aa882b8644815d67056d23706e7b0f78354c2dd96df6d6b06840a7cbbf
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46a12a5e8afd30c780590eb2445717bbb87066db78b3c939bc2c7f33f2825041
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f92b3eb1a6a8e1aca605a46fb67164cc3f3b8706561cae8d8417b91aea7fbc0d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ecea9fcf3691efde7f6f95771ff564391b1328126afca676a14949850bebad
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:22c9dfa784729c93db12d225bfc25c64e7ae0e1e9f4be7b45dc255fae6ea42c4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.003637818763869184,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 76.599,
       "eval_steps_per_second": 19.151,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -745,7 +1103,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4039906885632.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.005456728145803776,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 76.599,
       "eval_steps_per_second": 19.151,
       "step": 100
+    },
+    {
+      "epoch": 0.0036741969515078758,
+      "grad_norm": 7.461366476491094e-05,
+      "learning_rate": 5.226157894736842e-05,
+      "loss": 23.0,
+      "step": 101
+    },
+    {
+      "epoch": 0.0037105751391465678,
+      "grad_norm": 0.00017520117398817092,
+      "learning_rate": 5.173368421052632e-05,
+      "loss": 23.0,
+      "step": 102
+    },
+    {
+      "epoch": 0.0037469533267852594,
+      "grad_norm": 0.00017740805924404413,
+      "learning_rate": 5.1205789473684216e-05,
+      "loss": 23.0,
+      "step": 103
+    },
+    {
+      "epoch": 0.0037833315144239514,
+      "grad_norm": 0.00010646147711668164,
+      "learning_rate": 5.067789473684211e-05,
+      "loss": 23.0,
+      "step": 104
+    },
+    {
+      "epoch": 0.0038197097020626434,
+      "grad_norm": 0.0001494868629379198,
+      "learning_rate": 5.015e-05,
+      "loss": 23.0,
+      "step": 105
+    },
+    {
+      "epoch": 0.003856087889701335,
+      "grad_norm": 0.00012090586824342608,
+      "learning_rate": 4.962210526315789e-05,
+      "loss": 23.0,
+      "step": 106
+    },
+    {
+      "epoch": 0.003892466077340027,
+      "grad_norm": 0.00021911323710810393,
+      "learning_rate": 4.909421052631579e-05,
+      "loss": 23.0,
+      "step": 107
+    },
+    {
+      "epoch": 0.003928844264978719,
+      "grad_norm": 0.0003465862537268549,
+      "learning_rate": 4.856631578947368e-05,
+      "loss": 23.0,
+      "step": 108
+    },
+    {
+      "epoch": 0.003965222452617411,
+      "grad_norm": 0.0003293559711892158,
+      "learning_rate": 4.803842105263158e-05,
+      "loss": 23.0,
+      "step": 109
+    },
+    {
+      "epoch": 0.004001600640256103,
+      "grad_norm": 0.0002986920881085098,
+      "learning_rate": 4.751052631578947e-05,
+      "loss": 23.0,
+      "step": 110
+    },
+    {
+      "epoch": 0.004037978827894795,
+      "grad_norm": 0.00022487634851131588,
+      "learning_rate": 4.698263157894737e-05,
+      "loss": 23.0,
+      "step": 111
+    },
+    {
+      "epoch": 0.004074357015533486,
+      "grad_norm": 0.00037544663064181805,
+      "learning_rate": 4.645473684210526e-05,
+      "loss": 23.0,
+      "step": 112
+    },
+    {
+      "epoch": 0.004110735203172178,
+      "grad_norm": 0.0001820535253500566,
+      "learning_rate": 4.592684210526315e-05,
+      "loss": 23.0,
+      "step": 113
+    },
+    {
+      "epoch": 0.00414711339081087,
+      "grad_norm": 0.00031552044674754143,
+      "learning_rate": 4.539894736842105e-05,
+      "loss": 23.0,
+      "step": 114
+    },
+    {
+      "epoch": 0.004183491578449562,
+      "grad_norm": 8.148902998073027e-05,
+      "learning_rate": 4.487105263157895e-05,
+      "loss": 23.0,
+      "step": 115
+    },
+    {
+      "epoch": 0.004219869766088254,
+      "grad_norm": 0.00017857461352832615,
+      "learning_rate": 4.434315789473684e-05,
+      "loss": 23.0,
+      "step": 116
+    },
+    {
+      "epoch": 0.004256247953726945,
+      "grad_norm": 0.00020005616534035653,
+      "learning_rate": 4.381526315789474e-05,
+      "loss": 23.0,
+      "step": 117
+    },
+    {
+      "epoch": 0.004292626141365637,
+      "grad_norm": 0.00036300913779996336,
+      "learning_rate": 4.328736842105263e-05,
+      "loss": 23.0,
+      "step": 118
+    },
+    {
+      "epoch": 0.004329004329004329,
+      "grad_norm": 0.0003054387343581766,
+      "learning_rate": 4.2759473684210523e-05,
+      "loss": 23.0,
+      "step": 119
+    },
+    {
+      "epoch": 0.004365382516643021,
+      "grad_norm": 0.0002077149401884526,
+      "learning_rate": 4.2231578947368415e-05,
+      "loss": 23.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.004401760704281713,
+      "grad_norm": 0.0002281182969454676,
+      "learning_rate": 4.1703684210526314e-05,
+      "loss": 23.0,
+      "step": 121
+    },
+    {
+      "epoch": 0.004438138891920404,
+      "grad_norm": 0.0005008115549571812,
+      "learning_rate": 4.117578947368421e-05,
+      "loss": 23.0,
+      "step": 122
+    },
+    {
+      "epoch": 0.004474517079559096,
+      "grad_norm": 0.00035556236980482936,
+      "learning_rate": 4.0647894736842104e-05,
+      "loss": 23.0,
+      "step": 123
+    },
+    {
+      "epoch": 0.004510895267197788,
+      "grad_norm": 0.0005603492027148604,
+      "learning_rate": 4.012e-05,
+      "loss": 23.0,
+      "step": 124
+    },
+    {
+      "epoch": 0.00454727345483648,
+      "grad_norm": 0.0004926612018607557,
+      "learning_rate": 3.9592105263157894e-05,
+      "loss": 23.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.004583651642475172,
+      "grad_norm": 0.0004209111793898046,
+      "learning_rate": 3.9064210526315785e-05,
+      "loss": 23.0,
+      "step": 126
+    },
+    {
+      "epoch": 0.004620029830113863,
+      "grad_norm": 0.00046484937774948776,
+      "learning_rate": 3.8536315789473684e-05,
+      "loss": 23.0,
+      "step": 127
+    },
+    {
+      "epoch": 0.004656408017752555,
+      "grad_norm": 0.0005833171890117228,
+      "learning_rate": 3.800842105263158e-05,
+      "loss": 23.0,
+      "step": 128
+    },
+    {
+      "epoch": 0.004692786205391247,
+      "grad_norm": 0.0005073467036709189,
+      "learning_rate": 3.7480526315789474e-05,
+      "loss": 23.0,
+      "step": 129
+    },
+    {
+      "epoch": 0.0047291643930299394,
+      "grad_norm": 0.0002727651153691113,
+      "learning_rate": 3.6952631578947366e-05,
+      "loss": 23.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.0047655425806686314,
+      "grad_norm": 0.000585235480684787,
+      "learning_rate": 3.6424736842105264e-05,
+      "loss": 23.0,
+      "step": 131
+    },
+    {
+      "epoch": 0.004801920768307323,
+      "grad_norm": 0.0002120441640727222,
+      "learning_rate": 3.5896842105263156e-05,
+      "loss": 23.0,
+      "step": 132
+    },
+    {
+      "epoch": 0.004838298955946015,
+      "grad_norm": 0.0004325111221987754,
+      "learning_rate": 3.536894736842105e-05,
+      "loss": 23.0,
+      "step": 133
+    },
+    {
+      "epoch": 0.004874677143584707,
+      "grad_norm": 0.00032356969313696027,
+      "learning_rate": 3.4841052631578946e-05,
+      "loss": 23.0,
+      "step": 134
+    },
+    {
+      "epoch": 0.004911055331223399,
+      "grad_norm": 0.000456592213595286,
+      "learning_rate": 3.4313157894736844e-05,
+      "loss": 23.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.004947433518862091,
+      "grad_norm": 0.0007706891628913581,
+      "learning_rate": 3.3785263157894736e-05,
+      "loss": 23.0,
+      "step": 136
+    },
+    {
+      "epoch": 0.004983811706500782,
+      "grad_norm": 0.000517175649292767,
+      "learning_rate": 3.325736842105263e-05,
+      "loss": 23.0,
+      "step": 137
+    },
+    {
+      "epoch": 0.005020189894139474,
+      "grad_norm": 0.000617575307842344,
+      "learning_rate": 3.2729473684210526e-05,
+      "loss": 23.0,
+      "step": 138
+    },
+    {
+      "epoch": 0.005056568081778166,
+      "grad_norm": 0.0002448149025440216,
+      "learning_rate": 3.220157894736842e-05,
+      "loss": 23.0,
+      "step": 139
+    },
+    {
+      "epoch": 0.005092946269416858,
+      "grad_norm": 0.0004209695034660399,
+      "learning_rate": 3.1673684210526316e-05,
+      "loss": 23.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.00512932445705555,
+      "grad_norm": 0.0003657886991277337,
+      "learning_rate": 3.1145789473684215e-05,
+      "loss": 23.0,
+      "step": 141
+    },
+    {
+      "epoch": 0.005165702644694241,
+      "grad_norm": 0.0008267536177299917,
+      "learning_rate": 3.0617894736842107e-05,
+      "loss": 23.0,
+      "step": 142
+    },
+    {
+      "epoch": 0.005202080832332933,
+      "grad_norm": 0.0008407800341956317,
+      "learning_rate": 3.0089999999999998e-05,
+      "loss": 23.0,
+      "step": 143
+    },
+    {
+      "epoch": 0.005238459019971625,
+      "grad_norm": 0.0005053975037299097,
+      "learning_rate": 2.956210526315789e-05,
+      "loss": 23.0,
+      "step": 144
+    },
+    {
+      "epoch": 0.005274837207610317,
+      "grad_norm": 0.00048378558130934834,
+      "learning_rate": 2.9034210526315792e-05,
+      "loss": 23.0,
+      "step": 145
+    },
+    {
+      "epoch": 0.005311215395249009,
+      "grad_norm": 0.0007292490336112678,
+      "learning_rate": 2.8506315789473683e-05,
+      "loss": 23.0,
+      "step": 146
+    },
+    {
+      "epoch": 0.0053475935828877,
+      "grad_norm": 0.000520595523994416,
+      "learning_rate": 2.7978421052631575e-05,
+      "loss": 23.0,
+      "step": 147
+    },
+    {
+      "epoch": 0.005383971770526392,
+      "grad_norm": 0.0005500400438904762,
+      "learning_rate": 2.7450526315789474e-05,
+      "loss": 23.0,
+      "step": 148
+    },
+    {
+      "epoch": 0.005420349958165084,
+      "grad_norm": 0.00027113532996736467,
+      "learning_rate": 2.692263157894737e-05,
+      "loss": 23.0,
+      "step": 149
+    },
+    {
+      "epoch": 0.005456728145803776,
+      "grad_norm": 0.00021608092356473207,
+      "learning_rate": 2.639473684210526e-05,
+      "loss": 23.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.005456728145803776,
+      "eval_loss": 11.5,
+      "eval_runtime": 151.1109,
+      "eval_samples_per_second": 76.599,
+      "eval_steps_per_second": 19.152,
+      "step": 150
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 6049810808832.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null