Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5f12846cc6c9806b6a5481dff7a086314c5c48195b30839cfd52430fb6a78ef
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:a007ef3819d0a180b2cc640168c0b397f14a7b2be24311a5b244e8f015424f6d
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6017f12c836c8d47c87de7b291d454102596f57b0ae4c5f99d75934c92015ed
 size 162933396

 version https://git-lfs.github.com/spec/v1
+oid sha256:206d31fd0e94ad4b5ee8616fb918d46b4119f29e50d8445063f977fbd1c44d08
 size 162933396

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae6c0974bdf9bd154da1755e1d0da5d2fc0f74569b6570d0efebb8a5b861e31d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:68fb56de6c6277dcb224030074007cc7a4f8ca144925b66a98fada92c578b58a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7446192502975464,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.018563207722294413,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 8.005,
       "eval_steps_per_second": 2.003,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.954473927101645e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.5987274646759033,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.03712641544458883,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.005,
       "eval_steps_per_second": 2.003,
       "step": 50
+    },
+    {
+      "epoch": 0.0189344718767403,
+      "grad_norm": 5.353964328765869,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 8.9486,
+      "step": 51
+    },
+    {
+      "epoch": 0.01930573603118619,
+      "grad_norm": 6.079212188720703,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 9.9081,
+      "step": 52
+    },
+    {
+      "epoch": 0.019677000185632076,
+      "grad_norm": 7.1975932121276855,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 8.2391,
+      "step": 53
+    },
+    {
+      "epoch": 0.020048264340077965,
+      "grad_norm": 6.638981342315674,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 8.5821,
+      "step": 54
+    },
+    {
+      "epoch": 0.020419528494523855,
+      "grad_norm": 9.038630485534668,
+      "learning_rate": 5e-05,
+      "loss": 9.6055,
+      "step": 55
+    },
+    {
+      "epoch": 0.02079079264896974,
+      "grad_norm": 9.090941429138184,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 9.724,
+      "step": 56
+    },
+    {
+      "epoch": 0.02116205680341563,
+      "grad_norm": 7.1579108238220215,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 7.7207,
+      "step": 57
+    },
+    {
+      "epoch": 0.021533320957861517,
+      "grad_norm": 8.75041389465332,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 8.7275,
+      "step": 58
+    },
+    {
+      "epoch": 0.021904585112307407,
+      "grad_norm": 6.073078155517578,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 7.8166,
+      "step": 59
+    },
+    {
+      "epoch": 0.022275849266753293,
+      "grad_norm": 6.027640342712402,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 6.9793,
+      "step": 60
+    },
+    {
+      "epoch": 0.022647113421199183,
+      "grad_norm": 6.702777862548828,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 7.5993,
+      "step": 61
+    },
+    {
+      "epoch": 0.023018377575645073,
+      "grad_norm": 6.559276103973389,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 6.5231,
+      "step": 62
+    },
+    {
+      "epoch": 0.02338964173009096,
+      "grad_norm": 6.18765926361084,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 6.9596,
+      "step": 63
+    },
+    {
+      "epoch": 0.02376090588453685,
+      "grad_norm": 5.145318508148193,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 6.4935,
+      "step": 64
+    },
+    {
+      "epoch": 0.024132170038982735,
+      "grad_norm": 4.60317850112915,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 6.1899,
+      "step": 65
+    },
+    {
+      "epoch": 0.024503434193428625,
+      "grad_norm": 4.280679225921631,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 6.1858,
+      "step": 66
+    },
+    {
+      "epoch": 0.02487469834787451,
+      "grad_norm": 4.138360023498535,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 6.1128,
+      "step": 67
+    },
+    {
+      "epoch": 0.0252459625023204,
+      "grad_norm": 4.611503601074219,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 6.3028,
+      "step": 68
+    },
+    {
+      "epoch": 0.02561722665676629,
+      "grad_norm": 4.154057502746582,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 5.5962,
+      "step": 69
+    },
+    {
+      "epoch": 0.025988490811212177,
+      "grad_norm": 3.594250202178955,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 5.6007,
+      "step": 70
+    },
+    {
+      "epoch": 0.026359754965658067,
+      "grad_norm": 4.3070454597473145,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 6.1225,
+      "step": 71
+    },
+    {
+      "epoch": 0.026731019120103953,
+      "grad_norm": 4.306888580322266,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 6.8154,
+      "step": 72
+    },
+    {
+      "epoch": 0.027102283274549843,
+      "grad_norm": 3.854240655899048,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 5.8671,
+      "step": 73
+    },
+    {
+      "epoch": 0.02747354742899573,
+      "grad_norm": 3.8052146434783936,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 6.1667,
+      "step": 74
+    },
+    {
+      "epoch": 0.02784481158344162,
+      "grad_norm": 3.809180974960327,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 5.8298,
+      "step": 75
+    },
+    {
+      "epoch": 0.028216075737887508,
+      "grad_norm": 3.352525234222412,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 5.3726,
+      "step": 76
+    },
+    {
+      "epoch": 0.028587339892333394,
+      "grad_norm": 3.5558321475982666,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 6.2531,
+      "step": 77
+    },
+    {
+      "epoch": 0.028958604046779284,
+      "grad_norm": 3.92938494682312,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 5.5628,
+      "step": 78
+    },
+    {
+      "epoch": 0.02932986820122517,
+      "grad_norm": 3.702071189880371,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 5.8864,
+      "step": 79
+    },
+    {
+      "epoch": 0.02970113235567106,
+      "grad_norm": 4.398648262023926,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 6.06,
+      "step": 80
+    },
+    {
+      "epoch": 0.03007239651011695,
+      "grad_norm": 3.4324252605438232,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 5.1271,
+      "step": 81
+    },
+    {
+      "epoch": 0.030443660664562836,
+      "grad_norm": 3.377044439315796,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 5.4419,
+      "step": 82
+    },
+    {
+      "epoch": 0.030814924819008726,
+      "grad_norm": 3.836601734161377,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 5.5232,
+      "step": 83
+    },
+    {
+      "epoch": 0.031186188973454612,
+      "grad_norm": 4.520117282867432,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 6.1833,
+      "step": 84
+    },
+    {
+      "epoch": 0.0315574531279005,
+      "grad_norm": 3.46451473236084,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 5.3309,
+      "step": 85
+    },
+    {
+      "epoch": 0.03192871728234639,
+      "grad_norm": 4.2184157371521,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 5.6528,
+      "step": 86
+    },
+    {
+      "epoch": 0.032299981436792274,
+      "grad_norm": 4.261045455932617,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 6.3858,
+      "step": 87
+    },
+    {
+      "epoch": 0.032671245591238164,
+      "grad_norm": 3.678689479827881,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 5.5889,
+      "step": 88
+    },
+    {
+      "epoch": 0.033042509745684054,
+      "grad_norm": 4.6800665855407715,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 6.5155,
+      "step": 89
+    },
+    {
+      "epoch": 0.033413773900129944,
+      "grad_norm": 4.469486713409424,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 6.2689,
+      "step": 90
+    },
+    {
+      "epoch": 0.03378503805457583,
+      "grad_norm": 3.791959762573242,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 6.4441,
+      "step": 91
+    },
+    {
+      "epoch": 0.034156302209021716,
+      "grad_norm": 3.8623321056365967,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 6.4281,
+      "step": 92
+    },
+    {
+      "epoch": 0.034527566363467606,
+      "grad_norm": 6.107036113739014,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 6.3856,
+      "step": 93
+    },
+    {
+      "epoch": 0.034898830517913496,
+      "grad_norm": 3.526797294616699,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 5.6764,
+      "step": 94
+    },
+    {
+      "epoch": 0.035270094672359385,
+      "grad_norm": 4.093867301940918,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 5.6925,
+      "step": 95
+    },
+    {
+      "epoch": 0.035641358826805275,
+      "grad_norm": 3.6119606494903564,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 5.3956,
+      "step": 96
+    },
+    {
+      "epoch": 0.03601262298125116,
+      "grad_norm": 4.349273681640625,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 6.1551,
+      "step": 97
+    },
+    {
+      "epoch": 0.03638388713569705,
+      "grad_norm": 3.9628746509552,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 5.6607,
+      "step": 98
+    },
+    {
+      "epoch": 0.03675515129014294,
+      "grad_norm": 4.282620906829834,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 6.0429,
+      "step": 99
+    },
+    {
+      "epoch": 0.03712641544458883,
+      "grad_norm": 4.604611873626709,
+      "learning_rate": 0.0,
+      "loss": 6.4466,
+      "step": 100
+    },
+    {
+      "epoch": 0.03712641544458883,
+      "eval_loss": 1.5987274646759033,
+      "eval_runtime": 565.1895,
+      "eval_samples_per_second": 8.027,
+      "eval_steps_per_second": 2.008,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5777468946317312e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null