Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84da01682d68fc6b885d230458b56dcfb3f88928699dc88780600fe256b536c3
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:8895600a81c9aa3d0d5c497a7f4c267843a1aa6c53aaacebeb0ce264dbe4484b
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f12395374f893550dfb4231921f3a78786efdddefb0da25f6b7b0178027a62ba
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:27d1e3be788bd7e51b0e486a9baa2de4820f7d8277f4f1f37ad0bcec5fedf456
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:881608cbb07448770a5e76289b30723e66073325ba435fcdc41b961bd5ae0ba0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:46a12a5e8afd30c780590eb2445717bbb87066db78b3c939bc2c7f33f2825041
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b10e9e3e5958a34724fb544f11514b9a1b966537b22b0051516d96de8ed7dbc2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8ecea9fcf3691efde7f6f95771ff564391b1328126afca676a14949850bebad
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.001818909381934592,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 76.573,
       "eval_steps_per_second": 19.145,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -387,7 +745,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2030002962432.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.003637818763869184,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 76.573,
       "eval_steps_per_second": 19.145,
       "step": 50
+    },
+    {
+      "epoch": 0.0018552875695732839,
+      "grad_norm": 7.084694516379386e-05,
+      "learning_rate": 7.865631578947369e-05,
+      "loss": 23.0,
+      "step": 51
+    },
+    {
+      "epoch": 0.0018916657572119757,
+      "grad_norm": 5.6916571338661015e-05,
+      "learning_rate": 7.812842105263157e-05,
+      "loss": 23.0,
+      "step": 52
+    },
+    {
+      "epoch": 0.0019280439448506675,
+      "grad_norm": 0.00013821788888890296,
+      "learning_rate": 7.760052631578947e-05,
+      "loss": 23.0,
+      "step": 53
+    },
+    {
+      "epoch": 0.0019644221324893593,
+      "grad_norm": 0.00010073825251311064,
+      "learning_rate": 7.707263157894737e-05,
+      "loss": 23.0,
+      "step": 54
+    },
+    {
+      "epoch": 0.0020008003201280513,
+      "grad_norm": 3.989057950093411e-05,
+      "learning_rate": 7.654473684210527e-05,
+      "loss": 23.0,
+      "step": 55
+    },
+    {
+      "epoch": 0.002037178507766743,
+      "grad_norm": 0.00011672514665406197,
+      "learning_rate": 7.601684210526316e-05,
+      "loss": 23.0,
+      "step": 56
+    },
+    {
+      "epoch": 0.002073556695405435,
+      "grad_norm": 0.00011147064651595429,
+      "learning_rate": 7.548894736842105e-05,
+      "loss": 23.0,
+      "step": 57
+    },
+    {
+      "epoch": 0.002109934883044127,
+      "grad_norm": 0.00014741995255462825,
+      "learning_rate": 7.496105263157895e-05,
+      "loss": 23.0,
+      "step": 58
+    },
+    {
+      "epoch": 0.0021463130706828185,
+      "grad_norm": 0.00019834299746435136,
+      "learning_rate": 7.443315789473683e-05,
+      "loss": 23.0,
+      "step": 59
+    },
+    {
+      "epoch": 0.0021826912583215105,
+      "grad_norm": 7.386082143057138e-05,
+      "learning_rate": 7.390526315789473e-05,
+      "loss": 23.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.002219069445960202,
+      "grad_norm": 0.00012008604971924797,
+      "learning_rate": 7.337736842105263e-05,
+      "loss": 23.0,
+      "step": 61
+    },
+    {
+      "epoch": 0.002255447633598894,
+      "grad_norm": 0.00010102576925419271,
+      "learning_rate": 7.284947368421053e-05,
+      "loss": 23.0,
+      "step": 62
+    },
+    {
+      "epoch": 0.002291825821237586,
+      "grad_norm": 8.834318578010425e-05,
+      "learning_rate": 7.232157894736843e-05,
+      "loss": 23.0,
+      "step": 63
+    },
+    {
+      "epoch": 0.0023282040088762777,
+      "grad_norm": 0.00017303289496339858,
+      "learning_rate": 7.179368421052631e-05,
+      "loss": 23.0,
+      "step": 64
+    },
+    {
+      "epoch": 0.0023645821965149697,
+      "grad_norm": 0.00021577121515292674,
+      "learning_rate": 7.126578947368421e-05,
+      "loss": 23.0,
+      "step": 65
+    },
+    {
+      "epoch": 0.0024009603841536613,
+      "grad_norm": 0.00012508737563621253,
+      "learning_rate": 7.07378947368421e-05,
+      "loss": 23.0,
+      "step": 66
+    },
+    {
+      "epoch": 0.0024373385717923533,
+      "grad_norm": 0.00021092577662784606,
+      "learning_rate": 7.021e-05,
+      "loss": 23.0,
+      "step": 67
+    },
+    {
+      "epoch": 0.0024737167594310453,
+      "grad_norm": 9.400040289619938e-05,
+      "learning_rate": 6.968210526315789e-05,
+      "loss": 23.0,
+      "step": 68
+    },
+    {
+      "epoch": 0.002510094947069737,
+      "grad_norm": 0.00022102220100350678,
+      "learning_rate": 6.915421052631579e-05,
+      "loss": 23.0,
+      "step": 69
+    },
+    {
+      "epoch": 0.002546473134708429,
+      "grad_norm": 0.00022005224309396,
+      "learning_rate": 6.862631578947369e-05,
+      "loss": 23.0,
+      "step": 70
+    },
+    {
+      "epoch": 0.0025828513223471205,
+      "grad_norm": 0.0001787764485925436,
+      "learning_rate": 6.809842105263157e-05,
+      "loss": 23.0,
+      "step": 71
+    },
+    {
+      "epoch": 0.0026192295099858125,
+      "grad_norm": 0.00012445311585906893,
+      "learning_rate": 6.757052631578947e-05,
+      "loss": 23.0,
+      "step": 72
+    },
+    {
+      "epoch": 0.0026556076976245045,
+      "grad_norm": 0.00024370021128561348,
+      "learning_rate": 6.704263157894737e-05,
+      "loss": 23.0,
+      "step": 73
+    },
+    {
+      "epoch": 0.002691985885263196,
+      "grad_norm": 0.00029588048346340656,
+      "learning_rate": 6.651473684210526e-05,
+      "loss": 23.0,
+      "step": 74
+    },
+    {
+      "epoch": 0.002728364072901888,
+      "grad_norm": 0.00026139113469980657,
+      "learning_rate": 6.598684210526317e-05,
+      "loss": 23.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.0027647422605405797,
+      "grad_norm": 9.695452899904922e-05,
+      "learning_rate": 6.545894736842105e-05,
+      "loss": 23.0,
+      "step": 76
+    },
+    {
+      "epoch": 0.0028011204481792717,
+      "grad_norm": 0.00018712144810706377,
+      "learning_rate": 6.493105263157895e-05,
+      "loss": 23.0,
+      "step": 77
+    },
+    {
+      "epoch": 0.0028374986358179637,
+      "grad_norm": 0.00016666650481056422,
+      "learning_rate": 6.440315789473684e-05,
+      "loss": 23.0,
+      "step": 78
+    },
+    {
+      "epoch": 0.0028738768234566553,
+      "grad_norm": 0.00029639736749231815,
+      "learning_rate": 6.387526315789473e-05,
+      "loss": 23.0,
+      "step": 79
+    },
+    {
+      "epoch": 0.0029102550110953473,
+      "grad_norm": 0.00020341298659332097,
+      "learning_rate": 6.334736842105263e-05,
+      "loss": 23.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.002946633198734039,
+      "grad_norm": 0.0003866904880851507,
+      "learning_rate": 6.281947368421052e-05,
+      "loss": 23.0,
+      "step": 81
+    },
+    {
+      "epoch": 0.002983011386372731,
+      "grad_norm": 0.0003410225035622716,
+      "learning_rate": 6.229157894736843e-05,
+      "loss": 23.0,
+      "step": 82
+    },
+    {
+      "epoch": 0.003019389574011423,
+      "grad_norm": 0.0003634164750110358,
+      "learning_rate": 6.176368421052631e-05,
+      "loss": 23.0,
+      "step": 83
+    },
+    {
+      "epoch": 0.0030557677616501145,
+      "grad_norm": 0.00019395571143832058,
+      "learning_rate": 6.123578947368421e-05,
+      "loss": 23.0,
+      "step": 84
+    },
+    {
+      "epoch": 0.0030921459492888066,
+      "grad_norm": 0.00014450523303821683,
+      "learning_rate": 6.0707894736842105e-05,
+      "loss": 23.0,
+      "step": 85
+    },
+    {
+      "epoch": 0.003128524136927498,
+      "grad_norm": 0.0003423929156269878,
+      "learning_rate": 6.0179999999999996e-05,
+      "loss": 23.0,
+      "step": 86
+    },
+    {
+      "epoch": 0.00316490232456619,
+      "grad_norm": 0.00019756775873247534,
+      "learning_rate": 5.965210526315789e-05,
+      "loss": 23.0,
+      "step": 87
+    },
+    {
+      "epoch": 0.003201280512204882,
+      "grad_norm": 0.0003216055629309267,
+      "learning_rate": 5.912421052631578e-05,
+      "loss": 23.0,
+      "step": 88
+    },
+    {
+      "epoch": 0.0032376586998435737,
+      "grad_norm": 0.0005142286536283791,
+      "learning_rate": 5.8596315789473685e-05,
+      "loss": 23.0,
+      "step": 89
+    },
+    {
+      "epoch": 0.0032740368874822658,
+      "grad_norm": 0.00019845775386784226,
+      "learning_rate": 5.8068421052631583e-05,
+      "loss": 23.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.0033104150751209573,
+      "grad_norm": 0.0002886882866732776,
+      "learning_rate": 5.7540526315789475e-05,
+      "loss": 23.0,
+      "step": 91
+    },
+    {
+      "epoch": 0.0033467932627596494,
+      "grad_norm": 0.0003776240337174386,
+      "learning_rate": 5.701263157894737e-05,
+      "loss": 23.0,
+      "step": 92
+    },
+    {
+      "epoch": 0.003383171450398341,
+      "grad_norm": 0.00038095013587735593,
+      "learning_rate": 5.648473684210526e-05,
+      "loss": 23.0,
+      "step": 93
+    },
+    {
+      "epoch": 0.003419549638037033,
+      "grad_norm": 0.0003583188517950475,
+      "learning_rate": 5.595684210526315e-05,
+      "loss": 23.0,
+      "step": 94
+    },
+    {
+      "epoch": 0.003455927825675725,
+      "grad_norm": 0.0004853124264627695,
+      "learning_rate": 5.5428947368421055e-05,
+      "loss": 23.0,
+      "step": 95
+    },
+    {
+      "epoch": 0.0034923060133144166,
+      "grad_norm": 0.00038572377525269985,
+      "learning_rate": 5.490105263157895e-05,
+      "loss": 23.0,
+      "step": 96
+    },
+    {
+      "epoch": 0.0035286842009531086,
+      "grad_norm": 0.0003467136702965945,
+      "learning_rate": 5.4373157894736846e-05,
+      "loss": 23.0,
+      "step": 97
+    },
+    {
+      "epoch": 0.0035650623885918,
+      "grad_norm": 0.0002495538501534611,
+      "learning_rate": 5.384526315789474e-05,
+      "loss": 23.0,
+      "step": 98
+    },
+    {
+      "epoch": 0.003601440576230492,
+      "grad_norm": 0.0007139640511013567,
+      "learning_rate": 5.331736842105263e-05,
+      "loss": 23.0,
+      "step": 99
+    },
+    {
+      "epoch": 0.003637818763869184,
+      "grad_norm": 0.0005742310895584524,
+      "learning_rate": 5.278947368421052e-05,
+      "loss": 23.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.003637818763869184,
+      "eval_loss": 11.5,
+      "eval_runtime": 151.1117,
+      "eval_samples_per_second": 76.599,
+      "eval_steps_per_second": 19.151,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4039906885632.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null