Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23e9883c260f5b0516657dfbdc3636f9366bdc515216a0253d9dcaffb4868440
 size 578859568

 version https://git-lfs.github.com/spec/v1
+oid sha256:330ebf2fc78d7f6f8cb0c815f67bcae506da819a7aff7e37748efa931b16d910
 size 578859568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08d754e71c35b2b1437c7ec1e9f70bfb649c9a1d5cd1d0f876ccb4de39364f9c
 size 294324372

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b00ebe9b91258a190a07ebaabacfcadcdf9d0e664ff19ed32400b152a5e4f7c
 size 294324372

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5cbdb418bfe177481a57dee10d69bc3dfe62adc04e736d6907f01d4081f733a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:632841951b590cd1045c6936a248754bca8a52877d07e548317e1fb181970885
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:864f14d0e8e5d09ac48cddd1c1049b37207b552f2c3cff0a6a1d5654b8bd81f6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9f081c44b322e29d9c3ab667f1f86bff894c62055bb3540dff4b22ee74b02c5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.9230208396911621,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0013898444069186454,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 11.549,
       "eval_steps_per_second": 5.784,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -387,7 +745,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3300295509540864.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.9230208396911621,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.0027796888138372907,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.549,
       "eval_steps_per_second": 5.784,
       "step": 50
+    },
+    {
+      "epoch": 0.0014176412950570184,
+      "grad_norm": 0.1536542773246765,
+      "learning_rate": 0.001965648708885559,
+      "loss": 1.0933,
+      "step": 51
+    },
+    {
+      "epoch": 0.0014454381831953912,
+      "grad_norm": 0.1058596670627594,
+      "learning_rate": 0.001963962860695853,
+      "loss": 1.0704,
+      "step": 52
+    },
+    {
+      "epoch": 0.0014732350713337642,
+      "grad_norm": 0.08224259316921234,
+      "learning_rate": 0.001962237387768529,
+      "loss": 1.1229,
+      "step": 53
+    },
+    {
+      "epoch": 0.001501031959472137,
+      "grad_norm": 0.08697132021188736,
+      "learning_rate": 0.0019604723610310193,
+      "loss": 0.9118,
+      "step": 54
+    },
+    {
+      "epoch": 0.00152882884761051,
+      "grad_norm": 0.08103405684232712,
+      "learning_rate": 0.0019586678530366607,
+      "loss": 0.946,
+      "step": 55
+    },
+    {
+      "epoch": 0.0015566257357488829,
+      "grad_norm": 0.07975370436906815,
+      "learning_rate": 0.0019568239379617086,
+      "loss": 0.9038,
+      "step": 56
+    },
+    {
+      "epoch": 0.001584422623887256,
+      "grad_norm": 0.07595320045948029,
+      "learning_rate": 0.0019549406916022907,
+      "loss": 0.8122,
+      "step": 57
+    },
+    {
+      "epoch": 0.0016122195120256287,
+      "grad_norm": 0.08768190443515778,
+      "learning_rate": 0.0019530181913712872,
+      "loss": 0.8606,
+      "step": 58
+    },
+    {
+      "epoch": 0.0016400164001640015,
+      "grad_norm": 0.09886912256479263,
+      "learning_rate": 0.0019510565162951536,
+      "loss": 0.8902,
+      "step": 59
+    },
+    {
+      "epoch": 0.0016678132883023746,
+      "grad_norm": 0.08801861107349396,
+      "learning_rate": 0.0019490557470106687,
+      "loss": 0.9151,
+      "step": 60
+    },
+    {
+      "epoch": 0.0016956101764407474,
+      "grad_norm": 0.08995132893323898,
+      "learning_rate": 0.0019470159657616214,
+      "loss": 0.7793,
+      "step": 61
+    },
+    {
+      "epoch": 0.0017234070645791204,
+      "grad_norm": 0.08425740152597427,
+      "learning_rate": 0.0019449372563954293,
+      "loss": 0.7646,
+      "step": 62
+    },
+    {
+      "epoch": 0.0017512039527174932,
+      "grad_norm": 0.11669428646564484,
+      "learning_rate": 0.001942819704359693,
+      "loss": 0.9056,
+      "step": 63
+    },
+    {
+      "epoch": 0.0017790008408558663,
+      "grad_norm": 0.08303668349981308,
+      "learning_rate": 0.0019406633966986826,
+      "loss": 0.7583,
+      "step": 64
+    },
+    {
+      "epoch": 0.001806797728994239,
+      "grad_norm": 0.08263064175844193,
+      "learning_rate": 0.0019384684220497604,
+      "loss": 0.9233,
+      "step": 65
+    },
+    {
+      "epoch": 0.001834594617132612,
+      "grad_norm": 0.08262008428573608,
+      "learning_rate": 0.0019362348706397372,
+      "loss": 0.8359,
+      "step": 66
+    },
+    {
+      "epoch": 0.001862391505270985,
+      "grad_norm": 0.10420376062393188,
+      "learning_rate": 0.0019339628342811633,
+      "loss": 0.9978,
+      "step": 67
+    },
+    {
+      "epoch": 0.0018901883934093577,
+      "grad_norm": 0.0830477699637413,
+      "learning_rate": 0.001931652406368554,
+      "loss": 0.8834,
+      "step": 68
+    },
+    {
+      "epoch": 0.0019179852815477307,
+      "grad_norm": 0.08504804968833923,
+      "learning_rate": 0.0019293036818745519,
+      "loss": 0.9164,
+      "step": 69
+    },
+    {
+      "epoch": 0.0019457821696861036,
+      "grad_norm": 0.08910652250051498,
+      "learning_rate": 0.0019269167573460217,
+      "loss": 0.9095,
+      "step": 70
+    },
+    {
+      "epoch": 0.0019735790578244766,
+      "grad_norm": 0.09257230162620544,
+      "learning_rate": 0.0019244917309000815,
+      "loss": 0.7138,
+      "step": 71
+    },
+    {
+      "epoch": 0.0020013759459628494,
+      "grad_norm": 0.09553885459899902,
+      "learning_rate": 0.0019220287022200706,
+      "loss": 0.9544,
+      "step": 72
+    },
+    {
+      "epoch": 0.002029172834101222,
+      "grad_norm": 0.08890817314386368,
+      "learning_rate": 0.0019195277725514508,
+      "loss": 0.7013,
+      "step": 73
+    },
+    {
+      "epoch": 0.0020569697222395955,
+      "grad_norm": 0.10616549849510193,
+      "learning_rate": 0.0019169890446976451,
+      "loss": 0.7119,
+      "step": 74
+    },
+    {
+      "epoch": 0.0020847666103779683,
+      "grad_norm": 0.09758912026882172,
+      "learning_rate": 0.0019144126230158124,
+      "loss": 0.811,
+      "step": 75
+    },
+    {
+      "epoch": 0.002112563498516341,
+      "grad_norm": 0.09248580783605576,
+      "learning_rate": 0.001911798613412557,
+      "loss": 0.8025,
+      "step": 76
+    },
+    {
+      "epoch": 0.002140360386654714,
+      "grad_norm": 0.09431200474500656,
+      "learning_rate": 0.001909147123339575,
+      "loss": 0.7038,
+      "step": 77
+    },
+    {
+      "epoch": 0.0021681572747930867,
+      "grad_norm": 0.09258091449737549,
+      "learning_rate": 0.001906458261789238,
+      "loss": 0.7752,
+      "step": 78
+    },
+    {
+      "epoch": 0.00219595416293146,
+      "grad_norm": 0.08860747516155243,
+      "learning_rate": 0.0019037321392901135,
+      "loss": 0.6832,
+      "step": 79
+    },
+    {
+      "epoch": 0.0022237510510698328,
+      "grad_norm": 0.10791260004043579,
+      "learning_rate": 0.001900968867902419,
+      "loss": 0.7183,
+      "step": 80
+    },
+    {
+      "epoch": 0.0022515479392082056,
+      "grad_norm": 0.0878261998295784,
+      "learning_rate": 0.001898168561213419,
+      "loss": 0.6677,
+      "step": 81
+    },
+    {
+      "epoch": 0.0022793448273465784,
+      "grad_norm": 0.10915020108222961,
+      "learning_rate": 0.0018953313343327532,
+      "loss": 0.8602,
+      "step": 82
+    },
+    {
+      "epoch": 0.0023071417154849516,
+      "grad_norm": 0.10625939816236496,
+      "learning_rate": 0.001892457303887706,
+      "loss": 0.8385,
+      "step": 83
+    },
+    {
+      "epoch": 0.0023349386036233244,
+      "grad_norm": 0.10215223580598831,
+      "learning_rate": 0.001889546588018412,
+      "loss": 0.7723,
+      "step": 84
+    },
+    {
+      "epoch": 0.0023627354917616973,
+      "grad_norm": 0.08778225630521774,
+      "learning_rate": 0.0018865993063730002,
+      "loss": 0.6503,
+      "step": 85
+    },
+    {
+      "epoch": 0.00239053237990007,
+      "grad_norm": 0.10662350058555603,
+      "learning_rate": 0.0018836155801026753,
+      "loss": 0.6592,
+      "step": 86
+    },
+    {
+      "epoch": 0.002418329268038443,
+      "grad_norm": 0.10347293317317963,
+      "learning_rate": 0.001880595531856738,
+      "loss": 0.602,
+      "step": 87
+    },
+    {
+      "epoch": 0.002446126156176816,
+      "grad_norm": 0.11098446696996689,
+      "learning_rate": 0.001877539285777543,
+      "loss": 0.7291,
+      "step": 88
+    },
+    {
+      "epoch": 0.002473923044315189,
+      "grad_norm": 0.10774262994527817,
+      "learning_rate": 0.0018744469674953957,
+      "loss": 0.6501,
+      "step": 89
+    },
+    {
+      "epoch": 0.0025017199324535618,
+      "grad_norm": 0.10596223175525665,
+      "learning_rate": 0.0018713187041233894,
+      "loss": 0.7274,
+      "step": 90
+    },
+    {
+      "epoch": 0.0025295168205919346,
+      "grad_norm": 0.11689383536577225,
+      "learning_rate": 0.0018681546242521785,
+      "loss": 0.6693,
+      "step": 91
+    },
+    {
+      "epoch": 0.002557313708730308,
+      "grad_norm": 0.11212435364723206,
+      "learning_rate": 0.0018649548579446936,
+      "loss": 0.6218,
+      "step": 92
+    },
+    {
+      "epoch": 0.0025851105968686806,
+      "grad_norm": 0.13619789481163025,
+      "learning_rate": 0.0018617195367307952,
+      "loss": 0.5839,
+      "step": 93
+    },
+    {
+      "epoch": 0.0026129074850070534,
+      "grad_norm": 0.18084552884101868,
+      "learning_rate": 0.001858448793601866,
+      "loss": 0.6083,
+      "step": 94
+    },
+    {
+      "epoch": 0.0026407043731454262,
+      "grad_norm": 0.14780890941619873,
+      "learning_rate": 0.0018551427630053464,
+      "loss": 0.6095,
+      "step": 95
+    },
+    {
+      "epoch": 0.0026685012612837995,
+      "grad_norm": 0.12189039587974548,
+      "learning_rate": 0.0018518015808392043,
+      "loss": 0.6313,
+      "step": 96
+    },
+    {
+      "epoch": 0.0026962981494221723,
+      "grad_norm": 0.2006332129240036,
+      "learning_rate": 0.0018484253844463525,
+      "loss": 0.6528,
+      "step": 97
+    },
+    {
+      "epoch": 0.002724095037560545,
+      "grad_norm": 0.19439570605754852,
+      "learning_rate": 0.0018450143126090013,
+      "loss": 0.655,
+      "step": 98
+    },
+    {
+      "epoch": 0.002751891925698918,
+      "grad_norm": 0.23627929389476776,
+      "learning_rate": 0.0018415685055429532,
+      "loss": 0.6663,
+      "step": 99
+    },
+    {
+      "epoch": 0.0027796888138372907,
+      "grad_norm": 0.22060082852840424,
+      "learning_rate": 0.0018380881048918405,
+      "loss": 0.6549,
+      "step": 100
+    },
+    {
+      "epoch": 0.0027796888138372907,
+      "eval_loss": 1.0665974617004395,
+      "eval_runtime": 49.9362,
+      "eval_samples_per_second": 11.555,
+      "eval_steps_per_second": 5.787,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 6589805739638784.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null