Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90a121a4f2d67b4ad0944c96ec5c3337195dedba172c97a4b3930339c9313f4a
 size 1195453784

 version https://git-lfs.github.com/spec/v1
+oid sha256:928b1f3c8bd86bd69967832a3c3a2a42ca06ff294c0dadd0413c00e1fae40884
 size 1195453784

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f487936cda6e8ad027044b69a09a3e188b9f1faa30daae49980a7532ca07bdbf
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:05479b65b01f8b2cb0f19f4f770bcbc9a016f6b3d355180a0e5fe8e45d1b6690
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9a9867d8567908158e06e70755ea976a0bece17bbd778d906322210823a201b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0345a29dc0e84e4a6a696b2df81e39b2be07131bac6070bca096949c23c0bd3a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d359eb5d29e75fb2bbe5b7026981da69b95b8ad1fea469302d13cde104f7e8a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.000480437942314893,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.11092623405435385,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 12.161,
       "eval_steps_per_second": 3.044,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -387,7 +745,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.655843656472986e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.000480437942314893,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.2218524681087077,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.161,
       "eval_steps_per_second": 3.044,
       "step": 50
+    },
+    {
+      "epoch": 0.11314475873544093,
+      "grad_norm": 0.12439312040805817,
+      "learning_rate": 8.894386393810563e-05,
+      "loss": 0.0013,
+      "step": 51
+    },
+    {
+      "epoch": 0.115363283416528,
+      "grad_norm": 0.01804165169596672,
+      "learning_rate": 8.842005554284296e-05,
+      "loss": 0.0001,
+      "step": 52
+    },
+    {
+      "epoch": 0.11758180809761509,
+      "grad_norm": 0.11951281130313873,
+      "learning_rate": 8.788574348801675e-05,
+      "loss": 0.0005,
+      "step": 53
+    },
+    {
+      "epoch": 0.11980033277870217,
+      "grad_norm": 0.07723747193813324,
+      "learning_rate": 8.73410738492077e-05,
+      "loss": 0.0004,
+      "step": 54
+    },
+    {
+      "epoch": 0.12201885745978924,
+      "grad_norm": 0.009235666133463383,
+      "learning_rate": 8.678619553365659e-05,
+      "loss": 0.0001,
+      "step": 55
+    },
+    {
+      "epoch": 0.12423738214087632,
+      "grad_norm": 0.002556801540777087,
+      "learning_rate": 8.622126023955446e-05,
+      "loss": 0.0,
+      "step": 56
+    },
+    {
+      "epoch": 0.1264559068219634,
+      "grad_norm": 0.1781330555677414,
+      "learning_rate": 8.564642241456986e-05,
+      "loss": 0.0002,
+      "step": 57
+    },
+    {
+      "epoch": 0.12867443150305047,
+      "grad_norm": 0.005277496296912432,
+      "learning_rate": 8.506183921362443e-05,
+      "loss": 0.0,
+      "step": 58
+    },
+    {
+      "epoch": 0.13089295618413754,
+      "grad_norm": 0.07657326757907867,
+      "learning_rate": 8.44676704559283e-05,
+      "loss": 0.0002,
+      "step": 59
+    },
+    {
+      "epoch": 0.13311148086522462,
+      "grad_norm": 0.012319284491240978,
+      "learning_rate": 8.386407858128706e-05,
+      "loss": 0.0001,
+      "step": 60
+    },
+    {
+      "epoch": 0.1353300055463117,
+      "grad_norm": 0.13288556039333344,
+      "learning_rate": 8.32512286056924e-05,
+      "loss": 0.0007,
+      "step": 61
+    },
+    {
+      "epoch": 0.13754853022739877,
+      "grad_norm": 0.053151004016399384,
+      "learning_rate": 8.262928807620843e-05,
+      "loss": 0.0004,
+      "step": 62
+    },
+    {
+      "epoch": 0.13976705490848584,
+      "grad_norm": 0.1312064528465271,
+      "learning_rate": 8.199842702516583e-05,
+      "loss": 0.0008,
+      "step": 63
+    },
+    {
+      "epoch": 0.14198557958957295,
+      "grad_norm": 0.1711154282093048,
+      "learning_rate": 8.135881792367686e-05,
+      "loss": 0.0012,
+      "step": 64
+    },
+    {
+      "epoch": 0.14420410427066002,
+      "grad_norm": 0.028727808967232704,
+      "learning_rate": 8.07106356344834e-05,
+      "loss": 0.0002,
+      "step": 65
+    },
+    {
+      "epoch": 0.1464226289517471,
+      "grad_norm": 0.007667217403650284,
+      "learning_rate": 8.005405736415126e-05,
+      "loss": 0.0001,
+      "step": 66
+    },
+    {
+      "epoch": 0.14864115363283417,
+      "grad_norm": 0.0016746758483350277,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 0.0,
+      "step": 67
+    },
+    {
+      "epoch": 0.15085967831392125,
+      "grad_norm": 0.0075210388749837875,
+      "learning_rate": 7.871643313414718e-05,
+      "loss": 0.0,
+      "step": 68
+    },
+    {
+      "epoch": 0.15307820299500832,
+      "grad_norm": 0.019149813801050186,
+      "learning_rate": 7.803575286758364e-05,
+      "loss": 0.0001,
+      "step": 69
+    },
+    {
+      "epoch": 0.1552967276760954,
+      "grad_norm": 0.008106847293674946,
+      "learning_rate": 7.734740790612136e-05,
+      "loss": 0.0,
+      "step": 70
+    },
+    {
+      "epoch": 0.15751525235718247,
+      "grad_norm": 0.04205169901251793,
+      "learning_rate": 7.66515864363997e-05,
+      "loss": 0.0002,
+      "step": 71
+    },
+    {
+      "epoch": 0.15973377703826955,
+      "grad_norm": 0.03955560550093651,
+      "learning_rate": 7.594847868906076e-05,
+      "loss": 0.0,
+      "step": 72
+    },
+    {
+      "epoch": 0.16195230171935662,
+      "grad_norm": 0.002128523774445057,
+      "learning_rate": 7.52382768867422e-05,
+      "loss": 0.0,
+      "step": 73
+    },
+    {
+      "epoch": 0.1641708264004437,
+      "grad_norm": 0.07566439360380173,
+      "learning_rate": 7.452117519152542e-05,
+      "loss": 0.0003,
+      "step": 74
+    },
+    {
+      "epoch": 0.16638935108153077,
+      "grad_norm": 0.011166649870574474,
+      "learning_rate": 7.379736965185368e-05,
+      "loss": 0.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.16860787576261785,
+      "grad_norm": 0.0581900030374527,
+      "learning_rate": 7.30670581489344e-05,
+      "loss": 0.0002,
+      "step": 76
+    },
+    {
+      "epoch": 0.17082640044370492,
+      "grad_norm": 0.21121130883693695,
+      "learning_rate": 7.233044034264034e-05,
+      "loss": 0.0014,
+      "step": 77
+    },
+    {
+      "epoch": 0.17304492512479203,
+      "grad_norm": 0.0013943826779723167,
+      "learning_rate": 7.158771761692464e-05,
+      "loss": 0.0,
+      "step": 78
+    },
+    {
+      "epoch": 0.1752634498058791,
+      "grad_norm": 0.000764008320402354,
+      "learning_rate": 7.083909302476453e-05,
+      "loss": 0.0,
+      "step": 79
+    },
+    {
+      "epoch": 0.17748197448696618,
+      "grad_norm": 0.00407798308879137,
+      "learning_rate": 7.008477123264848e-05,
+      "loss": 0.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.17970049916805325,
+      "grad_norm": 0.04198778420686722,
+      "learning_rate": 6.932495846462261e-05,
+      "loss": 0.0004,
+      "step": 81
+    },
+    {
+      "epoch": 0.18191902384914033,
+      "grad_norm": 0.13586126267910004,
+      "learning_rate": 6.855986244591104e-05,
+      "loss": 0.0009,
+      "step": 82
+    },
+    {
+      "epoch": 0.1841375485302274,
+      "grad_norm": 0.0022439719177782536,
+      "learning_rate": 6.778969234612584e-05,
+      "loss": 0.0,
+      "step": 83
+    },
+    {
+      "epoch": 0.18635607321131448,
+      "grad_norm": 0.002078837947919965,
+      "learning_rate": 6.701465872208216e-05,
+      "loss": 0.0,
+      "step": 84
+    },
+    {
+      "epoch": 0.18857459789240155,
+      "grad_norm": 0.0007661273120902479,
+      "learning_rate": 6.623497346023418e-05,
+      "loss": 0.0,
+      "step": 85
+    },
+    {
+      "epoch": 0.19079312257348863,
+      "grad_norm": 0.9077136516571045,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 0.0009,
+      "step": 86
+    },
+    {
+      "epoch": 0.1930116472545757,
+      "grad_norm": 0.4077296555042267,
+      "learning_rate": 6.466250186922325e-05,
+      "loss": 0.0002,
+      "step": 87
+    },
+    {
+      "epoch": 0.19523017193566278,
+      "grad_norm": 0.5970966815948486,
+      "learning_rate": 6.387014543809223e-05,
+      "loss": 0.0012,
+      "step": 88
+    },
+    {
+      "epoch": 0.19744869661674985,
+      "grad_norm": 0.015372872352600098,
+      "learning_rate": 6.307399704769099e-05,
+      "loss": 0.0,
+      "step": 89
+    },
+    {
+      "epoch": 0.19966722129783693,
+      "grad_norm": 0.003503235289826989,
+      "learning_rate": 6.227427435703997e-05,
+      "loss": 0.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.201885745978924,
+      "grad_norm": 1.7559353113174438,
+      "learning_rate": 6.147119600233758e-05,
+      "loss": 0.0009,
+      "step": 91
+    },
+    {
+      "epoch": 0.2041042706600111,
+      "grad_norm": 0.0038733009714633226,
+      "learning_rate": 6.066498153718735e-05,
+      "loss": 0.0001,
+      "step": 92
+    },
+    {
+      "epoch": 0.20632279534109818,
+      "grad_norm": 0.006768415216356516,
+      "learning_rate": 5.985585137257401e-05,
+      "loss": 0.0001,
+      "step": 93
+    },
+    {
+      "epoch": 0.20854132002218526,
+      "grad_norm": 1.5329684019088745,
+      "learning_rate": 5.90440267166055e-05,
+      "loss": 0.0012,
+      "step": 94
+    },
+    {
+      "epoch": 0.21075984470327233,
+      "grad_norm": 0.6506447792053223,
+      "learning_rate": 5.8229729514036705e-05,
+      "loss": 0.0029,
+      "step": 95
+    },
+    {
+      "epoch": 0.2129783693843594,
+      "grad_norm": 0.03461511433124542,
+      "learning_rate": 5.74131823855921e-05,
+      "loss": 0.001,
+      "step": 96
+    },
+    {
+      "epoch": 0.21519689406544648,
+      "grad_norm": 0.12881682813167572,
+      "learning_rate": 5.6594608567103456e-05,
+      "loss": 0.0007,
+      "step": 97
+    },
+    {
+      "epoch": 0.21741541874653356,
+      "grad_norm": 0.4704674482345581,
+      "learning_rate": 5.577423184847932e-05,
+      "loss": 0.0027,
+      "step": 98
+    },
+    {
+      "epoch": 0.21963394342762063,
+      "grad_norm": 0.3575167953968048,
+      "learning_rate": 5.495227651252315e-05,
+      "loss": 0.0019,
+      "step": 99
+    },
+    {
+      "epoch": 0.2218524681087077,
+      "grad_norm": 0.09095858782529831,
+      "learning_rate": 5.4128967273616625e-05,
+      "loss": 0.0003,
+      "step": 100
+    },
+    {
+      "epoch": 0.2218524681087077,
+      "eval_loss": 0.0006046874914318323,
+      "eval_runtime": 62.4244,
+      "eval_samples_per_second": 12.159,
+      "eval_steps_per_second": 3.044,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.531168731294597e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null