Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9ed99147661c68bc8edbbcd3c332c0287089aacfb1487d11488fae3f1e73cec
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:85b254c61639c15a27279fbcd7d02fe27df265e7bf123be81cd803e59ab17fb9
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dea3a9f7283b938a5b4558172cc037fc59f7522fc8a71a95c98aa9b043e0a03b
 size 150486964

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd0f24caa0f011b726275fba39ca3ff24b0a51d222bb1ee53c66272668e5e82f
 size 150486964

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7ebeeaba95262d3b05bd068f7d83260849045fd65952d31a7ab6881052d0a33
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cd95e3e69ab027a17941b689751bbaaf8ac8ce15be486ff02ba929972e3a5a9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21bca0277a807518b292d79cbef83cbf0fd2afc472e011178f03e012f40013f9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c35f928f7dd9edebe7c94dc66def3d207bbcfc6fb428fd758a6bfcafaf8bae8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.4161189794540405,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.8733624454148472,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 33.263,
       "eval_steps_per_second": 8.573,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.381134508228608e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.16238145530223846,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.7467248908296944,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.263,
       "eval_steps_per_second": 8.573,
       "step": 50
+    },
+    {
+      "epoch": 0.8908296943231441,
+      "grad_norm": 1.1243302822113037,
+      "learning_rate": 8.501086738835843e-05,
+      "loss": 0.3851,
+      "step": 51
+    },
+    {
+      "epoch": 0.9082969432314411,
+      "grad_norm": 1.348684549331665,
+      "learning_rate": 8.43120818934367e-05,
+      "loss": 0.4908,
+      "step": 52
+    },
+    {
+      "epoch": 0.925764192139738,
+      "grad_norm": 1.6293962001800537,
+      "learning_rate": 8.360039302777612e-05,
+      "loss": 0.4444,
+      "step": 53
+    },
+    {
+      "epoch": 0.9432314410480349,
+      "grad_norm": 2.103517770767212,
+      "learning_rate": 8.28760684284532e-05,
+      "loss": 0.574,
+      "step": 54
+    },
+    {
+      "epoch": 0.9606986899563319,
+      "grad_norm": 1.4056304693222046,
+      "learning_rate": 8.213938048432697e-05,
+      "loss": 0.3752,
+      "step": 55
+    },
+    {
+      "epoch": 0.9781659388646288,
+      "grad_norm": 1.6228885650634766,
+      "learning_rate": 8.139060623360493e-05,
+      "loss": 0.3766,
+      "step": 56
+    },
+    {
+      "epoch": 0.9956331877729258,
+      "grad_norm": 1.6813204288482666,
+      "learning_rate": 8.063002725966015e-05,
+      "loss": 0.3613,
+      "step": 57
+    },
+    {
+      "epoch": 1.0131004366812226,
+      "grad_norm": 5.2064619064331055,
+      "learning_rate": 7.985792958513931e-05,
+      "loss": 0.788,
+      "step": 58
+    },
+    {
+      "epoch": 1.0305676855895196,
+      "grad_norm": 0.968858003616333,
+      "learning_rate": 7.907460356440133e-05,
+      "loss": 0.2553,
+      "step": 59
+    },
+    {
+      "epoch": 1.0480349344978166,
+      "grad_norm": 0.8971614837646484,
+      "learning_rate": 7.828034377432693e-05,
+      "loss": 0.2998,
+      "step": 60
+    },
+    {
+      "epoch": 1.0655021834061136,
+      "grad_norm": 1.2629083395004272,
+      "learning_rate": 7.74754489035403e-05,
+      "loss": 0.3353,
+      "step": 61
+    },
+    {
+      "epoch": 1.0829694323144106,
+      "grad_norm": 1.0545297861099243,
+      "learning_rate": 7.666022164008457e-05,
+      "loss": 0.2211,
+      "step": 62
+    },
+    {
+      "epoch": 1.1004366812227073,
+      "grad_norm": 1.3016352653503418,
+      "learning_rate": 7.583496855759316e-05,
+      "loss": 0.2633,
+      "step": 63
+    },
+    {
+      "epoch": 1.1179039301310043,
+      "grad_norm": 1.4473152160644531,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.246,
+      "step": 64
+    },
+    {
+      "epoch": 1.1353711790393013,
+      "grad_norm": 1.0543831586837769,
+      "learning_rate": 7.415562996483192e-05,
+      "loss": 0.2277,
+      "step": 65
+    },
+    {
+      "epoch": 1.1528384279475983,
+      "grad_norm": 1.0669910907745361,
+      "learning_rate": 7.330217598512695e-05,
+      "loss": 0.2476,
+      "step": 66
+    },
+    {
+      "epoch": 1.1703056768558953,
+      "grad_norm": 1.5889818668365479,
+      "learning_rate": 7.243995901002312e-05,
+      "loss": 0.2648,
+      "step": 67
+    },
+    {
+      "epoch": 1.1877729257641922,
+      "grad_norm": 1.4110018014907837,
+      "learning_rate": 7.156930328406268e-05,
+      "loss": 0.2575,
+      "step": 68
+    },
+    {
+      "epoch": 1.205240174672489,
+      "grad_norm": 1.1937973499298096,
+      "learning_rate": 7.069053622525696e-05,
+      "loss": 0.2309,
+      "step": 69
+    },
+    {
+      "epoch": 1.222707423580786,
+      "grad_norm": 1.118398666381836,
+      "learning_rate": 6.980398830195785e-05,
+      "loss": 0.1686,
+      "step": 70
+    },
+    {
+      "epoch": 1.240174672489083,
+      "grad_norm": 5.387632846832275,
+      "learning_rate": 6.890999290858214e-05,
+      "loss": 0.3197,
+      "step": 71
+    },
+    {
+      "epoch": 1.25764192139738,
+      "grad_norm": 1.4671061038970947,
+      "learning_rate": 6.800888624023553e-05,
+      "loss": 0.3027,
+      "step": 72
+    },
+    {
+      "epoch": 1.2751091703056767,
+      "grad_norm": 1.4483202695846558,
+      "learning_rate": 6.710100716628344e-05,
+      "loss": 0.2432,
+      "step": 73
+    },
+    {
+      "epoch": 1.2925764192139737,
+      "grad_norm": 1.154068946838379,
+      "learning_rate": 6.618669710291606e-05,
+      "loss": 0.2641,
+      "step": 74
+    },
+    {
+      "epoch": 1.3100436681222707,
+      "grad_norm": 0.9692803621292114,
+      "learning_rate": 6.526629988475567e-05,
+      "loss": 0.2105,
+      "step": 75
+    },
+    {
+      "epoch": 1.3275109170305677,
+      "grad_norm": 1.334222674369812,
+      "learning_rate": 6.434016163555452e-05,
+      "loss": 0.1716,
+      "step": 76
+    },
+    {
+      "epoch": 1.3449781659388647,
+      "grad_norm": 1.1230826377868652,
+      "learning_rate": 6.340863063803188e-05,
+      "loss": 0.1533,
+      "step": 77
+    },
+    {
+      "epoch": 1.3624454148471616,
+      "grad_norm": 1.4789968729019165,
+      "learning_rate": 6.247205720289907e-05,
+      "loss": 0.2085,
+      "step": 78
+    },
+    {
+      "epoch": 1.3799126637554586,
+      "grad_norm": 1.0231616497039795,
+      "learning_rate": 6.153079353712201e-05,
+      "loss": 0.1325,
+      "step": 79
+    },
+    {
+      "epoch": 1.3973799126637554,
+      "grad_norm": 1.1890296936035156,
+      "learning_rate": 6.058519361147055e-05,
+      "loss": 0.1489,
+      "step": 80
+    },
+    {
+      "epoch": 1.4148471615720524,
+      "grad_norm": 1.5673424005508423,
+      "learning_rate": 5.963561302740449e-05,
+      "loss": 0.1368,
+      "step": 81
+    },
+    {
+      "epoch": 1.4323144104803494,
+      "grad_norm": 1.3184294700622559,
+      "learning_rate": 5.868240888334653e-05,
+      "loss": 0.2192,
+      "step": 82
+    },
+    {
+      "epoch": 1.4497816593886463,
+      "grad_norm": 1.7077476978302002,
+      "learning_rate": 5.772593964039203e-05,
+      "loss": 0.2395,
+      "step": 83
+    },
+    {
+      "epoch": 1.467248908296943,
+      "grad_norm": 1.303192138671875,
+      "learning_rate": 5.6766564987506566e-05,
+      "loss": 0.1591,
+      "step": 84
+    },
+    {
+      "epoch": 1.48471615720524,
+      "grad_norm": 1.891233205795288,
+      "learning_rate": 5.5804645706261514e-05,
+      "loss": 0.181,
+      "step": 85
+    },
+    {
+      "epoch": 1.502183406113537,
+      "grad_norm": 1.065505862236023,
+      "learning_rate": 5.484054353515896e-05,
+      "loss": 0.2988,
+      "step": 86
+    },
+    {
+      "epoch": 1.519650655021834,
+      "grad_norm": 0.8333722352981567,
+      "learning_rate": 5.387462103359655e-05,
+      "loss": 0.1231,
+      "step": 87
+    },
+    {
+      "epoch": 1.537117903930131,
+      "grad_norm": 0.8929618000984192,
+      "learning_rate": 5.290724144552379e-05,
+      "loss": 0.1581,
+      "step": 88
+    },
+    {
+      "epoch": 1.554585152838428,
+      "grad_norm": 0.6446655988693237,
+      "learning_rate": 5.193876856284085e-05,
+      "loss": 0.103,
+      "step": 89
+    },
+    {
+      "epoch": 1.572052401746725,
+      "grad_norm": 1.5530974864959717,
+      "learning_rate": 5.096956658859122e-05,
+      "loss": 0.2085,
+      "step": 90
+    },
+    {
+      "epoch": 1.589519650655022,
+      "grad_norm": 1.2184090614318848,
+      "learning_rate": 5e-05,
+      "loss": 0.1611,
+      "step": 91
+    },
+    {
+      "epoch": 1.6069868995633187,
+      "grad_norm": 1.355815052986145,
+      "learning_rate": 4.903043341140879e-05,
+      "loss": 0.1425,
+      "step": 92
+    },
+    {
+      "epoch": 1.6244541484716157,
+      "grad_norm": 2.162626266479492,
+      "learning_rate": 4.806123143715916e-05,
+      "loss": 0.228,
+      "step": 93
+    },
+    {
+      "epoch": 1.6419213973799127,
+      "grad_norm": 1.2516330480575562,
+      "learning_rate": 4.709275855447621e-05,
+      "loss": 0.0985,
+      "step": 94
+    },
+    {
+      "epoch": 1.6593886462882095,
+      "grad_norm": 1.455117106437683,
+      "learning_rate": 4.612537896640346e-05,
+      "loss": 0.169,
+      "step": 95
+    },
+    {
+      "epoch": 1.6768558951965065,
+      "grad_norm": 1.3163059949874878,
+      "learning_rate": 4.515945646484105e-05,
+      "loss": 0.1206,
+      "step": 96
+    },
+    {
+      "epoch": 1.6943231441048034,
+      "grad_norm": 1.735390067100525,
+      "learning_rate": 4.4195354293738484e-05,
+      "loss": 0.1459,
+      "step": 97
+    },
+    {
+      "epoch": 1.7117903930131004,
+      "grad_norm": 1.2925852537155151,
+      "learning_rate": 4.323343501249346e-05,
+      "loss": 0.0877,
+      "step": 98
+    },
+    {
+      "epoch": 1.7292576419213974,
+      "grad_norm": 1.971182942390442,
+      "learning_rate": 4.227406035960798e-05,
+      "loss": 0.1316,
+      "step": 99
+    },
+    {
+      "epoch": 1.7467248908296944,
+      "grad_norm": 0.9830079078674316,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 0.2094,
+      "step": 100
+    },
+    {
+      "epoch": 1.7467248908296944,
+      "eval_loss": 0.16238145530223846,
+      "eval_runtime": 2.9208,
+      "eval_samples_per_second": 33.21,
+      "eval_steps_per_second": 8.559,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.758016755040256e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null