Training in progress, step 102, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:376b17e9e9631961076e3bbeb3993d19795f20b76c2f2005817821e13465e6e1
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:919cf08a7d7abf9880c22fb4dfe4b831d65927f7ae57ae5667378e28e06cd849
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3727032b69e6649af9f27beaabc3fe9ad69040d535a04f1eb340b53219c80bb
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:4138bcc19e199dc967bcd7384f55e319c96ad62e01d12d688ab9eccaa9ae5991
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:427ad7b17c78789ef97429efbe9d6211a7d09f2d08a147aa825d611691ab1bf8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:df7161f83e2c1b5421911bfc287c46c4380aa9d1a9390b7f6bdd147d920abb38
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b383ad1d61ff4e9bbd86bd276c043e414782d2bb7de68ada3e289a786eb79681
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:70670c442607259270e13afbef3aac28e38a58ddad6998414f76ed43ab7f41d4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12514377731769036,
   "eval_steps": 50,
-  "global_step": 85,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -618,6 +618,133 @@
       "learning_rate": 0.00010669636963055245,
       "loss": 0.1116,
       "step": 85
     }
   ],
   "logging_steps": 1,
@@ -637,7 +764,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.995021026974761e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.15017253278122844,
   "eval_steps": 50,
+  "global_step": 102,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00010669636963055245,
       "loss": 0.1116,
       "step": 85
+    },
+    {
+      "epoch": 0.12661605705083967,
+      "grad_norm": 0.1744721382856369,
+      "learning_rate": 0.00010550524823068502,
+      "loss": 0.0638,
+      "step": 86
+    },
+    {
+      "epoch": 0.12808833678398895,
+      "grad_norm": 0.13608935475349426,
+      "learning_rate": 0.00010430483463669551,
+      "loss": 0.0788,
+      "step": 87
+    },
+    {
+      "epoch": 0.12956061651713827,
+      "grad_norm": 0.12516197562217712,
+      "learning_rate": 0.0001030954945061934,
+      "loss": 0.0565,
+      "step": 88
+    },
+    {
+      "epoch": 0.13103289625028755,
+      "grad_norm": 0.09498213976621628,
+      "learning_rate": 0.0001018775962158975,
+      "loss": 0.0372,
+      "step": 89
+    },
+    {
+      "epoch": 0.13250517598343686,
+      "grad_norm": 0.09999972581863403,
+      "learning_rate": 0.00010065151074942516,
+      "loss": 0.0393,
+      "step": 90
+    },
+    {
+      "epoch": 0.13397745571658615,
+      "grad_norm": 0.09636418521404266,
+      "learning_rate": 9.941761158428674e-05,
+      "loss": 0.0314,
+      "step": 91
+    },
+    {
+      "epoch": 0.13544973544973546,
+      "grad_norm": 0.0872374102473259,
+      "learning_rate": 9.817627457812105e-05,
+      "loss": 0.0322,
+      "step": 92
+    },
+    {
+      "epoch": 0.13692201518288474,
+      "grad_norm": 0.11897428333759308,
+      "learning_rate": 9.692787785420525e-05,
+      "loss": 0.0726,
+      "step": 93
+    },
+    {
+      "epoch": 0.13839429491603406,
+      "grad_norm": 0.1489570438861847,
+      "learning_rate": 9.567280168627493e-05,
+      "loss": 0.0753,
+      "step": 94
+    },
+    {
+      "epoch": 0.13986657464918334,
+      "grad_norm": 0.1237846091389656,
+      "learning_rate": 9.441142838268905e-05,
+      "loss": 0.0304,
+      "step": 95
+    },
+    {
+      "epoch": 0.14133885438233265,
+      "grad_norm": 0.11417360603809357,
+      "learning_rate": 9.314414216997507e-05,
+      "loss": 0.0466,
+      "step": 96
+    },
+    {
+      "epoch": 0.14281113411548194,
+      "grad_norm": 0.13448922336101532,
+      "learning_rate": 9.187132907578987e-05,
+      "loss": 0.0676,
+      "step": 97
+    },
+    {
+      "epoch": 0.14428341384863125,
+      "grad_norm": 0.12376630306243896,
+      "learning_rate": 9.059337681133192e-05,
+      "loss": 0.0653,
+      "step": 98
+    },
+    {
+      "epoch": 0.14575569358178053,
+      "grad_norm": 0.13168717920780182,
+      "learning_rate": 8.931067465324085e-05,
+      "loss": 0.0414,
+      "step": 99
+    },
+    {
+      "epoch": 0.14722797331492984,
+      "grad_norm": 0.12176292389631271,
+      "learning_rate": 8.802361332501978e-05,
+      "loss": 0.0545,
+      "step": 100
+    },
+    {
+      "epoch": 0.14722797331492984,
+      "eval_loss": 0.05667497217655182,
+      "eval_runtime": 573.8343,
+      "eval_samples_per_second": 3.987,
+      "eval_steps_per_second": 1.994,
+      "step": 100
+    },
+    {
+      "epoch": 0.14870025304807913,
+      "grad_norm": 0.11033546179533005,
+      "learning_rate": 8.673258487801731e-05,
+      "loss": 0.0368,
+      "step": 101
+    },
+    {
+      "epoch": 0.15017253278122844,
+      "grad_norm": 0.14303916692733765,
+      "learning_rate": 8.54379825720049e-05,
+      "loss": 0.0527,
+      "step": 102
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.0779851033778586e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null