dixedus commited on
Commit
ec366d0
·
verified ·
1 Parent(s): 7bf46e4

Training in progress, step 102, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9e8cfba5a77cad29d727a3166f26c38a84816c41c1f2515caa8a7adb4dcc8c4
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de549ffe6cbda026938a42d1e2441221d0dd36aef338a55a6099f624dd35f95
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:689217aaa41feaaaeee9288107dd691977948d6c3a1d2a1df9bb651a1352b6aa
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4057eda53a53bed4869baaddd9a821cfa6574c2069db78e3e3ae3b090fa54119
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52126d43fca697f9a6daabc677f2c11c3e68cba9c6ff06167ebb9376aa2ac9b1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06b0e421df269e758dbad384e6a198d09f9dc2f79e06ddf32bd52cb423765ff
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24510621ac929102038dbb9557bac0e17f0ed52c36608ebab8e700d1b1464f1d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcbdf5cce354397b1cc7dbc75ae72cd1ce74fbf84991f656a8ae8c5ec4cf6c4c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.18338727076591155,
5
  "eval_steps": 17,
6
- "global_step": 85,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -251,6 +251,56 @@
251
  "eval_samples_per_second": 50.889,
252
  "eval_steps_per_second": 6.386,
253
  "step": 85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  }
255
  ],
256
  "logging_steps": 3,
@@ -270,7 +320,7 @@
270
  "attributes": {}
271
  }
272
  },
273
- "total_flos": 1.6596442068025344e+16,
274
  "train_batch_size": 8,
275
  "trial_name": null,
276
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.22006472491909385,
5
  "eval_steps": 17,
6
+ "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
251
  "eval_samples_per_second": 50.889,
252
  "eval_steps_per_second": 6.386,
253
  "step": 85
254
+ },
255
+ {
256
+ "epoch": 0.18770226537216828,
257
+ "grad_norm": 0.005217652302235365,
258
+ "learning_rate": 6.466250186922325e-05,
259
+ "loss": 0.0002,
260
+ "step": 87
261
+ },
262
+ {
263
+ "epoch": 0.1941747572815534,
264
+ "grad_norm": 0.2677434980869293,
265
+ "learning_rate": 6.227427435703997e-05,
266
+ "loss": 0.0004,
267
+ "step": 90
268
+ },
269
+ {
270
+ "epoch": 0.20064724919093851,
271
+ "grad_norm": 0.0085700498893857,
272
+ "learning_rate": 5.985585137257401e-05,
273
+ "loss": 0.0003,
274
+ "step": 93
275
+ },
276
+ {
277
+ "epoch": 0.20711974110032363,
278
+ "grad_norm": 0.0022312181536108255,
279
+ "learning_rate": 5.74131823855921e-05,
280
+ "loss": 0.0004,
281
+ "step": 96
282
+ },
283
+ {
284
+ "epoch": 0.21359223300970873,
285
+ "grad_norm": 0.008560556918382645,
286
+ "learning_rate": 5.495227651252315e-05,
287
+ "loss": 0.0026,
288
+ "step": 99
289
+ },
290
+ {
291
+ "epoch": 0.22006472491909385,
292
+ "grad_norm": 0.004689795430749655,
293
+ "learning_rate": 5.247918773366112e-05,
294
+ "loss": 0.0002,
295
+ "step": 102
296
+ },
297
+ {
298
+ "epoch": 0.22006472491909385,
299
+ "eval_loss": 0.0012459141435101628,
300
+ "eval_runtime": 15.286,
301
+ "eval_samples_per_second": 51.093,
302
+ "eval_steps_per_second": 6.411,
303
+ "step": 102
304
  }
305
  ],
306
  "logging_steps": 3,
 
320
  "attributes": {}
321
  }
322
  },
323
+ "total_flos": 1.988669880454349e+16,
324
  "train_batch_size": 8,
325
  "trial_name": null,
326
  "trial_params": null