dixedus commited on
Commit
77ccc02
·
verified ·
1 Parent(s): a377986

Training in progress, step 136, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aed73575afce5f7943132f9a95e33a9ba21279338a18b8dd26168871731b211
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0814f8e20ccede08fa8187f3ae9945e018206f1d367b4459817ab9e8b5571161
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fff603b0a0e378a2145f0ad44206e29ae2e549008620b5a7f213c5033d79522e
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe5644c35fb5877b90a0d6c6a8f75b466f1ab8e16fad16fadb3575d7f2895eb
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f3e8e8d4a0b35f921a3f49ba17d591da0b4505cd7fd8f39e19601ae8cf7ede3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1085dfbbda04ba524f0c68d6477f2459b2374ec1fdc942d609556cffc4d079fb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c91934808157be4b4581cbac88c1dcb8ab73e7092f7b8aa05c4fbac8ab77615f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd0ef2a827b219b75915f5a88a30c53ebe86f536eec93a6252baab983329eb7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25674217907227614,
5
  "eval_steps": 17,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -344,6 +344,56 @@
344
  "eval_samples_per_second": 50.998,
345
  "eval_steps_per_second": 6.399,
346
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "logging_steps": 3,
@@ -363,7 +413,7 @@
363
  "attributes": {}
364
  }
365
  },
366
- "total_flos": 2.322534166953984e+16,
367
  "train_batch_size": 8,
368
  "trial_name": null,
369
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.29341963322545844,
5
  "eval_steps": 17,
6
+ "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
344
  "eval_samples_per_second": 50.998,
345
  "eval_steps_per_second": 6.399,
346
  "step": 119
347
+ },
348
+ {
349
+ "epoch": 0.2588996763754045,
350
+ "grad_norm": 0.004851281642913818,
351
+ "learning_rate": 3.772572564296005e-05,
352
+ "loss": 0.0008,
353
+ "step": 120
354
+ },
355
+ {
356
+ "epoch": 0.26537216828478966,
357
+ "grad_norm": 0.0022969194687902927,
358
+ "learning_rate": 3.533749813077677e-05,
359
+ "loss": 0.0003,
360
+ "step": 123
361
+ },
362
+ {
363
+ "epoch": 0.27184466019417475,
364
+ "grad_norm": 0.002009553834795952,
365
+ "learning_rate": 3.298534127791785e-05,
366
+ "loss": 0.0008,
367
+ "step": 126
368
+ },
369
+ {
370
+ "epoch": 0.2783171521035599,
371
+ "grad_norm": 0.019238989800214767,
372
+ "learning_rate": 3.0675041535377405e-05,
373
+ "loss": 0.001,
374
+ "step": 129
375
+ },
376
+ {
377
+ "epoch": 0.284789644012945,
378
+ "grad_norm": 0.0018951981328427792,
379
+ "learning_rate": 2.8412282383075363e-05,
380
+ "loss": 0.0001,
381
+ "step": 132
382
+ },
383
+ {
384
+ "epoch": 0.2912621359223301,
385
+ "grad_norm": 0.0017350999405607581,
386
+ "learning_rate": 2.6202630348146324e-05,
387
+ "loss": 0.0001,
388
+ "step": 135
389
+ },
390
+ {
391
+ "epoch": 0.29341963322545844,
392
+ "eval_loss": 0.00118519167881459,
393
+ "eval_runtime": 15.2967,
394
+ "eval_samples_per_second": 51.057,
395
+ "eval_steps_per_second": 6.407,
396
+ "step": 136
397
  }
398
  ],
399
  "logging_steps": 3,
 
413
  "attributes": {}
414
  }
415
  },
416
+ "total_flos": 2.6515598406057984e+16,
417
  "train_batch_size": 8,
418
  "trial_name": null,
419
  "trial_params": null