leixa commited on
Commit
68c92c2
·
verified ·
1 Parent(s): 82f43ee

Training in progress, step 143, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3bc0f4fd031baa461a08a73c9d0b5c6557be7235ea59fbd23db3fc7dcaeaf52
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba8f92fff3d0704cfd2878c9aa27bb09f39f0ef1e36537ab31b00f6a2c8a455e
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bffac84eaac97cbaa77d1adc85cb7ccd4ffae53d401e54121aef1789235dda6a
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c32d27d70ffbf1baca00df2feb354808d5cf048c65ef6a942fa8d997b9f702a8
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f50bcb282a535c8d60e0cd286b37ee0fb35f76262156b1484dc636b0a4dd7d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9acb4bb3f272325115fb1e1ca543e5418b0ce7c8f1090e6957b7e3fd0ec598ee
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:507bfe4270aab5b2aa070e93d9c3404af996914dd61586771e0cc504e5b0252c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431c6c53bdba57f808cd9c2e6f738bda5a26247416d68b6f96cb4f3eef6f54ca
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04044489383215369,
5
  "eval_steps": 13,
6
- "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -396,6 +396,42 @@
396
  "eval_samples_per_second": 13.88,
397
  "eval_steps_per_second": 1.736,
398
  "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  }
400
  ],
401
  "logging_steps": 3,
@@ -415,7 +451,7 @@
415
  "attributes": {}
416
  }
417
  },
418
- "total_flos": 1.9289069372571648e+17,
419
  "train_batch_size": 8,
420
  "trial_name": null,
421
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.044489383215369056,
5
  "eval_steps": 13,
6
+ "global_step": 143,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
396
  "eval_samples_per_second": 13.88,
397
  "eval_steps_per_second": 1.736,
398
  "step": 130
399
+ },
400
+ {
401
+ "epoch": 0.04106712296803298,
402
+ "grad_norm": 1.2261285781860352,
403
+ "learning_rate": 2.0118056862137357e-06,
404
+ "loss": 1.5202,
405
+ "step": 132
406
+ },
407
+ {
408
+ "epoch": 0.04200046667185191,
409
+ "grad_norm": 1.3027156591415405,
410
+ "learning_rate": 1.4029167422908107e-06,
411
+ "loss": 1.5151,
412
+ "step": 135
413
+ },
414
+ {
415
+ "epoch": 0.04293381037567084,
416
+ "grad_norm": 1.1372510194778442,
417
+ "learning_rate": 9.009284826036691e-07,
418
+ "loss": 1.5352,
419
+ "step": 138
420
+ },
421
+ {
422
+ "epoch": 0.043867154079489774,
423
+ "grad_norm": 1.248124122619629,
424
+ "learning_rate": 5.08115039419113e-07,
425
+ "loss": 1.5216,
426
+ "step": 141
427
+ },
428
+ {
429
+ "epoch": 0.044489383215369056,
430
+ "eval_loss": 1.4776010513305664,
431
+ "eval_runtime": 389.8727,
432
+ "eval_samples_per_second": 13.887,
433
+ "eval_steps_per_second": 1.736,
434
+ "step": 143
435
  }
436
  ],
437
  "logging_steps": 3,
 
451
  "attributes": {}
452
  }
453
  },
454
+ "total_flos": 2.1217976309828813e+17,
455
  "train_batch_size": 8,
456
  "trial_name": null,
457
  "trial_params": null