eddysang commited on
Commit
f795f26
·
verified ·
1 Parent(s): 8fbc50f

Training in progress, step 72, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f652211dac610e80872598da1df98e590795842f441ad646c668780471aa84c1
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e3f00b92269c5bdae63492c182a839f8e5e904876efb0a0a617caf4cd2237d
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b61ce972ea96e2306fbf26a1d40d3335709da2c916fff3ac1116fa3c2f0f5737
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b617c63d1a2a5b2653a62ea756af8d2d2118bd4e37e6ba99d0c7b3fab15a255
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1098628841f4bcb07d5546445f4de27d56441cc7dc92917c1ddcac8b9e18a99
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a39a051c4c7e5dc853790ab7eaf069e615c99cbb9b4d6bc98eaf5292d23ad463
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d43bd90ad476e419738deb9472ad85fd5991005a147e1627aa99867bdfc5655
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3557498318762609,
5
  "eval_steps": 50,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -464,6 +464,69 @@
464
  "learning_rate": 6.618469519066217e-05,
465
  "loss": 25.726,
466
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  }
468
  ],
469
  "logging_steps": 1,
@@ -483,7 +546,7 @@
483
  "attributes": {}
484
  }
485
  },
486
- "total_flos": 3.3132684787266355e+17,
487
  "train_batch_size": 2,
488
  "trial_name": null,
489
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5494283792871553,
5
  "eval_steps": 50,
6
+ "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
464
  "learning_rate": 6.618469519066217e-05,
465
  "loss": 25.726,
466
  "step": 63
467
+ },
468
+ {
469
+ "epoch": 1.3772696704774714,
470
+ "grad_norm": 17.17249870300293,
471
+ "learning_rate": 6.326741512198266e-05,
472
+ "loss": 26.7663,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 1.398789509078682,
477
+ "grad_norm": 21.07649803161621,
478
+ "learning_rate": 6.036822584879038e-05,
479
+ "loss": 17.8787,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 1.4203093476798925,
484
+ "grad_norm": 18.028573989868164,
485
+ "learning_rate": 5.7491597710807114e-05,
486
+ "loss": 16.4428,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 1.4418291862811028,
491
+ "grad_norm": 20.420089721679688,
492
+ "learning_rate": 5.464196626011943e-05,
493
+ "loss": 21.7565,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 1.4633490248823133,
498
+ "grad_norm": 16.814950942993164,
499
+ "learning_rate": 5.182372542187895e-05,
500
+ "loss": 21.8306,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 1.4848688634835239,
505
+ "grad_norm": 18.8593692779541,
506
+ "learning_rate": 4.904122071918801e-05,
507
+ "loss": 21.4311,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 1.5063887020847344,
512
+ "grad_norm": 21.228858947753906,
513
+ "learning_rate": 4.6298742572618266e-05,
514
+ "loss": 24.4591,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 1.527908540685945,
519
+ "grad_norm": 22.029273986816406,
520
+ "learning_rate": 4.360051968469291e-05,
521
+ "loss": 27.6417,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 1.5494283792871553,
526
+ "grad_norm": 22.491724014282227,
527
+ "learning_rate": 4.095071251953399e-05,
528
+ "loss": 28.1538,
529
+ "step": 72
530
  }
531
  ],
532
  "logging_steps": 1,
 
546
  "attributes": {}
547
  }
548
  },
549
+ "total_flos": 3.786592547116155e+17,
550
  "train_batch_size": 2,
551
  "trial_name": null,
552
  "trial_params": null