eddysang commited on
Commit
f7c5592
·
verified ·
1 Parent(s): d22f5ec

Training in progress, step 65, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1a1f7edba342dcae21bc1bcc931a8d18e8fcd5b7452808965c7181e3f8fb2f1
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e874375ad72eebe8ef41dcd2cc77942456f640cbe3bb03a574424588cd8a3a
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31134d8c644da3fe4e7fe5335cea6c99d373c2c3f4bfc42b0a9f8a83ee3881e
3
  size 1001863522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e4bf9d77e80c4122da30cc787f051493de20046abbcd72f3dbf4f61742208d
3
  size 1001863522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e4f84759c0d0345e3d0ec8c8f48cc65de394faca774211b25c2f484b7ca4dbc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8fe965a183dbb4db01355f9eea55b73f3c2653c1ab23e6c3218c43988c19f31
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0083f46b3c06809c7ba2e6389d4ff484bc53254e300d739b3ff127153f94c8d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d73a9cffc5823f9b954d9d65f5e7cf29f10d6f2c164580cdcb5194694db8d74
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7282275711159737,
5
  "eval_steps": 50,
6
- "global_step": 52,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -387,6 +387,97 @@
387
  "learning_rate": 0.00012866992205580908,
388
  "loss": 2.6251,
389
  "step": 52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  }
391
  ],
392
  "logging_steps": 1,
@@ -406,7 +497,7 @@
406
  "attributes": {}
407
  }
408
  },
409
- "total_flos": 5.3069890977792e+17,
410
  "train_batch_size": 2,
411
  "trial_name": null,
412
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9102844638949672,
5
  "eval_steps": 50,
6
+ "global_step": 65,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
387
  "learning_rate": 0.00012866992205580908,
388
  "loss": 2.6251,
389
  "step": 52
390
+ },
391
+ {
392
+ "epoch": 0.7422319474835887,
393
+ "grad_norm": 8.420281410217285,
394
+ "learning_rate": 0.00012738835239367027,
395
+ "loss": 1.8761,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 0.7562363238512035,
400
+ "grad_norm": 5.919778347015381,
401
+ "learning_rate": 0.00012607618940900977,
402
+ "loss": 1.8202,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 0.7702407002188184,
407
+ "grad_norm": 7.590326309204102,
408
+ "learning_rate": 0.00012473419936805962,
409
+ "loss": 2.0368,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 0.7842450765864333,
414
+ "grad_norm": 5.551327228546143,
415
+ "learning_rate": 0.0001233631659552128,
416
+ "loss": 1.5056,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 0.7982494529540481,
421
+ "grad_norm": 4.756547451019287,
422
+ "learning_rate": 0.00012196388981537352,
423
+ "loss": 1.9188,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 0.812253829321663,
428
+ "grad_norm": 5.172512054443359,
429
+ "learning_rate": 0.00012053718808640333,
430
+ "loss": 1.4091,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 0.8262582056892779,
435
+ "grad_norm": 8.058588981628418,
436
+ "learning_rate": 0.00011908389392193547,
437
+ "loss": 2.8933,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 0.8402625820568927,
442
+ "grad_norm": 5.907708168029785,
443
+ "learning_rate": 0.00011760485600483667,
444
+ "loss": 1.4843,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 0.8542669584245076,
449
+ "grad_norm": 6.655636310577393,
450
+ "learning_rate": 0.00011610093805160025,
451
+ "loss": 2.0464,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 0.8682713347921225,
456
+ "grad_norm": 7.76405668258667,
457
+ "learning_rate": 0.00011457301830795994,
458
+ "loss": 2.042,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 0.8822757111597375,
463
+ "grad_norm": 8.136941909790039,
464
+ "learning_rate": 0.00011302198903601928,
465
+ "loss": 1.8616,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 0.8962800875273523,
470
+ "grad_norm": 4.971902370452881,
471
+ "learning_rate": 0.00011144875599319543,
472
+ "loss": 1.8062,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 0.9102844638949672,
477
+ "grad_norm": 4.203851222991943,
478
+ "learning_rate": 0.00010985423790328263,
479
+ "loss": 1.1645,
480
+ "step": 65
481
  }
482
  ],
483
  "logging_steps": 1,
 
497
  "attributes": {}
498
  }
499
  },
500
+ "total_flos": 6.633736372224e+17,
501
  "train_batch_size": 2,
502
  "trial_name": null,
503
  "trial_params": null