oldiday commited on
Commit
2c97578
·
verified ·
1 Parent(s): 1a51fc6

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cbc7a49709e9542b20fff8a8f59756c28115101e94fb4b8bd2ec2fde6b8e564
3
  size 217931936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d794f53e490aeabc8e2b197bf939edb0e879efc274e2b013ba6eec75e34c3be
3
  size 217931936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ed7b26d7e96385d1af34bcc7e0bf79e5a76779452b1836915858d6897ebdf39
3
  size 111412820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e2f496fc635dd73d2e05c66edaa3313f1ab7f7e0a4da39f79671d18a22706de
3
  size 111412820
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb4a2f681cf5e977b9bfcb6e4dd4cbb7858b14001ef3a7b3c46244d4d03bf8cb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feaac8ff001d8f64c9a27eb373ea7ff9c698577a00b8ba8e83d6dff9874d7c1f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb9c0d62d6b3cf0976c16f73e9bd814b298ebffa1786831bc2a68d8e48809b9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.0833667516708374,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.32663726931242854,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,6 +445,49 @@
445
  "eval_samples_per_second": 18.847,
446
  "eval_steps_per_second": 4.715,
447
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  }
449
  ],
450
  "logging_steps": 10,
@@ -473,7 +516,7 @@
473
  "attributes": {}
474
  }
475
  },
476
- "total_flos": 4.356406596473979e+17,
477
  "train_batch_size": 8,
478
  "trial_name": null,
479
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.081974983215332,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
+ "epoch": 0.3593009962436714,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  "eval_samples_per_second": 18.847,
446
  "eval_steps_per_second": 4.715,
447
  "step": 500
448
+ },
449
+ {
450
+ "epoch": 0.33317001469867713,
451
+ "grad_norm": 0.19704855978488922,
452
+ "learning_rate": 1.1264792494342857e-05,
453
+ "loss": 0.8724,
454
+ "step": 510
455
+ },
456
+ {
457
+ "epoch": 0.33970276008492567,
458
+ "grad_norm": 0.2699441909790039,
459
+ "learning_rate": 8.936522714508678e-06,
460
+ "loss": 0.9602,
461
+ "step": 520
462
+ },
463
+ {
464
+ "epoch": 0.34623550547117427,
465
+ "grad_norm": 0.40045005083084106,
466
+ "learning_rate": 6.866382254766157e-06,
467
+ "loss": 0.9765,
468
+ "step": 530
469
+ },
470
+ {
471
+ "epoch": 0.3527682508574228,
472
+ "grad_norm": 0.8045767545700073,
473
+ "learning_rate": 5.060239153161872e-06,
474
+ "loss": 1.0677,
475
+ "step": 540
476
+ },
477
+ {
478
+ "epoch": 0.3593009962436714,
479
+ "grad_norm": 1.6138585805892944,
480
+ "learning_rate": 3.5232131185484076e-06,
481
+ "loss": 1.4285,
482
+ "step": 550
483
+ },
484
+ {
485
+ "epoch": 0.3593009962436714,
486
+ "eval_loss": 1.081974983215332,
487
+ "eval_runtime": 136.8083,
488
+ "eval_samples_per_second": 18.844,
489
+ "eval_steps_per_second": 4.715,
490
+ "step": 550
491
  }
492
  ],
493
  "logging_steps": 10,
 
516
  "attributes": {}
517
  }
518
  },
519
+ "total_flos": 4.79289480604287e+17,
520
  "train_batch_size": 8,
521
  "trial_name": null,
522
  "trial_params": null