oldiday commited on
Commit
a405259
·
verified ·
1 Parent(s): f6dc376

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14fad88eaff3f9e0c2b07260912cdc45c644bf5e4d95a34cb03df39e87f900f4
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3fd992e113586f812afc77a0702f2871849c3b36106c0c2de720d263ffaa124
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6210a6ec10ebc2458aa9ffa49429f2af3dd7b19351ec74512b739af0e8579f01
3
  size 51418452
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3105b22c5fc7869e6ed63a58ea962520392f460fbb1c31c1abc4d139211b21cf
3
  size 51418452
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f28831b96af77035c2ded7be2a068154d942395539af092e0feeb6dcde701bc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6207d4c3c45167cc370ab64a1372acf1cee42bfee65685d0672373dc45c12efd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb9c0d62d6b3cf0976c16f73e9bd814b298ebffa1786831bc2a68d8e48809b9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7421861290931702,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.5884083553986467,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,6 +445,49 @@
445
  "eval_samples_per_second": 22.047,
446
  "eval_steps_per_second": 5.512,
447
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  }
449
  ],
450
  "logging_steps": 10,
@@ -473,7 +516,7 @@
473
  "attributes": {}
474
  }
475
  },
476
- "total_flos": 3.3691853688039014e+17,
477
  "train_batch_size": 8,
478
  "trial_name": null,
479
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7403023838996887,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
+ "epoch": 0.6472491909385113,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  "eval_samples_per_second": 22.047,
446
  "eval_steps_per_second": 5.512,
447
  "step": 500
448
+ },
449
+ {
450
+ "epoch": 0.6001765225066196,
451
+ "grad_norm": 0.7555685043334961,
452
+ "learning_rate": 1.1264792494342857e-05,
453
+ "loss": 5.5477,
454
+ "step": 510
455
+ },
456
+ {
457
+ "epoch": 0.6119446896145925,
458
+ "grad_norm": 0.7698599696159363,
459
+ "learning_rate": 8.936522714508678e-06,
460
+ "loss": 4.6595,
461
+ "step": 520
462
+ },
463
+ {
464
+ "epoch": 0.6237128567225655,
465
+ "grad_norm": 0.8012061715126038,
466
+ "learning_rate": 6.866382254766157e-06,
467
+ "loss": 2.9295,
468
+ "step": 530
469
+ },
470
+ {
471
+ "epoch": 0.6354810238305384,
472
+ "grad_norm": 0.8370329737663269,
473
+ "learning_rate": 5.060239153161872e-06,
474
+ "loss": 1.2254,
475
+ "step": 540
476
+ },
477
+ {
478
+ "epoch": 0.6472491909385113,
479
+ "grad_norm": 1.4551666975021362,
480
+ "learning_rate": 3.5232131185484076e-06,
481
+ "loss": 1.2469,
482
+ "step": 550
483
+ },
484
+ {
485
+ "epoch": 0.6472491909385113,
486
+ "eval_loss": 0.7403023838996887,
487
+ "eval_runtime": 65.1388,
488
+ "eval_samples_per_second": 21.984,
489
+ "eval_steps_per_second": 5.496,
490
+ "step": 550
491
  }
492
  ],
493
  "logging_steps": 10,
 
516
  "attributes": {}
517
  }
518
  },
519
+ "total_flos": 3.706288114835128e+17,
520
  "train_batch_size": 8,
521
  "trial_name": null,
522
  "trial_params": null