broodmother41 commited on
Commit
2c4f6c4
·
verified ·
1 Parent(s): b1a0024

Training in progress, step 1088, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b5640fad40281fd5d639511356095a8552ab13034cf39c186ed59e06ab055a1
3
  size 54285928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384f36007b97fbb92ffad76cabf9847376c7a4cb47cbde2ce83ad9d3c2e5b138
3
  size 54285928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1e01ad8da81a858e8e938a7bdca8254bfa5438d6b1c7251bdd13f12c56d5e1f
3
  size 27753786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f190adcd340a44a909ebf73f7c0c5cc971db6f330a46183ade6020952f14add
3
  size 27753786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:508337fd7bc9ca1cce78c0c53b3e5fba6c6a4bdf1bdeb4293058ee7e7b6238a1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8451f5ea486a190b6e371450fc18194c8ef19d966279c816f43e84a399cdb84
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a5cc0d0ebf737ac8a43a5138b0ded560b01a769ad496c84f0d332e37eb84e28
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f518c75cfe5a3d34bf6d4285b5324ac723c5d3cb3e7dcf0617a8e3578699ca5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.3224910497665405,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1050",
4
- "epoch": 0.9657392504023914,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7421,6 +7421,272 @@
7421
  "eval_samples_per_second": 165.065,
7422
  "eval_steps_per_second": 20.633,
7423
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7424
  }
7425
  ],
7426
  "logging_steps": 1,
@@ -7444,12 +7710,12 @@
7444
  "should_evaluate": false,
7445
  "should_log": false,
7446
  "should_save": true,
7447
- "should_training_stop": false
7448
  },
7449
  "attributes": {}
7450
  }
7451
  },
7452
- "total_flos": 1.9092013631668224e+17,
7453
  "train_batch_size": 12,
7454
  "trial_name": null,
7455
  "trial_params": null
 
1
  {
2
  "best_metric": 1.3224910497665405,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1050",
4
+ "epoch": 1.0006898137502873,
5
  "eval_steps": 150,
6
+ "global_step": 1088,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7421
  "eval_samples_per_second": 165.065,
7422
  "eval_steps_per_second": 20.633,
7423
  "step": 1050
7424
+ },
7425
+ {
7426
+ "epoch": 0.9666590020694412,
7427
+ "grad_norm": 0.7578225135803223,
7428
+ "learning_rate": 2.958502525492457e-07,
7429
+ "loss": 1.4715,
7430
+ "step": 1051
7431
+ },
7432
+ {
7433
+ "epoch": 0.9675787537364912,
7434
+ "grad_norm": 0.5387850403785706,
7435
+ "learning_rate": 2.800891996009025e-07,
7436
+ "loss": 1.5156,
7437
+ "step": 1052
7438
+ },
7439
+ {
7440
+ "epoch": 0.9684985054035411,
7441
+ "grad_norm": 0.5452224016189575,
7442
+ "learning_rate": 2.6475836335553836e-07,
7443
+ "loss": 1.4171,
7444
+ "step": 1053
7445
+ },
7446
+ {
7447
+ "epoch": 0.9694182570705909,
7448
+ "grad_norm": 0.5857959389686584,
7449
+ "learning_rate": 2.498578764678849e-07,
7450
+ "loss": 1.454,
7451
+ "step": 1054
7452
+ },
7453
+ {
7454
+ "epoch": 0.9703380087376409,
7455
+ "grad_norm": 0.5615691542625427,
7456
+ "learning_rate": 2.3538786786896915e-07,
7457
+ "loss": 1.3929,
7458
+ "step": 1055
7459
+ },
7460
+ {
7461
+ "epoch": 0.9712577604046907,
7462
+ "grad_norm": 0.5565760731697083,
7463
+ "learning_rate": 2.2134846276494202e-07,
7464
+ "loss": 1.4978,
7465
+ "step": 1056
7466
+ },
7467
+ {
7468
+ "epoch": 0.9721775120717406,
7469
+ "grad_norm": 0.6373360753059387,
7470
+ "learning_rate": 2.0773978263605166e-07,
7471
+ "loss": 1.4766,
7472
+ "step": 1057
7473
+ },
7474
+ {
7475
+ "epoch": 0.9730972637387906,
7476
+ "grad_norm": 0.6223818063735962,
7477
+ "learning_rate": 1.9456194523554406e-07,
7478
+ "loss": 1.4494,
7479
+ "step": 1058
7480
+ },
7481
+ {
7482
+ "epoch": 0.9740170154058404,
7483
+ "grad_norm": 0.5713940858840942,
7484
+ "learning_rate": 1.8181506458869736e-07,
7485
+ "loss": 1.4204,
7486
+ "step": 1059
7487
+ },
7488
+ {
7489
+ "epoch": 0.9749367670728903,
7490
+ "grad_norm": 0.6217029690742493,
7491
+ "learning_rate": 1.69499250991767e-07,
7492
+ "loss": 1.3679,
7493
+ "step": 1060
7494
+ },
7495
+ {
7496
+ "epoch": 0.9758565187399402,
7497
+ "grad_norm": 0.5862613320350647,
7498
+ "learning_rate": 1.576146110111032e-07,
7499
+ "loss": 1.428,
7500
+ "step": 1061
7501
+ },
7502
+ {
7503
+ "epoch": 0.9767762704069901,
7504
+ "grad_norm": 0.6091289520263672,
7505
+ "learning_rate": 1.4616124748217385e-07,
7506
+ "loss": 1.4198,
7507
+ "step": 1062
7508
+ },
7509
+ {
7510
+ "epoch": 0.97769602207404,
7511
+ "grad_norm": 0.515957236289978,
7512
+ "learning_rate": 1.351392595087042e-07,
7513
+ "loss": 1.3173,
7514
+ "step": 1063
7515
+ },
7516
+ {
7517
+ "epoch": 0.9786157737410899,
7518
+ "grad_norm": 0.5550262331962585,
7519
+ "learning_rate": 1.245487424618108e-07,
7520
+ "loss": 1.3209,
7521
+ "step": 1064
7522
+ },
7523
+ {
7524
+ "epoch": 0.9795355254081398,
7525
+ "grad_norm": 0.6746039986610413,
7526
+ "learning_rate": 1.1438978797916888e-07,
7527
+ "loss": 1.3918,
7528
+ "step": 1065
7529
+ },
7530
+ {
7531
+ "epoch": 0.9804552770751896,
7532
+ "grad_norm": 0.5552029013633728,
7533
+ "learning_rate": 1.0466248396424073e-07,
7534
+ "loss": 1.3515,
7535
+ "step": 1066
7536
+ },
7537
+ {
7538
+ "epoch": 0.9813750287422396,
7539
+ "grad_norm": 0.6203471422195435,
7540
+ "learning_rate": 9.536691458548741e-08,
7541
+ "loss": 1.4412,
7542
+ "step": 1067
7543
+ },
7544
+ {
7545
+ "epoch": 0.9822947804092895,
7546
+ "grad_norm": 0.5462220311164856,
7547
+ "learning_rate": 8.650316027566386e-08,
7548
+ "loss": 1.3411,
7549
+ "step": 1068
7550
+ },
7551
+ {
7552
+ "epoch": 0.9832145320763394,
7553
+ "grad_norm": 0.5441474318504333,
7554
+ "learning_rate": 7.807129773110822e-08,
7555
+ "loss": 1.3372,
7556
+ "step": 1069
7557
+ },
7558
+ {
7559
+ "epoch": 0.9841342837433893,
7560
+ "grad_norm": 0.6476730704307556,
7561
+ "learning_rate": 7.007139991108135e-08,
7562
+ "loss": 1.328,
7563
+ "step": 1070
7564
+ },
7565
+ {
7566
+ "epoch": 0.9850540354104392,
7567
+ "grad_norm": 0.5515516400337219,
7568
+ "learning_rate": 6.25035360371451e-08,
7569
+ "loss": 1.372,
7570
+ "step": 1071
7571
+ },
7572
+ {
7573
+ "epoch": 0.9859737870774891,
7574
+ "grad_norm": 0.6750530004501343,
7575
+ "learning_rate": 5.536777159254603e-08,
7576
+ "loss": 1.3563,
7577
+ "step": 1072
7578
+ },
7579
+ {
7580
+ "epoch": 0.986893538744539,
7581
+ "grad_norm": 0.5728088021278381,
7582
+ "learning_rate": 4.8664168321671534e-08,
7583
+ "loss": 1.318,
7584
+ "step": 1073
7585
+ },
7586
+ {
7587
+ "epoch": 0.9878132904115888,
7588
+ "grad_norm": 0.6397655606269836,
7589
+ "learning_rate": 4.239278422948911e-08,
7590
+ "loss": 1.286,
7591
+ "step": 1074
7592
+ },
7593
+ {
7594
+ "epoch": 0.9887330420786388,
7595
+ "grad_norm": 0.5922835469245911,
7596
+ "learning_rate": 3.655367358106343e-08,
7597
+ "loss": 1.2589,
7598
+ "step": 1075
7599
+ },
7600
+ {
7601
+ "epoch": 0.9896527937456887,
7602
+ "grad_norm": 0.6236873865127563,
7603
+ "learning_rate": 3.1146886901090025e-08,
7604
+ "loss": 1.2131,
7605
+ "step": 1076
7606
+ },
7607
+ {
7608
+ "epoch": 0.9905725454127385,
7609
+ "grad_norm": 0.5909478664398193,
7610
+ "learning_rate": 2.617247097342901e-08,
7611
+ "loss": 1.2015,
7612
+ "step": 1077
7613
+ },
7614
+ {
7615
+ "epoch": 0.9914922970797885,
7616
+ "grad_norm": 0.6112456321716309,
7617
+ "learning_rate": 2.1630468840738714e-08,
7618
+ "loss": 1.3083,
7619
+ "step": 1078
7620
+ },
7621
+ {
7622
+ "epoch": 0.9924120487468383,
7623
+ "grad_norm": 0.6521921157836914,
7624
+ "learning_rate": 1.7520919804075998e-08,
7625
+ "loss": 1.2579,
7626
+ "step": 1079
7627
+ },
7628
+ {
7629
+ "epoch": 0.9933318004138882,
7630
+ "grad_norm": 0.5833930373191833,
7631
+ "learning_rate": 1.3843859422574268e-08,
7632
+ "loss": 1.196,
7633
+ "step": 1080
7634
+ },
7635
+ {
7636
+ "epoch": 0.9942515520809382,
7637
+ "grad_norm": 0.6652984619140625,
7638
+ "learning_rate": 1.0599319513115991e-08,
7639
+ "loss": 1.197,
7640
+ "step": 1081
7641
+ },
7642
+ {
7643
+ "epoch": 0.995171303747988,
7644
+ "grad_norm": 0.6548473834991455,
7645
+ "learning_rate": 7.787328150071771e-09,
7646
+ "loss": 1.2061,
7647
+ "step": 1082
7648
+ },
7649
+ {
7650
+ "epoch": 0.996091055415038,
7651
+ "grad_norm": 0.6595301628112793,
7652
+ "learning_rate": 5.40790966505611e-09,
7653
+ "loss": 1.1764,
7654
+ "step": 1083
7655
+ },
7656
+ {
7657
+ "epoch": 0.9970108070820879,
7658
+ "grad_norm": 0.753955602645874,
7659
+ "learning_rate": 3.4610846467109103e-09,
7660
+ "loss": 1.1202,
7661
+ "step": 1084
7662
+ },
7663
+ {
7664
+ "epoch": 0.9979305587491377,
7665
+ "grad_norm": 0.7465300559997559,
7666
+ "learning_rate": 1.9468699405444934e-09,
7667
+ "loss": 1.1407,
7668
+ "step": 1085
7669
+ },
7670
+ {
7671
+ "epoch": 0.9988503104161877,
7672
+ "grad_norm": 0.8871564865112305,
7673
+ "learning_rate": 8.652786487484132e-10,
7674
+ "loss": 1.0533,
7675
+ "step": 1086
7676
+ },
7677
+ {
7678
+ "epoch": 0.9997700620832375,
7679
+ "grad_norm": 1.0818983316421509,
7680
+ "learning_rate": 2.1632013013084262e-10,
7681
+ "loss": 0.9817,
7682
+ "step": 1087
7683
+ },
7684
+ {
7685
+ "epoch": 1.0006898137502873,
7686
+ "grad_norm": 2.3892836570739746,
7687
+ "learning_rate": 0.0,
7688
+ "loss": 2.0309,
7689
+ "step": 1088
7690
  }
7691
  ],
7692
  "logging_steps": 1,
 
7710
  "should_evaluate": false,
7711
  "should_log": false,
7712
  "should_save": true,
7713
+ "should_training_stop": true
7714
  },
7715
  "attributes": {}
7716
  }
7717
  },
7718
+ "total_flos": 1.9792675536371712e+17,
7719
  "train_batch_size": 12,
7720
  "trial_name": null,
7721
  "trial_params": null