error577 commited on
Commit
d469a0e
·
verified ·
1 Parent(s): b5e3a12

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14b032a73b98015c425161a08cf382d4195c6e7edf76e4e437a4431736faa8f7
3
  size 578859568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ea2e833e395bd896b7338e0b159889cb0a9805a20a0ba81249634cf8be6acb
3
  size 578859568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fed801e682e5e68307b58f34bd4cd335b4789e2ce208c4c30cc8ae0d7c94bd66
3
  size 295198386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8408296ac89ee50ae11d1a24615517bae811a4a08e26902b3f91c3361afa9523
3
  size 295198386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf275d60f9d8a7677b056dc7c047a17ef88e4423f27115e72b43306be7f392b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d04b06ab68f7f17dc4df2206cd558b4bd98d8e29313159b73014f73bdd405dcc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5b6a73723cf527ee8aafd1afa2781dcc9cc28ce480c143c5bb0a790cbe7887b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18275b6327bd0d7d1ad9ae6ef36f205b2f0d81f6499994f6ecb9553362d17a42
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6910951733589172,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.016678132883023746,
5
  "eval_steps": 50,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4311,6 +4311,364 @@
4311
  "eval_samples_per_second": 11.449,
4312
  "eval_steps_per_second": 5.734,
4313
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4314
  }
4315
  ],
4316
  "logging_steps": 1,
@@ -4325,7 +4683,7 @@
4325
  "early_stopping_threshold": 0.0
4326
  },
4327
  "attributes": {
4328
- "early_stopping_patience_counter": 2
4329
  }
4330
  },
4331
  "TrainerControl": {
@@ -4334,12 +4692,12 @@
4334
  "should_evaluate": false,
4335
  "should_log": false,
4336
  "should_save": true,
4337
- "should_training_stop": false
4338
  },
4339
  "attributes": {}
4340
  }
4341
  },
4342
- "total_flos": 3.830931258133709e+16,
4343
  "train_batch_size": 2,
4344
  "trial_name": null,
4345
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6910951733589172,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.018067977289942392,
5
  "eval_steps": 50,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4311
  "eval_samples_per_second": 11.449,
4312
  "eval_steps_per_second": 5.734,
4313
  "step": 600
4314
+ },
4315
+ {
4316
+ "epoch": 0.016705929771162118,
4317
+ "grad_norm": 0.11464305222034454,
4318
+ "learning_rate": 0.0007000509087229895,
4319
+ "loss": 0.9103,
4320
+ "step": 601
4321
+ },
4322
+ {
4323
+ "epoch": 0.01673372665930049,
4324
+ "grad_norm": 0.09989949315786362,
4325
+ "learning_rate": 0.0006970252132582728,
4326
+ "loss": 0.8176,
4327
+ "step": 602
4328
+ },
4329
+ {
4330
+ "epoch": 0.016761523547438865,
4331
+ "grad_norm": 0.09268354624509811,
4332
+ "learning_rate": 0.0006940025687462952,
4333
+ "loss": 0.7983,
4334
+ "step": 603
4335
+ },
4336
+ {
4337
+ "epoch": 0.016789320435577237,
4338
+ "grad_norm": 0.09152427315711975,
4339
+ "learning_rate": 0.0006909830056250527,
4340
+ "loss": 0.7975,
4341
+ "step": 604
4342
+ },
4343
+ {
4344
+ "epoch": 0.01681711732371561,
4345
+ "grad_norm": 0.09386585652828217,
4346
+ "learning_rate": 0.000687966554301513,
4347
+ "loss": 0.8584,
4348
+ "step": 605
4349
+ },
4350
+ {
4351
+ "epoch": 0.01684491421185398,
4352
+ "grad_norm": 0.08477571606636047,
4353
+ "learning_rate": 0.0006849532451513074,
4354
+ "loss": 0.7387,
4355
+ "step": 606
4356
+ },
4357
+ {
4358
+ "epoch": 0.016872711099992357,
4359
+ "grad_norm": 0.08988666534423828,
4360
+ "learning_rate": 0.0006819431085184251,
4361
+ "loss": 0.8264,
4362
+ "step": 607
4363
+ },
4364
+ {
4365
+ "epoch": 0.01690050798813073,
4366
+ "grad_norm": 0.09714596718549728,
4367
+ "learning_rate": 0.0006789361747149092,
4368
+ "loss": 0.9452,
4369
+ "step": 608
4370
+ },
4371
+ {
4372
+ "epoch": 0.0169283048762691,
4373
+ "grad_norm": 0.10461269319057465,
4374
+ "learning_rate": 0.0006759324740205494,
4375
+ "loss": 0.7174,
4376
+ "step": 609
4377
+ },
4378
+ {
4379
+ "epoch": 0.016956101764407476,
4380
+ "grad_norm": 0.09161835163831711,
4381
+ "learning_rate": 0.0006729320366825784,
4382
+ "loss": 0.796,
4383
+ "step": 610
4384
+ },
4385
+ {
4386
+ "epoch": 0.016983898652545848,
4387
+ "grad_norm": 0.0949753150343895,
4388
+ "learning_rate": 0.0006699348929153668,
4389
+ "loss": 0.975,
4390
+ "step": 611
4391
+ },
4392
+ {
4393
+ "epoch": 0.01701169554068422,
4394
+ "grad_norm": 0.09028909355401993,
4395
+ "learning_rate": 0.0006669410729001193,
4396
+ "loss": 0.7738,
4397
+ "step": 612
4398
+ },
4399
+ {
4400
+ "epoch": 0.017039492428822592,
4401
+ "grad_norm": 0.08454867452383041,
4402
+ "learning_rate": 0.0006639506067845697,
4403
+ "loss": 0.7062,
4404
+ "step": 613
4405
+ },
4406
+ {
4407
+ "epoch": 0.017067289316960967,
4408
+ "grad_norm": 0.10592840611934662,
4409
+ "learning_rate": 0.0006609635246826793,
4410
+ "loss": 0.7745,
4411
+ "step": 614
4412
+ },
4413
+ {
4414
+ "epoch": 0.01709508620509934,
4415
+ "grad_norm": 0.09267466515302658,
4416
+ "learning_rate": 0.0006579798566743314,
4417
+ "loss": 0.8491,
4418
+ "step": 615
4419
+ },
4420
+ {
4421
+ "epoch": 0.01712288309323771,
4422
+ "grad_norm": 0.10221099853515625,
4423
+ "learning_rate": 0.0006549996328050296,
4424
+ "loss": 0.9564,
4425
+ "step": 616
4426
+ },
4427
+ {
4428
+ "epoch": 0.017150679981376087,
4429
+ "grad_norm": 0.09640829265117645,
4430
+ "learning_rate": 0.000652022883085595,
4431
+ "loss": 0.6694,
4432
+ "step": 617
4433
+ },
4434
+ {
4435
+ "epoch": 0.01717847686951446,
4436
+ "grad_norm": 0.09555254131555557,
4437
+ "learning_rate": 0.0006490496374918646,
4438
+ "loss": 0.7825,
4439
+ "step": 618
4440
+ },
4441
+ {
4442
+ "epoch": 0.01720627375765283,
4443
+ "grad_norm": 0.1080060750246048,
4444
+ "learning_rate": 0.0006460799259643883,
4445
+ "loss": 0.8122,
4446
+ "step": 619
4447
+ },
4448
+ {
4449
+ "epoch": 0.017234070645791202,
4450
+ "grad_norm": 0.09308885037899017,
4451
+ "learning_rate": 0.0006431137784081283,
4452
+ "loss": 0.7393,
4453
+ "step": 620
4454
+ },
4455
+ {
4456
+ "epoch": 0.017261867533929578,
4457
+ "grad_norm": 0.10485529899597168,
4458
+ "learning_rate": 0.0006401512246921576,
4459
+ "loss": 0.7577,
4460
+ "step": 621
4461
+ },
4462
+ {
4463
+ "epoch": 0.01728966442206795,
4464
+ "grad_norm": 0.10300412029027939,
4465
+ "learning_rate": 0.0006371922946493591,
4466
+ "loss": 0.7016,
4467
+ "step": 622
4468
+ },
4469
+ {
4470
+ "epoch": 0.01731746131020632,
4471
+ "grad_norm": 0.09915035963058472,
4472
+ "learning_rate": 0.0006342370180761255,
4473
+ "loss": 0.7562,
4474
+ "step": 623
4475
+ },
4476
+ {
4477
+ "epoch": 0.017345258198344694,
4478
+ "grad_norm": 0.11094118654727936,
4479
+ "learning_rate": 0.0006312854247320594,
4480
+ "loss": 0.7113,
4481
+ "step": 624
4482
+ },
4483
+ {
4484
+ "epoch": 0.01737305508648307,
4485
+ "grad_norm": 0.09752795100212097,
4486
+ "learning_rate": 0.0006283375443396726,
4487
+ "loss": 0.7649,
4488
+ "step": 625
4489
+ },
4490
+ {
4491
+ "epoch": 0.01740085197462144,
4492
+ "grad_norm": 0.10030993074178696,
4493
+ "learning_rate": 0.0006253934065840879,
4494
+ "loss": 0.7446,
4495
+ "step": 626
4496
+ },
4497
+ {
4498
+ "epoch": 0.017428648862759813,
4499
+ "grad_norm": 0.1134578287601471,
4500
+ "learning_rate": 0.0006224530411127403,
4501
+ "loss": 0.8147,
4502
+ "step": 627
4503
+ },
4504
+ {
4505
+ "epoch": 0.01745644575089819,
4506
+ "grad_norm": 0.09963490813970566,
4507
+ "learning_rate": 0.000619516477535077,
4508
+ "loss": 0.6904,
4509
+ "step": 628
4510
+ },
4511
+ {
4512
+ "epoch": 0.01748424263903656,
4513
+ "grad_norm": 0.10086818039417267,
4514
+ "learning_rate": 0.0006165837454222607,
4515
+ "loss": 0.5791,
4516
+ "step": 629
4517
+ },
4518
+ {
4519
+ "epoch": 0.017512039527174932,
4520
+ "grad_norm": 0.11571143567562103,
4521
+ "learning_rate": 0.0006136548743068713,
4522
+ "loss": 0.7572,
4523
+ "step": 630
4524
+ },
4525
+ {
4526
+ "epoch": 0.017539836415313304,
4527
+ "grad_norm": 0.10508367419242859,
4528
+ "learning_rate": 0.0006107298936826086,
4529
+ "loss": 0.5869,
4530
+ "step": 631
4531
+ },
4532
+ {
4533
+ "epoch": 0.01756763330345168,
4534
+ "grad_norm": 0.1044749990105629,
4535
+ "learning_rate": 0.0006078088330039945,
4536
+ "loss": 0.595,
4537
+ "step": 632
4538
+ },
4539
+ {
4540
+ "epoch": 0.01759543019159005,
4541
+ "grad_norm": 0.1138482466340065,
4542
+ "learning_rate": 0.0006048917216860781,
4543
+ "loss": 0.668,
4544
+ "step": 633
4545
+ },
4546
+ {
4547
+ "epoch": 0.017623227079728424,
4548
+ "grad_norm": 0.10499613732099533,
4549
+ "learning_rate": 0.0006019785891041381,
4550
+ "loss": 0.6028,
4551
+ "step": 634
4552
+ },
4553
+ {
4554
+ "epoch": 0.0176510239678668,
4555
+ "grad_norm": 0.10078407824039459,
4556
+ "learning_rate": 0.0005990694645933865,
4557
+ "loss": 0.5796,
4558
+ "step": 635
4559
+ },
4560
+ {
4561
+ "epoch": 0.01767882085600517,
4562
+ "grad_norm": 0.09239528328180313,
4563
+ "learning_rate": 0.0005961643774486753,
4564
+ "loss": 0.5735,
4565
+ "step": 636
4566
+ },
4567
+ {
4568
+ "epoch": 0.017706617744143543,
4569
+ "grad_norm": 0.09768297523260117,
4570
+ "learning_rate": 0.0005932633569242,
4571
+ "loss": 0.5082,
4572
+ "step": 637
4573
+ },
4574
+ {
4575
+ "epoch": 0.017734414632281915,
4576
+ "grad_norm": 0.10613156110048294,
4577
+ "learning_rate": 0.0005903664322332048,
4578
+ "loss": 0.5554,
4579
+ "step": 638
4580
+ },
4581
+ {
4582
+ "epoch": 0.01776221152042029,
4583
+ "grad_norm": 0.10876414179801941,
4584
+ "learning_rate": 0.000587473632547689,
4585
+ "loss": 0.6091,
4586
+ "step": 639
4587
+ },
4588
+ {
4589
+ "epoch": 0.017790008408558662,
4590
+ "grad_norm": 0.10759898275136948,
4591
+ "learning_rate": 0.0005845849869981136,
4592
+ "loss": 0.5748,
4593
+ "step": 640
4594
+ },
4595
+ {
4596
+ "epoch": 0.017817805296697034,
4597
+ "grad_norm": 0.12154053151607513,
4598
+ "learning_rate": 0.0005817005246731073,
4599
+ "loss": 0.6063,
4600
+ "step": 641
4601
+ },
4602
+ {
4603
+ "epoch": 0.017845602184835406,
4604
+ "grad_norm": 0.11394521594047546,
4605
+ "learning_rate": 0.0005788202746191734,
4606
+ "loss": 0.6124,
4607
+ "step": 642
4608
+ },
4609
+ {
4610
+ "epoch": 0.01787339907297378,
4611
+ "grad_norm": 0.09602084010839462,
4612
+ "learning_rate": 0.0005759442658403985,
4613
+ "loss": 0.4391,
4614
+ "step": 643
4615
+ },
4616
+ {
4617
+ "epoch": 0.017901195961112153,
4618
+ "grad_norm": 0.12600000202655792,
4619
+ "learning_rate": 0.0005730725272981583,
4620
+ "loss": 0.6201,
4621
+ "step": 644
4622
+ },
4623
+ {
4624
+ "epoch": 0.017928992849250525,
4625
+ "grad_norm": 0.1129770576953888,
4626
+ "learning_rate": 0.0005702050879108284,
4627
+ "loss": 0.4814,
4628
+ "step": 645
4629
+ },
4630
+ {
4631
+ "epoch": 0.0179567897373889,
4632
+ "grad_norm": 0.121727854013443,
4633
+ "learning_rate": 0.0005673419765534915,
4634
+ "loss": 0.5071,
4635
+ "step": 646
4636
+ },
4637
+ {
4638
+ "epoch": 0.017984586625527273,
4639
+ "grad_norm": 0.11814267188310623,
4640
+ "learning_rate": 0.0005644832220576479,
4641
+ "loss": 0.5387,
4642
+ "step": 647
4643
+ },
4644
+ {
4645
+ "epoch": 0.018012383513665645,
4646
+ "grad_norm": 0.14177252352237701,
4647
+ "learning_rate": 0.0005616288532109225,
4648
+ "loss": 0.6006,
4649
+ "step": 648
4650
+ },
4651
+ {
4652
+ "epoch": 0.018040180401804017,
4653
+ "grad_norm": 0.17021676898002625,
4654
+ "learning_rate": 0.0005587788987567784,
4655
+ "loss": 0.5445,
4656
+ "step": 649
4657
+ },
4658
+ {
4659
+ "epoch": 0.018067977289942392,
4660
+ "grad_norm": 0.17510192096233368,
4661
+ "learning_rate": 0.0005559333873942258,
4662
+ "loss": 0.5694,
4663
+ "step": 650
4664
+ },
4665
+ {
4666
+ "epoch": 0.018067977289942392,
4667
+ "eval_loss": 0.7097320556640625,
4668
+ "eval_runtime": 50.365,
4669
+ "eval_samples_per_second": 11.456,
4670
+ "eval_steps_per_second": 5.738,
4671
+ "step": 650
4672
  }
4673
  ],
4674
  "logging_steps": 1,
 
4683
  "early_stopping_threshold": 0.0
4684
  },
4685
  "attributes": {
4686
+ "early_stopping_patience_counter": 3
4687
  }
4688
  },
4689
  "TrainerControl": {
 
4692
  "should_evaluate": false,
4693
  "should_log": false,
4694
  "should_save": true,
4695
+ "should_training_stop": true
4696
  },
4697
  "attributes": {}
4698
  }
4699
  },
4700
+ "total_flos": 4.150175529644851e+16,
4701
  "train_batch_size": 2,
4702
  "trial_name": null,
4703
  "trial_params": null