error577 commited on
Commit
379f8b1
·
verified ·
1 Parent(s): 565e1d5

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b0c195b03a78891addfd541c6f5a6e05e308cb677dddc2b5d1e1bc7a4317910
3
  size 578859568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6519a65749b3640a198541c50a2d98674828ec6c9d6a4383408f62deb368efa
3
  size 578859568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ea03775d6f122ef22f9a34fa6d7c975927ce0c5091946eb8c4d70964cfe011e
3
  size 294324692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:366e0623982882f9616a1c85db3efb50f3266236a2f25d9632bdc591d388e3eb
3
  size 294324692
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a000bcd0fcfbd6dc706ee094bc40e59bcb50a28e8797f55a0743ef881fecdf71
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ceff29ed030645c389d768390ebf4a4817b18da01053a3a2c553da3856524de
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9443e18e9eff1c8055981c18d9a28ff4f85044c4c7fdc07a0fbff8845c622c60
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b3c6a1abf04b992261ec3713fab3774b78fe3ab6f52bec78378c6c0a4a1110
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6939424276351929,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
- "epoch": 0.012508599662267809,
5
  "eval_steps": 50,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3237,6 +3237,364 @@
3237
  "eval_samples_per_second": 11.548,
3238
  "eval_steps_per_second": 5.784,
3239
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3240
  }
3241
  ],
3242
  "logging_steps": 1,
@@ -3260,12 +3618,12 @@
3260
  "should_evaluate": false,
3261
  "should_log": false,
3262
  "should_save": true,
3263
- "should_training_stop": false
3264
  },
3265
  "attributes": {}
3266
  }
3267
  },
3268
- "total_flos": 2.879669611266048e+16,
3269
  "train_batch_size": 2,
3270
  "trial_name": null,
3271
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6910951733589172,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.013898444069186455,
5
  "eval_steps": 50,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3237
  "eval_samples_per_second": 11.548,
3238
  "eval_steps_per_second": 5.784,
3239
  "step": 450
3240
+ },
3241
+ {
3242
+ "epoch": 0.012536396550406182,
3243
+ "grad_norm": 0.08057525753974915,
3244
+ "learning_rate": 4.894348370484647e-05,
3245
+ "loss": 0.8512,
3246
+ "step": 451
3247
+ },
3248
+ {
3249
+ "epoch": 0.012564193438544554,
3250
+ "grad_norm": 0.08833472430706024,
3251
+ "learning_rate": 4.698180862871282e-05,
3252
+ "loss": 0.995,
3253
+ "step": 452
3254
+ },
3255
+ {
3256
+ "epoch": 0.012591990326682928,
3257
+ "grad_norm": 0.08962654322385788,
3258
+ "learning_rate": 4.505930839770966e-05,
3259
+ "loss": 0.9639,
3260
+ "step": 453
3261
+ },
3262
+ {
3263
+ "epoch": 0.0126197872148213,
3264
+ "grad_norm": 0.08324563503265381,
3265
+ "learning_rate": 4.3176062038291274e-05,
3266
+ "loss": 0.7867,
3267
+ "step": 454
3268
+ },
3269
+ {
3270
+ "epoch": 0.012647584102959674,
3271
+ "grad_norm": 0.0913391187787056,
3272
+ "learning_rate": 4.1332146963339423e-05,
3273
+ "loss": 0.7687,
3274
+ "step": 455
3275
+ },
3276
+ {
3277
+ "epoch": 0.012675380991098047,
3278
+ "grad_norm": 0.08055524528026581,
3279
+ "learning_rate": 3.952763896898071e-05,
3280
+ "loss": 0.7876,
3281
+ "step": 456
3282
+ },
3283
+ {
3284
+ "epoch": 0.01270317787923642,
3285
+ "grad_norm": 0.08405828475952148,
3286
+ "learning_rate": 3.776261223147126e-05,
3287
+ "loss": 0.7312,
3288
+ "step": 457
3289
+ },
3290
+ {
3291
+ "epoch": 0.012730974767374793,
3292
+ "grad_norm": 0.08189340680837631,
3293
+ "learning_rate": 3.603713930414676e-05,
3294
+ "loss": 0.7638,
3295
+ "step": 458
3296
+ },
3297
+ {
3298
+ "epoch": 0.012758771655513165,
3299
+ "grad_norm": 0.08689261227846146,
3300
+ "learning_rate": 3.435129111444113e-05,
3301
+ "loss": 0.7503,
3302
+ "step": 459
3303
+ },
3304
+ {
3305
+ "epoch": 0.012786568543651539,
3306
+ "grad_norm": 0.08499288558959961,
3307
+ "learning_rate": 3.270513696097055e-05,
3308
+ "loss": 0.7546,
3309
+ "step": 460
3310
+ },
3311
+ {
3312
+ "epoch": 0.01281436543178991,
3313
+ "grad_norm": 0.08908785879611969,
3314
+ "learning_rate": 3.109874451068473e-05,
3315
+ "loss": 0.7841,
3316
+ "step": 461
3317
+ },
3318
+ {
3319
+ "epoch": 0.012842162319928284,
3320
+ "grad_norm": 0.08109744638204575,
3321
+ "learning_rate": 2.9532179796085356e-05,
3322
+ "loss": 0.6244,
3323
+ "step": 462
3324
+ },
3325
+ {
3326
+ "epoch": 0.012869959208066656,
3327
+ "grad_norm": 0.08815158903598785,
3328
+ "learning_rate": 2.800550721251216e-05,
3329
+ "loss": 0.697,
3330
+ "step": 463
3331
+ },
3332
+ {
3333
+ "epoch": 0.01289775609620503,
3334
+ "grad_norm": 0.09392455220222473,
3335
+ "learning_rate": 2.6518789515495355e-05,
3336
+ "loss": 0.6835,
3337
+ "step": 464
3338
+ },
3339
+ {
3340
+ "epoch": 0.012925552984343404,
3341
+ "grad_norm": 0.1170358955860138,
3342
+ "learning_rate": 2.5072087818176382e-05,
3343
+ "loss": 0.938,
3344
+ "step": 465
3345
+ },
3346
+ {
3347
+ "epoch": 0.012953349872481775,
3348
+ "grad_norm": 0.09133084863424301,
3349
+ "learning_rate": 2.36654615887959e-05,
3350
+ "loss": 0.7392,
3351
+ "step": 466
3352
+ },
3353
+ {
3354
+ "epoch": 0.01298114676062015,
3355
+ "grad_norm": 0.09215465933084488,
3356
+ "learning_rate": 2.2298968648248653e-05,
3357
+ "loss": 0.8161,
3358
+ "step": 467
3359
+ },
3360
+ {
3361
+ "epoch": 0.013008943648758521,
3362
+ "grad_norm": 0.08623038232326508,
3363
+ "learning_rate": 2.0972665167707127e-05,
3364
+ "loss": 0.7643,
3365
+ "step": 468
3366
+ },
3367
+ {
3368
+ "epoch": 0.013036740536896895,
3369
+ "grad_norm": 0.08695585280656815,
3370
+ "learning_rate": 1.968660566631275e-05,
3371
+ "loss": 0.6102,
3372
+ "step": 469
3373
+ },
3374
+ {
3375
+ "epoch": 0.013064537425035267,
3376
+ "grad_norm": 0.08945546299219131,
3377
+ "learning_rate": 1.844084300893456e-05,
3378
+ "loss": 0.7888,
3379
+ "step": 470
3380
+ },
3381
+ {
3382
+ "epoch": 0.01309233431317364,
3383
+ "grad_norm": 0.09600325673818588,
3384
+ "learning_rate": 1.7235428403996167e-05,
3385
+ "loss": 0.8451,
3386
+ "step": 471
3387
+ },
3388
+ {
3389
+ "epoch": 0.013120131201312012,
3390
+ "grad_norm": 0.08915964514017105,
3391
+ "learning_rate": 1.6070411401370334e-05,
3392
+ "loss": 0.6192,
3393
+ "step": 472
3394
+ },
3395
+ {
3396
+ "epoch": 0.013147928089450386,
3397
+ "grad_norm": 0.1044343113899231,
3398
+ "learning_rate": 1.494583989034326e-05,
3399
+ "loss": 0.6746,
3400
+ "step": 473
3401
+ },
3402
+ {
3403
+ "epoch": 0.01317572497758876,
3404
+ "grad_norm": 0.0989852100610733,
3405
+ "learning_rate": 1.386176009764506e-05,
3406
+ "loss": 0.7388,
3407
+ "step": 474
3408
+ },
3409
+ {
3410
+ "epoch": 0.013203521865727132,
3411
+ "grad_norm": 0.11182911694049835,
3412
+ "learning_rate": 1.2818216585549825e-05,
3413
+ "loss": 0.9093,
3414
+ "step": 475
3415
+ },
3416
+ {
3417
+ "epoch": 0.013231318753865505,
3418
+ "grad_norm": 0.10432388633489609,
3419
+ "learning_rate": 1.1815252250044316e-05,
3420
+ "loss": 0.6842,
3421
+ "step": 476
3422
+ },
3423
+ {
3424
+ "epoch": 0.013259115642003877,
3425
+ "grad_norm": 0.11141140758991241,
3426
+ "learning_rate": 1.0852908319063826e-05,
3427
+ "loss": 0.6488,
3428
+ "step": 477
3429
+ },
3430
+ {
3431
+ "epoch": 0.013286912530142251,
3432
+ "grad_norm": 0.10428661853075027,
3433
+ "learning_rate": 9.931224350798185e-06,
3434
+ "loss": 0.7708,
3435
+ "step": 478
3436
+ },
3437
+ {
3438
+ "epoch": 0.013314709418280623,
3439
+ "grad_norm": 0.10303416848182678,
3440
+ "learning_rate": 9.0502382320653e-06,
3441
+ "loss": 0.6728,
3442
+ "step": 479
3443
+ },
3444
+ {
3445
+ "epoch": 0.013342506306418997,
3446
+ "grad_norm": 0.10627992451190948,
3447
+ "learning_rate": 8.209986176753947e-06,
3448
+ "loss": 0.6125,
3449
+ "step": 480
3450
+ },
3451
+ {
3452
+ "epoch": 0.013370303194557369,
3453
+ "grad_norm": 0.09629444032907486,
3454
+ "learning_rate": 7.4105027243349665e-06,
3455
+ "loss": 0.6386,
3456
+ "step": 481
3457
+ },
3458
+ {
3459
+ "epoch": 0.013398100082695742,
3460
+ "grad_norm": 0.09442020207643509,
3461
+ "learning_rate": 6.65182073844195e-06,
3462
+ "loss": 0.5644,
3463
+ "step": 482
3464
+ },
3465
+ {
3466
+ "epoch": 0.013425896970834116,
3467
+ "grad_norm": 0.0994250699877739,
3468
+ "learning_rate": 5.933971405519656e-06,
3469
+ "loss": 0.6243,
3470
+ "step": 483
3471
+ },
3472
+ {
3473
+ "epoch": 0.013453693858972488,
3474
+ "grad_norm": 0.1111208125948906,
3475
+ "learning_rate": 5.256984233542595e-06,
3476
+ "loss": 0.7222,
3477
+ "step": 484
3478
+ },
3479
+ {
3480
+ "epoch": 0.013481490747110862,
3481
+ "grad_norm": 0.11001411825418472,
3482
+ "learning_rate": 4.6208870508017695e-06,
3483
+ "loss": 0.693,
3484
+ "step": 485
3485
+ },
3486
+ {
3487
+ "epoch": 0.013509287635249233,
3488
+ "grad_norm": 0.0949028953909874,
3489
+ "learning_rate": 4.025706004760932e-06,
3490
+ "loss": 0.6378,
3491
+ "step": 486
3492
+ },
3493
+ {
3494
+ "epoch": 0.013537084523387607,
3495
+ "grad_norm": 0.10430373251438141,
3496
+ "learning_rate": 3.471465560981768e-06,
3497
+ "loss": 0.5924,
3498
+ "step": 487
3499
+ },
3500
+ {
3501
+ "epoch": 0.013564881411525979,
3502
+ "grad_norm": 0.09569145739078522,
3503
+ "learning_rate": 2.958188502118153e-06,
3504
+ "loss": 0.5534,
3505
+ "step": 488
3506
+ },
3507
+ {
3508
+ "epoch": 0.013592678299664353,
3509
+ "grad_norm": 0.09768345206975937,
3510
+ "learning_rate": 2.4858959269794535e-06,
3511
+ "loss": 0.4815,
3512
+ "step": 489
3513
+ },
3514
+ {
3515
+ "epoch": 0.013620475187802725,
3516
+ "grad_norm": 0.09561553597450256,
3517
+ "learning_rate": 2.054607249663665e-06,
3518
+ "loss": 0.5637,
3519
+ "step": 490
3520
+ },
3521
+ {
3522
+ "epoch": 0.013648272075941098,
3523
+ "grad_norm": 0.11304374039173126,
3524
+ "learning_rate": 1.6643401987591622e-06,
3525
+ "loss": 0.5943,
3526
+ "step": 491
3527
+ },
3528
+ {
3529
+ "epoch": 0.013676068964079472,
3530
+ "grad_norm": 0.11450091749429703,
3531
+ "learning_rate": 1.3151108166156167e-06,
3532
+ "loss": 0.583,
3533
+ "step": 492
3534
+ },
3535
+ {
3536
+ "epoch": 0.013703865852217844,
3537
+ "grad_norm": 0.11334867030382156,
3538
+ "learning_rate": 1.0069334586854107e-06,
3539
+ "loss": 0.6076,
3540
+ "step": 493
3541
+ },
3542
+ {
3543
+ "epoch": 0.013731662740356218,
3544
+ "grad_norm": 0.1081780344247818,
3545
+ "learning_rate": 7.398207929323331e-07,
3546
+ "loss": 0.5119,
3547
+ "step": 494
3548
+ },
3549
+ {
3550
+ "epoch": 0.01375945962849459,
3551
+ "grad_norm": 0.11384209990501404,
3552
+ "learning_rate": 5.137837993121064e-07,
3553
+ "loss": 0.48,
3554
+ "step": 495
3555
+ },
3556
+ {
3557
+ "epoch": 0.013787256516632963,
3558
+ "grad_norm": 0.11100795120000839,
3559
+ "learning_rate": 3.2883176932019256e-07,
3560
+ "loss": 0.53,
3561
+ "step": 496
3562
+ },
3563
+ {
3564
+ "epoch": 0.013815053404771335,
3565
+ "grad_norm": 0.14853325486183167,
3566
+ "learning_rate": 1.8497230560998722e-07,
3567
+ "loss": 0.5982,
3568
+ "step": 497
3569
+ },
3570
+ {
3571
+ "epoch": 0.013842850292909709,
3572
+ "grad_norm": 0.1417522132396698,
3573
+ "learning_rate": 8.221132168073631e-08,
3574
+ "loss": 0.5259,
3575
+ "step": 498
3576
+ },
3577
+ {
3578
+ "epoch": 0.013870647181048081,
3579
+ "grad_norm": 0.15358008444309235,
3580
+ "learning_rate": 2.0553041633952775e-08,
3581
+ "loss": 0.5328,
3582
+ "step": 499
3583
+ },
3584
+ {
3585
+ "epoch": 0.013898444069186455,
3586
+ "grad_norm": 0.21747428178787231,
3587
+ "learning_rate": 0.0,
3588
+ "loss": 0.5217,
3589
+ "step": 500
3590
+ },
3591
+ {
3592
+ "epoch": 0.013898444069186455,
3593
+ "eval_loss": 0.6910951733589172,
3594
+ "eval_runtime": 50.0791,
3595
+ "eval_samples_per_second": 11.522,
3596
+ "eval_steps_per_second": 5.771,
3597
+ "step": 500
3598
  }
3599
  ],
3600
  "logging_steps": 1,
 
3618
  "should_evaluate": false,
3619
  "should_log": false,
3620
  "should_save": true,
3621
+ "should_training_stop": true
3622
  },
3623
  "attributes": {}
3624
  }
3625
  },
3626
+ "total_flos": 3.1913641871671296e+16,
3627
  "train_batch_size": 2,
3628
  "trial_name": null,
3629
  "trial_params": null