ErrorAI commited on
Commit
7b39060
·
verified ·
1 Parent(s): 03af7ba

Training in progress, step 689, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca1b35c14ed33812fa5a1148e0b088fe02867203f825a4bf1cefa851a4e6feaf
3
  size 125248064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:041f64e371597393e019be4e6ff4350d5d401ed932c90bcf32d228f5048b36cb
3
  size 125248064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c0382adba942aabd19895436a65b38840ff6c022abfbe83a7ea862d7b65421
3
  size 64220436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82b2b3c85d6c13772f251cf444d87ea08f3e13e439374182d515b33b6bfa924e
3
  size 64220436
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e768d8866a4711cc5d93bbd547c0328a9008c16a56345b441128bfb2705240c1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733a8078a9a3de796ea1643fe169ffb5ef7502a8fc626cde056eafcb9b3baf9c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28d186c40b946a6a34744d7423e8d26313d6804efbd0c6e291f515000778d46c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14bfcc404c13c41041f67f6bce66a78c5779da7ad0881e01dc99ce86c536e721
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7532656023222061,
5
  "eval_steps": 173,
6
- "global_step": 519,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3672,6 +3672,1196 @@
3672
  "eval_samples_per_second": 8.964,
3673
  "eval_steps_per_second": 4.498,
3674
  "step": 519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3675
  }
3676
  ],
3677
  "logging_steps": 1,
@@ -3686,12 +4876,12 @@
3686
  "should_evaluate": false,
3687
  "should_log": false,
3688
  "should_save": true,
3689
- "should_training_stop": false
3690
  },
3691
  "attributes": {}
3692
  }
3693
  },
3694
- "total_flos": 3.281791492502323e+17,
3695
  "train_batch_size": 2,
3696
  "trial_name": null,
3697
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 173,
6
+ "global_step": 689,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3672
  "eval_samples_per_second": 8.964,
3673
  "eval_steps_per_second": 4.498,
3674
  "step": 519
3675
+ },
3676
+ {
3677
+ "epoch": 0.7547169811320755,
3678
+ "grad_norm": 0.24123501777648926,
3679
+ "learning_rate": 2.9044375337725593e-05,
3680
+ "loss": 0.4715,
3681
+ "step": 520
3682
+ },
3683
+ {
3684
+ "epoch": 0.7561683599419449,
3685
+ "grad_norm": 0.4682220220565796,
3686
+ "learning_rate": 2.8719109545317103e-05,
3687
+ "loss": 0.8464,
3688
+ "step": 521
3689
+ },
3690
+ {
3691
+ "epoch": 0.7576197387518142,
3692
+ "grad_norm": 0.3238670527935028,
3693
+ "learning_rate": 2.839536967562504e-05,
3694
+ "loss": 0.34,
3695
+ "step": 522
3696
+ },
3697
+ {
3698
+ "epoch": 0.7590711175616836,
3699
+ "grad_norm": 0.46153318881988525,
3700
+ "learning_rate": 2.807316265900649e-05,
3701
+ "loss": 1.183,
3702
+ "step": 523
3703
+ },
3704
+ {
3705
+ "epoch": 0.760522496371553,
3706
+ "grad_norm": 0.42253023386001587,
3707
+ "learning_rate": 2.7752495393004475e-05,
3708
+ "loss": 0.5708,
3709
+ "step": 524
3710
+ },
3711
+ {
3712
+ "epoch": 0.7619738751814223,
3713
+ "grad_norm": 0.4283324182033539,
3714
+ "learning_rate": 2.7433374742200192e-05,
3715
+ "loss": 1.2854,
3716
+ "step": 525
3717
+ },
3718
+ {
3719
+ "epoch": 0.7634252539912917,
3720
+ "grad_norm": 0.33337557315826416,
3721
+ "learning_rate": 2.7115807538066297e-05,
3722
+ "loss": 0.6406,
3723
+ "step": 526
3724
+ },
3725
+ {
3726
+ "epoch": 0.7648766328011611,
3727
+ "grad_norm": 0.33540481328964233,
3728
+ "learning_rate": 2.679980057882049e-05,
3729
+ "loss": 0.5988,
3730
+ "step": 527
3731
+ },
3732
+ {
3733
+ "epoch": 0.7663280116110305,
3734
+ "grad_norm": 0.5348614454269409,
3735
+ "learning_rate": 2.6485360629279987e-05,
3736
+ "loss": 1.6389,
3737
+ "step": 528
3738
+ },
3739
+ {
3740
+ "epoch": 0.7677793904208998,
3741
+ "grad_norm": 0.36704495549201965,
3742
+ "learning_rate": 2.6172494420716797e-05,
3743
+ "loss": 1.0625,
3744
+ "step": 529
3745
+ },
3746
+ {
3747
+ "epoch": 0.7692307692307693,
3748
+ "grad_norm": 0.3153030276298523,
3749
+ "learning_rate": 2.5861208650713588e-05,
3750
+ "loss": 0.3844,
3751
+ "step": 530
3752
+ },
3753
+ {
3754
+ "epoch": 0.7706821480406386,
3755
+ "grad_norm": 0.29406848549842834,
3756
+ "learning_rate": 2.555150998302026e-05,
3757
+ "loss": 0.575,
3758
+ "step": 531
3759
+ },
3760
+ {
3761
+ "epoch": 0.772133526850508,
3762
+ "grad_norm": 0.35533541440963745,
3763
+ "learning_rate": 2.524340504741135e-05,
3764
+ "loss": 0.9053,
3765
+ "step": 532
3766
+ },
3767
+ {
3768
+ "epoch": 0.7735849056603774,
3769
+ "grad_norm": 0.35642877221107483,
3770
+ "learning_rate": 2.4936900439544107e-05,
3771
+ "loss": 1.6089,
3772
+ "step": 533
3773
+ },
3774
+ {
3775
+ "epoch": 0.7750362844702468,
3776
+ "grad_norm": 0.5626897811889648,
3777
+ "learning_rate": 2.4632002720817228e-05,
3778
+ "loss": 1.136,
3779
+ "step": 534
3780
+ },
3781
+ {
3782
+ "epoch": 0.7764876632801161,
3783
+ "grad_norm": 0.3613468408584595,
3784
+ "learning_rate": 2.432871841823047e-05,
3785
+ "loss": 1.0527,
3786
+ "step": 535
3787
+ },
3788
+ {
3789
+ "epoch": 0.7779390420899854,
3790
+ "grad_norm": 0.5131534934043884,
3791
+ "learning_rate": 2.4027054024244956e-05,
3792
+ "loss": 1.0593,
3793
+ "step": 536
3794
+ },
3795
+ {
3796
+ "epoch": 0.7793904208998549,
3797
+ "grad_norm": 0.2713516652584076,
3798
+ "learning_rate": 2.372701599664404e-05,
3799
+ "loss": 0.3204,
3800
+ "step": 537
3801
+ },
3802
+ {
3803
+ "epoch": 0.7808417997097242,
3804
+ "grad_norm": 0.3534812033176422,
3805
+ "learning_rate": 2.342861075839523e-05,
3806
+ "loss": 0.6906,
3807
+ "step": 538
3808
+ },
3809
+ {
3810
+ "epoch": 0.7822931785195936,
3811
+ "grad_norm": 0.2669237554073334,
3812
+ "learning_rate": 2.313184469751265e-05,
3813
+ "loss": 0.4097,
3814
+ "step": 539
3815
+ },
3816
+ {
3817
+ "epoch": 0.783744557329463,
3818
+ "grad_norm": 0.26740333437919617,
3819
+ "learning_rate": 2.283672416692013e-05,
3820
+ "loss": 0.2641,
3821
+ "step": 540
3822
+ },
3823
+ {
3824
+ "epoch": 0.7851959361393324,
3825
+ "grad_norm": 0.4208941161632538,
3826
+ "learning_rate": 2.2543255484315484e-05,
3827
+ "loss": 0.9359,
3828
+ "step": 541
3829
+ },
3830
+ {
3831
+ "epoch": 0.7866473149492017,
3832
+ "grad_norm": 0.32194021344184875,
3833
+ "learning_rate": 2.2251444932035094e-05,
3834
+ "loss": 0.6239,
3835
+ "step": 542
3836
+ },
3837
+ {
3838
+ "epoch": 0.7880986937590712,
3839
+ "grad_norm": 0.4907798767089844,
3840
+ "learning_rate": 2.1961298756919335e-05,
3841
+ "loss": 1.76,
3842
+ "step": 543
3843
+ },
3844
+ {
3845
+ "epoch": 0.7895500725689405,
3846
+ "grad_norm": 0.45274341106414795,
3847
+ "learning_rate": 2.16728231701791e-05,
3848
+ "loss": 1.3965,
3849
+ "step": 544
3850
+ },
3851
+ {
3852
+ "epoch": 0.7910014513788098,
3853
+ "grad_norm": 0.3342875838279724,
3854
+ "learning_rate": 2.138602434726269e-05,
3855
+ "loss": 0.2511,
3856
+ "step": 545
3857
+ },
3858
+ {
3859
+ "epoch": 0.7924528301886793,
3860
+ "grad_norm": 0.37094488739967346,
3861
+ "learning_rate": 2.11009084277235e-05,
3862
+ "loss": 1.1932,
3863
+ "step": 546
3864
+ },
3865
+ {
3866
+ "epoch": 0.7939042089985486,
3867
+ "grad_norm": 0.36341187357902527,
3868
+ "learning_rate": 2.0817481515088833e-05,
3869
+ "loss": 0.7164,
3870
+ "step": 547
3871
+ },
3872
+ {
3873
+ "epoch": 0.795355587808418,
3874
+ "grad_norm": 0.4300606846809387,
3875
+ "learning_rate": 2.053574967672912e-05,
3876
+ "loss": 0.9558,
3877
+ "step": 548
3878
+ },
3879
+ {
3880
+ "epoch": 0.7968069666182874,
3881
+ "grad_norm": 0.4252452850341797,
3882
+ "learning_rate": 2.025571894372794e-05,
3883
+ "loss": 1.1288,
3884
+ "step": 549
3885
+ },
3886
+ {
3887
+ "epoch": 0.7982583454281568,
3888
+ "grad_norm": 0.5417127013206482,
3889
+ "learning_rate": 1.9977395310753087e-05,
3890
+ "loss": 2.1247,
3891
+ "step": 550
3892
+ },
3893
+ {
3894
+ "epoch": 0.7997097242380261,
3895
+ "grad_norm": 0.3363449275493622,
3896
+ "learning_rate": 1.970078473592817e-05,
3897
+ "loss": 0.5331,
3898
+ "step": 551
3899
+ },
3900
+ {
3901
+ "epoch": 0.8011611030478955,
3902
+ "grad_norm": 0.2750878632068634,
3903
+ "learning_rate": 1.9425893140704943e-05,
3904
+ "loss": 0.3197,
3905
+ "step": 552
3906
+ },
3907
+ {
3908
+ "epoch": 0.8026124818577649,
3909
+ "grad_norm": 0.2823106050491333,
3910
+ "learning_rate": 1.915272640973683e-05,
3911
+ "loss": 0.6639,
3912
+ "step": 553
3913
+ },
3914
+ {
3915
+ "epoch": 0.8040638606676342,
3916
+ "grad_norm": 0.41894766688346863,
3917
+ "learning_rate": 1.888129039075268e-05,
3918
+ "loss": 1.0442,
3919
+ "step": 554
3920
+ },
3921
+ {
3922
+ "epoch": 0.8055152394775036,
3923
+ "grad_norm": 0.3718733489513397,
3924
+ "learning_rate": 1.8611590894431653e-05,
3925
+ "loss": 0.7986,
3926
+ "step": 555
3927
+ },
3928
+ {
3929
+ "epoch": 0.806966618287373,
3930
+ "grad_norm": 0.3276025950908661,
3931
+ "learning_rate": 1.8343633694278895e-05,
3932
+ "loss": 0.7076,
3933
+ "step": 556
3934
+ },
3935
+ {
3936
+ "epoch": 0.8084179970972424,
3937
+ "grad_norm": 0.3458663523197174,
3938
+ "learning_rate": 1.807742452650196e-05,
3939
+ "loss": 0.8522,
3940
+ "step": 557
3941
+ },
3942
+ {
3943
+ "epoch": 0.8098693759071117,
3944
+ "grad_norm": 0.41551870107650757,
3945
+ "learning_rate": 1.7812969089887864e-05,
3946
+ "loss": 0.7282,
3947
+ "step": 558
3948
+ },
3949
+ {
3950
+ "epoch": 0.8113207547169812,
3951
+ "grad_norm": 0.36481913924217224,
3952
+ "learning_rate": 1.7550273045681242e-05,
3953
+ "loss": 0.5842,
3954
+ "step": 559
3955
+ },
3956
+ {
3957
+ "epoch": 0.8127721335268505,
3958
+ "grad_norm": 0.31234338879585266,
3959
+ "learning_rate": 1.7289342017463138e-05,
3960
+ "loss": 1.0055,
3961
+ "step": 560
3962
+ },
3963
+ {
3964
+ "epoch": 0.8142235123367199,
3965
+ "grad_norm": 0.29913321137428284,
3966
+ "learning_rate": 1.7030181591030493e-05,
3967
+ "loss": 0.4555,
3968
+ "step": 561
3969
+ },
3970
+ {
3971
+ "epoch": 0.8156748911465893,
3972
+ "grad_norm": 0.2602443993091583,
3973
+ "learning_rate": 1.6772797314276713e-05,
3974
+ "loss": 0.5486,
3975
+ "step": 562
3976
+ },
3977
+ {
3978
+ "epoch": 0.8171262699564587,
3979
+ "grad_norm": 0.43661990761756897,
3980
+ "learning_rate": 1.65171946970729e-05,
3981
+ "loss": 1.7129,
3982
+ "step": 563
3983
+ },
3984
+ {
3985
+ "epoch": 0.818577648766328,
3986
+ "grad_norm": 0.658896267414093,
3987
+ "learning_rate": 1.6263379211149732e-05,
3988
+ "loss": 2.026,
3989
+ "step": 564
3990
+ },
3991
+ {
3992
+ "epoch": 0.8200290275761973,
3993
+ "grad_norm": 0.48423880338668823,
3994
+ "learning_rate": 1.601135628998056e-05,
3995
+ "loss": 1.1105,
3996
+ "step": 565
3997
+ },
3998
+ {
3999
+ "epoch": 0.8214804063860668,
4000
+ "grad_norm": 0.3823278844356537,
4001
+ "learning_rate": 1.5761131328664947e-05,
4002
+ "loss": 0.9354,
4003
+ "step": 566
4004
+ },
4005
+ {
4006
+ "epoch": 0.8229317851959361,
4007
+ "grad_norm": 0.38709479570388794,
4008
+ "learning_rate": 1.5512709683813164e-05,
4009
+ "loss": 1.7732,
4010
+ "step": 567
4011
+ },
4012
+ {
4013
+ "epoch": 0.8243831640058055,
4014
+ "grad_norm": 0.4736160933971405,
4015
+ "learning_rate": 1.526609667343163e-05,
4016
+ "loss": 1.0733,
4017
+ "step": 568
4018
+ },
4019
+ {
4020
+ "epoch": 0.8258345428156749,
4021
+ "grad_norm": 0.4745509922504425,
4022
+ "learning_rate": 1.5021297576809001e-05,
4023
+ "loss": 1.4294,
4024
+ "step": 569
4025
+ },
4026
+ {
4027
+ "epoch": 0.8272859216255443,
4028
+ "grad_norm": 0.28139880299568176,
4029
+ "learning_rate": 1.4778317634403083e-05,
4030
+ "loss": 0.2901,
4031
+ "step": 570
4032
+ },
4033
+ {
4034
+ "epoch": 0.8287373004354136,
4035
+ "grad_norm": 0.39859816431999207,
4036
+ "learning_rate": 1.4537162047728804e-05,
4037
+ "loss": 0.7626,
4038
+ "step": 571
4039
+ },
4040
+ {
4041
+ "epoch": 0.8301886792452831,
4042
+ "grad_norm": 0.8511147499084473,
4043
+ "learning_rate": 1.4297835979246777e-05,
4044
+ "loss": 1.482,
4045
+ "step": 572
4046
+ },
4047
+ {
4048
+ "epoch": 0.8316400580551524,
4049
+ "grad_norm": 0.4506050646305084,
4050
+ "learning_rate": 1.4060344552252702e-05,
4051
+ "loss": 1.2382,
4052
+ "step": 573
4053
+ },
4054
+ {
4055
+ "epoch": 0.8330914368650217,
4056
+ "grad_norm": 0.22067701816558838,
4057
+ "learning_rate": 1.3824692850767929e-05,
4058
+ "loss": 0.571,
4059
+ "step": 574
4060
+ },
4061
+ {
4062
+ "epoch": 0.8345428156748912,
4063
+ "grad_norm": 0.3021042048931122,
4064
+ "learning_rate": 1.3590885919430384e-05,
4065
+ "loss": 0.522,
4066
+ "step": 575
4067
+ },
4068
+ {
4069
+ "epoch": 0.8359941944847605,
4070
+ "grad_norm": 0.49198654294013977,
4071
+ "learning_rate": 1.3358928763386646e-05,
4072
+ "loss": 0.9773,
4073
+ "step": 576
4074
+ },
4075
+ {
4076
+ "epoch": 0.8374455732946299,
4077
+ "grad_norm": 0.35230767726898193,
4078
+ "learning_rate": 1.3128826348184887e-05,
4079
+ "loss": 0.6312,
4080
+ "step": 577
4081
+ },
4082
+ {
4083
+ "epoch": 0.8388969521044993,
4084
+ "grad_norm": 0.3485874831676483,
4085
+ "learning_rate": 1.2900583599668515e-05,
4086
+ "loss": 0.6788,
4087
+ "step": 578
4088
+ },
4089
+ {
4090
+ "epoch": 0.8403483309143687,
4091
+ "grad_norm": 0.3603363335132599,
4092
+ "learning_rate": 1.2674205403870642e-05,
4093
+ "loss": 0.7338,
4094
+ "step": 579
4095
+ },
4096
+ {
4097
+ "epoch": 0.841799709724238,
4098
+ "grad_norm": 0.25826606154441833,
4099
+ "learning_rate": 1.2449696606909655e-05,
4100
+ "loss": 0.4404,
4101
+ "step": 580
4102
+ },
4103
+ {
4104
+ "epoch": 0.8432510885341074,
4105
+ "grad_norm": 0.3804086744785309,
4106
+ "learning_rate": 1.222706201488536e-05,
4107
+ "loss": 0.4955,
4108
+ "step": 581
4109
+ },
4110
+ {
4111
+ "epoch": 0.8447024673439768,
4112
+ "grad_norm": 0.47231999039649963,
4113
+ "learning_rate": 1.200630639377609e-05,
4114
+ "loss": 1.6225,
4115
+ "step": 582
4116
+ },
4117
+ {
4118
+ "epoch": 0.8461538461538461,
4119
+ "grad_norm": 0.44939887523651123,
4120
+ "learning_rate": 1.1787434469336766e-05,
4121
+ "loss": 1.252,
4122
+ "step": 583
4123
+ },
4124
+ {
4125
+ "epoch": 0.8476052249637155,
4126
+ "grad_norm": 0.3787010610103607,
4127
+ "learning_rate": 1.1570450926997655e-05,
4128
+ "loss": 0.544,
4129
+ "step": 584
4130
+ },
4131
+ {
4132
+ "epoch": 0.8490566037735849,
4133
+ "grad_norm": 0.4327794909477234,
4134
+ "learning_rate": 1.135536041176406e-05,
4135
+ "loss": 0.3797,
4136
+ "step": 585
4137
+ },
4138
+ {
4139
+ "epoch": 0.8505079825834543,
4140
+ "grad_norm": 0.35290834307670593,
4141
+ "learning_rate": 1.1142167528116965e-05,
4142
+ "loss": 0.8393,
4143
+ "step": 586
4144
+ },
4145
+ {
4146
+ "epoch": 0.8519593613933236,
4147
+ "grad_norm": 0.24693018198013306,
4148
+ "learning_rate": 1.0930876839914416e-05,
4149
+ "loss": 0.1381,
4150
+ "step": 587
4151
+ },
4152
+ {
4153
+ "epoch": 0.8534107402031931,
4154
+ "grad_norm": 0.3256937265396118,
4155
+ "learning_rate": 1.072149287029377e-05,
4156
+ "loss": 0.5073,
4157
+ "step": 588
4158
+ },
4159
+ {
4160
+ "epoch": 0.8548621190130624,
4161
+ "grad_norm": 0.32373976707458496,
4162
+ "learning_rate": 1.0514020101574972e-05,
4163
+ "loss": 0.4282,
4164
+ "step": 589
4165
+ },
4166
+ {
4167
+ "epoch": 0.8563134978229318,
4168
+ "grad_norm": 0.3258521854877472,
4169
+ "learning_rate": 1.0308462975164545e-05,
4170
+ "loss": 0.348,
4171
+ "step": 590
4172
+ },
4173
+ {
4174
+ "epoch": 0.8577648766328012,
4175
+ "grad_norm": 0.3401307165622711,
4176
+ "learning_rate": 1.010482589146048e-05,
4177
+ "loss": 1.1884,
4178
+ "step": 591
4179
+ },
4180
+ {
4181
+ "epoch": 0.8592162554426706,
4182
+ "grad_norm": 0.39415818452835083,
4183
+ "learning_rate": 9.903113209758096e-06,
4184
+ "loss": 1.2618,
4185
+ "step": 592
4186
+ },
4187
+ {
4188
+ "epoch": 0.8606676342525399,
4189
+ "grad_norm": 0.3865334689617157,
4190
+ "learning_rate": 9.703329248156711e-06,
4191
+ "loss": 0.9641,
4192
+ "step": 593
4193
+ },
4194
+ {
4195
+ "epoch": 0.8621190130624092,
4196
+ "grad_norm": 0.5107492208480835,
4197
+ "learning_rate": 9.505478283467128e-06,
4198
+ "loss": 0.6755,
4199
+ "step": 594
4200
+ },
4201
+ {
4202
+ "epoch": 0.8635703918722787,
4203
+ "grad_norm": 0.3883417248725891,
4204
+ "learning_rate": 9.309564551120254e-06,
4205
+ "loss": 1.4845,
4206
+ "step": 595
4207
+ },
4208
+ {
4209
+ "epoch": 0.865021770682148,
4210
+ "grad_norm": 0.5274085998535156,
4211
+ "learning_rate": 9.115592245076177e-06,
4212
+ "loss": 1.4281,
4213
+ "step": 596
4214
+ },
4215
+ {
4216
+ "epoch": 0.8664731494920174,
4217
+ "grad_norm": 0.4148600399494171,
4218
+ "learning_rate": 8.923565517734633e-06,
4219
+ "loss": 1.4102,
4220
+ "step": 597
4221
+ },
4222
+ {
4223
+ "epoch": 0.8679245283018868,
4224
+ "grad_norm": 0.3156782388687134,
4225
+ "learning_rate": 8.733488479845997e-06,
4226
+ "loss": 0.7565,
4227
+ "step": 598
4228
+ },
4229
+ {
4230
+ "epoch": 0.8693759071117562,
4231
+ "grad_norm": 0.41858044266700745,
4232
+ "learning_rate": 8.545365200423217e-06,
4233
+ "loss": 1.3413,
4234
+ "step": 599
4235
+ },
4236
+ {
4237
+ "epoch": 0.8708272859216255,
4238
+ "grad_norm": 0.3816291391849518,
4239
+ "learning_rate": 8.359199706654853e-06,
4240
+ "loss": 0.9125,
4241
+ "step": 600
4242
+ },
4243
+ {
4244
+ "epoch": 0.872278664731495,
4245
+ "grad_norm": 0.3754681944847107,
4246
+ "learning_rate": 8.1749959838188e-06,
4247
+ "loss": 0.5213,
4248
+ "step": 601
4249
+ },
4250
+ {
4251
+ "epoch": 0.8737300435413643,
4252
+ "grad_norm": 0.32858067750930786,
4253
+ "learning_rate": 7.992757975196974e-06,
4254
+ "loss": 0.9199,
4255
+ "step": 602
4256
+ },
4257
+ {
4258
+ "epoch": 0.8751814223512336,
4259
+ "grad_norm": 0.4272098243236542,
4260
+ "learning_rate": 7.812489581990844e-06,
4261
+ "loss": 1.4278,
4262
+ "step": 603
4263
+ },
4264
+ {
4265
+ "epoch": 0.8766328011611031,
4266
+ "grad_norm": 0.35721269249916077,
4267
+ "learning_rate": 7.634194663238015e-06,
4268
+ "loss": 0.4783,
4269
+ "step": 604
4270
+ },
4271
+ {
4272
+ "epoch": 0.8780841799709724,
4273
+ "grad_norm": 0.38133150339126587,
4274
+ "learning_rate": 7.457877035729588e-06,
4275
+ "loss": 0.6231,
4276
+ "step": 605
4277
+ },
4278
+ {
4279
+ "epoch": 0.8795355587808418,
4280
+ "grad_norm": 0.3652817904949188,
4281
+ "learning_rate": 7.283540473928385e-06,
4282
+ "loss": 0.7934,
4283
+ "step": 606
4284
+ },
4285
+ {
4286
+ "epoch": 0.8809869375907112,
4287
+ "grad_norm": 0.3659121096134186,
4288
+ "learning_rate": 7.11118870988825e-06,
4289
+ "loss": 0.9149,
4290
+ "step": 607
4291
+ },
4292
+ {
4293
+ "epoch": 0.8824383164005806,
4294
+ "grad_norm": 0.4516978859901428,
4295
+ "learning_rate": 6.940825433174103e-06,
4296
+ "loss": 1.2015,
4297
+ "step": 608
4298
+ },
4299
+ {
4300
+ "epoch": 0.8838896952104499,
4301
+ "grad_norm": 0.3403629660606384,
4302
+ "learning_rate": 6.772454290782926e-06,
4303
+ "loss": 0.552,
4304
+ "step": 609
4305
+ },
4306
+ {
4307
+ "epoch": 0.8853410740203193,
4308
+ "grad_norm": 0.24706141650676727,
4309
+ "learning_rate": 6.606078887065748e-06,
4310
+ "loss": 0.2273,
4311
+ "step": 610
4312
+ },
4313
+ {
4314
+ "epoch": 0.8867924528301887,
4315
+ "grad_norm": 0.3536119759082794,
4316
+ "learning_rate": 6.441702783650494e-06,
4317
+ "loss": 0.7869,
4318
+ "step": 611
4319
+ },
4320
+ {
4321
+ "epoch": 0.888243831640058,
4322
+ "grad_norm": 0.43990838527679443,
4323
+ "learning_rate": 6.2793294993656494e-06,
4324
+ "loss": 0.745,
4325
+ "step": 612
4326
+ },
4327
+ {
4328
+ "epoch": 0.8896952104499274,
4329
+ "grad_norm": 0.560270369052887,
4330
+ "learning_rate": 6.118962510165038e-06,
4331
+ "loss": 1.311,
4332
+ "step": 613
4333
+ },
4334
+ {
4335
+ "epoch": 0.8911465892597968,
4336
+ "grad_norm": 0.3678494691848755,
4337
+ "learning_rate": 5.960605249053386e-06,
4338
+ "loss": 1.182,
4339
+ "step": 614
4340
+ },
4341
+ {
4342
+ "epoch": 0.8925979680696662,
4343
+ "grad_norm": 0.36771297454833984,
4344
+ "learning_rate": 5.8042611060127315e-06,
4345
+ "loss": 0.7735,
4346
+ "step": 615
4347
+ },
4348
+ {
4349
+ "epoch": 0.8940493468795355,
4350
+ "grad_norm": 0.43662601709365845,
4351
+ "learning_rate": 5.649933427930043e-06,
4352
+ "loss": 0.9749,
4353
+ "step": 616
4354
+ },
4355
+ {
4356
+ "epoch": 0.895500725689405,
4357
+ "grad_norm": 0.4699442386627197,
4358
+ "learning_rate": 5.497625518525373e-06,
4359
+ "loss": 0.9527,
4360
+ "step": 617
4361
+ },
4362
+ {
4363
+ "epoch": 0.8969521044992743,
4364
+ "grad_norm": 0.3796102702617645,
4365
+ "learning_rate": 5.347340638281273e-06,
4366
+ "loss": 0.7223,
4367
+ "step": 618
4368
+ },
4369
+ {
4370
+ "epoch": 0.8984034833091437,
4371
+ "grad_norm": 0.3132692277431488,
4372
+ "learning_rate": 5.199082004372957e-06,
4373
+ "loss": 0.3637,
4374
+ "step": 619
4375
+ },
4376
+ {
4377
+ "epoch": 0.8998548621190131,
4378
+ "grad_norm": 0.46878132224082947,
4379
+ "learning_rate": 5.052852790599383e-06,
4380
+ "loss": 1.131,
4381
+ "step": 620
4382
+ },
4383
+ {
4384
+ "epoch": 0.9013062409288825,
4385
+ "grad_norm": 0.359684556722641,
4386
+ "learning_rate": 4.908656127315359e-06,
4387
+ "loss": 0.6985,
4388
+ "step": 621
4389
+ },
4390
+ {
4391
+ "epoch": 0.9027576197387518,
4392
+ "grad_norm": 0.32585737109184265,
4393
+ "learning_rate": 4.7664951013645875e-06,
4394
+ "loss": 0.6099,
4395
+ "step": 622
4396
+ },
4397
+ {
4398
+ "epoch": 0.9042089985486212,
4399
+ "grad_norm": 0.4055173397064209,
4400
+ "learning_rate": 4.626372756013409e-06,
4401
+ "loss": 0.6696,
4402
+ "step": 623
4403
+ },
4404
+ {
4405
+ "epoch": 0.9056603773584906,
4406
+ "grad_norm": 0.3793414235115051,
4407
+ "learning_rate": 4.48829209088586e-06,
4408
+ "loss": 1.2006,
4409
+ "step": 624
4410
+ },
4411
+ {
4412
+ "epoch": 0.9071117561683599,
4413
+ "grad_norm": 0.41631418466567993,
4414
+ "learning_rate": 4.352256061899329e-06,
4415
+ "loss": 0.6615,
4416
+ "step": 625
4417
+ },
4418
+ {
4419
+ "epoch": 0.9085631349782293,
4420
+ "grad_norm": 0.28770431876182556,
4421
+ "learning_rate": 4.2182675812012965e-06,
4422
+ "loss": 0.4437,
4423
+ "step": 626
4424
+ },
4425
+ {
4426
+ "epoch": 0.9100145137880987,
4427
+ "grad_norm": 0.4755733609199524,
4428
+ "learning_rate": 4.086329517107046e-06,
4429
+ "loss": 0.9437,
4430
+ "step": 627
4431
+ },
4432
+ {
4433
+ "epoch": 0.9114658925979681,
4434
+ "grad_norm": 0.4184425473213196,
4435
+ "learning_rate": 3.9564446940382084e-06,
4436
+ "loss": 1.8477,
4437
+ "step": 628
4438
+ },
4439
+ {
4440
+ "epoch": 0.9129172714078374,
4441
+ "grad_norm": 0.515277087688446,
4442
+ "learning_rate": 3.828615892462328e-06,
4443
+ "loss": 1.0525,
4444
+ "step": 629
4445
+ },
4446
+ {
4447
+ "epoch": 0.9143686502177069,
4448
+ "grad_norm": 0.35891175270080566,
4449
+ "learning_rate": 3.7028458488333138e-06,
4450
+ "loss": 1.4299,
4451
+ "step": 630
4452
+ },
4453
+ {
4454
+ "epoch": 0.9158200290275762,
4455
+ "grad_norm": 0.34797385334968567,
4456
+ "learning_rate": 3.5791372555328937e-06,
4457
+ "loss": 0.3932,
4458
+ "step": 631
4459
+ },
4460
+ {
4461
+ "epoch": 0.9172714078374455,
4462
+ "grad_norm": 0.4482119679450989,
4463
+ "learning_rate": 3.457492760812975e-06,
4464
+ "loss": 0.9157,
4465
+ "step": 632
4466
+ },
4467
+ {
4468
+ "epoch": 0.918722786647315,
4469
+ "grad_norm": 0.43506234884262085,
4470
+ "learning_rate": 3.3379149687388867e-06,
4471
+ "loss": 0.6012,
4472
+ "step": 633
4473
+ },
4474
+ {
4475
+ "epoch": 0.9201741654571843,
4476
+ "grad_norm": 0.539933443069458,
4477
+ "learning_rate": 3.2204064391337273e-06,
4478
+ "loss": 0.8629,
4479
+ "step": 634
4480
+ },
4481
+ {
4482
+ "epoch": 0.9216255442670537,
4483
+ "grad_norm": 0.32083696126937866,
4484
+ "learning_rate": 3.1049696875235512e-06,
4485
+ "loss": 0.4713,
4486
+ "step": 635
4487
+ },
4488
+ {
4489
+ "epoch": 0.9230769230769231,
4490
+ "grad_norm": 0.5421581864356995,
4491
+ "learning_rate": 2.9916071850834005e-06,
4492
+ "loss": 1.2597,
4493
+ "step": 636
4494
+ },
4495
+ {
4496
+ "epoch": 0.9245283018867925,
4497
+ "grad_norm": 0.2753216028213501,
4498
+ "learning_rate": 2.8803213585846035e-06,
4499
+ "loss": 0.4526,
4500
+ "step": 637
4501
+ },
4502
+ {
4503
+ "epoch": 0.9259796806966618,
4504
+ "grad_norm": 0.5353147983551025,
4505
+ "learning_rate": 2.7711145903426496e-06,
4506
+ "loss": 1.0714,
4507
+ "step": 638
4508
+ },
4509
+ {
4510
+ "epoch": 0.9274310595065312,
4511
+ "grad_norm": 0.26769256591796875,
4512
+ "learning_rate": 2.663989218166274e-06,
4513
+ "loss": 1.1174,
4514
+ "step": 639
4515
+ },
4516
+ {
4517
+ "epoch": 0.9288824383164006,
4518
+ "grad_norm": 0.2907368838787079,
4519
+ "learning_rate": 2.5589475353073988e-06,
4520
+ "loss": 1.2706,
4521
+ "step": 640
4522
+ },
4523
+ {
4524
+ "epoch": 0.93033381712627,
4525
+ "grad_norm": 0.49619778990745544,
4526
+ "learning_rate": 2.455991790412027e-06,
4527
+ "loss": 1.0534,
4528
+ "step": 641
4529
+ },
4530
+ {
4531
+ "epoch": 0.9317851959361393,
4532
+ "grad_norm": 0.32665136456489563,
4533
+ "learning_rate": 2.355124187472135e-06,
4534
+ "loss": 0.9813,
4535
+ "step": 642
4536
+ },
4537
+ {
4538
+ "epoch": 0.9332365747460087,
4539
+ "grad_norm": 0.24648268520832062,
4540
+ "learning_rate": 2.2563468857784818e-06,
4541
+ "loss": 0.3728,
4542
+ "step": 643
4543
+ },
4544
+ {
4545
+ "epoch": 0.9346879535558781,
4546
+ "grad_norm": 0.45106983184814453,
4547
+ "learning_rate": 2.159661999874307e-06,
4548
+ "loss": 0.6732,
4549
+ "step": 644
4550
+ },
4551
+ {
4552
+ "epoch": 0.9361393323657474,
4553
+ "grad_norm": 0.4321921467781067,
4554
+ "learning_rate": 2.0650715995102267e-06,
4555
+ "loss": 1.9884,
4556
+ "step": 645
4557
+ },
4558
+ {
4559
+ "epoch": 0.9375907111756169,
4560
+ "grad_norm": 0.33670032024383545,
4561
+ "learning_rate": 1.9725777095997657e-06,
4562
+ "loss": 0.6079,
4563
+ "step": 646
4564
+ },
4565
+ {
4566
+ "epoch": 0.9390420899854862,
4567
+ "grad_norm": 0.324014276266098,
4568
+ "learning_rate": 1.882182310176095e-06,
4569
+ "loss": 0.2433,
4570
+ "step": 647
4571
+ },
4572
+ {
4573
+ "epoch": 0.9404934687953556,
4574
+ "grad_norm": 0.41200318932533264,
4575
+ "learning_rate": 1.7938873363496422e-06,
4576
+ "loss": 1.2887,
4577
+ "step": 648
4578
+ },
4579
+ {
4580
+ "epoch": 0.941944847605225,
4581
+ "grad_norm": 0.4197370707988739,
4582
+ "learning_rate": 1.707694678266636e-06,
4583
+ "loss": 1.1368,
4584
+ "step": 649
4585
+ },
4586
+ {
4587
+ "epoch": 0.9433962264150944,
4588
+ "grad_norm": 0.5854864120483398,
4589
+ "learning_rate": 1.6236061810686487e-06,
4590
+ "loss": 0.5592,
4591
+ "step": 650
4592
+ },
4593
+ {
4594
+ "epoch": 0.9448476052249637,
4595
+ "grad_norm": 0.426692932844162,
4596
+ "learning_rate": 1.5416236448531074e-06,
4597
+ "loss": 1.1019,
4598
+ "step": 651
4599
+ },
4600
+ {
4601
+ "epoch": 0.9462989840348331,
4602
+ "grad_norm": 0.4369201362133026,
4603
+ "learning_rate": 1.461748824634801e-06,
4604
+ "loss": 1.0856,
4605
+ "step": 652
4606
+ },
4607
+ {
4608
+ "epoch": 0.9477503628447025,
4609
+ "grad_norm": 0.3605095148086548,
4610
+ "learning_rate": 1.383983430308189e-06,
4611
+ "loss": 1.3698,
4612
+ "step": 653
4613
+ },
4614
+ {
4615
+ "epoch": 0.9492017416545718,
4616
+ "grad_norm": 0.28937241435050964,
4617
+ "learning_rate": 1.30832912661093e-06,
4618
+ "loss": 0.4791,
4619
+ "step": 654
4620
+ },
4621
+ {
4622
+ "epoch": 0.9506531204644412,
4623
+ "grad_norm": 0.38452112674713135,
4624
+ "learning_rate": 1.2347875330881886e-06,
4625
+ "loss": 1.3005,
4626
+ "step": 655
4627
+ },
4628
+ {
4629
+ "epoch": 0.9521044992743106,
4630
+ "grad_norm": 0.44588330388069153,
4631
+ "learning_rate": 1.1633602240579522e-06,
4632
+ "loss": 1.1278,
4633
+ "step": 656
4634
+ },
4635
+ {
4636
+ "epoch": 0.95355587808418,
4637
+ "grad_norm": 0.34775638580322266,
4638
+ "learning_rate": 1.0940487285773459e-06,
4639
+ "loss": 0.6701,
4640
+ "step": 657
4641
+ },
4642
+ {
4643
+ "epoch": 0.9550072568940493,
4644
+ "grad_norm": 0.34269067645072937,
4645
+ "learning_rate": 1.026854530409882e-06,
4646
+ "loss": 1.3742,
4647
+ "step": 658
4648
+ },
4649
+ {
4650
+ "epoch": 0.9564586357039188,
4651
+ "grad_norm": 0.3833567500114441,
4652
+ "learning_rate": 9.61779067993729e-07,
4653
+ "loss": 1.387,
4654
+ "step": 659
4655
+ },
4656
+ {
4657
+ "epoch": 0.9579100145137881,
4658
+ "grad_norm": 0.37239694595336914,
4659
+ "learning_rate": 8.988237344109251e-07,
4660
+ "loss": 0.8695,
4661
+ "step": 660
4662
+ },
4663
+ {
4664
+ "epoch": 0.9593613933236574,
4665
+ "grad_norm": 0.30777066946029663,
4666
+ "learning_rate": 8.379898773574924e-07,
4667
+ "loss": 0.7924,
4668
+ "step": 661
4669
+ },
4670
+ {
4671
+ "epoch": 0.9608127721335269,
4672
+ "grad_norm": 0.38899528980255127,
4673
+ "learning_rate": 7.792787991146355e-07,
4674
+ "loss": 1.4396,
4675
+ "step": 662
4676
+ },
4677
+ {
4678
+ "epoch": 0.9622641509433962,
4679
+ "grad_norm": 0.4895128309726715,
4680
+ "learning_rate": 7.226917565208657e-07,
4681
+ "loss": 1.3093,
4682
+ "step": 663
4683
+ },
4684
+ {
4685
+ "epoch": 0.9637155297532656,
4686
+ "grad_norm": 0.5337472558021545,
4687
+ "learning_rate": 6.682299609450993e-07,
4688
+ "loss": 0.8694,
4689
+ "step": 664
4690
+ },
4691
+ {
4692
+ "epoch": 0.965166908563135,
4693
+ "grad_norm": 0.32183799147605896,
4694
+ "learning_rate": 6.158945782606673e-07,
4695
+ "loss": 0.5548,
4696
+ "step": 665
4697
+ },
4698
+ {
4699
+ "epoch": 0.9666182873730044,
4700
+ "grad_norm": 0.3950204849243164,
4701
+ "learning_rate": 5.65686728820436e-07,
4702
+ "loss": 0.4285,
4703
+ "step": 666
4704
+ },
4705
+ {
4706
+ "epoch": 0.9680696661828737,
4707
+ "grad_norm": 0.4570080041885376,
4708
+ "learning_rate": 5.176074874327919e-07,
4709
+ "loss": 1.5729,
4710
+ "step": 667
4711
+ },
4712
+ {
4713
+ "epoch": 0.969521044992743,
4714
+ "grad_norm": 0.42208755016326904,
4715
+ "learning_rate": 4.7165788333860536e-07,
4716
+ "loss": 0.8863,
4717
+ "step": 668
4718
+ },
4719
+ {
4720
+ "epoch": 0.9709724238026125,
4721
+ "grad_norm": 0.29929739236831665,
4722
+ "learning_rate": 4.278389001892369e-07,
4723
+ "loss": 0.7099,
4724
+ "step": 669
4725
+ },
4726
+ {
4727
+ "epoch": 0.9724238026124818,
4728
+ "grad_norm": 0.46631020307540894,
4729
+ "learning_rate": 3.8615147602546473e-07,
4730
+ "loss": 0.4506,
4731
+ "step": 670
4732
+ },
4733
+ {
4734
+ "epoch": 0.9738751814223512,
4735
+ "grad_norm": 0.4783049523830414,
4736
+ "learning_rate": 3.4659650325740147e-07,
4737
+ "loss": 1.1751,
4738
+ "step": 671
4739
+ },
4740
+ {
4741
+ "epoch": 0.9753265602322206,
4742
+ "grad_norm": 0.3503381907939911,
4743
+ "learning_rate": 3.091748286453866e-07,
4744
+ "loss": 0.9895,
4745
+ "step": 672
4746
+ },
4747
+ {
4748
+ "epoch": 0.97677793904209,
4749
+ "grad_norm": 0.28899291157722473,
4750
+ "learning_rate": 2.7388725328189036e-07,
4751
+ "loss": 0.5174,
4752
+ "step": 673
4753
+ },
4754
+ {
4755
+ "epoch": 0.9782293178519593,
4756
+ "grad_norm": 0.33355581760406494,
4757
+ "learning_rate": 2.407345325743049e-07,
4758
+ "loss": 0.4006,
4759
+ "step": 674
4760
+ },
4761
+ {
4762
+ "epoch": 0.9796806966618288,
4763
+ "grad_norm": 0.40867161750793457,
4764
+ "learning_rate": 2.0971737622883515e-07,
4765
+ "loss": 1.0604,
4766
+ "step": 675
4767
+ },
4768
+ {
4769
+ "epoch": 0.9811320754716981,
4770
+ "grad_norm": 0.40149986743927,
4771
+ "learning_rate": 1.8083644823526647e-07,
4772
+ "loss": 0.7342,
4773
+ "step": 676
4774
+ },
4775
+ {
4776
+ "epoch": 0.9825834542815675,
4777
+ "grad_norm": 0.4048071503639221,
4778
+ "learning_rate": 1.5409236685277605e-07,
4779
+ "loss": 0.966,
4780
+ "step": 677
4781
+ },
4782
+ {
4783
+ "epoch": 0.9840348330914369,
4784
+ "grad_norm": 0.4240354895591736,
4785
+ "learning_rate": 1.2948570459667686e-07,
4786
+ "loss": 1.29,
4787
+ "step": 678
4788
+ },
4789
+ {
4790
+ "epoch": 0.9854862119013063,
4791
+ "grad_norm": 0.429208904504776,
4792
+ "learning_rate": 1.0701698822614959e-07,
4793
+ "loss": 0.985,
4794
+ "step": 679
4795
+ },
4796
+ {
4797
+ "epoch": 0.9869375907111756,
4798
+ "grad_norm": 0.29551103711128235,
4799
+ "learning_rate": 8.668669873304058e-08,
4800
+ "loss": 0.6212,
4801
+ "step": 680
4802
+ },
4803
+ {
4804
+ "epoch": 0.988388969521045,
4805
+ "grad_norm": 0.4513908922672272,
4806
+ "learning_rate": 6.849527133144796e-08,
4807
+ "loss": 1.3823,
4808
+ "step": 681
4809
+ },
4810
+ {
4811
+ "epoch": 0.9898403483309144,
4812
+ "grad_norm": 0.38480904698371887,
4813
+ "learning_rate": 5.2443095448506674e-08,
4814
+ "loss": 0.8696,
4815
+ "step": 682
4816
+ },
4817
+ {
4818
+ "epoch": 0.9912917271407837,
4819
+ "grad_norm": 0.3505820035934448,
4820
+ "learning_rate": 3.8530514716017544e-08,
4821
+ "loss": 0.3473,
4822
+ "step": 683
4823
+ },
4824
+ {
4825
+ "epoch": 0.9927431059506531,
4826
+ "grad_norm": 0.37889888882637024,
4827
+ "learning_rate": 2.6757826963053066e-08,
4828
+ "loss": 0.3755,
4829
+ "step": 684
4830
+ },
4831
+ {
4832
+ "epoch": 0.9941944847605225,
4833
+ "grad_norm": 0.3586353063583374,
4834
+ "learning_rate": 1.712528420966253e-08,
4835
+ "loss": 1.2269,
4836
+ "step": 685
4837
+ },
4838
+ {
4839
+ "epoch": 0.9956458635703919,
4840
+ "grad_norm": 0.2834562361240387,
4841
+ "learning_rate": 9.633092661376352e-09,
4842
+ "loss": 0.1832,
4843
+ "step": 686
4844
+ },
4845
+ {
4846
+ "epoch": 0.9970972423802612,
4847
+ "grad_norm": 0.3964359760284424,
4848
+ "learning_rate": 4.281412704887356e-09,
4849
+ "loss": 1.9892,
4850
+ "step": 687
4851
+ },
4852
+ {
4853
+ "epoch": 0.9985486211901307,
4854
+ "grad_norm": 0.543126106262207,
4855
+ "learning_rate": 1.070358904564639e-09,
4856
+ "loss": 0.7731,
4857
+ "step": 688
4858
+ },
4859
+ {
4860
+ "epoch": 1.0,
4861
+ "grad_norm": 0.22489748895168304,
4862
+ "learning_rate": 0.0,
4863
+ "loss": 0.4556,
4864
+ "step": 689
4865
  }
4866
  ],
4867
  "logging_steps": 1,
 
4876
  "should_evaluate": false,
4877
  "should_log": false,
4878
  "should_save": true,
4879
+ "should_training_stop": true
4880
  },
4881
  "attributes": {}
4882
  }
4883
  },
4884
+ "total_flos": 4.357516200247296e+17,
4885
  "train_batch_size": 2,
4886
  "trial_name": null,
4887
  "trial_params": null