{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.031583079199251185,
  "eval_steps": 393,
  "global_step": 1569,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 2.0129432249363408e-05,
      "grad_norm": 1.2282008356123697e-05,
      "learning_rate": 2e-05,
      "loss": 46.0,
      "step": 1
    },
    {
      "epoch": 2.0129432249363408e-05,
      "eval_loss": 11.5,
      "eval_runtime": 130.2655,
      "eval_samples_per_second": 160.58,
      "eval_steps_per_second": 80.29,
      "step": 1
    },
    {
      "epoch": 4.0258864498726816e-05,
      "grad_norm": 2.1924333850620314e-05,
      "learning_rate": 4e-05,
      "loss": 46.0,
      "step": 2
    },
    {
      "epoch": 6.038829674809022e-05,
      "grad_norm": 1.5113248082343489e-05,
      "learning_rate": 6e-05,
      "loss": 46.0,
      "step": 3
    },
    {
      "epoch": 8.051772899745363e-05,
      "grad_norm": 1.604706449143123e-05,
      "learning_rate": 8e-05,
      "loss": 46.0,
      "step": 4
    },
    {
      "epoch": 0.00010064716124681703,
      "grad_norm": 2.237814805994276e-05,
      "learning_rate": 0.0001,
      "loss": 46.0,
      "step": 5
    },
    {
      "epoch": 0.00012077659349618043,
      "grad_norm": 2.265493640152272e-05,
      "learning_rate": 0.00012,
      "loss": 46.0,
      "step": 6
    },
    {
      "epoch": 0.00014090602574554385,
      "grad_norm": 1.7647287677391432e-05,
      "learning_rate": 0.00014,
      "loss": 46.0,
      "step": 7
    },
    {
      "epoch": 0.00016103545799490726,
      "grad_norm": 2.9863820600439794e-05,
      "learning_rate": 0.00016,
      "loss": 46.0,
      "step": 8
    },
    {
      "epoch": 0.00018116489024427065,
      "grad_norm": 1.1106193596788216e-05,
      "learning_rate": 0.00018,
      "loss": 46.0,
      "step": 9
    },
    {
      "epoch": 0.00020129432249363407,
      "grad_norm": 1.795052594388835e-05,
      "learning_rate": 0.0002,
      "loss": 46.0,
      "step": 10
    },
    {
      "epoch": 0.00022142375474299748,
      "grad_norm": 1.9582959794206545e-05,
      "learning_rate": 0.0001999997969619787,
      "loss": 46.0,
      "step": 11
    },
    {
      "epoch": 0.00024155318699236087,
      "grad_norm": 3.401090725674294e-05,
      "learning_rate": 0.00019999918784873927,
      "loss": 46.0,
      "step": 12
    },
    {
      "epoch": 0.0002616826192417243,
      "grad_norm": 1.5809200704097748e-05,
      "learning_rate": 0.00019999817266275517,
      "loss": 46.0,
      "step": 13
    },
    {
      "epoch": 0.0002818120514910877,
      "grad_norm": 1.1195018487342168e-05,
      "learning_rate": 0.00019999675140814887,
      "loss": 46.0,
      "step": 14
    },
    {
      "epoch": 0.0003019414837404511,
      "grad_norm": 4.638621976482682e-05,
      "learning_rate": 0.00019999492409069166,
      "loss": 46.0,
      "step": 15
    },
    {
      "epoch": 0.00032207091598981453,
      "grad_norm": 2.1349093003664166e-05,
      "learning_rate": 0.00019999269071780394,
      "loss": 46.0,
      "step": 16
    },
    {
      "epoch": 0.0003422003482391779,
      "grad_norm": 1.8559087038738653e-05,
      "learning_rate": 0.0001999900512985548,
      "loss": 46.0,
      "step": 17
    },
    {
      "epoch": 0.0003623297804885413,
      "grad_norm": 9.40681820793543e-06,
      "learning_rate": 0.00019998700584366238,
      "loss": 46.0,
      "step": 18
    },
    {
      "epoch": 0.00038245921273790474,
      "grad_norm": 1.6563231838517822e-05,
      "learning_rate": 0.0001999835543654935,
      "loss": 46.0,
      "step": 19
    },
    {
      "epoch": 0.00040258864498726813,
      "grad_norm": 1.7772108549252152e-05,
      "learning_rate": 0.0001999796968780638,
      "loss": 46.0,
      "step": 20
    },
    {
      "epoch": 0.0004227180772366315,
      "grad_norm": 3.5355504223844036e-05,
      "learning_rate": 0.00019997543339703757,
      "loss": 46.0,
      "step": 21
    },
    {
      "epoch": 0.00044284750948599496,
      "grad_norm": 2.7386544388718903e-05,
      "learning_rate": 0.00019997076393972783,
      "loss": 46.0,
      "step": 22
    },
    {
      "epoch": 0.00046297694173535835,
      "grad_norm": 2.447705810482148e-05,
      "learning_rate": 0.0001999656885250961,
      "loss": 46.0,
      "step": 23
    },
    {
      "epoch": 0.00048310637398472174,
      "grad_norm": 2.7531290470506065e-05,
      "learning_rate": 0.00019996020717375247,
      "loss": 46.0,
      "step": 24
    },
    {
      "epoch": 0.0005032358062340851,
      "grad_norm": 1.6084130038507283e-05,
      "learning_rate": 0.00019995431990795531,
      "loss": 46.0,
      "step": 25
    },
    {
      "epoch": 0.0005233652384834486,
      "grad_norm": 1.0257011126668658e-05,
      "learning_rate": 0.00019994802675161148,
      "loss": 46.0,
      "step": 26
    },
    {
      "epoch": 0.000543494670732812,
      "grad_norm": 1.4123150322120637e-05,
      "learning_rate": 0.00019994132773027597,
      "loss": 46.0,
      "step": 27
    },
    {
      "epoch": 0.0005636241029821754,
      "grad_norm": 2.4509085051249713e-05,
      "learning_rate": 0.00019993422287115185,
      "loss": 46.0,
      "step": 28
    },
    {
      "epoch": 0.0005837535352315388,
      "grad_norm": 1.926114782691002e-05,
      "learning_rate": 0.0001999267122030903,
      "loss": 46.0,
      "step": 29
    },
    {
      "epoch": 0.0006038829674809022,
      "grad_norm": 2.307241447851993e-05,
      "learning_rate": 0.0001999187957565903,
      "loss": 46.0,
      "step": 30
    },
    {
      "epoch": 0.0006240123997302656,
      "grad_norm": 1.905815770442132e-05,
      "learning_rate": 0.00019991047356379866,
      "loss": 46.0,
      "step": 31
    },
    {
      "epoch": 0.0006441418319796291,
      "grad_norm": 3.1321374990511686e-05,
      "learning_rate": 0.00019990174565850984,
      "loss": 46.0,
      "step": 32
    },
    {
      "epoch": 0.0006642712642289924,
      "grad_norm": 1.4927420124877244e-05,
      "learning_rate": 0.00019989261207616573,
      "loss": 46.0,
      "step": 33
    },
    {
      "epoch": 0.0006844006964783558,
      "grad_norm": 6.0364993260009214e-05,
      "learning_rate": 0.00019988307285385566,
      "loss": 46.0,
      "step": 34
    },
    {
      "epoch": 0.0007045301287277192,
      "grad_norm": 3.057021604035981e-05,
      "learning_rate": 0.00019987312803031607,
      "loss": 46.0,
      "step": 35
    },
    {
      "epoch": 0.0007246595609770826,
      "grad_norm": 2.576452425273601e-05,
      "learning_rate": 0.00019986277764593057,
      "loss": 46.0,
      "step": 36
    },
    {
      "epoch": 0.0007447889932264461,
      "grad_norm": 1.8830834960681386e-05,
      "learning_rate": 0.00019985202174272956,
      "loss": 46.0,
      "step": 37
    },
    {
      "epoch": 0.0007649184254758095,
      "grad_norm": 2.4923951059463434e-05,
      "learning_rate": 0.0001998408603643902,
      "loss": 46.0,
      "step": 38
    },
    {
      "epoch": 0.0007850478577251729,
      "grad_norm": 2.467411286488641e-05,
      "learning_rate": 0.00019982929355623615,
      "loss": 46.0,
      "step": 39
    },
    {
      "epoch": 0.0008051772899745363,
      "grad_norm": 4.072063529747538e-05,
      "learning_rate": 0.00019981732136523746,
      "loss": 46.0,
      "step": 40
    },
    {
      "epoch": 0.0008253067222238997,
      "grad_norm": 2.181486888730433e-05,
      "learning_rate": 0.0001998049438400103,
      "loss": 46.0,
      "step": 41
    },
    {
      "epoch": 0.000845436154473263,
      "grad_norm": 1.713978599582333e-05,
      "learning_rate": 0.0001997921610308169,
      "loss": 46.0,
      "step": 42
    },
    {
      "epoch": 0.0008655655867226265,
      "grad_norm": 1.962662281584926e-05,
      "learning_rate": 0.00019977897298956515,
      "loss": 46.0,
      "step": 43
    },
    {
      "epoch": 0.0008856950189719899,
      "grad_norm": 2.0111776393605396e-05,
      "learning_rate": 0.0001997653797698085,
      "loss": 46.0,
      "step": 44
    },
    {
      "epoch": 0.0009058244512213533,
      "grad_norm": 3.424846727284603e-05,
      "learning_rate": 0.0001997513814267458,
      "loss": 46.0,
      "step": 45
    },
    {
      "epoch": 0.0009259538834707167,
      "grad_norm": 2.3754591893521138e-05,
      "learning_rate": 0.00019973697801722095,
      "loss": 46.0,
      "step": 46
    },
    {
      "epoch": 0.0009460833157200801,
      "grad_norm": 3.0731138394912705e-05,
      "learning_rate": 0.00019972216959972274,
      "loss": 46.0,
      "step": 47
    },
    {
      "epoch": 0.0009662127479694435,
      "grad_norm": 2.0918800146318972e-05,
      "learning_rate": 0.0001997069562343846,
      "loss": 46.0,
      "step": 48
    },
    {
      "epoch": 0.000986342180218807,
      "grad_norm": 3.504705455270596e-05,
      "learning_rate": 0.0001996913379829844,
      "loss": 46.0,
      "step": 49
    },
    {
      "epoch": 0.0010064716124681702,
      "grad_norm": 2.918963582487777e-05,
      "learning_rate": 0.0001996753149089441,
      "loss": 46.0,
      "step": 50
    },
    {
      "epoch": 0.0010266010447175337,
      "grad_norm": 3.13876080326736e-05,
      "learning_rate": 0.00019965888707732953,
      "loss": 46.0,
      "step": 51
    },
    {
      "epoch": 0.0010467304769668972,
      "grad_norm": 1.2979964594705962e-05,
      "learning_rate": 0.0001996420545548502,
      "loss": 46.0,
      "step": 52
    },
    {
      "epoch": 0.0010668599092162605,
      "grad_norm": 2.1166308215470053e-05,
      "learning_rate": 0.00019962481740985895,
      "loss": 46.0,
      "step": 53
    },
    {
      "epoch": 0.001086989341465624,
      "grad_norm": 2.2731392164132558e-05,
      "learning_rate": 0.00019960717571235173,
      "loss": 46.0,
      "step": 54
    },
    {
      "epoch": 0.0011071187737149873,
      "grad_norm": 2.4999253582791425e-05,
      "learning_rate": 0.00019958912953396723,
      "loss": 46.0,
      "step": 55
    },
    {
      "epoch": 0.0011272482059643508,
      "grad_norm": 6.292035686783493e-05,
      "learning_rate": 0.00019957067894798663,
      "loss": 46.0,
      "step": 56
    },
    {
      "epoch": 0.0011473776382137143,
      "grad_norm": 1.5206553143798374e-05,
      "learning_rate": 0.00019955182402933334,
      "loss": 46.0,
      "step": 57
    },
    {
      "epoch": 0.0011675070704630776,
      "grad_norm": 3.7179495848249644e-05,
      "learning_rate": 0.0001995325648545727,
      "loss": 46.0,
      "step": 58
    },
    {
      "epoch": 0.001187636502712441,
      "grad_norm": 2.6544912543613464e-05,
      "learning_rate": 0.00019951290150191158,
      "loss": 46.0,
      "step": 59
    },
    {
      "epoch": 0.0012077659349618043,
      "grad_norm": 1.6469433830934577e-05,
      "learning_rate": 0.00019949283405119815,
      "loss": 46.0,
      "step": 60
    },
    {
      "epoch": 0.0012278953672111678,
      "grad_norm": 2.489179496478755e-05,
      "learning_rate": 0.00019947236258392154,
      "loss": 46.0,
      "step": 61
    },
    {
      "epoch": 0.0012480247994605311,
      "grad_norm": 1.7665654013399035e-05,
      "learning_rate": 0.00019945148718321143,
      "loss": 46.0,
      "step": 62
    },
    {
      "epoch": 0.0012681542317098946,
      "grad_norm": 2.1060941435280256e-05,
      "learning_rate": 0.00019943020793383785,
      "loss": 46.0,
      "step": 63
    },
    {
      "epoch": 0.0012882836639592581,
      "grad_norm": 1.269436688744463e-05,
      "learning_rate": 0.00019940852492221075,
      "loss": 46.0,
      "step": 64
    },
    {
      "epoch": 0.0013084130962086214,
      "grad_norm": 2.0220479200361297e-05,
      "learning_rate": 0.0001993864382363796,
      "loss": 46.0,
      "step": 65
    },
    {
      "epoch": 0.0013285425284579849,
      "grad_norm": 2.53070866165217e-05,
      "learning_rate": 0.00019936394796603318,
      "loss": 46.0,
      "step": 66
    },
    {
      "epoch": 0.0013486719607073482,
      "grad_norm": 3.738961459021084e-05,
      "learning_rate": 0.00019934105420249908,
      "loss": 46.0,
      "step": 67
    },
    {
      "epoch": 0.0013688013929567117,
      "grad_norm": 2.57118354056729e-05,
      "learning_rate": 0.0001993177570387434,
      "loss": 46.0,
      "step": 68
    },
    {
      "epoch": 0.0013889308252060752,
      "grad_norm": 1.631179293326568e-05,
      "learning_rate": 0.00019929405656937032,
      "loss": 46.0,
      "step": 69
    },
    {
      "epoch": 0.0014090602574554384,
      "grad_norm": 2.4105151169351302e-05,
      "learning_rate": 0.00019926995289062176,
      "loss": 46.0,
      "step": 70
    },
    {
      "epoch": 0.001429189689704802,
      "grad_norm": 6.657966878265142e-05,
      "learning_rate": 0.00019924544610037698,
      "loss": 46.0,
      "step": 71
    },
    {
      "epoch": 0.0014493191219541652,
      "grad_norm": 2.2929831175133586e-05,
      "learning_rate": 0.00019922053629815224,
      "loss": 46.0,
      "step": 72
    },
    {
      "epoch": 0.0014694485542035287,
      "grad_norm": 2.9039501896477304e-05,
      "learning_rate": 0.00019919522358510024,
      "loss": 46.0,
      "step": 73
    },
    {
      "epoch": 0.0014895779864528922,
      "grad_norm": 5.170597432879731e-05,
      "learning_rate": 0.00019916950806400983,
      "loss": 46.0,
      "step": 74
    },
    {
      "epoch": 0.0015097074187022555,
      "grad_norm": 2.5796563932090066e-05,
      "learning_rate": 0.00019914338983930557,
      "loss": 46.0,
      "step": 75
    },
    {
      "epoch": 0.001529836850951619,
      "grad_norm": 2.303555083926767e-05,
      "learning_rate": 0.0001991168690170474,
      "loss": 46.0,
      "step": 76
    },
    {
      "epoch": 0.0015499662832009823,
      "grad_norm": 3.512139664962888e-05,
      "learning_rate": 0.00019908994570492993,
      "loss": 46.0,
      "step": 77
    },
    {
      "epoch": 0.0015700957154503458,
      "grad_norm": 3.3836728107417e-05,
      "learning_rate": 0.00019906262001228228,
      "loss": 46.0,
      "step": 78
    },
    {
      "epoch": 0.001590225147699709,
      "grad_norm": 2.430832500976976e-05,
      "learning_rate": 0.00019903489205006764,
      "loss": 46.0,
      "step": 79
    },
    {
      "epoch": 0.0016103545799490725,
      "grad_norm": 3.29008289554622e-05,
      "learning_rate": 0.0001990067619308825,
      "loss": 46.0,
      "step": 80
    },
    {
      "epoch": 0.001630484012198436,
      "grad_norm": 2.6406103643239476e-05,
      "learning_rate": 0.00019897822976895665,
      "loss": 46.0,
      "step": 81
    },
    {
      "epoch": 0.0016506134444477993,
      "grad_norm": 1.8888900740421377e-05,
      "learning_rate": 0.00019894929568015226,
      "loss": 46.0,
      "step": 82
    },
    {
      "epoch": 0.0016707428766971628,
      "grad_norm": 3.3854525099741295e-05,
      "learning_rate": 0.0001989199597819638,
      "loss": 46.0,
      "step": 83
    },
    {
      "epoch": 0.001690872308946526,
      "grad_norm": 2.4574126655352302e-05,
      "learning_rate": 0.0001988902221935173,
      "loss": 46.0,
      "step": 84
    },
    {
      "epoch": 0.0017110017411958896,
      "grad_norm": 1.5161581359279808e-05,
      "learning_rate": 0.00019886008303557,
      "loss": 46.0,
      "step": 85
    },
    {
      "epoch": 0.001731131173445253,
      "grad_norm": 2.6385316232335754e-05,
      "learning_rate": 0.00019882954243050972,
      "loss": 46.0,
      "step": 86
    },
    {
      "epoch": 0.0017512606056946164,
      "grad_norm": 1.925287324411329e-05,
      "learning_rate": 0.00019879860050235469,
      "loss": 46.0,
      "step": 87
    },
    {
      "epoch": 0.0017713900379439798,
      "grad_norm": 1.4624111827288289e-05,
      "learning_rate": 0.00019876725737675254,
      "loss": 46.0,
      "step": 88
    },
    {
      "epoch": 0.0017915194701933431,
      "grad_norm": 2.154901812900789e-05,
      "learning_rate": 0.00019873551318098026,
      "loss": 46.0,
      "step": 89
    },
    {
      "epoch": 0.0018116489024427066,
      "grad_norm": 1.7668218788458034e-05,
      "learning_rate": 0.00019870336804394338,
      "loss": 46.0,
      "step": 90
    },
    {
      "epoch": 0.0018317783346920701,
      "grad_norm": 2.4439837943646125e-05,
      "learning_rate": 0.00019867082209617563,
      "loss": 46.0,
      "step": 91
    },
    {
      "epoch": 0.0018519077669414334,
      "grad_norm": 2.07311622943962e-05,
      "learning_rate": 0.00019863787546983832,
      "loss": 46.0,
      "step": 92
    },
    {
      "epoch": 0.001872037199190797,
      "grad_norm": 2.8900734832859598e-05,
      "learning_rate": 0.00019860452829871975,
      "loss": 46.0,
      "step": 93
    },
    {
      "epoch": 0.0018921666314401602,
      "grad_norm": 2.661554935912136e-05,
      "learning_rate": 0.00019857078071823484,
      "loss": 46.0,
      "step": 94
    },
    {
      "epoch": 0.0019122960636895237,
      "grad_norm": 3.1965726520866156e-05,
      "learning_rate": 0.00019853663286542442,
      "loss": 46.0,
      "step": 95
    },
    {
      "epoch": 0.001932425495938887,
      "grad_norm": 2.5141018340946175e-05,
      "learning_rate": 0.00019850208487895475,
      "loss": 46.0,
      "step": 96
    },
    {
      "epoch": 0.0019525549281882504,
      "grad_norm": 2.246517760795541e-05,
      "learning_rate": 0.0001984671368991169,
      "loss": 46.0,
      "step": 97
    },
    {
      "epoch": 0.001972684360437614,
      "grad_norm": 1.959139626706019e-05,
      "learning_rate": 0.00019843178906782624,
      "loss": 46.0,
      "step": 98
    },
    {
      "epoch": 0.0019928137926869772,
      "grad_norm": 1.0403034139017109e-05,
      "learning_rate": 0.0001983960415286219,
      "loss": 46.0,
      "step": 99
    },
    {
      "epoch": 0.0020129432249363405,
      "grad_norm": 2.894277531595435e-05,
      "learning_rate": 0.000198359894426666,
      "loss": 46.0,
      "step": 100
    },
    {
      "epoch": 0.002033072657185704,
      "grad_norm": 2.3059441446093842e-05,
      "learning_rate": 0.00019832334790874332,
      "loss": 46.0,
      "step": 101
    },
    {
      "epoch": 0.0020532020894350675,
      "grad_norm": 3.624501550802961e-05,
      "learning_rate": 0.00019828640212326046,
      "loss": 46.0,
      "step": 102
    },
    {
      "epoch": 0.0020733315216844308,
      "grad_norm": 2.7872463761013933e-05,
      "learning_rate": 0.00019824905722024542,
      "loss": 46.0,
      "step": 103
    },
    {
      "epoch": 0.0020934609539337945,
      "grad_norm": 4.196954978397116e-05,
      "learning_rate": 0.00019821131335134696,
      "loss": 46.0,
      "step": 104
    },
    {
      "epoch": 0.0021135903861831578,
      "grad_norm": 4.869971962762065e-05,
      "learning_rate": 0.00019817317066983382,
      "loss": 46.0,
      "step": 105
    },
    {
      "epoch": 0.002133719818432521,
      "grad_norm": 2.689683788048569e-05,
      "learning_rate": 0.00019813462933059435,
      "loss": 46.0,
      "step": 106
    },
    {
      "epoch": 0.0021538492506818843,
      "grad_norm": 2.0828028937103227e-05,
      "learning_rate": 0.0001980956894901356,
      "loss": 46.0,
      "step": 107
    },
    {
      "epoch": 0.002173978682931248,
      "grad_norm": 1.8326103599974886e-05,
      "learning_rate": 0.00019805635130658306,
      "loss": 46.0,
      "step": 108
    },
    {
      "epoch": 0.0021941081151806113,
      "grad_norm": 2.5773550078156404e-05,
      "learning_rate": 0.00019801661493967955,
      "loss": 46.0,
      "step": 109
    },
    {
      "epoch": 0.0022142375474299746,
      "grad_norm": 1.7421609300072305e-05,
      "learning_rate": 0.00019797648055078503,
      "loss": 46.0,
      "step": 110
    },
    {
      "epoch": 0.0022343669796793383,
      "grad_norm": 2.473703534633387e-05,
      "learning_rate": 0.0001979359483028756,
      "loss": 46.0,
      "step": 111
    },
    {
      "epoch": 0.0022544964119287016,
      "grad_norm": 4.040896601509303e-05,
      "learning_rate": 0.00019789501836054297,
      "loss": 46.0,
      "step": 112
    },
    {
      "epoch": 0.002274625844178065,
      "grad_norm": 3.786892557400279e-05,
      "learning_rate": 0.00019785369088999387,
      "loss": 46.0,
      "step": 113
    },
    {
      "epoch": 0.0022947552764274286,
      "grad_norm": 2.3734346541459672e-05,
      "learning_rate": 0.0001978119660590493,
      "loss": 46.0,
      "step": 114
    },
    {
      "epoch": 0.002314884708676792,
      "grad_norm": 4.281644578441046e-05,
      "learning_rate": 0.0001977698440371437,
      "loss": 46.0,
      "step": 115
    },
    {
      "epoch": 0.002335014140926155,
      "grad_norm": 6.363167631207034e-05,
      "learning_rate": 0.0001977273249953246,
      "loss": 46.0,
      "step": 116
    },
    {
      "epoch": 0.0023551435731755184,
      "grad_norm": 2.2506428649649024e-05,
      "learning_rate": 0.00019768440910625162,
      "loss": 46.0,
      "step": 117
    },
    {
      "epoch": 0.002375273005424882,
      "grad_norm": 2.2376898414222524e-05,
      "learning_rate": 0.00019764109654419584,
      "loss": 46.0,
      "step": 118
    },
    {
      "epoch": 0.0023954024376742454,
      "grad_norm": 4.303127570892684e-05,
      "learning_rate": 0.0001975973874850393,
      "loss": 46.0,
      "step": 119
    },
    {
      "epoch": 0.0024155318699236087,
      "grad_norm": 2.1347035726648755e-05,
      "learning_rate": 0.00019755328210627394,
      "loss": 46.0,
      "step": 120
    },
    {
      "epoch": 0.0024356613021729724,
      "grad_norm": 3.3754025935195386e-05,
      "learning_rate": 0.00019750878058700117,
      "loss": 46.0,
      "step": 121
    },
    {
      "epoch": 0.0024557907344223357,
      "grad_norm": 1.353104380541481e-05,
      "learning_rate": 0.000197463883107931,
      "loss": 46.0,
      "step": 122
    },
    {
      "epoch": 0.002475920166671699,
      "grad_norm": 4.797324072569609e-05,
      "learning_rate": 0.00019741858985138132,
      "loss": 46.0,
      "step": 123
    },
    {
      "epoch": 0.0024960495989210622,
      "grad_norm": 2.413903348497115e-05,
      "learning_rate": 0.00019737290100127722,
      "loss": 46.0,
      "step": 124
    },
    {
      "epoch": 0.002516179031170426,
      "grad_norm": 1.5329667803598568e-05,
      "learning_rate": 0.00019732681674315014,
      "loss": 46.0,
      "step": 125
    },
    {
      "epoch": 0.0025363084634197892,
      "grad_norm": 1.7863807443063706e-05,
      "learning_rate": 0.00019728033726413723,
      "loss": 46.0,
      "step": 126
    },
    {
      "epoch": 0.0025564378956691525,
      "grad_norm": 3.0265011446317658e-05,
      "learning_rate": 0.00019723346275298052,
      "loss": 46.0,
      "step": 127
    },
    {
      "epoch": 0.0025765673279185162,
      "grad_norm": 2.1787187506561168e-05,
      "learning_rate": 0.00019718619340002618,
      "loss": 46.0,
      "step": 128
    },
    {
      "epoch": 0.0025966967601678795,
      "grad_norm": 2.615424818941392e-05,
      "learning_rate": 0.0001971385293972237,
      "loss": 46.0,
      "step": 129
    },
    {
      "epoch": 0.0026168261924172428,
      "grad_norm": 2.6950148821924813e-05,
      "learning_rate": 0.0001970904709381252,
      "loss": 46.0,
      "step": 130
    },
    {
      "epoch": 0.0026369556246666065,
      "grad_norm": 2.9200287826824933e-05,
      "learning_rate": 0.00019704201821788456,
      "loss": 46.0,
      "step": 131
    },
    {
      "epoch": 0.0026570850569159698,
      "grad_norm": 4.081759470864199e-05,
      "learning_rate": 0.00019699317143325666,
      "loss": 46.0,
      "step": 132
    },
    {
      "epoch": 0.002677214489165333,
      "grad_norm": 3.662336166598834e-05,
      "learning_rate": 0.0001969439307825966,
      "loss": 46.0,
      "step": 133
    },
    {
      "epoch": 0.0026973439214146963,
      "grad_norm": 3.1114377634366974e-05,
      "learning_rate": 0.0001968942964658589,
      "loss": 46.0,
      "step": 134
    },
    {
      "epoch": 0.00271747335366406,
      "grad_norm": 2.8435193598852493e-05,
      "learning_rate": 0.00019684426868459655,
      "loss": 46.0,
      "step": 135
    },
    {
      "epoch": 0.0027376027859134233,
      "grad_norm": 2.6921919925371185e-05,
      "learning_rate": 0.00019679384764196046,
      "loss": 46.0,
      "step": 136
    },
    {
      "epoch": 0.0027577322181627866,
      "grad_norm": 2.3536973458249122e-05,
      "learning_rate": 0.00019674303354269833,
      "loss": 46.0,
      "step": 137
    },
    {
      "epoch": 0.0027778616504121503,
      "grad_norm": 1.310581592406379e-05,
      "learning_rate": 0.00019669182659315412,
      "loss": 46.0,
      "step": 138
    },
    {
      "epoch": 0.0027979910826615136,
      "grad_norm": 3.121258123428561e-05,
      "learning_rate": 0.00019664022700126695,
      "loss": 46.0,
      "step": 139
    },
    {
      "epoch": 0.002818120514910877,
      "grad_norm": 3.1170515285339206e-05,
      "learning_rate": 0.00019658823497657038,
      "loss": 46.0,
      "step": 140
    },
    {
      "epoch": 0.00283824994716024,
      "grad_norm": 2.6195963073405437e-05,
      "learning_rate": 0.0001965358507301916,
      "loss": 46.0,
      "step": 141
    },
    {
      "epoch": 0.002858379379409604,
      "grad_norm": 2.5877065127133392e-05,
      "learning_rate": 0.00019648307447485048,
      "loss": 46.0,
      "step": 142
    },
    {
      "epoch": 0.002878508811658967,
      "grad_norm": 2.742213837336749e-05,
      "learning_rate": 0.00019642990642485875,
      "loss": 46.0,
      "step": 143
    },
    {
      "epoch": 0.0028986382439083304,
      "grad_norm": 3.452887904131785e-05,
      "learning_rate": 0.0001963763467961191,
      "loss": 46.0,
      "step": 144
    },
    {
      "epoch": 0.002918767676157694,
      "grad_norm": 3.4890996175818145e-05,
      "learning_rate": 0.00019632239580612436,
      "loss": 46.0,
      "step": 145
    },
    {
      "epoch": 0.0029388971084070574,
      "grad_norm": 2.4831771952449344e-05,
      "learning_rate": 0.0001962680536739566,
      "loss": 46.0,
      "step": 146
    },
    {
      "epoch": 0.0029590265406564207,
      "grad_norm": 2.926559500338044e-05,
      "learning_rate": 0.00019621332062028617,
      "loss": 46.0,
      "step": 147
    },
    {
      "epoch": 0.0029791559729057844,
      "grad_norm": 3.801756975008175e-05,
      "learning_rate": 0.00019615819686737092,
      "loss": 46.0,
      "step": 148
    },
    {
      "epoch": 0.0029992854051551477,
      "grad_norm": 4.2626343201845884e-05,
      "learning_rate": 0.00019610268263905515,
      "loss": 46.0,
      "step": 149
    },
    {
      "epoch": 0.003019414837404511,
      "grad_norm": 5.3078922064742073e-05,
      "learning_rate": 0.00019604677816076888,
      "loss": 46.0,
      "step": 150
    },
    {
      "epoch": 0.0030395442696538742,
      "grad_norm": 3.865420876536518e-05,
      "learning_rate": 0.00019599048365952682,
      "loss": 46.0,
      "step": 151
    },
    {
      "epoch": 0.003059673701903238,
      "grad_norm": 2.114846029144246e-05,
      "learning_rate": 0.00019593379936392742,
      "loss": 46.0,
      "step": 152
    },
    {
      "epoch": 0.0030798031341526012,
      "grad_norm": 2.966085958178155e-05,
      "learning_rate": 0.00019587672550415203,
      "loss": 46.0,
      "step": 153
    },
    {
      "epoch": 0.0030999325664019645,
      "grad_norm": 2.12357390410034e-05,
      "learning_rate": 0.00019581926231196391,
      "loss": 46.0,
      "step": 154
    },
    {
      "epoch": 0.0031200619986513282,
      "grad_norm": 2.5047153030755e-05,
      "learning_rate": 0.00019576141002070738,
      "loss": 46.0,
      "step": 155
    },
    {
      "epoch": 0.0031401914309006915,
      "grad_norm": 2.9844281016266905e-05,
      "learning_rate": 0.00019570316886530665,
      "loss": 46.0,
      "step": 156
    },
    {
      "epoch": 0.003160320863150055,
      "grad_norm": 2.7546439014258794e-05,
      "learning_rate": 0.00019564453908226515,
      "loss": 46.0,
      "step": 157
    },
    {
      "epoch": 0.003180450295399418,
      "grad_norm": 5.0760227168211713e-05,
      "learning_rate": 0.00019558552090966435,
      "loss": 46.0,
      "step": 158
    },
    {
      "epoch": 0.0032005797276487818,
      "grad_norm": 4.231133061693981e-05,
      "learning_rate": 0.00019552611458716296,
      "loss": 46.0,
      "step": 159
    },
    {
      "epoch": 0.003220709159898145,
      "grad_norm": 5.593464447883889e-05,
      "learning_rate": 0.0001954663203559958,
      "loss": 46.0,
      "step": 160
    },
    {
      "epoch": 0.0032408385921475083,
      "grad_norm": 3.7881276512052864e-05,
      "learning_rate": 0.00019540613845897288,
      "loss": 46.0,
      "step": 161
    },
    {
      "epoch": 0.003260968024396872,
      "grad_norm": 3.1589570426149294e-05,
      "learning_rate": 0.00019534556914047851,
      "loss": 46.0,
      "step": 162
    },
    {
      "epoch": 0.0032810974566462353,
      "grad_norm": 7.080697105266154e-05,
      "learning_rate": 0.00019528461264647014,
      "loss": 46.0,
      "step": 163
    },
    {
      "epoch": 0.0033012268888955986,
      "grad_norm": 6.128560198703781e-05,
      "learning_rate": 0.00019522326922447755,
      "loss": 46.0,
      "step": 164
    },
    {
      "epoch": 0.0033213563211449623,
      "grad_norm": 6.614374433411285e-05,
      "learning_rate": 0.00019516153912360165,
      "loss": 46.0,
      "step": 165
    },
    {
      "epoch": 0.0033414857533943256,
      "grad_norm": 4.147323124925606e-05,
      "learning_rate": 0.00019509942259451357,
      "loss": 46.0,
      "step": 166
    },
    {
      "epoch": 0.003361615185643689,
      "grad_norm": 3.474095865385607e-05,
      "learning_rate": 0.00019503691988945367,
      "loss": 46.0,
      "step": 167
    },
    {
      "epoch": 0.003381744617893052,
      "grad_norm": 2.644029700604733e-05,
      "learning_rate": 0.00019497403126223048,
      "loss": 46.0,
      "step": 168
    },
    {
      "epoch": 0.003401874050142416,
      "grad_norm": 2.13767089007888e-05,
      "learning_rate": 0.00019491075696821962,
      "loss": 46.0,
      "step": 169
    },
    {
      "epoch": 0.003422003482391779,
      "grad_norm": 2.432280598441139e-05,
      "learning_rate": 0.00019484709726436282,
      "loss": 46.0,
      "step": 170
    },
    {
      "epoch": 0.0034421329146411424,
      "grad_norm": 2.13755301956553e-05,
      "learning_rate": 0.00019478305240916698,
      "loss": 46.0,
      "step": 171
    },
    {
      "epoch": 0.003462262346890506,
      "grad_norm": 2.9935839847894385e-05,
      "learning_rate": 0.0001947186226627028,
      "loss": 46.0,
      "step": 172
    },
    {
      "epoch": 0.0034823917791398694,
      "grad_norm": 6.0657053836621344e-05,
      "learning_rate": 0.0001946538082866041,
      "loss": 46.0,
      "step": 173
    },
    {
      "epoch": 0.0035025212113892327,
      "grad_norm": 3.512473631417379e-05,
      "learning_rate": 0.00019458860954406655,
      "loss": 46.0,
      "step": 174
    },
    {
      "epoch": 0.003522650643638596,
      "grad_norm": 3.1507472158409655e-05,
      "learning_rate": 0.00019452302669984662,
      "loss": 46.0,
      "step": 175
    },
    {
      "epoch": 0.0035427800758879597,
      "grad_norm": 9.249807771993801e-05,
      "learning_rate": 0.00019445706002026048,
      "loss": 46.0,
      "step": 176
    },
    {
      "epoch": 0.003562909508137323,
      "grad_norm": 2.893183773267083e-05,
      "learning_rate": 0.000194390709773183,
      "loss": 46.0,
      "step": 177
    },
    {
      "epoch": 0.0035830389403866863,
      "grad_norm": 6.318593659671023e-05,
      "learning_rate": 0.00019432397622804674,
      "loss": 46.0,
      "step": 178
    },
    {
      "epoch": 0.00360316837263605,
      "grad_norm": 5.4799009376438335e-05,
      "learning_rate": 0.00019425685965584056,
      "loss": 46.0,
      "step": 179
    },
    {
      "epoch": 0.0036232978048854132,
      "grad_norm": 5.187106944504194e-05,
      "learning_rate": 0.0001941893603291088,
      "loss": 46.0,
      "step": 180
    },
    {
      "epoch": 0.0036434272371347765,
      "grad_norm": 3.32783383782953e-05,
      "learning_rate": 0.00019412147852195007,
      "loss": 46.0,
      "step": 181
    },
    {
      "epoch": 0.0036635566693841402,
      "grad_norm": 2.430532913422212e-05,
      "learning_rate": 0.00019405321451001605,
      "loss": 46.0,
      "step": 182
    },
    {
      "epoch": 0.0036836861016335035,
      "grad_norm": 5.952031642664224e-05,
      "learning_rate": 0.00019398456857051065,
      "loss": 46.0,
      "step": 183
    },
    {
      "epoch": 0.003703815533882867,
      "grad_norm": 3.728271258296445e-05,
      "learning_rate": 0.00019391554098218853,
      "loss": 46.0,
      "step": 184
    },
    {
      "epoch": 0.00372394496613223,
      "grad_norm": 4.849689139518887e-05,
      "learning_rate": 0.00019384613202535418,
      "loss": 46.0,
      "step": 185
    },
    {
      "epoch": 0.003744074398381594,
      "grad_norm": 6.212339212652296e-05,
      "learning_rate": 0.00019377634198186077,
      "loss": 46.0,
      "step": 186
    },
    {
      "epoch": 0.003764203830630957,
      "grad_norm": 0.00010154359188163653,
      "learning_rate": 0.0001937061711351089,
      "loss": 46.0,
      "step": 187
    },
    {
      "epoch": 0.0037843332628803203,
      "grad_norm": 4.9923335609491915e-05,
      "learning_rate": 0.00019363561977004564,
      "loss": 46.0,
      "step": 188
    },
    {
      "epoch": 0.003804462695129684,
      "grad_norm": 0.00011626673949649557,
      "learning_rate": 0.00019356468817316311,
      "loss": 46.0,
      "step": 189
    },
    {
      "epoch": 0.0038245921273790473,
      "grad_norm": 9.168522228719667e-05,
      "learning_rate": 0.0001934933766324976,
      "loss": 46.0,
      "step": 190
    },
    {
      "epoch": 0.0038447215596284106,
      "grad_norm": 3.176367317792028e-05,
      "learning_rate": 0.00019342168543762814,
      "loss": 46.0,
      "step": 191
    },
    {
      "epoch": 0.003864850991877774,
      "grad_norm": 5.836347190779634e-05,
      "learning_rate": 0.0001933496148796755,
      "loss": 46.0,
      "step": 192
    },
    {
      "epoch": 0.0038849804241271376,
      "grad_norm": 2.9238137358333915e-05,
      "learning_rate": 0.00019327716525130098,
      "loss": 46.0,
      "step": 193
    },
    {
      "epoch": 0.003905109856376501,
      "grad_norm": 3.159691550536081e-05,
      "learning_rate": 0.00019320433684670514,
      "loss": 46.0,
      "step": 194
    },
    {
      "epoch": 0.003925239288625864,
      "grad_norm": 3.837713666143827e-05,
      "learning_rate": 0.00019313112996162667,
      "loss": 46.0,
      "step": 195
    },
    {
      "epoch": 0.003945368720875228,
      "grad_norm": 3.1190254958346486e-05,
      "learning_rate": 0.00019305754489334125,
      "loss": 46.0,
      "step": 196
    },
    {
      "epoch": 0.003965498153124591,
      "grad_norm": 9.271525777876377e-05,
      "learning_rate": 0.00019298358194066016,
      "loss": 46.0,
      "step": 197
    },
    {
      "epoch": 0.0039856275853739544,
      "grad_norm": 3.302880941191688e-05,
      "learning_rate": 0.00019290924140392921,
      "loss": 46.0,
      "step": 198
    },
    {
      "epoch": 0.004005757017623318,
      "grad_norm": 3.4837332350434735e-05,
      "learning_rate": 0.0001928345235850276,
      "loss": 46.0,
      "step": 199
    },
    {
      "epoch": 0.004025886449872681,
      "grad_norm": 2.956252865260467e-05,
      "learning_rate": 0.0001927594287873664,
      "loss": 46.0,
      "step": 200
    },
    {
      "epoch": 0.004046015882122045,
      "grad_norm": 4.961419472238049e-05,
      "learning_rate": 0.00019268395731588764,
      "loss": 46.0,
      "step": 201
    },
    {
      "epoch": 0.004066145314371408,
      "grad_norm": 7.009352702880278e-05,
      "learning_rate": 0.00019260810947706287,
      "loss": 46.0,
      "step": 202
    },
    {
      "epoch": 0.004086274746620771,
      "grad_norm": 4.309253199608065e-05,
      "learning_rate": 0.000192531885578892,
      "loss": 46.0,
      "step": 203
    },
    {
      "epoch": 0.004106404178870135,
      "grad_norm": 0.00012773476191796362,
      "learning_rate": 0.00019245528593090204,
      "loss": 46.0,
      "step": 204
    },
    {
      "epoch": 0.004126533611119499,
      "grad_norm": 2.8066795493941754e-05,
      "learning_rate": 0.00019237831084414577,
      "loss": 46.0,
      "step": 205
    },
    {
      "epoch": 0.0041466630433688615,
      "grad_norm": 5.905181751586497e-05,
      "learning_rate": 0.0001923009606312006,
      "loss": 46.0,
      "step": 206
    },
    {
      "epoch": 0.004166792475618225,
      "grad_norm": 4.193755376036279e-05,
      "learning_rate": 0.0001922232356061672,
      "loss": 46.0,
      "step": 207
    },
    {
      "epoch": 0.004186921907867589,
      "grad_norm": 4.6146677050273865e-05,
      "learning_rate": 0.00019214513608466826,
      "loss": 46.0,
      "step": 208
    },
    {
      "epoch": 0.004207051340116952,
      "grad_norm": 2.824837065418251e-05,
      "learning_rate": 0.00019206666238384728,
      "loss": 46.0,
      "step": 209
    },
    {
      "epoch": 0.0042271807723663155,
      "grad_norm": 3.296473005320877e-05,
      "learning_rate": 0.0001919878148223671,
      "loss": 46.0,
      "step": 210
    },
    {
      "epoch": 0.004247310204615679,
      "grad_norm": 3.6901357816532254e-05,
      "learning_rate": 0.00019190859372040882,
      "loss": 46.0,
      "step": 211
    },
    {
      "epoch": 0.004267439636865042,
      "grad_norm": 4.644416912924498e-05,
      "learning_rate": 0.00019182899939967034,
      "loss": 46.0,
      "step": 212
    },
    {
      "epoch": 0.004287569069114406,
      "grad_norm": 5.2605690143536776e-05,
      "learning_rate": 0.00019174903218336511,
      "loss": 46.0,
      "step": 213
    },
    {
      "epoch": 0.004307698501363769,
      "grad_norm": 2.7000423870049417e-05,
      "learning_rate": 0.00019166869239622085,
      "loss": 46.0,
      "step": 214
    },
    {
      "epoch": 0.004327827933613132,
      "grad_norm": 4.584537600749172e-05,
      "learning_rate": 0.00019158798036447822,
      "loss": 46.0,
      "step": 215
    },
    {
      "epoch": 0.004347957365862496,
      "grad_norm": 2.657137156347744e-05,
      "learning_rate": 0.0001915068964158894,
      "loss": 46.0,
      "step": 216
    },
    {
      "epoch": 0.004368086798111859,
      "grad_norm": 4.8142963350983337e-05,
      "learning_rate": 0.00019142544087971693,
      "loss": 46.0,
      "step": 217
    },
    {
      "epoch": 0.004388216230361223,
      "grad_norm": 3.945273419958539e-05,
      "learning_rate": 0.00019134361408673216,
      "loss": 46.0,
      "step": 218
    },
    {
      "epoch": 0.004408345662610586,
      "grad_norm": 6.008298441884108e-05,
      "learning_rate": 0.00019126141636921414,
      "loss": 46.0,
      "step": 219
    },
    {
      "epoch": 0.004428475094859949,
      "grad_norm": 5.5544471251778305e-05,
      "learning_rate": 0.0001911788480609481,
      "loss": 46.0,
      "step": 220
    },
    {
      "epoch": 0.004448604527109313,
      "grad_norm": 5.3515970648732036e-05,
      "learning_rate": 0.00019109590949722413,
      "loss": 46.0,
      "step": 221
    },
    {
      "epoch": 0.004468733959358677,
      "grad_norm": 4.5226741349324584e-05,
      "learning_rate": 0.00019101260101483592,
      "loss": 46.0,
      "step": 222
    },
    {
      "epoch": 0.0044888633916080395,
      "grad_norm": 5.253091512713581e-05,
      "learning_rate": 0.0001909289229520792,
      "loss": 46.0,
      "step": 223
    },
    {
      "epoch": 0.004508992823857403,
      "grad_norm": 7.821829058229923e-05,
      "learning_rate": 0.0001908448756487506,
      "loss": 46.0,
      "step": 224
    },
    {
      "epoch": 0.004529122256106767,
      "grad_norm": 2.5970837668864988e-05,
      "learning_rate": 0.00019076045944614603,
      "loss": 46.0,
      "step": 225
    },
    {
      "epoch": 0.00454925168835613,
      "grad_norm": 8.79172730492428e-05,
      "learning_rate": 0.0001906756746870595,
      "loss": 46.0,
      "step": 226
    },
    {
      "epoch": 0.0045693811206054934,
      "grad_norm": 5.294303991831839e-05,
      "learning_rate": 0.00019059052171578155,
      "loss": 46.0,
      "step": 227
    },
    {
      "epoch": 0.004589510552854857,
      "grad_norm": 3.699533408507705e-05,
      "learning_rate": 0.00019050500087809807,
      "loss": 46.0,
      "step": 228
    },
    {
      "epoch": 0.00460963998510422,
      "grad_norm": 5.711496851290576e-05,
      "learning_rate": 0.00019041911252128864,
      "loss": 46.0,
      "step": 229
    },
    {
      "epoch": 0.004629769417353584,
      "grad_norm": 5.045397483627312e-05,
      "learning_rate": 0.00019033285699412533,
      "loss": 46.0,
      "step": 230
    },
    {
      "epoch": 0.0046498988496029466,
      "grad_norm": 7.707828626735136e-05,
      "learning_rate": 0.00019024623464687114,
      "loss": 46.0,
      "step": 231
    },
    {
      "epoch": 0.00467002828185231,
      "grad_norm": 4.182373595540412e-05,
      "learning_rate": 0.00019015924583127872,
      "loss": 46.0,
      "step": 232
    },
    {
      "epoch": 0.004690157714101674,
      "grad_norm": 4.5557317207567394e-05,
      "learning_rate": 0.00019007189090058878,
      "loss": 46.0,
      "step": 233
    },
    {
      "epoch": 0.004710287146351037,
      "grad_norm": 4.820396861759946e-05,
      "learning_rate": 0.0001899841702095287,
      "loss": 46.0,
      "step": 234
    },
    {
      "epoch": 0.0047304165786004005,
      "grad_norm": 5.630170926451683e-05,
      "learning_rate": 0.00018989608411431135,
      "loss": 46.0,
      "step": 235
    },
    {
      "epoch": 0.004750546010849764,
      "grad_norm": 3.503153857309371e-05,
      "learning_rate": 0.0001898076329726331,
      "loss": 46.0,
      "step": 236
    },
    {
      "epoch": 0.004770675443099127,
      "grad_norm": 8.119984704535455e-05,
      "learning_rate": 0.00018971881714367295,
      "loss": 46.0,
      "step": 237
    },
    {
      "epoch": 0.004790804875348491,
      "grad_norm": 0.0001170546529465355,
      "learning_rate": 0.00018962963698809063,
      "loss": 46.0,
      "step": 238
    },
    {
      "epoch": 0.0048109343075978545,
      "grad_norm": 3.3851210901048034e-05,
      "learning_rate": 0.00018954009286802545,
      "loss": 46.0,
      "step": 239
    },
    {
      "epoch": 0.004831063739847217,
      "grad_norm": 4.396312215249054e-05,
      "learning_rate": 0.0001894501851470946,
      "loss": 46.0,
      "step": 240
    },
    {
      "epoch": 0.004851193172096581,
      "grad_norm": 6.271849269978702e-05,
      "learning_rate": 0.00018935991419039176,
      "loss": 46.0,
      "step": 241
    },
    {
      "epoch": 0.004871322604345945,
      "grad_norm": 4.544670082395896e-05,
      "learning_rate": 0.00018926928036448572,
      "loss": 46.0,
      "step": 242
    },
    {
      "epoch": 0.004891452036595308,
      "grad_norm": 3.364813528605737e-05,
      "learning_rate": 0.0001891782840374187,
      "loss": 46.0,
      "step": 243
    },
    {
      "epoch": 0.004911581468844671,
      "grad_norm": 3.067620491492562e-05,
      "learning_rate": 0.000189086925578705,
      "loss": 46.0,
      "step": 244
    },
    {
      "epoch": 0.004931710901094035,
      "grad_norm": 3.76120260625612e-05,
      "learning_rate": 0.00018899520535932938,
      "loss": 46.0,
      "step": 245
    },
    {
      "epoch": 0.004951840333343398,
      "grad_norm": 5.46066730748862e-05,
      "learning_rate": 0.00018890312375174578,
      "loss": 46.0,
      "step": 246
    },
    {
      "epoch": 0.004971969765592762,
      "grad_norm": 4.003910726169124e-05,
      "learning_rate": 0.0001888106811298755,
      "loss": 46.0,
      "step": 247
    },
    {
      "epoch": 0.0049920991978421245,
      "grad_norm": 6.587472307728603e-05,
      "learning_rate": 0.00018871787786910583,
      "loss": 46.0,
      "step": 248
    },
    {
      "epoch": 0.005012228630091488,
      "grad_norm": 3.237745113437995e-05,
      "learning_rate": 0.0001886247143462886,
      "loss": 46.0,
      "step": 249
    },
    {
      "epoch": 0.005032358062340852,
      "grad_norm": 4.901425199932419e-05,
      "learning_rate": 0.00018853119093973863,
      "loss": 46.0,
      "step": 250
    },
    {
      "epoch": 0.005052487494590215,
      "grad_norm": 0.00016870767285581678,
      "learning_rate": 0.00018843730802923202,
      "loss": 46.0,
      "step": 251
    },
    {
      "epoch": 0.0050726169268395785,
      "grad_norm": 5.0396236474625766e-05,
      "learning_rate": 0.00018834306599600472,
      "loss": 46.0,
      "step": 252
    },
    {
      "epoch": 0.005092746359088942,
      "grad_norm": 4.4291067752055824e-05,
      "learning_rate": 0.00018824846522275113,
      "loss": 46.0,
      "step": 253
    },
    {
      "epoch": 0.005112875791338305,
      "grad_norm": 4.502312367549166e-05,
      "learning_rate": 0.0001881535060936223,
      "loss": 46.0,
      "step": 254
    },
    {
      "epoch": 0.005133005223587669,
      "grad_norm": 9.577722084941342e-05,
      "learning_rate": 0.00018805818899422447,
      "loss": 46.0,
      "step": 255
    },
    {
      "epoch": 0.0051531346558370324,
      "grad_norm": 3.933937841793522e-05,
      "learning_rate": 0.0001879625143116176,
      "loss": 46.0,
      "step": 256
    },
    {
      "epoch": 0.005173264088086395,
      "grad_norm": 5.2159059123368934e-05,
      "learning_rate": 0.00018786648243431363,
      "loss": 46.0,
      "step": 257
    },
    {
      "epoch": 0.005193393520335759,
      "grad_norm": 6.076386125641875e-05,
      "learning_rate": 0.000187770093752275,
      "loss": 46.0,
      "step": 258
    },
    {
      "epoch": 0.005213522952585123,
      "grad_norm": 7.954567990964279e-05,
      "learning_rate": 0.0001876733486569131,
      "loss": 46.0,
      "step": 259
    },
    {
      "epoch": 0.0052336523848344856,
      "grad_norm": 9.186089300783351e-05,
      "learning_rate": 0.0001875762475410865,
      "loss": 46.0,
      "step": 260
    },
    {
      "epoch": 0.005253781817083849,
      "grad_norm": 6.62936654407531e-05,
      "learning_rate": 0.00018747879079909963,
      "loss": 46.0,
      "step": 261
    },
    {
      "epoch": 0.005273911249333213,
      "grad_norm": 8.344445814145729e-05,
      "learning_rate": 0.00018738097882670097,
      "loss": 46.0,
      "step": 262
    },
    {
      "epoch": 0.005294040681582576,
      "grad_norm": 4.7971618187148124e-05,
      "learning_rate": 0.0001872828120210815,
      "loss": 46.0,
      "step": 263
    },
    {
      "epoch": 0.0053141701138319395,
      "grad_norm": 6.027764902682975e-05,
      "learning_rate": 0.00018718429078087306,
      "loss": 46.0,
      "step": 264
    },
    {
      "epoch": 0.005334299546081302,
      "grad_norm": 3.437638588366099e-05,
      "learning_rate": 0.00018708541550614688,
      "loss": 46.0,
      "step": 265
    },
    {
      "epoch": 0.005354428978330666,
      "grad_norm": 0.00015669086133129895,
      "learning_rate": 0.00018698618659841168,
      "loss": 46.0,
      "step": 266
    },
    {
      "epoch": 0.00537455841058003,
      "grad_norm": 5.7055072829825804e-05,
      "learning_rate": 0.00018688660446061235,
      "loss": 46.0,
      "step": 267
    },
    {
      "epoch": 0.005394687842829393,
      "grad_norm": 9.243898966815323e-05,
      "learning_rate": 0.00018678666949712805,
      "loss": 46.0,
      "step": 268
    },
    {
      "epoch": 0.005414817275078756,
      "grad_norm": 6.301647226791829e-05,
      "learning_rate": 0.00018668638211377075,
      "loss": 46.0,
      "step": 269
    },
    {
      "epoch": 0.00543494670732812,
      "grad_norm": 5.150353172211908e-05,
      "learning_rate": 0.00018658574271778345,
      "loss": 46.0,
      "step": 270
    },
    {
      "epoch": 0.005455076139577483,
      "grad_norm": 7.764642214169726e-05,
      "learning_rate": 0.0001864847517178387,
      "loss": 46.0,
      "step": 271
    },
    {
      "epoch": 0.005475205571826847,
      "grad_norm": 6.848713383078575e-05,
      "learning_rate": 0.0001863834095240367,
      "loss": 46.0,
      "step": 272
    },
    {
      "epoch": 0.00549533500407621,
      "grad_norm": 6.603610381716862e-05,
      "learning_rate": 0.00018628171654790383,
      "loss": 46.0,
      "step": 273
    },
    {
      "epoch": 0.005515464436325573,
      "grad_norm": 4.008671749033965e-05,
      "learning_rate": 0.00018617967320239088,
      "loss": 46.0,
      "step": 274
    },
    {
      "epoch": 0.005535593868574937,
      "grad_norm": 0.00014578885748051107,
      "learning_rate": 0.00018607727990187147,
      "loss": 46.0,
      "step": 275
    },
    {
      "epoch": 0.005555723300824301,
      "grad_norm": 0.0001186303561553359,
      "learning_rate": 0.00018597453706214025,
      "loss": 46.0,
      "step": 276
    },
    {
      "epoch": 0.0055758527330736635,
      "grad_norm": 6.097505320212804e-05,
      "learning_rate": 0.00018587144510041128,
      "loss": 46.0,
      "step": 277
    },
    {
      "epoch": 0.005595982165323027,
      "grad_norm": 3.146566086797975e-05,
      "learning_rate": 0.0001857680044353163,
      "loss": 46.0,
      "step": 278
    },
    {
      "epoch": 0.005616111597572391,
      "grad_norm": 3.7260102544678375e-05,
      "learning_rate": 0.0001856642154869031,
      "loss": 46.0,
      "step": 279
    },
    {
      "epoch": 0.005636241029821754,
      "grad_norm": 9.276531636714935e-05,
      "learning_rate": 0.0001855600786766337,
      "loss": 46.0,
      "step": 280
    },
    {
      "epoch": 0.0056563704620711175,
      "grad_norm": 4.5973942178534344e-05,
      "learning_rate": 0.00018545559442738273,
      "loss": 46.0,
      "step": 281
    },
    {
      "epoch": 0.00567649989432048,
      "grad_norm": 6.683785613859072e-05,
      "learning_rate": 0.00018535076316343575,
      "loss": 46.0,
      "step": 282
    },
    {
      "epoch": 0.005696629326569844,
      "grad_norm": 8.007440192159265e-05,
      "learning_rate": 0.00018524558531048737,
      "loss": 46.0,
      "step": 283
    },
    {
      "epoch": 0.005716758758819208,
      "grad_norm": 4.978142897016369e-05,
      "learning_rate": 0.00018514006129563966,
      "loss": 46.0,
      "step": 284
    },
    {
      "epoch": 0.005736888191068571,
      "grad_norm": 0.00010632863268256187,
      "learning_rate": 0.00018503419154740035,
      "loss": 46.0,
      "step": 285
    },
    {
      "epoch": 0.005757017623317934,
      "grad_norm": 2.972048423544038e-05,
      "learning_rate": 0.00018492797649568115,
      "loss": 46.0,
      "step": 286
    },
    {
      "epoch": 0.005777147055567298,
      "grad_norm": 4.370976603240706e-05,
      "learning_rate": 0.00018482141657179594,
      "loss": 46.0,
      "step": 287
    },
    {
      "epoch": 0.005797276487816661,
      "grad_norm": 7.786518835928291e-05,
      "learning_rate": 0.00018471451220845902,
      "loss": 46.0,
      "step": 288
    },
    {
      "epoch": 0.0058174059200660246,
      "grad_norm": 0.00010520989599172026,
      "learning_rate": 0.00018460726383978337,
      "loss": 46.0,
      "step": 289
    },
    {
      "epoch": 0.005837535352315388,
      "grad_norm": 7.304285827558488e-05,
      "learning_rate": 0.000184499671901279,
      "loss": 46.0,
      "step": 290
    },
    {
      "epoch": 0.005857664784564751,
      "grad_norm": 3.885753540089354e-05,
      "learning_rate": 0.00018439173682985094,
      "loss": 46.0,
      "step": 291
    },
    {
      "epoch": 0.005877794216814115,
      "grad_norm": 9.42759434110485e-05,
      "learning_rate": 0.00018428345906379767,
      "loss": 46.0,
      "step": 292
    },
    {
      "epoch": 0.0058979236490634785,
      "grad_norm": 7.605970313306898e-05,
      "learning_rate": 0.00018417483904280925,
      "loss": 46.0,
      "step": 293
    },
    {
      "epoch": 0.005918053081312841,
      "grad_norm": 7.958237983984873e-05,
      "learning_rate": 0.00018406587720796555,
      "loss": 46.0,
      "step": 294
    },
    {
      "epoch": 0.005938182513562205,
      "grad_norm": 0.00015608093235641718,
      "learning_rate": 0.00018395657400173453,
      "loss": 46.0,
      "step": 295
    },
    {
      "epoch": 0.005958311945811569,
      "grad_norm": 6.675553595414385e-05,
      "learning_rate": 0.00018384692986797026,
      "loss": 46.0,
      "step": 296
    },
    {
      "epoch": 0.005978441378060932,
      "grad_norm": 4.7439232730539516e-05,
      "learning_rate": 0.00018373694525191138,
      "loss": 46.0,
      "step": 297
    },
    {
      "epoch": 0.005998570810310295,
      "grad_norm": 7.789856317685917e-05,
      "learning_rate": 0.00018362662060017896,
      "loss": 46.0,
      "step": 298
    },
    {
      "epoch": 0.006018700242559658,
      "grad_norm": 5.332715591066517e-05,
      "learning_rate": 0.00018351595636077509,
      "loss": 46.0,
      "step": 299
    },
    {
      "epoch": 0.006038829674809022,
      "grad_norm": 9.82348938123323e-05,
      "learning_rate": 0.00018340495298308063,
      "loss": 46.0,
      "step": 300
    },
    {
      "epoch": 0.006058959107058386,
      "grad_norm": 0.00010532526357565075,
      "learning_rate": 0.0001832936109178538,
      "loss": 46.0,
      "step": 301
    },
    {
      "epoch": 0.0060790885393077485,
      "grad_norm": 5.648566730087623e-05,
      "learning_rate": 0.00018318193061722795,
      "loss": 46.0,
      "step": 302
    },
    {
      "epoch": 0.006099217971557112,
      "grad_norm": 0.00012587102537509054,
      "learning_rate": 0.00018306991253471013,
      "loss": 46.0,
      "step": 303
    },
    {
      "epoch": 0.006119347403806476,
      "grad_norm": 0.00011527648894116282,
      "learning_rate": 0.00018295755712517887,
      "loss": 46.0,
      "step": 304
    },
    {
      "epoch": 0.006139476836055839,
      "grad_norm": 6.82896061334759e-05,
      "learning_rate": 0.00018284486484488257,
      "loss": 46.0,
      "step": 305
    },
    {
      "epoch": 0.0061596062683052025,
      "grad_norm": 6.256354390643537e-05,
      "learning_rate": 0.00018273183615143764,
      "loss": 46.0,
      "step": 306
    },
    {
      "epoch": 0.006179735700554566,
      "grad_norm": 6.483913603005931e-05,
      "learning_rate": 0.00018261847150382644,
      "loss": 46.0,
      "step": 307
    },
    {
      "epoch": 0.006199865132803929,
      "grad_norm": 9.90207918221131e-05,
      "learning_rate": 0.00018250477136239572,
      "loss": 46.0,
      "step": 308
    },
    {
      "epoch": 0.006219994565053293,
      "grad_norm": 8.644862100481987e-05,
      "learning_rate": 0.00018239073618885447,
      "loss": 46.0,
      "step": 309
    },
    {
      "epoch": 0.0062401239973026565,
      "grad_norm": 5.34062746737618e-05,
      "learning_rate": 0.00018227636644627224,
      "loss": 46.0,
      "step": 310
    },
    {
      "epoch": 0.006260253429552019,
      "grad_norm": 5.9164103731745854e-05,
      "learning_rate": 0.00018216166259907713,
      "loss": 46.0,
      "step": 311
    },
    {
      "epoch": 0.006280382861801383,
      "grad_norm": 0.00011165209434693679,
      "learning_rate": 0.000182046625113054,
      "loss": 46.0,
      "step": 312
    },
    {
      "epoch": 0.006300512294050747,
      "grad_norm": 9.209036215906963e-05,
      "learning_rate": 0.00018193125445534252,
      "loss": 46.0,
      "step": 313
    },
    {
      "epoch": 0.00632064172630011,
      "grad_norm": 9.060541924554855e-05,
      "learning_rate": 0.00018181555109443527,
      "loss": 46.0,
      "step": 314
    },
    {
      "epoch": 0.006340771158549473,
      "grad_norm": 0.00010777592979138717,
      "learning_rate": 0.0001816995155001759,
      "loss": 46.0,
      "step": 315
    },
    {
      "epoch": 0.006360900590798836,
      "grad_norm": 6.211627623997629e-05,
      "learning_rate": 0.00018158314814375716,
      "loss": 46.0,
      "step": 316
    },
    {
      "epoch": 0.0063810300230482,
      "grad_norm": 0.00011468301818240434,
      "learning_rate": 0.000181466449497719,
      "loss": 46.0,
      "step": 317
    },
    {
      "epoch": 0.0064011594552975636,
      "grad_norm": 0.00011545659799594432,
      "learning_rate": 0.00018134942003594665,
      "loss": 46.0,
      "step": 318
    },
    {
      "epoch": 0.006421288887546926,
      "grad_norm": 7.793011172907427e-05,
      "learning_rate": 0.00018123206023366875,
      "loss": 46.0,
      "step": 319
    },
    {
      "epoch": 0.00644141831979629,
      "grad_norm": 6.144792860141024e-05,
      "learning_rate": 0.00018111437056745532,
      "loss": 46.0,
      "step": 320
    },
    {
      "epoch": 0.006461547752045654,
      "grad_norm": 7.264247687999159e-05,
      "learning_rate": 0.00018099635151521586,
      "loss": 46.0,
      "step": 321
    },
    {
      "epoch": 0.006481677184295017,
      "grad_norm": 0.0001523289829492569,
      "learning_rate": 0.00018087800355619753,
      "loss": 46.0,
      "step": 322
    },
    {
      "epoch": 0.00650180661654438,
      "grad_norm": 0.00013660687545780092,
      "learning_rate": 0.000180759327170983,
      "loss": 46.0,
      "step": 323
    },
    {
      "epoch": 0.006521936048793744,
      "grad_norm": 0.0001787421788321808,
      "learning_rate": 0.00018064032284148868,
      "loss": 46.0,
      "step": 324
    },
    {
      "epoch": 0.006542065481043107,
      "grad_norm": 0.00017624157771933824,
      "learning_rate": 0.0001805209910509626,
      "loss": 46.0,
      "step": 325
    },
    {
      "epoch": 0.006562194913292471,
      "grad_norm": 7.056714821374044e-05,
      "learning_rate": 0.0001804013322839826,
      "loss": 46.0,
      "step": 326
    },
    {
      "epoch": 0.006582324345541834,
      "grad_norm": 0.00016265243175439537,
      "learning_rate": 0.00018028134702645425,
      "loss": 46.0,
      "step": 327
    },
    {
      "epoch": 0.006602453777791197,
      "grad_norm": 0.00015621079364791512,
      "learning_rate": 0.00018016103576560895,
      "loss": 46.0,
      "step": 328
    },
    {
      "epoch": 0.006622583210040561,
      "grad_norm": 0.00015462673036381602,
      "learning_rate": 0.00018004039899000186,
      "loss": 46.0,
      "step": 329
    },
    {
      "epoch": 0.006642712642289925,
      "grad_norm": 9.932387911248952e-05,
      "learning_rate": 0.0001799194371895101,
      "loss": 46.0,
      "step": 330
    },
    {
      "epoch": 0.0066628420745392875,
      "grad_norm": 0.0001572413748363033,
      "learning_rate": 0.00017979815085533048,
      "loss": 46.0,
      "step": 331
    },
    {
      "epoch": 0.006682971506788651,
      "grad_norm": 0.00013717268302571028,
      "learning_rate": 0.00017967654047997784,
      "loss": 46.0,
      "step": 332
    },
    {
      "epoch": 0.006703100939038014,
      "grad_norm": 0.00017757757450453937,
      "learning_rate": 0.0001795546065572827,
      "loss": 46.0,
      "step": 333
    },
    {
      "epoch": 0.006723230371287378,
      "grad_norm": 9.130597027251497e-05,
      "learning_rate": 0.00017943234958238952,
      "loss": 46.0,
      "step": 334
    },
    {
      "epoch": 0.0067433598035367415,
      "grad_norm": 6.078862497815862e-05,
      "learning_rate": 0.00017930977005175465,
      "loss": 46.0,
      "step": 335
    },
    {
      "epoch": 0.006763489235786104,
      "grad_norm": 0.00018025643657892942,
      "learning_rate": 0.0001791868684631441,
      "loss": 46.0,
      "step": 336
    },
    {
      "epoch": 0.006783618668035468,
      "grad_norm": 0.0001726304617477581,
      "learning_rate": 0.00017906364531563185,
      "loss": 46.0,
      "step": 337
    },
    {
      "epoch": 0.006803748100284832,
      "grad_norm": 8.799460192676634e-05,
      "learning_rate": 0.00017894010110959755,
      "loss": 46.0,
      "step": 338
    },
    {
      "epoch": 0.006823877532534195,
      "grad_norm": 0.00012871818034909666,
      "learning_rate": 0.00017881623634672465,
      "loss": 46.0,
      "step": 339
    },
    {
      "epoch": 0.006844006964783558,
      "grad_norm": 0.00010133467003470287,
      "learning_rate": 0.00017869205152999822,
      "loss": 46.0,
      "step": 340
    },
    {
      "epoch": 0.006864136397032922,
      "grad_norm": 0.00010885829397011548,
      "learning_rate": 0.00017856754716370313,
      "loss": 46.0,
      "step": 341
    },
    {
      "epoch": 0.006884265829282285,
      "grad_norm": 9.338463132735342e-05,
      "learning_rate": 0.0001784427237534217,
      "loss": 46.0,
      "step": 342
    },
    {
      "epoch": 0.006904395261531649,
      "grad_norm": 9.378411778016016e-05,
      "learning_rate": 0.00017831758180603195,
      "loss": 46.0,
      "step": 343
    },
    {
      "epoch": 0.006924524693781012,
      "grad_norm": 0.00012989221431780607,
      "learning_rate": 0.00017819212182970535,
      "loss": 46.0,
      "step": 344
    },
    {
      "epoch": 0.006944654126030375,
      "grad_norm": 0.00017901930550578982,
      "learning_rate": 0.00017806634433390476,
      "loss": 46.0,
      "step": 345
    },
    {
      "epoch": 0.006964783558279739,
      "grad_norm": 9.102857438847423e-05,
      "learning_rate": 0.00017794024982938252,
      "loss": 46.0,
      "step": 346
    },
    {
      "epoch": 0.0069849129905291026,
      "grad_norm": 7.371963874902576e-05,
      "learning_rate": 0.00017781383882817811,
      "loss": 46.0,
      "step": 347
    },
    {
      "epoch": 0.007005042422778465,
      "grad_norm": 7.886077219154686e-05,
      "learning_rate": 0.00017768711184361645,
      "loss": 46.0,
      "step": 348
    },
    {
      "epoch": 0.007025171855027829,
      "grad_norm": 0.0001304990437347442,
      "learning_rate": 0.00017756006939030535,
      "loss": 46.0,
      "step": 349
    },
    {
      "epoch": 0.007045301287277192,
      "grad_norm": 0.00014258353621698916,
      "learning_rate": 0.00017743271198413386,
      "loss": 46.0,
      "step": 350
    },
    {
      "epoch": 0.007065430719526556,
      "grad_norm": 0.00015988641825970262,
      "learning_rate": 0.00017730504014226982,
      "loss": 46.0,
      "step": 351
    },
    {
      "epoch": 0.007085560151775919,
      "grad_norm": 8.829456783132628e-05,
      "learning_rate": 0.00017717705438315804,
      "loss": 46.0,
      "step": 352
    },
    {
      "epoch": 0.007105689584025282,
      "grad_norm": 0.00018408888718113303,
      "learning_rate": 0.00017704875522651806,
      "loss": 46.0,
      "step": 353
    },
    {
      "epoch": 0.007125819016274646,
      "grad_norm": 0.0001536206982564181,
      "learning_rate": 0.0001769201431933419,
      "loss": 46.0,
      "step": 354
    },
    {
      "epoch": 0.00714594844852401,
      "grad_norm": 0.00015358305245172232,
      "learning_rate": 0.00017679121880589236,
      "loss": 46.0,
      "step": 355
    },
    {
      "epoch": 0.0071660778807733725,
      "grad_norm": 0.00028956442838534713,
      "learning_rate": 0.00017666198258770038,
      "loss": 46.0,
      "step": 356
    },
    {
      "epoch": 0.007186207313022736,
      "grad_norm": 0.0001954118488356471,
      "learning_rate": 0.00017653243506356332,
      "loss": 46.0,
      "step": 357
    },
    {
      "epoch": 0.0072063367452721,
      "grad_norm": 0.0001885920064523816,
      "learning_rate": 0.00017640257675954264,
      "loss": 46.0,
      "step": 358
    },
    {
      "epoch": 0.007226466177521463,
      "grad_norm": 7.952122541610152e-05,
      "learning_rate": 0.00017627240820296177,
      "loss": 46.0,
      "step": 359
    },
    {
      "epoch": 0.0072465956097708265,
      "grad_norm": 6.53123643132858e-05,
      "learning_rate": 0.00017614192992240413,
      "loss": 46.0,
      "step": 360
    },
    {
      "epoch": 0.00726672504202019,
      "grad_norm": 0.0001405112270731479,
      "learning_rate": 0.00017601114244771067,
      "loss": 46.0,
      "step": 361
    },
    {
      "epoch": 0.007286854474269553,
      "grad_norm": 0.00019535009050741792,
      "learning_rate": 0.000175880046309978,
      "loss": 46.0,
      "step": 362
    },
    {
      "epoch": 0.007306983906518917,
      "grad_norm": 0.0003268709115218371,
      "learning_rate": 0.00017574864204155614,
      "loss": 46.0,
      "step": 363
    },
    {
      "epoch": 0.0073271133387682805,
      "grad_norm": 0.00010955316975014284,
      "learning_rate": 0.00017561693017604637,
      "loss": 46.0,
      "step": 364
    },
    {
      "epoch": 0.007347242771017643,
      "grad_norm": 0.00027552066603675485,
      "learning_rate": 0.000175484911248299,
      "loss": 46.0,
      "step": 365
    },
    {
      "epoch": 0.007367372203267007,
      "grad_norm": 0.00019117463671136647,
      "learning_rate": 0.0001753525857944112,
      "loss": 46.0,
      "step": 366
    },
    {
      "epoch": 0.00738750163551637,
      "grad_norm": 9.778481035027653e-05,
      "learning_rate": 0.00017521995435172504,
      "loss": 46.0,
      "step": 367
    },
    {
      "epoch": 0.007407631067765734,
      "grad_norm": 3.404769449844025e-05,
      "learning_rate": 0.000175087017458825,
      "loss": 46.0,
      "step": 368
    },
    {
      "epoch": 0.007427760500015097,
      "grad_norm": 0.00010666289017535746,
      "learning_rate": 0.00017495377565553594,
      "loss": 46.0,
      "step": 369
    },
    {
      "epoch": 0.00744788993226446,
      "grad_norm": 0.0001144933485193178,
      "learning_rate": 0.0001748202294829209,
      "loss": 46.0,
      "step": 370
    },
    {
      "epoch": 0.007468019364513824,
      "grad_norm": 0.00013504338858183473,
      "learning_rate": 0.00017468637948327894,
      "loss": 46.0,
      "step": 371
    },
    {
      "epoch": 0.007488148796763188,
      "grad_norm": 0.00010847981320694089,
      "learning_rate": 0.00017455222620014276,
      "loss": 46.0,
      "step": 372
    },
    {
      "epoch": 0.00750827822901255,
      "grad_norm": 0.00020060865790583193,
      "learning_rate": 0.00017441777017827677,
      "loss": 46.0,
      "step": 373
    },
    {
      "epoch": 0.007528407661261914,
      "grad_norm": 0.00019206189608667046,
      "learning_rate": 0.00017428301196367464,
      "loss": 46.0,
      "step": 374
    },
    {
      "epoch": 0.007548537093511278,
      "grad_norm": 0.00011548047768883407,
      "learning_rate": 0.0001741479521035572,
      "loss": 46.0,
      "step": 375
    },
    {
      "epoch": 0.007568666525760641,
      "grad_norm": 0.0001220878621097654,
      "learning_rate": 0.00017401259114637014,
      "loss": 46.0,
      "step": 376
    },
    {
      "epoch": 0.007588795958010004,
      "grad_norm": 0.00027704107924364507,
      "learning_rate": 0.00017387692964178198,
      "loss": 46.0,
      "step": 377
    },
    {
      "epoch": 0.007608925390259368,
      "grad_norm": 8.742011414142326e-05,
      "learning_rate": 0.00017374096814068145,
      "loss": 46.0,
      "step": 378
    },
    {
      "epoch": 0.007629054822508731,
      "grad_norm": 0.0001393141719745472,
      "learning_rate": 0.00017360470719517577,
      "loss": 46.0,
      "step": 379
    },
    {
      "epoch": 0.007649184254758095,
      "grad_norm": 0.00011780137720052153,
      "learning_rate": 0.00017346814735858792,
      "loss": 46.0,
      "step": 380
    },
    {
      "epoch": 0.007669313687007458,
      "grad_norm": 0.00013582094106823206,
      "learning_rate": 0.0001733312891854547,
      "loss": 46.0,
      "step": 381
    },
    {
      "epoch": 0.007689443119256821,
      "grad_norm": 7.030325650703162e-05,
      "learning_rate": 0.00017319413323152436,
      "loss": 46.0,
      "step": 382
    },
    {
      "epoch": 0.007709572551506185,
      "grad_norm": 0.00016730479546822608,
      "learning_rate": 0.00017305668005375435,
      "loss": 46.0,
      "step": 383
    },
    {
      "epoch": 0.007729701983755548,
      "grad_norm": 0.00027646831586025655,
      "learning_rate": 0.00017291893021030913,
      "loss": 46.0,
      "step": 384
    },
    {
      "epoch": 0.0077498314160049115,
      "grad_norm": 0.0001369678502669558,
      "learning_rate": 0.0001727808842605578,
      "loss": 46.0,
      "step": 385
    },
    {
      "epoch": 0.007769960848254275,
      "grad_norm": 0.00011632432142505422,
      "learning_rate": 0.00017264254276507188,
      "loss": 46.0,
      "step": 386
    },
    {
      "epoch": 0.007790090280503638,
      "grad_norm": 0.00013326382031664252,
      "learning_rate": 0.00017250390628562303,
      "loss": 46.0,
      "step": 387
    },
    {
      "epoch": 0.007810219712753002,
      "grad_norm": 0.0002664460625965148,
      "learning_rate": 0.00017236497538518082,
      "loss": 46.0,
      "step": 388
    },
    {
      "epoch": 0.007830349145002365,
      "grad_norm": 0.00013524248788598925,
      "learning_rate": 0.00017222575062791033,
      "loss": 46.0,
      "step": 389
    },
    {
      "epoch": 0.007850478577251728,
      "grad_norm": 0.00023887053248472512,
      "learning_rate": 0.00017208623257916993,
      "loss": 46.0,
      "step": 390
    },
    {
      "epoch": 0.007870608009501092,
      "grad_norm": 0.0002952871145680547,
      "learning_rate": 0.000171946421805509,
      "loss": 46.0,
      "step": 391
    },
    {
      "epoch": 0.007890737441750456,
      "grad_norm": 0.00014817963528912514,
      "learning_rate": 0.00017180631887466562,
      "loss": 46.0,
      "step": 392
    },
    {
      "epoch": 0.00791086687399982,
      "grad_norm": 5.316930764820427e-05,
      "learning_rate": 0.0001716659243555642,
      "loss": 46.0,
      "step": 393
    },
    {
      "epoch": 0.00791086687399982,
      "eval_loss": 11.5,
      "eval_runtime": 130.3433,
      "eval_samples_per_second": 160.484,
      "eval_steps_per_second": 80.242,
      "step": 393
    },
    {
      "epoch": 0.007930996306249181,
      "grad_norm": 7.42626580176875e-05,
      "learning_rate": 0.00017152523881831325,
      "loss": 46.0,
      "step": 394
    },
    {
      "epoch": 0.007951125738498545,
      "grad_norm": 0.00012892778613604605,
      "learning_rate": 0.00017138426283420304,
      "loss": 46.0,
      "step": 395
    },
    {
      "epoch": 0.007971255170747909,
      "grad_norm": 0.00026775835431180894,
      "learning_rate": 0.00017124299697570327,
      "loss": 46.0,
      "step": 396
    },
    {
      "epoch": 0.007991384602997273,
      "grad_norm": 0.00010640334949130192,
      "learning_rate": 0.00017110144181646072,
      "loss": 46.0,
      "step": 397
    },
    {
      "epoch": 0.008011514035246636,
      "grad_norm": 0.00044169218745082617,
      "learning_rate": 0.00017095959793129705,
      "loss": 46.0,
      "step": 398
    },
    {
      "epoch": 0.008031643467496,
      "grad_norm": 0.0001671955396886915,
      "learning_rate": 0.0001708174658962062,
      "loss": 46.0,
      "step": 399
    },
    {
      "epoch": 0.008051772899745362,
      "grad_norm": 0.0001959178625838831,
      "learning_rate": 0.00017067504628835237,
      "loss": 46.0,
      "step": 400
    },
    {
      "epoch": 0.008071902331994726,
      "grad_norm": 0.0003456271078903228,
      "learning_rate": 0.00017053233968606745,
      "loss": 46.0,
      "step": 401
    },
    {
      "epoch": 0.00809203176424409,
      "grad_norm": 0.00020114498329348862,
      "learning_rate": 0.00017038934666884878,
      "loss": 46.0,
      "step": 402
    },
    {
      "epoch": 0.008112161196493453,
      "grad_norm": 0.00013646352454088628,
      "learning_rate": 0.00017024606781735675,
      "loss": 46.0,
      "step": 403
    },
    {
      "epoch": 0.008132290628742817,
      "grad_norm": 0.0001477425394114107,
      "learning_rate": 0.00017010250371341244,
      "loss": 46.0,
      "step": 404
    },
    {
      "epoch": 0.00815242006099218,
      "grad_norm": 0.00025215549976564944,
      "learning_rate": 0.00016995865493999528,
      "loss": 46.0,
      "step": 405
    },
    {
      "epoch": 0.008172549493241543,
      "grad_norm": 0.0003362063434906304,
      "learning_rate": 0.00016981452208124064,
      "loss": 46.0,
      "step": 406
    },
    {
      "epoch": 0.008192678925490906,
      "grad_norm": 0.00011808531417045742,
      "learning_rate": 0.00016967010572243758,
      "loss": 46.0,
      "step": 407
    },
    {
      "epoch": 0.00821280835774027,
      "grad_norm": 0.00017271251999773085,
      "learning_rate": 0.00016952540645002632,
      "loss": 46.0,
      "step": 408
    },
    {
      "epoch": 0.008232937789989634,
      "grad_norm": 0.00015558266022708267,
      "learning_rate": 0.00016938042485159594,
      "loss": 46.0,
      "step": 409
    },
    {
      "epoch": 0.008253067222238997,
      "grad_norm": 0.00015800447727087885,
      "learning_rate": 0.000169235161515882,
      "loss": 46.0,
      "step": 410
    },
    {
      "epoch": 0.00827319665448836,
      "grad_norm": 0.00020300566393416375,
      "learning_rate": 0.00016908961703276406,
      "loss": 46.0,
      "step": 411
    },
    {
      "epoch": 0.008293326086737723,
      "grad_norm": 0.00015362584963440895,
      "learning_rate": 0.0001689437919932634,
      "loss": 46.0,
      "step": 412
    },
    {
      "epoch": 0.008313455518987087,
      "grad_norm": 0.0001552566682221368,
      "learning_rate": 0.0001687976869895406,
      "loss": 46.0,
      "step": 413
    },
    {
      "epoch": 0.00833358495123645,
      "grad_norm": 0.0002456993970554322,
      "learning_rate": 0.00016865130261489305,
      "loss": 46.0,
      "step": 414
    },
    {
      "epoch": 0.008353714383485814,
      "grad_norm": 0.00025272692437283695,
      "learning_rate": 0.00016850463946375266,
      "loss": 46.0,
      "step": 415
    },
    {
      "epoch": 0.008373843815735178,
      "grad_norm": 0.0001329753577010706,
      "learning_rate": 0.00016835769813168332,
      "loss": 46.0,
      "step": 416
    },
    {
      "epoch": 0.00839397324798454,
      "grad_norm": 0.00011102524149464443,
      "learning_rate": 0.00016821047921537858,
      "loss": 46.0,
      "step": 417
    },
    {
      "epoch": 0.008414102680233904,
      "grad_norm": 0.0005011210450902581,
      "learning_rate": 0.0001680629833126592,
      "loss": 46.0,
      "step": 418
    },
    {
      "epoch": 0.008434232112483267,
      "grad_norm": 0.00018243804515805095,
      "learning_rate": 0.0001679152110224707,
      "loss": 46.0,
      "step": 419
    },
    {
      "epoch": 0.008454361544732631,
      "grad_norm": 0.00032836064929142594,
      "learning_rate": 0.00016776716294488099,
      "loss": 46.0,
      "step": 420
    },
    {
      "epoch": 0.008474490976981995,
      "grad_norm": 0.00017685361672192812,
      "learning_rate": 0.00016761883968107775,
      "loss": 46.0,
      "step": 421
    },
    {
      "epoch": 0.008494620409231358,
      "grad_norm": 0.0002569703501649201,
      "learning_rate": 0.0001674702418333663,
      "loss": 46.0,
      "step": 422
    },
    {
      "epoch": 0.00851474984148072,
      "grad_norm": 8.305059600388631e-05,
      "learning_rate": 0.00016732137000516684,
      "loss": 46.0,
      "step": 423
    },
    {
      "epoch": 0.008534879273730084,
      "grad_norm": 0.0002819601504597813,
      "learning_rate": 0.00016717222480101221,
      "loss": 46.0,
      "step": 424
    },
    {
      "epoch": 0.008555008705979448,
      "grad_norm": 0.00020890127052552998,
      "learning_rate": 0.00016702280682654542,
      "loss": 46.0,
      "step": 425
    },
    {
      "epoch": 0.008575138138228812,
      "grad_norm": 0.00015081673336680979,
      "learning_rate": 0.00016687311668851703,
      "loss": 46.0,
      "step": 426
    },
    {
      "epoch": 0.008595267570478175,
      "grad_norm": 0.00014410317817237228,
      "learning_rate": 0.0001667231549947828,
      "loss": 46.0,
      "step": 427
    },
    {
      "epoch": 0.008615397002727537,
      "grad_norm": 0.0002492456405889243,
      "learning_rate": 0.00016657292235430126,
      "loss": 46.0,
      "step": 428
    },
    {
      "epoch": 0.008635526434976901,
      "grad_norm": 0.0001906536053866148,
      "learning_rate": 0.0001664224193771312,
      "loss": 46.0,
      "step": 429
    },
    {
      "epoch": 0.008655655867226265,
      "grad_norm": 0.00024850506451912224,
      "learning_rate": 0.0001662716466744291,
      "loss": 46.0,
      "step": 430
    },
    {
      "epoch": 0.008675785299475628,
      "grad_norm": 0.00013132646563462913,
      "learning_rate": 0.0001661206048584468,
      "loss": 46.0,
      "step": 431
    },
    {
      "epoch": 0.008695914731724992,
      "grad_norm": 0.00026154195074923337,
      "learning_rate": 0.00016596929454252895,
      "loss": 46.0,
      "step": 432
    },
    {
      "epoch": 0.008716044163974356,
      "grad_norm": 0.00015794049249961972,
      "learning_rate": 0.0001658177163411105,
      "loss": 46.0,
      "step": 433
    },
    {
      "epoch": 0.008736173596223718,
      "grad_norm": 0.00023368936672341079,
      "learning_rate": 0.00016566587086971416,
      "loss": 46.0,
      "step": 434
    },
    {
      "epoch": 0.008756303028473082,
      "grad_norm": 0.0002248157252324745,
      "learning_rate": 0.00016551375874494805,
      "loss": 46.0,
      "step": 435
    },
    {
      "epoch": 0.008776432460722445,
      "grad_norm": 0.0003579051699489355,
      "learning_rate": 0.00016536138058450309,
      "loss": 46.0,
      "step": 436
    },
    {
      "epoch": 0.008796561892971809,
      "grad_norm": 0.00031197903444990516,
      "learning_rate": 0.00016520873700715045,
      "loss": 46.0,
      "step": 437
    },
    {
      "epoch": 0.008816691325221173,
      "grad_norm": 0.0001378994493279606,
      "learning_rate": 0.0001650558286327391,
      "loss": 46.0,
      "step": 438
    },
    {
      "epoch": 0.008836820757470536,
      "grad_norm": 0.0001752666721586138,
      "learning_rate": 0.0001649026560821934,
      "loss": 46.0,
      "step": 439
    },
    {
      "epoch": 0.008856950189719898,
      "grad_norm": 0.00017502061382401735,
      "learning_rate": 0.0001647492199775103,
      "loss": 46.0,
      "step": 440
    },
    {
      "epoch": 0.008877079621969262,
      "grad_norm": 0.00026857954799197614,
      "learning_rate": 0.0001645955209417571,
      "loss": 46.0,
      "step": 441
    },
    {
      "epoch": 0.008897209054218626,
      "grad_norm": 0.0002821139642037451,
      "learning_rate": 0.00016444155959906875,
      "loss": 46.0,
      "step": 442
    },
    {
      "epoch": 0.00891733848646799,
      "grad_norm": 0.00018881195865105838,
      "learning_rate": 0.0001642873365746454,
      "loss": 46.0,
      "step": 443
    },
    {
      "epoch": 0.008937467918717353,
      "grad_norm": 0.00030518523999489844,
      "learning_rate": 0.00016413285249474975,
      "loss": 46.0,
      "step": 444
    },
    {
      "epoch": 0.008957597350966715,
      "grad_norm": 0.00015108012303244323,
      "learning_rate": 0.0001639781079867047,
      "loss": 46.0,
      "step": 445
    },
    {
      "epoch": 0.008977726783216079,
      "grad_norm": 0.00034121968201361597,
      "learning_rate": 0.0001638231036788906,
      "loss": 46.0,
      "step": 446
    },
    {
      "epoch": 0.008997856215465443,
      "grad_norm": 0.00013062897778581828,
      "learning_rate": 0.00016366784020074282,
      "loss": 46.0,
      "step": 447
    },
    {
      "epoch": 0.009017985647714806,
      "grad_norm": 0.00035385601222515106,
      "learning_rate": 0.0001635123181827491,
      "loss": 46.0,
      "step": 448
    },
    {
      "epoch": 0.00903811507996417,
      "grad_norm": 0.00014690958778373897,
      "learning_rate": 0.00016335653825644717,
      "loss": 46.0,
      "step": 449
    },
    {
      "epoch": 0.009058244512213534,
      "grad_norm": 0.0001020775962388143,
      "learning_rate": 0.00016320050105442192,
      "loss": 46.0,
      "step": 450
    },
    {
      "epoch": 0.009078373944462896,
      "grad_norm": 0.00028834721888415515,
      "learning_rate": 0.00016304420721030308,
      "loss": 46.0,
      "step": 451
    },
    {
      "epoch": 0.00909850337671226,
      "grad_norm": 0.00015054053801577538,
      "learning_rate": 0.00016288765735876254,
      "loss": 46.0,
      "step": 452
    },
    {
      "epoch": 0.009118632808961623,
      "grad_norm": 0.0002609645889606327,
      "learning_rate": 0.00016273085213551166,
      "loss": 46.0,
      "step": 453
    },
    {
      "epoch": 0.009138762241210987,
      "grad_norm": 0.00016032745770644397,
      "learning_rate": 0.00016257379217729897,
      "loss": 46.0,
      "step": 454
    },
    {
      "epoch": 0.00915889167346035,
      "grad_norm": 0.00017535104416310787,
      "learning_rate": 0.00016241647812190724,
      "loss": 46.0,
      "step": 455
    },
    {
      "epoch": 0.009179021105709714,
      "grad_norm": 0.00018267772975377738,
      "learning_rate": 0.00016225891060815128,
      "loss": 46.0,
      "step": 456
    },
    {
      "epoch": 0.009199150537959076,
      "grad_norm": 0.00029334655846469104,
      "learning_rate": 0.00016210109027587494,
      "loss": 46.0,
      "step": 457
    },
    {
      "epoch": 0.00921927997020844,
      "grad_norm": 0.000231573183555156,
      "learning_rate": 0.00016194301776594876,
      "loss": 46.0,
      "step": 458
    },
    {
      "epoch": 0.009239409402457804,
      "grad_norm": 0.0002635002601891756,
      "learning_rate": 0.0001617846937202674,
      "loss": 46.0,
      "step": 459
    },
    {
      "epoch": 0.009259538834707167,
      "grad_norm": 0.00032035779440775514,
      "learning_rate": 0.00016162611878174678,
      "loss": 46.0,
      "step": 460
    },
    {
      "epoch": 0.009279668266956531,
      "grad_norm": 0.00026974373031407595,
      "learning_rate": 0.00016146729359432183,
      "loss": 46.0,
      "step": 461
    },
    {
      "epoch": 0.009299797699205893,
      "grad_norm": 0.0001251360954483971,
      "learning_rate": 0.00016130821880294354,
      "loss": 46.0,
      "step": 462
    },
    {
      "epoch": 0.009319927131455257,
      "grad_norm": 0.00034513213904574513,
      "learning_rate": 0.00016114889505357654,
      "loss": 46.0,
      "step": 463
    },
    {
      "epoch": 0.00934005656370462,
      "grad_norm": 0.00021668773842975497,
      "learning_rate": 0.00016098932299319642,
      "loss": 46.0,
      "step": 464
    },
    {
      "epoch": 0.009360185995953984,
      "grad_norm": 0.00024054896493908018,
      "learning_rate": 0.00016082950326978707,
      "loss": 46.0,
      "step": 465
    },
    {
      "epoch": 0.009380315428203348,
      "grad_norm": 0.00023018120555207133,
      "learning_rate": 0.00016066943653233808,
      "loss": 46.0,
      "step": 466
    },
    {
      "epoch": 0.009400444860452712,
      "grad_norm": 0.000292058102786541,
      "learning_rate": 0.00016050912343084216,
      "loss": 46.0,
      "step": 467
    },
    {
      "epoch": 0.009420574292702074,
      "grad_norm": 0.00015946109488140792,
      "learning_rate": 0.0001603485646162924,
      "loss": 46.0,
      "step": 468
    },
    {
      "epoch": 0.009440703724951437,
      "grad_norm": 0.0003883461467921734,
      "learning_rate": 0.00016018776074067965,
      "loss": 46.0,
      "step": 469
    },
    {
      "epoch": 0.009460833157200801,
      "grad_norm": 0.00025555226602591574,
      "learning_rate": 0.00016002671245698999,
      "loss": 46.0,
      "step": 470
    },
    {
      "epoch": 0.009480962589450165,
      "grad_norm": 0.0003913817636203021,
      "learning_rate": 0.00015986542041920184,
      "loss": 46.0,
      "step": 471
    },
    {
      "epoch": 0.009501092021699529,
      "grad_norm": 0.00036681946949101985,
      "learning_rate": 0.00015970388528228354,
      "loss": 46.0,
      "step": 472
    },
    {
      "epoch": 0.009521221453948892,
      "grad_norm": 0.00019019895989913493,
      "learning_rate": 0.00015954210770219063,
      "loss": 46.0,
      "step": 473
    },
    {
      "epoch": 0.009541350886198254,
      "grad_norm": 0.00026755756698548794,
      "learning_rate": 0.00015938008833586307,
      "loss": 46.0,
      "step": 474
    },
    {
      "epoch": 0.009561480318447618,
      "grad_norm": 0.0002743021759670228,
      "learning_rate": 0.00015921782784122273,
      "loss": 46.0,
      "step": 475
    },
    {
      "epoch": 0.009581609750696982,
      "grad_norm": 0.0004780637682415545,
      "learning_rate": 0.00015905532687717053,
      "loss": 46.0,
      "step": 476
    },
    {
      "epoch": 0.009601739182946345,
      "grad_norm": 0.00016790846711955965,
      "learning_rate": 0.00015889258610358398,
      "loss": 46.0,
      "step": 477
    },
    {
      "epoch": 0.009621868615195709,
      "grad_norm": 0.0003423172456678003,
      "learning_rate": 0.00015872960618131443,
      "loss": 46.0,
      "step": 478
    },
    {
      "epoch": 0.009641998047445071,
      "grad_norm": 0.0004558518703561276,
      "learning_rate": 0.00015856638777218422,
      "loss": 46.0,
      "step": 479
    },
    {
      "epoch": 0.009662127479694435,
      "grad_norm": 0.0003485089691821486,
      "learning_rate": 0.00015840293153898428,
      "loss": 46.0,
      "step": 480
    },
    {
      "epoch": 0.009682256911943798,
      "grad_norm": 0.0005973344668745995,
      "learning_rate": 0.00015823923814547116,
      "loss": 46.0,
      "step": 481
    },
    {
      "epoch": 0.009702386344193162,
      "grad_norm": 0.0003374523075763136,
      "learning_rate": 0.0001580753082563645,
      "loss": 46.0,
      "step": 482
    },
    {
      "epoch": 0.009722515776442526,
      "grad_norm": 0.0004212489875499159,
      "learning_rate": 0.00015791114253734437,
      "loss": 46.0,
      "step": 483
    },
    {
      "epoch": 0.00974264520869189,
      "grad_norm": 0.0002458385133650154,
      "learning_rate": 0.0001577467416550484,
      "loss": 46.0,
      "step": 484
    },
    {
      "epoch": 0.009762774640941252,
      "grad_norm": 0.0006288140430115163,
      "learning_rate": 0.00015758210627706917,
      "loss": 46.0,
      "step": 485
    },
    {
      "epoch": 0.009782904073190615,
      "grad_norm": 0.0004315089900046587,
      "learning_rate": 0.0001574172370719515,
      "loss": 46.0,
      "step": 486
    },
    {
      "epoch": 0.009803033505439979,
      "grad_norm": 0.00017863186076283455,
      "learning_rate": 0.00015725213470918977,
      "loss": 46.0,
      "step": 487
    },
    {
      "epoch": 0.009823162937689343,
      "grad_norm": 0.00017475250933784992,
      "learning_rate": 0.0001570867998592251,
      "loss": 46.0,
      "step": 488
    },
    {
      "epoch": 0.009843292369938706,
      "grad_norm": 0.0007568314322270453,
      "learning_rate": 0.00015692123319344272,
      "loss": 46.0,
      "step": 489
    },
    {
      "epoch": 0.00986342180218807,
      "grad_norm": 0.00021712924353778362,
      "learning_rate": 0.00015675543538416916,
      "loss": 46.0,
      "step": 490
    },
    {
      "epoch": 0.009883551234437432,
      "grad_norm": 0.0002802063536364585,
      "learning_rate": 0.00015658940710466964,
      "loss": 46.0,
      "step": 491
    },
    {
      "epoch": 0.009903680666686796,
      "grad_norm": 0.0004394160059746355,
      "learning_rate": 0.0001564231490291452,
      "loss": 46.0,
      "step": 492
    },
    {
      "epoch": 0.00992381009893616,
      "grad_norm": 0.0002763732336461544,
      "learning_rate": 0.0001562566618327301,
      "loss": 46.0,
      "step": 493
    },
    {
      "epoch": 0.009943939531185523,
      "grad_norm": 0.0002444623096380383,
      "learning_rate": 0.00015608994619148886,
      "loss": 46.0,
      "step": 494
    },
    {
      "epoch": 0.009964068963434887,
      "grad_norm": 0.0003327823942527175,
      "learning_rate": 0.00015592300278241384,
      "loss": 46.0,
      "step": 495
    },
    {
      "epoch": 0.009984198395684249,
      "grad_norm": 0.00026393041480332613,
      "learning_rate": 0.00015575583228342226,
      "loss": 46.0,
      "step": 496
    },
    {
      "epoch": 0.010004327827933613,
      "grad_norm": 0.00029584183357656,
      "learning_rate": 0.00015558843537335338,
      "loss": 46.0,
      "step": 497
    },
    {
      "epoch": 0.010024457260182976,
      "grad_norm": 0.0006626403192058206,
      "learning_rate": 0.00015542081273196598,
      "loss": 46.0,
      "step": 498
    },
    {
      "epoch": 0.01004458669243234,
      "grad_norm": 0.0007279766141436994,
      "learning_rate": 0.00015525296503993548,
      "loss": 46.0,
      "step": 499
    },
    {
      "epoch": 0.010064716124681704,
      "grad_norm": 0.0004254883388057351,
      "learning_rate": 0.0001550848929788511,
      "loss": 46.0,
      "step": 500
    },
    {
      "epoch": 0.010084845556931068,
      "grad_norm": 0.0004729441716335714,
      "learning_rate": 0.00015491659723121325,
      "loss": 46.0,
      "step": 501
    },
    {
      "epoch": 0.01010497498918043,
      "grad_norm": 0.00022078775509726256,
      "learning_rate": 0.0001547480784804306,
      "loss": 46.0,
      "step": 502
    },
    {
      "epoch": 0.010125104421429793,
      "grad_norm": 0.0003951303951907903,
      "learning_rate": 0.00015457933741081745,
      "loss": 46.0,
      "step": 503
    },
    {
      "epoch": 0.010145233853679157,
      "grad_norm": 0.00033989755320362747,
      "learning_rate": 0.0001544103747075909,
      "loss": 46.0,
      "step": 504
    },
    {
      "epoch": 0.01016536328592852,
      "grad_norm": 0.0005033800262026489,
      "learning_rate": 0.00015424119105686792,
      "loss": 46.0,
      "step": 505
    },
    {
      "epoch": 0.010185492718177884,
      "grad_norm": 0.0004154644557274878,
      "learning_rate": 0.00015407178714566287,
      "loss": 46.0,
      "step": 506
    },
    {
      "epoch": 0.010205622150427248,
      "grad_norm": 0.00023219191643875092,
      "learning_rate": 0.0001539021636618844,
      "loss": 46.0,
      "step": 507
    },
    {
      "epoch": 0.01022575158267661,
      "grad_norm": 0.0003007667255587876,
      "learning_rate": 0.0001537323212943328,
      "loss": 46.0,
      "step": 508
    },
    {
      "epoch": 0.010245881014925974,
      "grad_norm": 0.00034918496385216713,
      "learning_rate": 0.00015356226073269736,
      "loss": 46.0,
      "step": 509
    },
    {
      "epoch": 0.010266010447175337,
      "grad_norm": 0.00021761894458904862,
      "learning_rate": 0.00015339198266755316,
      "loss": 46.0,
      "step": 510
    },
    {
      "epoch": 0.010286139879424701,
      "grad_norm": 0.0007356986752711236,
      "learning_rate": 0.00015322148779035869,
      "loss": 46.0,
      "step": 511
    },
    {
      "epoch": 0.010306269311674065,
      "grad_norm": 0.000666945765260607,
      "learning_rate": 0.00015305077679345276,
      "loss": 46.0,
      "step": 512
    },
    {
      "epoch": 0.010326398743923427,
      "grad_norm": 0.0003056855348404497,
      "learning_rate": 0.00015287985037005182,
      "loss": 46.0,
      "step": 513
    },
    {
      "epoch": 0.01034652817617279,
      "grad_norm": 0.0006946488283574581,
      "learning_rate": 0.00015270870921424721,
      "loss": 46.0,
      "step": 514
    },
    {
      "epoch": 0.010366657608422154,
      "grad_norm": 0.0005418279324658215,
      "learning_rate": 0.0001525373540210021,
      "loss": 46.0,
      "step": 515
    },
    {
      "epoch": 0.010386787040671518,
      "grad_norm": 0.00027551245875656605,
      "learning_rate": 0.00015236578548614887,
      "loss": 46.0,
      "step": 516
    },
    {
      "epoch": 0.010406916472920882,
      "grad_norm": 0.0004473023291211575,
      "learning_rate": 0.0001521940043063863,
      "loss": 46.0,
      "step": 517
    },
    {
      "epoch": 0.010427045905170245,
      "grad_norm": 0.0005578941782005131,
      "learning_rate": 0.00015202201117927656,
      "loss": 46.0,
      "step": 518
    },
    {
      "epoch": 0.010447175337419607,
      "grad_norm": 0.00012505419726949185,
      "learning_rate": 0.00015184980680324248,
      "loss": 46.0,
      "step": 519
    },
    {
      "epoch": 0.010467304769668971,
      "grad_norm": 0.00039335055043920875,
      "learning_rate": 0.00015167739187756487,
      "loss": 46.0,
      "step": 520
    },
    {
      "epoch": 0.010487434201918335,
      "grad_norm": 0.00027171571855433285,
      "learning_rate": 0.0001515047671023794,
      "loss": 46.0,
      "step": 521
    },
    {
      "epoch": 0.010507563634167699,
      "grad_norm": 0.0005522433784790337,
      "learning_rate": 0.00015133193317867392,
      "loss": 46.0,
      "step": 522
    },
    {
      "epoch": 0.010527693066417062,
      "grad_norm": 0.00027844231226481497,
      "learning_rate": 0.00015115889080828557,
      "loss": 46.0,
      "step": 523
    },
    {
      "epoch": 0.010547822498666426,
      "grad_norm": 0.0007594394846819341,
      "learning_rate": 0.000150985640693898,
      "loss": 46.0,
      "step": 524
    },
    {
      "epoch": 0.010567951930915788,
      "grad_norm": 0.00045375648187473416,
      "learning_rate": 0.00015081218353903838,
      "loss": 46.0,
      "step": 525
    },
    {
      "epoch": 0.010588081363165152,
      "grad_norm": 0.0003794842632487416,
      "learning_rate": 0.0001506385200480747,
      "loss": 46.0,
      "step": 526
    },
    {
      "epoch": 0.010608210795414515,
      "grad_norm": 0.0006085368804633617,
      "learning_rate": 0.00015046465092621278,
      "loss": 46.0,
      "step": 527
    },
    {
      "epoch": 0.010628340227663879,
      "grad_norm": 0.00021481052681338042,
      "learning_rate": 0.00015029057687949347,
      "loss": 46.0,
      "step": 528
    },
    {
      "epoch": 0.010648469659913243,
      "grad_norm": 0.0003222424420528114,
      "learning_rate": 0.0001501162986147897,
      "loss": 46.0,
      "step": 529
    },
    {
      "epoch": 0.010668599092162605,
      "grad_norm": 0.0006585840019397438,
      "learning_rate": 0.00014994181683980387,
      "loss": 46.0,
      "step": 530
    },
    {
      "epoch": 0.010688728524411968,
      "grad_norm": 0.0006019362481310964,
      "learning_rate": 0.00014976713226306457,
      "loss": 46.0,
      "step": 531
    },
    {
      "epoch": 0.010708857956661332,
      "grad_norm": 0.0005435794009827077,
      "learning_rate": 0.00014959224559392406,
      "loss": 46.0,
      "step": 532
    },
    {
      "epoch": 0.010728987388910696,
      "grad_norm": 0.0007174118072725832,
      "learning_rate": 0.00014941715754255522,
      "loss": 46.0,
      "step": 533
    },
    {
      "epoch": 0.01074911682116006,
      "grad_norm": 0.001002269797027111,
      "learning_rate": 0.00014924186881994867,
      "loss": 46.0,
      "step": 534
    },
    {
      "epoch": 0.010769246253409423,
      "grad_norm": 0.0002345768007216975,
      "learning_rate": 0.0001490663801379099,
      "loss": 46.0,
      "step": 535
    },
    {
      "epoch": 0.010789375685658785,
      "grad_norm": 0.0002722369390539825,
      "learning_rate": 0.00014889069220905637,
      "loss": 46.0,
      "step": 536
    },
    {
      "epoch": 0.010809505117908149,
      "grad_norm": 0.0003452429664321244,
      "learning_rate": 0.00014871480574681477,
      "loss": 46.0,
      "step": 537
    },
    {
      "epoch": 0.010829634550157513,
      "grad_norm": 0.000284095061942935,
      "learning_rate": 0.0001485387214654178,
      "loss": 46.0,
      "step": 538
    },
    {
      "epoch": 0.010849763982406876,
      "grad_norm": 0.0008951055933721364,
      "learning_rate": 0.00014836244007990156,
      "loss": 46.0,
      "step": 539
    },
    {
      "epoch": 0.01086989341465624,
      "grad_norm": 0.0003389440244063735,
      "learning_rate": 0.00014818596230610254,
      "loss": 46.0,
      "step": 540
    },
    {
      "epoch": 0.010890022846905604,
      "grad_norm": 0.0007038054754957557,
      "learning_rate": 0.0001480092888606547,
      "loss": 46.0,
      "step": 541
    },
    {
      "epoch": 0.010910152279154966,
      "grad_norm": 0.0007687349570915103,
      "learning_rate": 0.00014783242046098653,
      "loss": 46.0,
      "step": 542
    },
    {
      "epoch": 0.01093028171140433,
      "grad_norm": 0.00043066966463811696,
      "learning_rate": 0.00014765535782531832,
      "loss": 46.0,
      "step": 543
    },
    {
      "epoch": 0.010950411143653693,
      "grad_norm": 0.0010278800036758184,
      "learning_rate": 0.00014747810167265894,
      "loss": 46.0,
      "step": 544
    },
    {
      "epoch": 0.010970540575903057,
      "grad_norm": 0.0003619072958827019,
      "learning_rate": 0.0001473006527228032,
      "loss": 46.0,
      "step": 545
    },
    {
      "epoch": 0.01099067000815242,
      "grad_norm": 0.0005286497180350125,
      "learning_rate": 0.0001471230116963287,
      "loss": 46.0,
      "step": 546
    },
    {
      "epoch": 0.011010799440401783,
      "grad_norm": 0.00021179212490096688,
      "learning_rate": 0.00014694517931459317,
      "loss": 46.0,
      "step": 547
    },
    {
      "epoch": 0.011030928872651146,
      "grad_norm": 0.0006336824735626578,
      "learning_rate": 0.0001467671562997313,
      "loss": 46.0,
      "step": 548
    },
    {
      "epoch": 0.01105105830490051,
      "grad_norm": 0.00037215143674984574,
      "learning_rate": 0.00014658894337465187,
      "loss": 46.0,
      "step": 549
    },
    {
      "epoch": 0.011071187737149874,
      "grad_norm": 0.0005753418663516641,
      "learning_rate": 0.0001464105412630349,
      "loss": 46.0,
      "step": 550
    },
    {
      "epoch": 0.011091317169399238,
      "grad_norm": 0.0005321545177139342,
      "learning_rate": 0.0001462319506893286,
      "loss": 46.0,
      "step": 551
    },
    {
      "epoch": 0.011111446601648601,
      "grad_norm": 0.0005020481185056269,
      "learning_rate": 0.00014605317237874655,
      "loss": 46.0,
      "step": 552
    },
    {
      "epoch": 0.011131576033897963,
      "grad_norm": 0.0007133895414881408,
      "learning_rate": 0.00014587420705726458,
      "loss": 46.0,
      "step": 553
    },
    {
      "epoch": 0.011151705466147327,
      "grad_norm": 0.0005411367164924741,
      "learning_rate": 0.000145695055451618,
      "loss": 46.0,
      "step": 554
    },
    {
      "epoch": 0.01117183489839669,
      "grad_norm": 0.00042524756281636655,
      "learning_rate": 0.0001455157182892986,
      "loss": 46.0,
      "step": 555
    },
    {
      "epoch": 0.011191964330646054,
      "grad_norm": 0.0009429487981833518,
      "learning_rate": 0.00014533619629855158,
      "loss": 46.0,
      "step": 556
    },
    {
      "epoch": 0.011212093762895418,
      "grad_norm": 0.0002087266038870439,
      "learning_rate": 0.00014515649020837277,
      "loss": 46.0,
      "step": 557
    },
    {
      "epoch": 0.011232223195144782,
      "grad_norm": 0.0004085498512722552,
      "learning_rate": 0.00014497660074850552,
      "loss": 46.0,
      "step": 558
    },
    {
      "epoch": 0.011252352627394144,
      "grad_norm": 0.0005626246565952897,
      "learning_rate": 0.00014479652864943788,
      "loss": 46.0,
      "step": 559
    },
    {
      "epoch": 0.011272482059643507,
      "grad_norm": 0.00035937741631641984,
      "learning_rate": 0.00014461627464239948,
      "loss": 46.0,
      "step": 560
    },
    {
      "epoch": 0.011292611491892871,
      "grad_norm": 0.0006745000719092786,
      "learning_rate": 0.0001444358394593586,
      "loss": 46.0,
      "step": 561
    },
    {
      "epoch": 0.011312740924142235,
      "grad_norm": 0.0004202695854473859,
      "learning_rate": 0.0001442552238330194,
      "loss": 46.0,
      "step": 562
    },
    {
      "epoch": 0.011332870356391599,
      "grad_norm": 0.0005303717334754765,
      "learning_rate": 0.0001440744284968186,
      "loss": 46.0,
      "step": 563
    },
    {
      "epoch": 0.01135299978864096,
      "grad_norm": 0.001384332892484963,
      "learning_rate": 0.00014389345418492272,
      "loss": 46.0,
      "step": 564
    },
    {
      "epoch": 0.011373129220890324,
      "grad_norm": 0.0014389336574822664,
      "learning_rate": 0.00014371230163222516,
      "loss": 46.0,
      "step": 565
    },
    {
      "epoch": 0.011393258653139688,
      "grad_norm": 0.0007280406425707042,
      "learning_rate": 0.00014353097157434298,
      "loss": 46.0,
      "step": 566
    },
    {
      "epoch": 0.011413388085389052,
      "grad_norm": 0.0007825639913789928,
      "learning_rate": 0.00014334946474761412,
      "loss": 46.0,
      "step": 567
    },
    {
      "epoch": 0.011433517517638415,
      "grad_norm": 0.0011532383505254984,
      "learning_rate": 0.0001431677818890943,
      "loss": 46.0,
      "step": 568
    },
    {
      "epoch": 0.01145364694988778,
      "grad_norm": 0.00037170344148762524,
      "learning_rate": 0.00014298592373655414,
      "loss": 46.0,
      "step": 569
    },
    {
      "epoch": 0.011473776382137141,
      "grad_norm": 0.0004953424213454127,
      "learning_rate": 0.00014280389102847596,
      "loss": 46.0,
      "step": 570
    },
    {
      "epoch": 0.011493905814386505,
      "grad_norm": 0.00048051271005533636,
      "learning_rate": 0.000142621684504051,
      "loss": 46.0,
      "step": 571
    },
    {
      "epoch": 0.011514035246635869,
      "grad_norm": 0.0007658221293240786,
      "learning_rate": 0.0001424393049031763,
      "loss": 46.0,
      "step": 572
    },
    {
      "epoch": 0.011534164678885232,
      "grad_norm": 0.00040762132266536355,
      "learning_rate": 0.00014225675296645178,
      "loss": 46.0,
      "step": 573
    },
    {
      "epoch": 0.011554294111134596,
      "grad_norm": 0.00042765153921209276,
      "learning_rate": 0.00014207402943517707,
      "loss": 46.0,
      "step": 574
    },
    {
      "epoch": 0.01157442354338396,
      "grad_norm": 0.0006899941363371909,
      "learning_rate": 0.00014189113505134866,
      "loss": 46.0,
      "step": 575
    },
    {
      "epoch": 0.011594552975633322,
      "grad_norm": 0.0007566651329398155,
      "learning_rate": 0.00014170807055765682,
      "loss": 46.0,
      "step": 576
    },
    {
      "epoch": 0.011614682407882685,
      "grad_norm": 0.0007781738531775773,
      "learning_rate": 0.0001415248366974826,
      "loss": 46.0,
      "step": 577
    },
    {
      "epoch": 0.011634811840132049,
      "grad_norm": 0.0006567966192960739,
      "learning_rate": 0.00014134143421489482,
      "loss": 46.0,
      "step": 578
    },
    {
      "epoch": 0.011654941272381413,
      "grad_norm": 0.0004726462357211858,
      "learning_rate": 0.00014115786385464704,
      "loss": 46.0,
      "step": 579
    },
    {
      "epoch": 0.011675070704630777,
      "grad_norm": 0.0003221970109734684,
      "learning_rate": 0.00014097412636217448,
      "loss": 46.0,
      "step": 580
    },
    {
      "epoch": 0.011695200136880139,
      "grad_norm": 0.0010498060146346688,
      "learning_rate": 0.00014079022248359113,
      "loss": 46.0,
      "step": 581
    },
    {
      "epoch": 0.011715329569129502,
      "grad_norm": 0.0005236300639808178,
      "learning_rate": 0.0001406061529656865,
      "loss": 46.0,
      "step": 582
    },
    {
      "epoch": 0.011735459001378866,
      "grad_norm": 0.0005190221127122641,
      "learning_rate": 0.00014042191855592284,
      "loss": 46.0,
      "step": 583
    },
    {
      "epoch": 0.01175558843362823,
      "grad_norm": 0.0009433837258256972,
      "learning_rate": 0.000140237520002432,
      "loss": 46.0,
      "step": 584
    },
    {
      "epoch": 0.011775717865877593,
      "grad_norm": 0.0005737603642046452,
      "learning_rate": 0.00014005295805401226,
      "loss": 46.0,
      "step": 585
    },
    {
      "epoch": 0.011795847298126957,
      "grad_norm": 0.0004122421960346401,
      "learning_rate": 0.00013986823346012552,
      "loss": 46.0,
      "step": 586
    },
    {
      "epoch": 0.011815976730376319,
      "grad_norm": 0.000715672445949167,
      "learning_rate": 0.00013968334697089406,
      "loss": 46.0,
      "step": 587
    },
    {
      "epoch": 0.011836106162625683,
      "grad_norm": 0.0009091185638681054,
      "learning_rate": 0.00013949829933709767,
      "loss": 46.0,
      "step": 588
    },
    {
      "epoch": 0.011856235594875046,
      "grad_norm": 0.0005809186259284616,
      "learning_rate": 0.00013931309131017046,
      "loss": 46.0,
      "step": 589
    },
    {
      "epoch": 0.01187636502712441,
      "grad_norm": 0.0011282520135864615,
      "learning_rate": 0.0001391277236421978,
      "loss": 46.0,
      "step": 590
    },
    {
      "epoch": 0.011896494459373774,
      "grad_norm": 0.0011749881086871028,
      "learning_rate": 0.0001389421970859134,
      "loss": 46.0,
      "step": 591
    },
    {
      "epoch": 0.011916623891623138,
      "grad_norm": 0.0011483165435492992,
      "learning_rate": 0.0001387565123946962,
      "loss": 46.0,
      "step": 592
    },
    {
      "epoch": 0.0119367533238725,
      "grad_norm": 0.0005833703908137977,
      "learning_rate": 0.0001385706703225672,
      "loss": 46.0,
      "step": 593
    },
    {
      "epoch": 0.011956882756121863,
      "grad_norm": 0.0004976568161509931,
      "learning_rate": 0.00013838467162418652,
      "loss": 46.0,
      "step": 594
    },
    {
      "epoch": 0.011977012188371227,
      "grad_norm": 0.0004910955904051661,
      "learning_rate": 0.00013819851705485035,
      "loss": 46.0,
      "step": 595
    },
    {
      "epoch": 0.01199714162062059,
      "grad_norm": 0.0005480287945829332,
      "learning_rate": 0.00013801220737048777,
      "loss": 46.0,
      "step": 596
    },
    {
      "epoch": 0.012017271052869954,
      "grad_norm": 0.0005976366810500622,
      "learning_rate": 0.0001378257433276578,
      "loss": 46.0,
      "step": 597
    },
    {
      "epoch": 0.012037400485119316,
      "grad_norm": 0.0009460100554861128,
      "learning_rate": 0.00013763912568354625,
      "loss": 46.0,
      "step": 598
    },
    {
      "epoch": 0.01205752991736868,
      "grad_norm": 0.00048293505096808076,
      "learning_rate": 0.00013745235519596263,
      "loss": 46.0,
      "step": 599
    },
    {
      "epoch": 0.012077659349618044,
      "grad_norm": 0.0014719015453010798,
      "learning_rate": 0.00013726543262333721,
      "loss": 46.0,
      "step": 600
    },
    {
      "epoch": 0.012097788781867408,
      "grad_norm": 0.000909750466234982,
      "learning_rate": 0.00013707835872471771,
      "loss": 46.0,
      "step": 601
    },
    {
      "epoch": 0.012117918214116771,
      "grad_norm": 0.0008785554673522711,
      "learning_rate": 0.0001368911342597664,
      "loss": 46.0,
      "step": 602
    },
    {
      "epoch": 0.012138047646366135,
      "grad_norm": 0.0009704609983600676,
      "learning_rate": 0.00013670375998875708,
      "loss": 46.0,
      "step": 603
    },
    {
      "epoch": 0.012158177078615497,
      "grad_norm": 0.0004874320875387639,
      "learning_rate": 0.00013651623667257164,
      "loss": 46.0,
      "step": 604
    },
    {
      "epoch": 0.01217830651086486,
      "grad_norm": 0.0003640170325525105,
      "learning_rate": 0.00013632856507269744,
      "loss": 46.0,
      "step": 605
    },
    {
      "epoch": 0.012198435943114224,
      "grad_norm": 0.00045160859008319676,
      "learning_rate": 0.00013614074595122387,
      "loss": 46.0,
      "step": 606
    },
    {
      "epoch": 0.012218565375363588,
      "grad_norm": 0.0012795224320143461,
      "learning_rate": 0.00013595278007083933,
      "loss": 46.0,
      "step": 607
    },
    {
      "epoch": 0.012238694807612952,
      "grad_norm": 0.0006611685384996235,
      "learning_rate": 0.00013576466819482832,
      "loss": 46.0,
      "step": 608
    },
    {
      "epoch": 0.012258824239862316,
      "grad_norm": 0.001468715607188642,
      "learning_rate": 0.000135576411087068,
      "loss": 46.0,
      "step": 609
    },
    {
      "epoch": 0.012278953672111678,
      "grad_norm": 0.0008079329272732139,
      "learning_rate": 0.00013538800951202546,
      "loss": 46.0,
      "step": 610
    },
    {
      "epoch": 0.012299083104361041,
      "grad_norm": 0.0014699992025271058,
      "learning_rate": 0.0001351994642347543,
      "loss": 46.0,
      "step": 611
    },
    {
      "epoch": 0.012319212536610405,
      "grad_norm": 0.000965460145380348,
      "learning_rate": 0.0001350107760208918,
      "loss": 46.0,
      "step": 612
    },
    {
      "epoch": 0.012339341968859769,
      "grad_norm": 0.0008890178869478405,
      "learning_rate": 0.00013482194563665554,
      "loss": 46.0,
      "step": 613
    },
    {
      "epoch": 0.012359471401109132,
      "grad_norm": 0.0003420621796976775,
      "learning_rate": 0.00013463297384884047,
      "loss": 46.0,
      "step": 614
    },
    {
      "epoch": 0.012379600833358494,
      "grad_norm": 0.0015883035957813263,
      "learning_rate": 0.00013444386142481574,
      "loss": 46.0,
      "step": 615
    },
    {
      "epoch": 0.012399730265607858,
      "grad_norm": 0.0009616951574571431,
      "learning_rate": 0.00013425460913252165,
      "loss": 46.0,
      "step": 616
    },
    {
      "epoch": 0.012419859697857222,
      "grad_norm": 0.0015981622273102403,
      "learning_rate": 0.00013406521774046636,
      "loss": 46.0,
      "step": 617
    },
    {
      "epoch": 0.012439989130106585,
      "grad_norm": 0.0006598389591090381,
      "learning_rate": 0.000133875688017723,
      "loss": 46.0,
      "step": 618
    },
    {
      "epoch": 0.01246011856235595,
      "grad_norm": 0.0010206311708316207,
      "learning_rate": 0.00013368602073392626,
      "loss": 46.0,
      "step": 619
    },
    {
      "epoch": 0.012480247994605313,
      "grad_norm": 0.0010332156671211123,
      "learning_rate": 0.00013349621665926966,
      "loss": 46.0,
      "step": 620
    },
    {
      "epoch": 0.012500377426854675,
      "grad_norm": 0.0005513799260370433,
      "learning_rate": 0.00013330627656450199,
      "loss": 46.0,
      "step": 621
    },
    {
      "epoch": 0.012520506859104039,
      "grad_norm": 0.0005332003347575665,
      "learning_rate": 0.00013311620122092454,
      "loss": 46.0,
      "step": 622
    },
    {
      "epoch": 0.012540636291353402,
      "grad_norm": 0.0011620650766417384,
      "learning_rate": 0.0001329259914003877,
      "loss": 46.0,
      "step": 623
    },
    {
      "epoch": 0.012560765723602766,
      "grad_norm": 0.0013277794932946563,
      "learning_rate": 0.00013273564787528796,
      "loss": 46.0,
      "step": 624
    },
    {
      "epoch": 0.01258089515585213,
      "grad_norm": 0.0003230631700716913,
      "learning_rate": 0.00013254517141856483,
      "loss": 46.0,
      "step": 625
    },
    {
      "epoch": 0.012601024588101493,
      "grad_norm": 0.0017639078432694077,
      "learning_rate": 0.00013235456280369753,
      "loss": 46.0,
      "step": 626
    },
    {
      "epoch": 0.012621154020350855,
      "grad_norm": 0.0006892398814670742,
      "learning_rate": 0.000132163822804702,
      "loss": 46.0,
      "step": 627
    },
    {
      "epoch": 0.01264128345260022,
      "grad_norm": 0.0012958202278241515,
      "learning_rate": 0.00013197295219612767,
      "loss": 46.0,
      "step": 628
    },
    {
      "epoch": 0.012661412884849583,
      "grad_norm": 0.00060327781829983,
      "learning_rate": 0.00013178195175305438,
      "loss": 46.0,
      "step": 629
    },
    {
      "epoch": 0.012681542317098947,
      "grad_norm": 0.002238104585558176,
      "learning_rate": 0.0001315908222510891,
      "loss": 46.0,
      "step": 630
    },
    {
      "epoch": 0.01270167174934831,
      "grad_norm": 0.0008538436959497631,
      "learning_rate": 0.00013139956446636304,
      "loss": 46.0,
      "step": 631
    },
    {
      "epoch": 0.012721801181597672,
      "grad_norm": 0.0005125590832903981,
      "learning_rate": 0.00013120817917552816,
      "loss": 46.0,
      "step": 632
    },
    {
      "epoch": 0.012741930613847036,
      "grad_norm": 0.0015798620879650116,
      "learning_rate": 0.00013101666715575435,
      "loss": 46.0,
      "step": 633
    },
    {
      "epoch": 0.0127620600460964,
      "grad_norm": 0.0008807751582935452,
      "learning_rate": 0.000130825029184726,
      "loss": 46.0,
      "step": 634
    },
    {
      "epoch": 0.012782189478345763,
      "grad_norm": 0.0018555463757365942,
      "learning_rate": 0.00013063326604063896,
      "loss": 46.0,
      "step": 635
    },
    {
      "epoch": 0.012802318910595127,
      "grad_norm": 0.0017406801925972104,
      "learning_rate": 0.0001304413785021975,
      "loss": 46.0,
      "step": 636
    },
    {
      "epoch": 0.01282244834284449,
      "grad_norm": 0.0007575178751721978,
      "learning_rate": 0.00013024936734861087,
      "loss": 46.0,
      "step": 637
    },
    {
      "epoch": 0.012842577775093853,
      "grad_norm": 0.000731868261937052,
      "learning_rate": 0.0001300572333595904,
      "loss": 46.0,
      "step": 638
    },
    {
      "epoch": 0.012862707207343217,
      "grad_norm": 0.0005787058616988361,
      "learning_rate": 0.00012986497731534618,
      "loss": 46.0,
      "step": 639
    },
    {
      "epoch": 0.01288283663959258,
      "grad_norm": 0.0014929514145478606,
      "learning_rate": 0.00012967259999658402,
      "loss": 46.0,
      "step": 640
    },
    {
      "epoch": 0.012902966071841944,
      "grad_norm": 0.0007031034911051393,
      "learning_rate": 0.00012948010218450198,
      "loss": 46.0,
      "step": 641
    },
    {
      "epoch": 0.012923095504091308,
      "grad_norm": 0.0007022300269454718,
      "learning_rate": 0.00012928748466078767,
      "loss": 46.0,
      "step": 642
    },
    {
      "epoch": 0.012943224936340671,
      "grad_norm": 0.0007273477385751903,
      "learning_rate": 0.00012909474820761463,
      "loss": 46.0,
      "step": 643
    },
    {
      "epoch": 0.012963354368590033,
      "grad_norm": 0.0007245743181556463,
      "learning_rate": 0.0001289018936076395,
      "loss": 46.0,
      "step": 644
    },
    {
      "epoch": 0.012983483800839397,
      "grad_norm": 0.0007635979563929141,
      "learning_rate": 0.00012870892164399856,
      "loss": 46.0,
      "step": 645
    },
    {
      "epoch": 0.01300361323308876,
      "grad_norm": 0.0019391605164855719,
      "learning_rate": 0.00012851583310030467,
      "loss": 46.0,
      "step": 646
    },
    {
      "epoch": 0.013023742665338124,
      "grad_norm": 0.0011577141704037786,
      "learning_rate": 0.00012832262876064427,
      "loss": 46.0,
      "step": 647
    },
    {
      "epoch": 0.013043872097587488,
      "grad_norm": 0.0006742352270521224,
      "learning_rate": 0.00012812930940957386,
      "loss": 46.0,
      "step": 648
    },
    {
      "epoch": 0.01306400152983685,
      "grad_norm": 0.001967653399333358,
      "learning_rate": 0.00012793587583211693,
      "loss": 46.0,
      "step": 649
    },
    {
      "epoch": 0.013084130962086214,
      "grad_norm": 0.001183089567348361,
      "learning_rate": 0.000127742328813761,
      "loss": 46.0,
      "step": 650
    },
    {
      "epoch": 0.013104260394335578,
      "grad_norm": 0.000578385079279542,
      "learning_rate": 0.00012754866914045402,
      "loss": 46.0,
      "step": 651
    },
    {
      "epoch": 0.013124389826584941,
      "grad_norm": 0.0018523032777011395,
      "learning_rate": 0.00012735489759860166,
      "loss": 46.0,
      "step": 652
    },
    {
      "epoch": 0.013144519258834305,
      "grad_norm": 0.0011416682973504066,
      "learning_rate": 0.00012716101497506365,
      "loss": 46.0,
      "step": 653
    },
    {
      "epoch": 0.013164648691083669,
      "grad_norm": 0.0004267425974830985,
      "learning_rate": 0.00012696702205715088,
      "loss": 46.0,
      "step": 654
    },
    {
      "epoch": 0.01318477812333303,
      "grad_norm": 0.0008205072954297066,
      "learning_rate": 0.00012677291963262218,
      "loss": 46.0,
      "step": 655
    },
    {
      "epoch": 0.013204907555582394,
      "grad_norm": 0.0016005141660571098,
      "learning_rate": 0.00012657870848968092,
      "loss": 46.0,
      "step": 656
    },
    {
      "epoch": 0.013225036987831758,
      "grad_norm": 0.0009871599031612277,
      "learning_rate": 0.00012638438941697206,
      "loss": 46.0,
      "step": 657
    },
    {
      "epoch": 0.013245166420081122,
      "grad_norm": 0.0008003399707376957,
      "learning_rate": 0.00012618996320357877,
      "loss": 46.0,
      "step": 658
    },
    {
      "epoch": 0.013265295852330486,
      "grad_norm": 0.0006417233380489051,
      "learning_rate": 0.00012599543063901935,
      "loss": 46.0,
      "step": 659
    },
    {
      "epoch": 0.01328542528457985,
      "grad_norm": 0.001283377525396645,
      "learning_rate": 0.00012580079251324394,
      "loss": 46.0,
      "step": 660
    },
    {
      "epoch": 0.013305554716829211,
      "grad_norm": 0.0003393135848455131,
      "learning_rate": 0.00012560604961663128,
      "loss": 46.0,
      "step": 661
    },
    {
      "epoch": 0.013325684149078575,
      "grad_norm": 0.0011401561787351966,
      "learning_rate": 0.0001254112027399857,
      "loss": 46.0,
      "step": 662
    },
    {
      "epoch": 0.013345813581327939,
      "grad_norm": 0.000948408676777035,
      "learning_rate": 0.0001252162526745337,
      "loss": 46.0,
      "step": 663
    },
    {
      "epoch": 0.013365943013577302,
      "grad_norm": 0.0010545322438701987,
      "learning_rate": 0.0001250212002119207,
      "loss": 46.0,
      "step": 664
    },
    {
      "epoch": 0.013386072445826666,
      "grad_norm": 0.0008792767766863108,
      "learning_rate": 0.00012482604614420806,
      "loss": 46.0,
      "step": 665
    },
    {
      "epoch": 0.013406201878076028,
      "grad_norm": 0.0009220750071108341,
      "learning_rate": 0.0001246307912638697,
      "loss": 46.0,
      "step": 666
    },
    {
      "epoch": 0.013426331310325392,
      "grad_norm": 0.002156344009563327,
      "learning_rate": 0.0001244354363637889,
      "loss": 46.0,
      "step": 667
    },
    {
      "epoch": 0.013446460742574756,
      "grad_norm": 0.0012686087284237146,
      "learning_rate": 0.00012423998223725513,
      "loss": 46.0,
      "step": 668
    },
    {
      "epoch": 0.01346659017482412,
      "grad_norm": 0.0011429619044065475,
      "learning_rate": 0.00012404442967796077,
      "loss": 46.0,
      "step": 669
    },
    {
      "epoch": 0.013486719607073483,
      "grad_norm": 0.0009963412303477526,
      "learning_rate": 0.00012384877947999793,
      "loss": 46.0,
      "step": 670
    },
    {
      "epoch": 0.013506849039322847,
      "grad_norm": 0.0010601101676002145,
      "learning_rate": 0.00012365303243785513,
      "loss": 46.0,
      "step": 671
    },
    {
      "epoch": 0.013526978471572209,
      "grad_norm": 0.0007964776596054435,
      "learning_rate": 0.00012345718934641425,
      "loss": 46.0,
      "step": 672
    },
    {
      "epoch": 0.013547107903821572,
      "grad_norm": 0.0011618619319051504,
      "learning_rate": 0.00012326125100094716,
      "loss": 46.0,
      "step": 673
    },
    {
      "epoch": 0.013567237336070936,
      "grad_norm": 0.002301463857293129,
      "learning_rate": 0.0001230652181971126,
      "loss": 46.0,
      "step": 674
    },
    {
      "epoch": 0.0135873667683203,
      "grad_norm": 0.0009032952948473394,
      "learning_rate": 0.0001228690917309527,
      "loss": 46.0,
      "step": 675
    },
    {
      "epoch": 0.013607496200569663,
      "grad_norm": 0.001307567348703742,
      "learning_rate": 0.00012267287239889013,
      "loss": 46.0,
      "step": 676
    },
    {
      "epoch": 0.013627625632819027,
      "grad_norm": 0.0010554592590779066,
      "learning_rate": 0.0001224765609977246,
      "loss": 46.0,
      "step": 677
    },
    {
      "epoch": 0.01364775506506839,
      "grad_norm": 0.0008427874417975545,
      "learning_rate": 0.0001222801583246296,
      "loss": 46.0,
      "step": 678
    },
    {
      "epoch": 0.013667884497317753,
      "grad_norm": 0.0006029874202795327,
      "learning_rate": 0.00012208366517714946,
      "loss": 46.0,
      "step": 679
    },
    {
      "epoch": 0.013688013929567117,
      "grad_norm": 0.0012924791080877185,
      "learning_rate": 0.00012188708235319565,
      "loss": 46.0,
      "step": 680
    },
    {
      "epoch": 0.01370814336181648,
      "grad_norm": 0.00043431291123852134,
      "learning_rate": 0.00012169041065104401,
      "loss": 46.0,
      "step": 681
    },
    {
      "epoch": 0.013728272794065844,
      "grad_norm": 0.000775394553784281,
      "learning_rate": 0.00012149365086933115,
      "loss": 46.0,
      "step": 682
    },
    {
      "epoch": 0.013748402226315206,
      "grad_norm": 0.0011056979419663548,
      "learning_rate": 0.00012129680380705144,
      "loss": 46.0,
      "step": 683
    },
    {
      "epoch": 0.01376853165856457,
      "grad_norm": 0.0011312337592244148,
      "learning_rate": 0.0001210998702635536,
      "loss": 46.0,
      "step": 684
    },
    {
      "epoch": 0.013788661090813933,
      "grad_norm": 0.0010967912385240197,
      "learning_rate": 0.00012090285103853764,
      "loss": 46.0,
      "step": 685
    },
    {
      "epoch": 0.013808790523063297,
      "grad_norm": 0.002103852340951562,
      "learning_rate": 0.00012070574693205138,
      "loss": 46.0,
      "step": 686
    },
    {
      "epoch": 0.01382891995531266,
      "grad_norm": 0.0009759899112395942,
      "learning_rate": 0.00012050855874448737,
      "loss": 46.0,
      "step": 687
    },
    {
      "epoch": 0.013849049387562025,
      "grad_norm": 0.0012610235717147589,
      "learning_rate": 0.00012031128727657963,
      "loss": 46.0,
      "step": 688
    },
    {
      "epoch": 0.013869178819811387,
      "grad_norm": 0.0022020682226866484,
      "learning_rate": 0.0001201139333294003,
      "loss": 46.0,
      "step": 689
    },
    {
      "epoch": 0.01388930825206075,
      "grad_norm": 0.0011697233421728015,
      "learning_rate": 0.0001199164977043565,
      "loss": 46.0,
      "step": 690
    },
    {
      "epoch": 0.013909437684310114,
      "grad_norm": 0.001423744368366897,
      "learning_rate": 0.00011971898120318699,
      "loss": 46.0,
      "step": 691
    },
    {
      "epoch": 0.013929567116559478,
      "grad_norm": 0.001964231953024864,
      "learning_rate": 0.00011952138462795897,
      "loss": 46.0,
      "step": 692
    },
    {
      "epoch": 0.013949696548808841,
      "grad_norm": 0.001953084603883326,
      "learning_rate": 0.00011932370878106477,
      "loss": 46.0,
      "step": 693
    },
    {
      "epoch": 0.013969825981058205,
      "grad_norm": 0.0017078432720154524,
      "learning_rate": 0.00011912595446521868,
      "loss": 46.0,
      "step": 694
    },
    {
      "epoch": 0.013989955413307567,
      "grad_norm": 0.0008826723205856979,
      "learning_rate": 0.00011892812248345358,
      "loss": 46.0,
      "step": 695
    },
    {
      "epoch": 0.01401008484555693,
      "grad_norm": 0.00104327907320112,
      "learning_rate": 0.00011873021363911779,
      "loss": 46.0,
      "step": 696
    },
    {
      "epoch": 0.014030214277806295,
      "grad_norm": 0.001443845801986754,
      "learning_rate": 0.00011853222873587167,
      "loss": 46.0,
      "step": 697
    },
    {
      "epoch": 0.014050343710055658,
      "grad_norm": 0.0008737000171095133,
      "learning_rate": 0.00011833416857768447,
      "loss": 46.0,
      "step": 698
    },
    {
      "epoch": 0.014070473142305022,
      "grad_norm": 0.001212298753671348,
      "learning_rate": 0.00011813603396883108,
      "loss": 46.0,
      "step": 699
    },
    {
      "epoch": 0.014090602574554384,
      "grad_norm": 0.0007996526546776295,
      "learning_rate": 0.00011793782571388865,
      "loss": 46.0,
      "step": 700
    },
    {
      "epoch": 0.014110732006803748,
      "grad_norm": 0.0007419726462103426,
      "learning_rate": 0.00011773954461773344,
      "loss": 46.0,
      "step": 701
    },
    {
      "epoch": 0.014130861439053111,
      "grad_norm": 0.001537975505925715,
      "learning_rate": 0.00011754119148553746,
      "loss": 46.0,
      "step": 702
    },
    {
      "epoch": 0.014150990871302475,
      "grad_norm": 0.001321911346167326,
      "learning_rate": 0.00011734276712276528,
      "loss": 46.0,
      "step": 703
    },
    {
      "epoch": 0.014171120303551839,
      "grad_norm": 0.0020747899543493986,
      "learning_rate": 0.00011714427233517069,
      "loss": 46.0,
      "step": 704
    },
    {
      "epoch": 0.014191249735801202,
      "grad_norm": 0.0013918217737227678,
      "learning_rate": 0.00011694570792879345,
      "loss": 46.0,
      "step": 705
    },
    {
      "epoch": 0.014211379168050564,
      "grad_norm": 0.0008428136934526265,
      "learning_rate": 0.00011674707470995608,
      "loss": 46.0,
      "step": 706
    },
    {
      "epoch": 0.014231508600299928,
      "grad_norm": 0.0007644314900971949,
      "learning_rate": 0.00011654837348526044,
      "loss": 46.0,
      "step": 707
    },
    {
      "epoch": 0.014251638032549292,
      "grad_norm": 0.001396226929500699,
      "learning_rate": 0.00011634960506158465,
      "loss": 46.0,
      "step": 708
    },
    {
      "epoch": 0.014271767464798656,
      "grad_norm": 0.0008691162220202386,
      "learning_rate": 0.00011615077024607965,
      "loss": 46.0,
      "step": 709
    },
    {
      "epoch": 0.01429189689704802,
      "grad_norm": 0.0011933896457776427,
      "learning_rate": 0.00011595186984616598,
      "loss": 46.0,
      "step": 710
    },
    {
      "epoch": 0.014312026329297383,
      "grad_norm": 0.001465336186811328,
      "learning_rate": 0.00011575290466953054,
      "loss": 46.0,
      "step": 711
    },
    {
      "epoch": 0.014332155761546745,
      "grad_norm": 0.0012663495726883411,
      "learning_rate": 0.0001155538755241232,
      "loss": 46.0,
      "step": 712
    },
    {
      "epoch": 0.014352285193796109,
      "grad_norm": 0.0017936511430889368,
      "learning_rate": 0.00011535478321815366,
      "loss": 46.0,
      "step": 713
    },
    {
      "epoch": 0.014372414626045472,
      "grad_norm": 0.0014043014962226152,
      "learning_rate": 0.00011515562856008808,
      "loss": 46.0,
      "step": 714
    },
    {
      "epoch": 0.014392544058294836,
      "grad_norm": 0.001723953988403082,
      "learning_rate": 0.00011495641235864581,
      "loss": 46.0,
      "step": 715
    },
    {
      "epoch": 0.0144126734905442,
      "grad_norm": 0.0010820517782121897,
      "learning_rate": 0.00011475713542279612,
      "loss": 46.0,
      "step": 716
    },
    {
      "epoch": 0.014432802922793562,
      "grad_norm": 0.001104168244637549,
      "learning_rate": 0.00011455779856175488,
      "loss": 46.0,
      "step": 717
    },
    {
      "epoch": 0.014452932355042926,
      "grad_norm": 0.0006916458951309323,
      "learning_rate": 0.00011435840258498139,
      "loss": 46.0,
      "step": 718
    },
    {
      "epoch": 0.01447306178729229,
      "grad_norm": 0.002727057319134474,
      "learning_rate": 0.00011415894830217486,
      "loss": 46.0,
      "step": 719
    },
    {
      "epoch": 0.014493191219541653,
      "grad_norm": 0.002052182564511895,
      "learning_rate": 0.00011395943652327141,
      "loss": 46.0,
      "step": 720
    },
    {
      "epoch": 0.014513320651791017,
      "grad_norm": 0.0010489120613783598,
      "learning_rate": 0.00011375986805844054,
      "loss": 46.0,
      "step": 721
    },
    {
      "epoch": 0.01453345008404038,
      "grad_norm": 0.0022547508124262094,
      "learning_rate": 0.000113560243718082,
      "loss": 46.0,
      "step": 722
    },
    {
      "epoch": 0.014553579516289742,
      "grad_norm": 0.0013953811721876264,
      "learning_rate": 0.00011336056431282238,
      "loss": 46.0,
      "step": 723
    },
    {
      "epoch": 0.014573708948539106,
      "grad_norm": 0.0013688750332221389,
      "learning_rate": 0.00011316083065351195,
      "loss": 46.0,
      "step": 724
    },
    {
      "epoch": 0.01459383838078847,
      "grad_norm": 0.0008434226620011032,
      "learning_rate": 0.00011296104355122126,
      "loss": 46.0,
      "step": 725
    },
    {
      "epoch": 0.014613967813037834,
      "grad_norm": 0.0006940681487321854,
      "learning_rate": 0.00011276120381723779,
      "loss": 46.0,
      "step": 726
    },
    {
      "epoch": 0.014634097245287197,
      "grad_norm": 0.0007419649627991021,
      "learning_rate": 0.00011256131226306288,
      "loss": 46.0,
      "step": 727
    },
    {
      "epoch": 0.014654226677536561,
      "grad_norm": 0.0008091644267551601,
      "learning_rate": 0.00011236136970040823,
      "loss": 46.0,
      "step": 728
    },
    {
      "epoch": 0.014674356109785923,
      "grad_norm": 0.0014601018046960235,
      "learning_rate": 0.00011216137694119271,
      "loss": 46.0,
      "step": 729
    },
    {
      "epoch": 0.014694485542035287,
      "grad_norm": 0.0016407629009336233,
      "learning_rate": 0.00011196133479753894,
      "loss": 46.0,
      "step": 730
    },
    {
      "epoch": 0.01471461497428465,
      "grad_norm": 0.0018916395492851734,
      "learning_rate": 0.0001117612440817702,
      "loss": 46.0,
      "step": 731
    },
    {
      "epoch": 0.014734744406534014,
      "grad_norm": 0.0015089900698512793,
      "learning_rate": 0.00011156110560640693,
      "loss": 46.0,
      "step": 732
    },
    {
      "epoch": 0.014754873838783378,
      "grad_norm": 0.0007338857976719737,
      "learning_rate": 0.00011136092018416356,
      "loss": 46.0,
      "step": 733
    },
    {
      "epoch": 0.01477500327103274,
      "grad_norm": 0.001960652880370617,
      "learning_rate": 0.00011116068862794506,
      "loss": 46.0,
      "step": 734
    },
    {
      "epoch": 0.014795132703282103,
      "grad_norm": 0.0007831032853573561,
      "learning_rate": 0.00011096041175084386,
      "loss": 46.0,
      "step": 735
    },
    {
      "epoch": 0.014815262135531467,
      "grad_norm": 0.0011528864270076156,
      "learning_rate": 0.00011076009036613637,
      "loss": 46.0,
      "step": 736
    },
    {
      "epoch": 0.014835391567780831,
      "grad_norm": 0.001367407850921154,
      "learning_rate": 0.00011055972528727973,
      "loss": 46.0,
      "step": 737
    },
    {
      "epoch": 0.014855521000030195,
      "grad_norm": 0.0011748921824619174,
      "learning_rate": 0.00011035931732790856,
      "loss": 46.0,
      "step": 738
    },
    {
      "epoch": 0.014875650432279558,
      "grad_norm": 0.0011586399050429463,
      "learning_rate": 0.00011015886730183152,
      "loss": 46.0,
      "step": 739
    },
    {
      "epoch": 0.01489577986452892,
      "grad_norm": 0.001220092410221696,
      "learning_rate": 0.00010995837602302819,
      "loss": 46.0,
      "step": 740
    },
    {
      "epoch": 0.014915909296778284,
      "grad_norm": 0.0012157183373346925,
      "learning_rate": 0.00010975784430564558,
      "loss": 46.0,
      "step": 741
    },
    {
      "epoch": 0.014936038729027648,
      "grad_norm": 0.000734594592358917,
      "learning_rate": 0.00010955727296399496,
      "loss": 46.0,
      "step": 742
    },
    {
      "epoch": 0.014956168161277011,
      "grad_norm": 0.0010155554627999663,
      "learning_rate": 0.00010935666281254853,
      "loss": 46.0,
      "step": 743
    },
    {
      "epoch": 0.014976297593526375,
      "grad_norm": 0.001596163841895759,
      "learning_rate": 0.00010915601466593604,
      "loss": 46.0,
      "step": 744
    },
    {
      "epoch": 0.014996427025775739,
      "grad_norm": 0.0010005077347159386,
      "learning_rate": 0.0001089553293389415,
      "loss": 46.0,
      "step": 745
    },
    {
      "epoch": 0.0150165564580251,
      "grad_norm": 0.0015270253643393517,
      "learning_rate": 0.00010875460764649998,
      "loss": 46.0,
      "step": 746
    },
    {
      "epoch": 0.015036685890274465,
      "grad_norm": 0.0010284383315593004,
      "learning_rate": 0.00010855385040369419,
      "loss": 46.0,
      "step": 747
    },
    {
      "epoch": 0.015056815322523828,
      "grad_norm": 0.0006261624512262642,
      "learning_rate": 0.00010835305842575119,
      "loss": 46.0,
      "step": 748
    },
    {
      "epoch": 0.015076944754773192,
      "grad_norm": 0.0009544830536469817,
      "learning_rate": 0.0001081522325280391,
      "loss": 46.0,
      "step": 749
    },
    {
      "epoch": 0.015097074187022556,
      "grad_norm": 0.0009903472382575274,
      "learning_rate": 0.00010795137352606377,
      "loss": 46.0,
      "step": 750
    },
    {
      "epoch": 0.015117203619271918,
      "grad_norm": 0.0013472349382936954,
      "learning_rate": 0.00010775048223546551,
      "loss": 46.0,
      "step": 751
    },
    {
      "epoch": 0.015137333051521281,
      "grad_norm": 0.0021951631642878056,
      "learning_rate": 0.00010754955947201571,
      "loss": 46.0,
      "step": 752
    },
    {
      "epoch": 0.015157462483770645,
      "grad_norm": 0.0011615699622780085,
      "learning_rate": 0.00010734860605161355,
      "loss": 46.0,
      "step": 753
    },
    {
      "epoch": 0.015177591916020009,
      "grad_norm": 0.001115267863497138,
      "learning_rate": 0.00010714762279028275,
      "loss": 46.0,
      "step": 754
    },
    {
      "epoch": 0.015197721348269373,
      "grad_norm": 0.0008176314877346158,
      "learning_rate": 0.00010694661050416819,
      "loss": 46.0,
      "step": 755
    },
    {
      "epoch": 0.015217850780518736,
      "grad_norm": 0.0013733267551288009,
      "learning_rate": 0.00010674557000953258,
      "loss": 46.0,
      "step": 756
    },
    {
      "epoch": 0.015237980212768098,
      "grad_norm": 0.0006811064085923135,
      "learning_rate": 0.00010654450212275324,
      "loss": 46.0,
      "step": 757
    },
    {
      "epoch": 0.015258109645017462,
      "grad_norm": 0.0011723422212526202,
      "learning_rate": 0.00010634340766031868,
      "loss": 46.0,
      "step": 758
    },
    {
      "epoch": 0.015278239077266826,
      "grad_norm": 0.0008587664924561977,
      "learning_rate": 0.0001061422874388253,
      "loss": 46.0,
      "step": 759
    },
    {
      "epoch": 0.01529836850951619,
      "grad_norm": 0.0007777991122566164,
      "learning_rate": 0.00010594114227497419,
      "loss": 46.0,
      "step": 760
    },
    {
      "epoch": 0.015318497941765553,
      "grad_norm": 0.0022410592064261436,
      "learning_rate": 0.00010573997298556762,
      "loss": 46.0,
      "step": 761
    },
    {
      "epoch": 0.015338627374014917,
      "grad_norm": 0.0009275775519199669,
      "learning_rate": 0.00010553878038750591,
      "loss": 46.0,
      "step": 762
    },
    {
      "epoch": 0.015358756806264279,
      "grad_norm": 0.0011703603668138385,
      "learning_rate": 0.000105337565297784,
      "loss": 46.0,
      "step": 763
    },
    {
      "epoch": 0.015378886238513642,
      "grad_norm": 0.0008710163529030979,
      "learning_rate": 0.00010513632853348817,
      "loss": 46.0,
      "step": 764
    },
    {
      "epoch": 0.015399015670763006,
      "grad_norm": 0.0014304480282589793,
      "learning_rate": 0.00010493507091179267,
      "loss": 46.0,
      "step": 765
    },
    {
      "epoch": 0.01541914510301237,
      "grad_norm": 0.0010722475126385689,
      "learning_rate": 0.00010473379324995654,
      "loss": 46.0,
      "step": 766
    },
    {
      "epoch": 0.015439274535261734,
      "grad_norm": 0.0015626356471329927,
      "learning_rate": 0.00010453249636532007,
      "loss": 46.0,
      "step": 767
    },
    {
      "epoch": 0.015459403967511096,
      "grad_norm": 0.0005589164211414754,
      "learning_rate": 0.00010433118107530175,
      "loss": 46.0,
      "step": 768
    },
    {
      "epoch": 0.01547953339976046,
      "grad_norm": 0.0016512125730514526,
      "learning_rate": 0.00010412984819739473,
      "loss": 46.0,
      "step": 769
    },
    {
      "epoch": 0.015499662832009823,
      "grad_norm": 0.0007763996836729348,
      "learning_rate": 0.0001039284985491636,
      "loss": 46.0,
      "step": 770
    },
    {
      "epoch": 0.015519792264259187,
      "grad_norm": 0.0013635704526677728,
      "learning_rate": 0.00010372713294824102,
      "loss": 46.0,
      "step": 771
    },
    {
      "epoch": 0.01553992169650855,
      "grad_norm": 0.0014162887819111347,
      "learning_rate": 0.00010352575221232443,
      "loss": 46.0,
      "step": 772
    },
    {
      "epoch": 0.015560051128757914,
      "grad_norm": 0.002015099162235856,
      "learning_rate": 0.00010332435715917282,
      "loss": 46.0,
      "step": 773
    },
    {
      "epoch": 0.015580180561007276,
      "grad_norm": 0.0012888460187241435,
      "learning_rate": 0.00010312294860660319,
      "loss": 46.0,
      "step": 774
    },
    {
      "epoch": 0.01560030999325664,
      "grad_norm": 0.0013061100617051125,
      "learning_rate": 0.00010292152737248746,
      "loss": 46.0,
      "step": 775
    },
    {
      "epoch": 0.015620439425506004,
      "grad_norm": 0.001647230121307075,
      "learning_rate": 0.00010272009427474898,
      "loss": 46.0,
      "step": 776
    },
    {
      "epoch": 0.015640568857755367,
      "grad_norm": 0.0006732527981512249,
      "learning_rate": 0.00010251865013135931,
      "loss": 46.0,
      "step": 777
    },
    {
      "epoch": 0.01566069829000473,
      "grad_norm": 0.0018044369062408805,
      "learning_rate": 0.00010231719576033487,
      "loss": 46.0,
      "step": 778
    },
    {
      "epoch": 0.015680827722254095,
      "grad_norm": 0.002040853723883629,
      "learning_rate": 0.00010211573197973356,
      "loss": 46.0,
      "step": 779
    },
    {
      "epoch": 0.015700957154503457,
      "grad_norm": 0.0008194477995857596,
      "learning_rate": 0.00010191425960765159,
      "loss": 46.0,
      "step": 780
    },
    {
      "epoch": 0.015721086586752822,
      "grad_norm": 0.0022002204786986113,
      "learning_rate": 0.00010171277946221998,
      "loss": 46.0,
      "step": 781
    },
    {
      "epoch": 0.015741216019002184,
      "grad_norm": 0.001327970647253096,
      "learning_rate": 0.00010151129236160126,
      "loss": 46.0,
      "step": 782
    },
    {
      "epoch": 0.015761345451251546,
      "grad_norm": 0.0012340659741312265,
      "learning_rate": 0.00010130979912398635,
      "loss": 46.0,
      "step": 783
    },
    {
      "epoch": 0.01578147488350091,
      "grad_norm": 0.0020124921575188637,
      "learning_rate": 0.000101108300567591,
      "loss": 46.0,
      "step": 784
    },
    {
      "epoch": 0.015801604315750273,
      "grad_norm": 0.0010386345675215125,
      "learning_rate": 0.00010090679751065255,
      "loss": 46.0,
      "step": 785
    },
    {
      "epoch": 0.01582173374799964,
      "grad_norm": 0.0010036143939942122,
      "learning_rate": 0.00010070529077142665,
      "loss": 46.0,
      "step": 786
    },
    {
      "epoch": 0.01582173374799964,
      "eval_loss": 11.5,
      "eval_runtime": 129.297,
      "eval_samples_per_second": 161.783,
      "eval_steps_per_second": 80.891,
      "step": 786
    },
    {
      "epoch": 0.015841863180249,
      "grad_norm": 0.0016748905181884766,
      "learning_rate": 0.00010050378116818391,
      "loss": 46.0,
      "step": 787
    },
    {
      "epoch": 0.015861992612498363,
      "grad_norm": 0.0021699341014027596,
      "learning_rate": 0.00010030226951920654,
      "loss": 46.0,
      "step": 788
    },
    {
      "epoch": 0.01588212204474773,
      "grad_norm": 0.0006511384854093194,
      "learning_rate": 0.00010010075664278507,
      "loss": 46.0,
      "step": 789
    },
    {
      "epoch": 0.01590225147699709,
      "grad_norm": 0.00087300396990031,
      "learning_rate": 9.9899243357215e-05,
      "loss": 46.0,
      "step": 790
    },
    {
      "epoch": 0.015922380909246456,
      "grad_norm": 0.0009463525493629277,
      "learning_rate": 9.969773048079351e-05,
      "loss": 46.0,
      "step": 791
    },
    {
      "epoch": 0.015942510341495818,
      "grad_norm": 0.0008198622381314635,
      "learning_rate": 9.949621883181612e-05,
      "loss": 46.0,
      "step": 792
    },
    {
      "epoch": 0.015962639773745183,
      "grad_norm": 0.0014198910212144256,
      "learning_rate": 9.929470922857337e-05,
      "loss": 46.0,
      "step": 793
    },
    {
      "epoch": 0.015982769205994545,
      "grad_norm": 0.000913235591724515,
      "learning_rate": 9.909320248934747e-05,
      "loss": 46.0,
      "step": 794
    },
    {
      "epoch": 0.016002898638243907,
      "grad_norm": 0.0006811967468820512,
      "learning_rate": 9.889169943240903e-05,
      "loss": 46.0,
      "step": 795
    },
    {
      "epoch": 0.016023028070493273,
      "grad_norm": 0.0009198164916597307,
      "learning_rate": 9.869020087601365e-05,
      "loss": 46.0,
      "step": 796
    },
    {
      "epoch": 0.016043157502742635,
      "grad_norm": 0.0016066880198195577,
      "learning_rate": 9.848870763839877e-05,
      "loss": 46.0,
      "step": 797
    },
    {
      "epoch": 0.016063286934992,
      "grad_norm": 0.0016591864405199885,
      "learning_rate": 9.828722053778008e-05,
      "loss": 46.0,
      "step": 798
    },
    {
      "epoch": 0.016083416367241362,
      "grad_norm": 0.0011810685973614454,
      "learning_rate": 9.808574039234843e-05,
      "loss": 46.0,
      "step": 799
    },
    {
      "epoch": 0.016103545799490724,
      "grad_norm": 0.0011113261571153998,
      "learning_rate": 9.788426802026645e-05,
      "loss": 46.0,
      "step": 800
    },
    {
      "epoch": 0.01612367523174009,
      "grad_norm": 0.0012657048646360636,
      "learning_rate": 9.768280423966516e-05,
      "loss": 46.0,
      "step": 801
    },
    {
      "epoch": 0.01614380466398945,
      "grad_norm": 0.0013318355195224285,
      "learning_rate": 9.748134986864072e-05,
      "loss": 46.0,
      "step": 802
    },
    {
      "epoch": 0.016163934096238817,
      "grad_norm": 0.0016351451631635427,
      "learning_rate": 9.727990572525105e-05,
      "loss": 46.0,
      "step": 803
    },
    {
      "epoch": 0.01618406352848818,
      "grad_norm": 0.0012661207001656294,
      "learning_rate": 9.707847262751257e-05,
      "loss": 46.0,
      "step": 804
    },
    {
      "epoch": 0.01620419296073754,
      "grad_norm": 0.0015301022212952375,
      "learning_rate": 9.687705139339685e-05,
      "loss": 46.0,
      "step": 805
    },
    {
      "epoch": 0.016224322392986906,
      "grad_norm": 0.0011811705771833658,
      "learning_rate": 9.667564284082723e-05,
      "loss": 46.0,
      "step": 806
    },
    {
      "epoch": 0.016244451825236268,
      "grad_norm": 0.0018039242131635547,
      "learning_rate": 9.64742477876756e-05,
      "loss": 46.0,
      "step": 807
    },
    {
      "epoch": 0.016264581257485634,
      "grad_norm": 0.0017712223343551159,
      "learning_rate": 9.627286705175902e-05,
      "loss": 46.0,
      "step": 808
    },
    {
      "epoch": 0.016284710689734996,
      "grad_norm": 0.0017983190482482314,
      "learning_rate": 9.607150145083642e-05,
      "loss": 46.0,
      "step": 809
    },
    {
      "epoch": 0.01630484012198436,
      "grad_norm": 0.0020130311604589224,
      "learning_rate": 9.587015180260526e-05,
      "loss": 46.0,
      "step": 810
    },
    {
      "epoch": 0.016324969554233723,
      "grad_norm": 0.001603461685590446,
      "learning_rate": 9.566881892469824e-05,
      "loss": 46.0,
      "step": 811
    },
    {
      "epoch": 0.016345098986483085,
      "grad_norm": 0.001214203075505793,
      "learning_rate": 9.546750363467997e-05,
      "loss": 46.0,
      "step": 812
    },
    {
      "epoch": 0.01636522841873245,
      "grad_norm": 0.0015112390974536538,
      "learning_rate": 9.526620675004352e-05,
      "loss": 46.0,
      "step": 813
    },
    {
      "epoch": 0.016385357850981812,
      "grad_norm": 0.001148148556239903,
      "learning_rate": 9.506492908820737e-05,
      "loss": 46.0,
      "step": 814
    },
    {
      "epoch": 0.016405487283231178,
      "grad_norm": 0.0008940810221247375,
      "learning_rate": 9.486367146651187e-05,
      "loss": 46.0,
      "step": 815
    },
    {
      "epoch": 0.01642561671548054,
      "grad_norm": 0.0011365159880369902,
      "learning_rate": 9.466243470221602e-05,
      "loss": 46.0,
      "step": 816
    },
    {
      "epoch": 0.016445746147729902,
      "grad_norm": 0.0012618922628462315,
      "learning_rate": 9.44612196124941e-05,
      "loss": 46.0,
      "step": 817
    },
    {
      "epoch": 0.016465875579979267,
      "grad_norm": 0.0018559535965323448,
      "learning_rate": 9.42600270144324e-05,
      "loss": 46.0,
      "step": 818
    },
    {
      "epoch": 0.01648600501222863,
      "grad_norm": 0.0010438922327011824,
      "learning_rate": 9.405885772502582e-05,
      "loss": 46.0,
      "step": 819
    },
    {
      "epoch": 0.016506134444477995,
      "grad_norm": 0.0019696117378771305,
      "learning_rate": 9.385771256117473e-05,
      "loss": 46.0,
      "step": 820
    },
    {
      "epoch": 0.016526263876727357,
      "grad_norm": 0.0009223187807947397,
      "learning_rate": 9.365659233968136e-05,
      "loss": 46.0,
      "step": 821
    },
    {
      "epoch": 0.01654639330897672,
      "grad_norm": 0.001138696214184165,
      "learning_rate": 9.345549787724679e-05,
      "loss": 46.0,
      "step": 822
    },
    {
      "epoch": 0.016566522741226084,
      "grad_norm": 0.0010714115342125297,
      "learning_rate": 9.325442999046744e-05,
      "loss": 46.0,
      "step": 823
    },
    {
      "epoch": 0.016586652173475446,
      "grad_norm": 0.0024443184956908226,
      "learning_rate": 9.305338949583183e-05,
      "loss": 46.0,
      "step": 824
    },
    {
      "epoch": 0.01660678160572481,
      "grad_norm": 0.0012600711779668927,
      "learning_rate": 9.285237720971726e-05,
      "loss": 46.0,
      "step": 825
    },
    {
      "epoch": 0.016626911037974174,
      "grad_norm": 0.001455603982321918,
      "learning_rate": 9.265139394838646e-05,
      "loss": 46.0,
      "step": 826
    },
    {
      "epoch": 0.01664704047022354,
      "grad_norm": 0.001236987765878439,
      "learning_rate": 9.245044052798435e-05,
      "loss": 46.0,
      "step": 827
    },
    {
      "epoch": 0.0166671699024729,
      "grad_norm": 0.0023520609829574823,
      "learning_rate": 9.224951776453454e-05,
      "loss": 46.0,
      "step": 828
    },
    {
      "epoch": 0.016687299334722263,
      "grad_norm": 0.0009862706065177917,
      "learning_rate": 9.204862647393625e-05,
      "loss": 46.0,
      "step": 829
    },
    {
      "epoch": 0.01670742876697163,
      "grad_norm": 0.000905146764125675,
      "learning_rate": 9.184776747196092e-05,
      "loss": 46.0,
      "step": 830
    },
    {
      "epoch": 0.01672755819922099,
      "grad_norm": 0.0013413660926744342,
      "learning_rate": 9.164694157424882e-05,
      "loss": 46.0,
      "step": 831
    },
    {
      "epoch": 0.016747687631470356,
      "grad_norm": 0.0010207323357462883,
      "learning_rate": 9.144614959630583e-05,
      "loss": 46.0,
      "step": 832
    },
    {
      "epoch": 0.016767817063719718,
      "grad_norm": 0.001262878649868071,
      "learning_rate": 9.124539235350004e-05,
      "loss": 46.0,
      "step": 833
    },
    {
      "epoch": 0.01678794649596908,
      "grad_norm": 0.0010532139567658305,
      "learning_rate": 9.104467066105855e-05,
      "loss": 46.0,
      "step": 834
    },
    {
      "epoch": 0.016808075928218445,
      "grad_norm": 0.0016255469527095556,
      "learning_rate": 9.084398533406401e-05,
      "loss": 46.0,
      "step": 835
    },
    {
      "epoch": 0.016828205360467807,
      "grad_norm": 0.001301302807405591,
      "learning_rate": 9.06433371874515e-05,
      "loss": 46.0,
      "step": 836
    },
    {
      "epoch": 0.016848334792717173,
      "grad_norm": 0.0009694418986327946,
      "learning_rate": 9.044272703600505e-05,
      "loss": 46.0,
      "step": 837
    },
    {
      "epoch": 0.016868464224966535,
      "grad_norm": 0.0006478687282651663,
      "learning_rate": 9.024215569435443e-05,
      "loss": 46.0,
      "step": 838
    },
    {
      "epoch": 0.016888593657215897,
      "grad_norm": 0.0010540804360061884,
      "learning_rate": 9.004162397697183e-05,
      "loss": 46.0,
      "step": 839
    },
    {
      "epoch": 0.016908723089465262,
      "grad_norm": 0.001823692349717021,
      "learning_rate": 8.984113269816849e-05,
      "loss": 46.0,
      "step": 840
    },
    {
      "epoch": 0.016928852521714624,
      "grad_norm": 0.0018427857430651784,
      "learning_rate": 8.964068267209145e-05,
      "loss": 46.0,
      "step": 841
    },
    {
      "epoch": 0.01694898195396399,
      "grad_norm": 0.0011904591228812933,
      "learning_rate": 8.94402747127203e-05,
      "loss": 46.0,
      "step": 842
    },
    {
      "epoch": 0.01696911138621335,
      "grad_norm": 0.001324215205386281,
      "learning_rate": 8.923990963386367e-05,
      "loss": 46.0,
      "step": 843
    },
    {
      "epoch": 0.016989240818462717,
      "grad_norm": 0.0005633292021229863,
      "learning_rate": 8.903958824915616e-05,
      "loss": 46.0,
      "step": 844
    },
    {
      "epoch": 0.01700937025071208,
      "grad_norm": 0.0008070293697528541,
      "learning_rate": 8.883931137205496e-05,
      "loss": 46.0,
      "step": 845
    },
    {
      "epoch": 0.01702949968296144,
      "grad_norm": 0.0009888982167467475,
      "learning_rate": 8.863907981583648e-05,
      "loss": 46.0,
      "step": 846
    },
    {
      "epoch": 0.017049629115210806,
      "grad_norm": 0.001973828999325633,
      "learning_rate": 8.843889439359308e-05,
      "loss": 46.0,
      "step": 847
    },
    {
      "epoch": 0.01706975854746017,
      "grad_norm": 0.0009145489893853664,
      "learning_rate": 8.82387559182298e-05,
      "loss": 46.0,
      "step": 848
    },
    {
      "epoch": 0.017089887979709534,
      "grad_norm": 0.0007107039564289153,
      "learning_rate": 8.803866520246111e-05,
      "loss": 46.0,
      "step": 849
    },
    {
      "epoch": 0.017110017411958896,
      "grad_norm": 0.0007881961646489799,
      "learning_rate": 8.783862305880734e-05,
      "loss": 46.0,
      "step": 850
    },
    {
      "epoch": 0.017130146844208258,
      "grad_norm": 0.00188835512381047,
      "learning_rate": 8.76386302995918e-05,
      "loss": 46.0,
      "step": 851
    },
    {
      "epoch": 0.017150276276457623,
      "grad_norm": 0.0015146104851737618,
      "learning_rate": 8.743868773693715e-05,
      "loss": 46.0,
      "step": 852
    },
    {
      "epoch": 0.017170405708706985,
      "grad_norm": 0.0020661610178649426,
      "learning_rate": 8.723879618276224e-05,
      "loss": 46.0,
      "step": 853
    },
    {
      "epoch": 0.01719053514095635,
      "grad_norm": 0.0013012840645387769,
      "learning_rate": 8.703895644877877e-05,
      "loss": 46.0,
      "step": 854
    },
    {
      "epoch": 0.017210664573205713,
      "grad_norm": 0.0008128953049890697,
      "learning_rate": 8.683916934648804e-05,
      "loss": 46.0,
      "step": 855
    },
    {
      "epoch": 0.017230794005455075,
      "grad_norm": 0.0026957583613693714,
      "learning_rate": 8.663943568717763e-05,
      "loss": 46.0,
      "step": 856
    },
    {
      "epoch": 0.01725092343770444,
      "grad_norm": 0.002083521569147706,
      "learning_rate": 8.643975628191802e-05,
      "loss": 46.0,
      "step": 857
    },
    {
      "epoch": 0.017271052869953802,
      "grad_norm": 0.0012452022638171911,
      "learning_rate": 8.624013194155949e-05,
      "loss": 46.0,
      "step": 858
    },
    {
      "epoch": 0.017291182302203167,
      "grad_norm": 0.0020928813610225916,
      "learning_rate": 8.604056347672862e-05,
      "loss": 46.0,
      "step": 859
    },
    {
      "epoch": 0.01731131173445253,
      "grad_norm": 0.0012156859738752246,
      "learning_rate": 8.584105169782516e-05,
      "loss": 46.0,
      "step": 860
    },
    {
      "epoch": 0.017331441166701895,
      "grad_norm": 0.000812519050668925,
      "learning_rate": 8.564159741501863e-05,
      "loss": 46.0,
      "step": 861
    },
    {
      "epoch": 0.017351570598951257,
      "grad_norm": 0.0012246136320754886,
      "learning_rate": 8.544220143824511e-05,
      "loss": 46.0,
      "step": 862
    },
    {
      "epoch": 0.01737170003120062,
      "grad_norm": 0.0006080910097807646,
      "learning_rate": 8.524286457720389e-05,
      "loss": 46.0,
      "step": 863
    },
    {
      "epoch": 0.017391829463449984,
      "grad_norm": 0.0011760718189179897,
      "learning_rate": 8.504358764135423e-05,
      "loss": 46.0,
      "step": 864
    },
    {
      "epoch": 0.017411958895699346,
      "grad_norm": 0.0011525904992595315,
      "learning_rate": 8.484437143991195e-05,
      "loss": 46.0,
      "step": 865
    },
    {
      "epoch": 0.01743208832794871,
      "grad_norm": 0.0016694519435986876,
      "learning_rate": 8.464521678184637e-05,
      "loss": 46.0,
      "step": 866
    },
    {
      "epoch": 0.017452217760198074,
      "grad_norm": 0.0010596023639664054,
      "learning_rate": 8.444612447587683e-05,
      "loss": 46.0,
      "step": 867
    },
    {
      "epoch": 0.017472347192447436,
      "grad_norm": 0.0011948344763368368,
      "learning_rate": 8.424709533046948e-05,
      "loss": 46.0,
      "step": 868
    },
    {
      "epoch": 0.0174924766246968,
      "grad_norm": 0.0011381066869944334,
      "learning_rate": 8.404813015383402e-05,
      "loss": 46.0,
      "step": 869
    },
    {
      "epoch": 0.017512606056946163,
      "grad_norm": 0.000726166705135256,
      "learning_rate": 8.384922975392035e-05,
      "loss": 46.0,
      "step": 870
    },
    {
      "epoch": 0.01753273548919553,
      "grad_norm": 0.0010629513999447227,
      "learning_rate": 8.365039493841537e-05,
      "loss": 46.0,
      "step": 871
    },
    {
      "epoch": 0.01755286492144489,
      "grad_norm": 0.0018847067840397358,
      "learning_rate": 8.345162651473958e-05,
      "loss": 46.0,
      "step": 872
    },
    {
      "epoch": 0.017572994353694252,
      "grad_norm": 0.001316647743806243,
      "learning_rate": 8.325292529004396e-05,
      "loss": 46.0,
      "step": 873
    },
    {
      "epoch": 0.017593123785943618,
      "grad_norm": 0.0013123692478984594,
      "learning_rate": 8.305429207120657e-05,
      "loss": 46.0,
      "step": 874
    },
    {
      "epoch": 0.01761325321819298,
      "grad_norm": 0.0015352462651208043,
      "learning_rate": 8.285572766482934e-05,
      "loss": 46.0,
      "step": 875
    },
    {
      "epoch": 0.017633382650442345,
      "grad_norm": 0.0008602976449765265,
      "learning_rate": 8.265723287723474e-05,
      "loss": 46.0,
      "step": 876
    },
    {
      "epoch": 0.017653512082691707,
      "grad_norm": 0.0016124986577779055,
      "learning_rate": 8.245880851446255e-05,
      "loss": 46.0,
      "step": 877
    },
    {
      "epoch": 0.017673641514941073,
      "grad_norm": 0.0013488165568560362,
      "learning_rate": 8.226045538226657e-05,
      "loss": 46.0,
      "step": 878
    },
    {
      "epoch": 0.017693770947190435,
      "grad_norm": 0.0016552746528759599,
      "learning_rate": 8.20621742861114e-05,
      "loss": 46.0,
      "step": 879
    },
    {
      "epoch": 0.017713900379439797,
      "grad_norm": 0.0012239515781402588,
      "learning_rate": 8.186396603116897e-05,
      "loss": 46.0,
      "step": 880
    },
    {
      "epoch": 0.017734029811689162,
      "grad_norm": 0.0016428582603111863,
      "learning_rate": 8.166583142231557e-05,
      "loss": 46.0,
      "step": 881
    },
    {
      "epoch": 0.017754159243938524,
      "grad_norm": 0.0007930579595267773,
      "learning_rate": 8.146777126412837e-05,
      "loss": 46.0,
      "step": 882
    },
    {
      "epoch": 0.01777428867618789,
      "grad_norm": 0.002032884396612644,
      "learning_rate": 8.126978636088222e-05,
      "loss": 46.0,
      "step": 883
    },
    {
      "epoch": 0.01779441810843725,
      "grad_norm": 0.0012445595348253846,
      "learning_rate": 8.107187751654642e-05,
      "loss": 46.0,
      "step": 884
    },
    {
      "epoch": 0.017814547540686614,
      "grad_norm": 0.0013359242584556341,
      "learning_rate": 8.087404553478132e-05,
      "loss": 46.0,
      "step": 885
    },
    {
      "epoch": 0.01783467697293598,
      "grad_norm": 0.0010678229155018926,
      "learning_rate": 8.067629121893525e-05,
      "loss": 46.0,
      "step": 886
    },
    {
      "epoch": 0.01785480640518534,
      "grad_norm": 0.0020257853902876377,
      "learning_rate": 8.047861537204107e-05,
      "loss": 46.0,
      "step": 887
    },
    {
      "epoch": 0.017874935837434706,
      "grad_norm": 0.001662117661908269,
      "learning_rate": 8.028101879681304e-05,
      "loss": 46.0,
      "step": 888
    },
    {
      "epoch": 0.01789506526968407,
      "grad_norm": 0.0010253286454826593,
      "learning_rate": 8.008350229564351e-05,
      "loss": 46.0,
      "step": 889
    },
    {
      "epoch": 0.01791519470193343,
      "grad_norm": 0.001274330890737474,
      "learning_rate": 7.988606667059972e-05,
      "loss": 46.0,
      "step": 890
    },
    {
      "epoch": 0.017935324134182796,
      "grad_norm": 0.0004866346425842494,
      "learning_rate": 7.968871272342038e-05,
      "loss": 46.0,
      "step": 891
    },
    {
      "epoch": 0.017955453566432158,
      "grad_norm": 0.0010090246796607971,
      "learning_rate": 7.949144125551263e-05,
      "loss": 46.0,
      "step": 892
    },
    {
      "epoch": 0.017975582998681523,
      "grad_norm": 0.0010872040875256062,
      "learning_rate": 7.929425306794867e-05,
      "loss": 46.0,
      "step": 893
    },
    {
      "epoch": 0.017995712430930885,
      "grad_norm": 0.0005780845531262457,
      "learning_rate": 7.909714896146239e-05,
      "loss": 46.0,
      "step": 894
    },
    {
      "epoch": 0.01801584186318025,
      "grad_norm": 0.0005967924953438342,
      "learning_rate": 7.89001297364464e-05,
      "loss": 46.0,
      "step": 895
    },
    {
      "epoch": 0.018035971295429613,
      "grad_norm": 0.001067324192263186,
      "learning_rate": 7.870319619294859e-05,
      "loss": 46.0,
      "step": 896
    },
    {
      "epoch": 0.018056100727678975,
      "grad_norm": 0.0020970788318663836,
      "learning_rate": 7.850634913066887e-05,
      "loss": 46.0,
      "step": 897
    },
    {
      "epoch": 0.01807623015992834,
      "grad_norm": 0.00045425730058923364,
      "learning_rate": 7.830958934895602e-05,
      "loss": 46.0,
      "step": 898
    },
    {
      "epoch": 0.018096359592177702,
      "grad_norm": 0.000873990764375776,
      "learning_rate": 7.811291764680436e-05,
      "loss": 46.0,
      "step": 899
    },
    {
      "epoch": 0.018116489024427068,
      "grad_norm": 0.0016305999597534537,
      "learning_rate": 7.791633482285056e-05,
      "loss": 46.0,
      "step": 900
    },
    {
      "epoch": 0.01813661845667643,
      "grad_norm": 0.0013768981443718076,
      "learning_rate": 7.771984167537041e-05,
      "loss": 46.0,
      "step": 901
    },
    {
      "epoch": 0.01815674788892579,
      "grad_norm": 0.0021705874241888523,
      "learning_rate": 7.752343900227545e-05,
      "loss": 46.0,
      "step": 902
    },
    {
      "epoch": 0.018176877321175157,
      "grad_norm": 0.0011179293505847454,
      "learning_rate": 7.73271276011099e-05,
      "loss": 46.0,
      "step": 903
    },
    {
      "epoch": 0.01819700675342452,
      "grad_norm": 0.0013696214882656932,
      "learning_rate": 7.713090826904732e-05,
      "loss": 46.0,
      "step": 904
    },
    {
      "epoch": 0.018217136185673884,
      "grad_norm": 0.0017531095072627068,
      "learning_rate": 7.693478180288745e-05,
      "loss": 46.0,
      "step": 905
    },
    {
      "epoch": 0.018237265617923246,
      "grad_norm": 0.0006764591089449823,
      "learning_rate": 7.673874899905284e-05,
      "loss": 46.0,
      "step": 906
    },
    {
      "epoch": 0.01825739505017261,
      "grad_norm": 0.0009293857146985829,
      "learning_rate": 7.654281065358575e-05,
      "loss": 46.0,
      "step": 907
    },
    {
      "epoch": 0.018277524482421974,
      "grad_norm": 0.0015665123937651515,
      "learning_rate": 7.634696756214492e-05,
      "loss": 46.0,
      "step": 908
    },
    {
      "epoch": 0.018297653914671336,
      "grad_norm": 0.0009497758583165705,
      "learning_rate": 7.615122052000212e-05,
      "loss": 46.0,
      "step": 909
    },
    {
      "epoch": 0.0183177833469207,
      "grad_norm": 0.0009189668344333768,
      "learning_rate": 7.595557032203924e-05,
      "loss": 46.0,
      "step": 910
    },
    {
      "epoch": 0.018337912779170063,
      "grad_norm": 0.0012546752113848925,
      "learning_rate": 7.576001776274488e-05,
      "loss": 46.0,
      "step": 911
    },
    {
      "epoch": 0.01835804221141943,
      "grad_norm": 0.000910087488591671,
      "learning_rate": 7.556456363621112e-05,
      "loss": 46.0,
      "step": 912
    },
    {
      "epoch": 0.01837817164366879,
      "grad_norm": 0.0017347291577607393,
      "learning_rate": 7.536920873613034e-05,
      "loss": 46.0,
      "step": 913
    },
    {
      "epoch": 0.018398301075918153,
      "grad_norm": 0.0019821105524897575,
      "learning_rate": 7.517395385579198e-05,
      "loss": 46.0,
      "step": 914
    },
    {
      "epoch": 0.018418430508167518,
      "grad_norm": 0.0015641407808288932,
      "learning_rate": 7.497879978807934e-05,
      "loss": 46.0,
      "step": 915
    },
    {
      "epoch": 0.01843855994041688,
      "grad_norm": 0.0009593809954822063,
      "learning_rate": 7.478374732546635e-05,
      "loss": 46.0,
      "step": 916
    },
    {
      "epoch": 0.018458689372666245,
      "grad_norm": 0.001673855702392757,
      "learning_rate": 7.458879726001431e-05,
      "loss": 46.0,
      "step": 917
    },
    {
      "epoch": 0.018478818804915607,
      "grad_norm": 0.0014266518410295248,
      "learning_rate": 7.439395038336871e-05,
      "loss": 46.0,
      "step": 918
    },
    {
      "epoch": 0.01849894823716497,
      "grad_norm": 0.0007388940430246294,
      "learning_rate": 7.41992074867561e-05,
      "loss": 46.0,
      "step": 919
    },
    {
      "epoch": 0.018519077669414335,
      "grad_norm": 0.0009342276025563478,
      "learning_rate": 7.400456936098066e-05,
      "loss": 46.0,
      "step": 920
    },
    {
      "epoch": 0.018539207101663697,
      "grad_norm": 0.0018599577015265822,
      "learning_rate": 7.381003679642124e-05,
      "loss": 46.0,
      "step": 921
    },
    {
      "epoch": 0.018559336533913062,
      "grad_norm": 0.0007527298876084387,
      "learning_rate": 7.361561058302795e-05,
      "loss": 46.0,
      "step": 922
    },
    {
      "epoch": 0.018579465966162424,
      "grad_norm": 0.0017750163096934557,
      "learning_rate": 7.342129151031911e-05,
      "loss": 46.0,
      "step": 923
    },
    {
      "epoch": 0.018599595398411786,
      "grad_norm": 0.0008317319443449378,
      "learning_rate": 7.322708036737784e-05,
      "loss": 46.0,
      "step": 924
    },
    {
      "epoch": 0.01861972483066115,
      "grad_norm": 0.0015617224853485823,
      "learning_rate": 7.303297794284911e-05,
      "loss": 46.0,
      "step": 925
    },
    {
      "epoch": 0.018639854262910514,
      "grad_norm": 0.0008347228285856545,
      "learning_rate": 7.283898502493637e-05,
      "loss": 46.0,
      "step": 926
    },
    {
      "epoch": 0.01865998369515988,
      "grad_norm": 0.0011498607927933335,
      "learning_rate": 7.264510240139836e-05,
      "loss": 46.0,
      "step": 927
    },
    {
      "epoch": 0.01868011312740924,
      "grad_norm": 0.001822000602260232,
      "learning_rate": 7.245133085954598e-05,
      "loss": 46.0,
      "step": 928
    },
    {
      "epoch": 0.018700242559658607,
      "grad_norm": 0.001454083132557571,
      "learning_rate": 7.225767118623906e-05,
      "loss": 46.0,
      "step": 929
    },
    {
      "epoch": 0.01872037199190797,
      "grad_norm": 0.0008696825243532658,
      "learning_rate": 7.206412416788311e-05,
      "loss": 46.0,
      "step": 930
    },
    {
      "epoch": 0.01874050142415733,
      "grad_norm": 0.002098069293424487,
      "learning_rate": 7.18706905904262e-05,
      "loss": 46.0,
      "step": 931
    },
    {
      "epoch": 0.018760630856406696,
      "grad_norm": 0.0014457725919783115,
      "learning_rate": 7.167737123935574e-05,
      "loss": 46.0,
      "step": 932
    },
    {
      "epoch": 0.018780760288656058,
      "grad_norm": 0.0008266555378213525,
      "learning_rate": 7.148416689969533e-05,
      "loss": 46.0,
      "step": 933
    },
    {
      "epoch": 0.018800889720905423,
      "grad_norm": 0.0019346848130226135,
      "learning_rate": 7.129107835600149e-05,
      "loss": 46.0,
      "step": 934
    },
    {
      "epoch": 0.018821019153154785,
      "grad_norm": 0.0005959367263130844,
      "learning_rate": 7.109810639236051e-05,
      "loss": 46.0,
      "step": 935
    },
    {
      "epoch": 0.018841148585404147,
      "grad_norm": 0.0015214644372463226,
      "learning_rate": 7.090525179238538e-05,
      "loss": 46.0,
      "step": 936
    },
    {
      "epoch": 0.018861278017653513,
      "grad_norm": 0.0008678233716636896,
      "learning_rate": 7.071251533921235e-05,
      "loss": 46.0,
      "step": 937
    },
    {
      "epoch": 0.018881407449902875,
      "grad_norm": 0.0013784753391519189,
      "learning_rate": 7.051989781549806e-05,
      "loss": 46.0,
      "step": 938
    },
    {
      "epoch": 0.01890153688215224,
      "grad_norm": 0.000639195553958416,
      "learning_rate": 7.032740000341604e-05,
      "loss": 46.0,
      "step": 939
    },
    {
      "epoch": 0.018921666314401602,
      "grad_norm": 0.002253234386444092,
      "learning_rate": 7.013502268465382e-05,
      "loss": 46.0,
      "step": 940
    },
    {
      "epoch": 0.018941795746650964,
      "grad_norm": 0.0008279394824057817,
      "learning_rate": 6.994276664040962e-05,
      "loss": 46.0,
      "step": 941
    },
    {
      "epoch": 0.01896192517890033,
      "grad_norm": 0.0014505049912258983,
      "learning_rate": 6.975063265138915e-05,
      "loss": 46.0,
      "step": 942
    },
    {
      "epoch": 0.01898205461114969,
      "grad_norm": 0.0012778080999851227,
      "learning_rate": 6.955862149780251e-05,
      "loss": 46.0,
      "step": 943
    },
    {
      "epoch": 0.019002184043399057,
      "grad_norm": 0.0007853744900785387,
      "learning_rate": 6.936673395936103e-05,
      "loss": 46.0,
      "step": 944
    },
    {
      "epoch": 0.01902231347564842,
      "grad_norm": 0.0013930387794971466,
      "learning_rate": 6.917497081527405e-05,
      "loss": 46.0,
      "step": 945
    },
    {
      "epoch": 0.019042442907897784,
      "grad_norm": 0.0013855951838195324,
      "learning_rate": 6.898333284424568e-05,
      "loss": 46.0,
      "step": 946
    },
    {
      "epoch": 0.019062572340147146,
      "grad_norm": 0.000777574663516134,
      "learning_rate": 6.879182082447185e-05,
      "loss": 46.0,
      "step": 947
    },
    {
      "epoch": 0.01908270177239651,
      "grad_norm": 0.0013940025819465518,
      "learning_rate": 6.860043553363697e-05,
      "loss": 46.0,
      "step": 948
    },
    {
      "epoch": 0.019102831204645874,
      "grad_norm": 0.0007605886785313487,
      "learning_rate": 6.840917774891089e-05,
      "loss": 46.0,
      "step": 949
    },
    {
      "epoch": 0.019122960636895236,
      "grad_norm": 0.0017117703100666404,
      "learning_rate": 6.821804824694564e-05,
      "loss": 46.0,
      "step": 950
    },
    {
      "epoch": 0.0191430900691446,
      "grad_norm": 0.0016022106865420938,
      "learning_rate": 6.802704780387233e-05,
      "loss": 46.0,
      "step": 951
    },
    {
      "epoch": 0.019163219501393963,
      "grad_norm": 0.0010787155479192734,
      "learning_rate": 6.7836177195298e-05,
      "loss": 46.0,
      "step": 952
    },
    {
      "epoch": 0.019183348933643325,
      "grad_norm": 0.0008270377875305712,
      "learning_rate": 6.764543719630247e-05,
      "loss": 46.0,
      "step": 953
    },
    {
      "epoch": 0.01920347836589269,
      "grad_norm": 0.0011401512892916799,
      "learning_rate": 6.745482858143519e-05,
      "loss": 46.0,
      "step": 954
    },
    {
      "epoch": 0.019223607798142053,
      "grad_norm": 0.0010015569860115647,
      "learning_rate": 6.726435212471205e-05,
      "loss": 46.0,
      "step": 955
    },
    {
      "epoch": 0.019243737230391418,
      "grad_norm": 0.0017946928273886442,
      "learning_rate": 6.707400859961233e-05,
      "loss": 46.0,
      "step": 956
    },
    {
      "epoch": 0.01926386666264078,
      "grad_norm": 0.0014648939250037074,
      "learning_rate": 6.688379877907548e-05,
      "loss": 46.0,
      "step": 957
    },
    {
      "epoch": 0.019283996094890142,
      "grad_norm": 0.0010645122965797782,
      "learning_rate": 6.6693723435498e-05,
      "loss": 46.0,
      "step": 958
    },
    {
      "epoch": 0.019304125527139507,
      "grad_norm": 0.0009420996648259461,
      "learning_rate": 6.650378334073036e-05,
      "loss": 46.0,
      "step": 959
    },
    {
      "epoch": 0.01932425495938887,
      "grad_norm": 0.0014943941496312618,
      "learning_rate": 6.631397926607376e-05,
      "loss": 46.0,
      "step": 960
    },
    {
      "epoch": 0.019344384391638235,
      "grad_norm": 0.001286246464587748,
      "learning_rate": 6.612431198227707e-05,
      "loss": 46.0,
      "step": 961
    },
    {
      "epoch": 0.019364513823887597,
      "grad_norm": 0.0008747805841267109,
      "learning_rate": 6.593478225953366e-05,
      "loss": 46.0,
      "step": 962
    },
    {
      "epoch": 0.019384643256136962,
      "grad_norm": 0.0008240886963903904,
      "learning_rate": 6.574539086747837e-05,
      "loss": 46.0,
      "step": 963
    },
    {
      "epoch": 0.019404772688386324,
      "grad_norm": 0.001010640524327755,
      "learning_rate": 6.555613857518425e-05,
      "loss": 46.0,
      "step": 964
    },
    {
      "epoch": 0.019424902120635686,
      "grad_norm": 0.0015888873022049665,
      "learning_rate": 6.536702615115954e-05,
      "loss": 46.0,
      "step": 965
    },
    {
      "epoch": 0.019445031552885052,
      "grad_norm": 0.0007431007106788456,
      "learning_rate": 6.517805436334447e-05,
      "loss": 46.0,
      "step": 966
    },
    {
      "epoch": 0.019465160985134414,
      "grad_norm": 0.0009085469646379352,
      "learning_rate": 6.498922397910822e-05,
      "loss": 46.0,
      "step": 967
    },
    {
      "epoch": 0.01948529041738378,
      "grad_norm": 0.0016039622714743018,
      "learning_rate": 6.48005357652457e-05,
      "loss": 46.0,
      "step": 968
    },
    {
      "epoch": 0.01950541984963314,
      "grad_norm": 0.0009189580450765789,
      "learning_rate": 6.461199048797457e-05,
      "loss": 46.0,
      "step": 969
    },
    {
      "epoch": 0.019525549281882503,
      "grad_norm": 0.0008581737638451159,
      "learning_rate": 6.442358891293201e-05,
      "loss": 46.0,
      "step": 970
    },
    {
      "epoch": 0.01954567871413187,
      "grad_norm": 0.001615122426301241,
      "learning_rate": 6.423533180517171e-05,
      "loss": 46.0,
      "step": 971
    },
    {
      "epoch": 0.01956580814638123,
      "grad_norm": 0.001174842705950141,
      "learning_rate": 6.404721992916068e-05,
      "loss": 46.0,
      "step": 972
    },
    {
      "epoch": 0.019585937578630596,
      "grad_norm": 0.002247781027108431,
      "learning_rate": 6.385925404877616e-05,
      "loss": 46.0,
      "step": 973
    },
    {
      "epoch": 0.019606067010879958,
      "grad_norm": 0.0019079920602962375,
      "learning_rate": 6.367143492730257e-05,
      "loss": 46.0,
      "step": 974
    },
    {
      "epoch": 0.01962619644312932,
      "grad_norm": 0.0024635521695017815,
      "learning_rate": 6.34837633274284e-05,
      "loss": 46.0,
      "step": 975
    },
    {
      "epoch": 0.019646325875378685,
      "grad_norm": 0.0010142981773242354,
      "learning_rate": 6.329624001124297e-05,
      "loss": 46.0,
      "step": 976
    },
    {
      "epoch": 0.019666455307628047,
      "grad_norm": 0.0005801988299936056,
      "learning_rate": 6.310886574023362e-05,
      "loss": 46.0,
      "step": 977
    },
    {
      "epoch": 0.019686584739877413,
      "grad_norm": 0.001695830374956131,
      "learning_rate": 6.292164127528232e-05,
      "loss": 46.0,
      "step": 978
    },
    {
      "epoch": 0.019706714172126775,
      "grad_norm": 0.0005539690027944744,
      "learning_rate": 6.273456737666281e-05,
      "loss": 46.0,
      "step": 979
    },
    {
      "epoch": 0.01972684360437614,
      "grad_norm": 0.0017788794357329607,
      "learning_rate": 6.254764480403736e-05,
      "loss": 46.0,
      "step": 980
    },
    {
      "epoch": 0.019746973036625502,
      "grad_norm": 0.0011399569921195507,
      "learning_rate": 6.236087431645376e-05,
      "loss": 46.0,
      "step": 981
    },
    {
      "epoch": 0.019767102468874864,
      "grad_norm": 0.000878597202245146,
      "learning_rate": 6.217425667234223e-05,
      "loss": 46.0,
      "step": 982
    },
    {
      "epoch": 0.01978723190112423,
      "grad_norm": 0.0008538950351066887,
      "learning_rate": 6.198779262951225e-05,
      "loss": 46.0,
      "step": 983
    },
    {
      "epoch": 0.01980736133337359,
      "grad_norm": 0.0015830093761906028,
      "learning_rate": 6.180148294514969e-05,
      "loss": 46.0,
      "step": 984
    },
    {
      "epoch": 0.019827490765622957,
      "grad_norm": 0.0015480596339330077,
      "learning_rate": 6.161532837581352e-05,
      "loss": 46.0,
      "step": 985
    },
    {
      "epoch": 0.01984762019787232,
      "grad_norm": 0.001011079759337008,
      "learning_rate": 6.142932967743284e-05,
      "loss": 46.0,
      "step": 986
    },
    {
      "epoch": 0.01986774963012168,
      "grad_norm": 0.0016792715759947896,
      "learning_rate": 6.124348760530383e-05,
      "loss": 46.0,
      "step": 987
    },
    {
      "epoch": 0.019887879062371046,
      "grad_norm": 0.002266524126753211,
      "learning_rate": 6.10578029140866e-05,
      "loss": 46.0,
      "step": 988
    },
    {
      "epoch": 0.01990800849462041,
      "grad_norm": 0.0016408918891102076,
      "learning_rate": 6.087227635780225e-05,
      "loss": 46.0,
      "step": 989
    },
    {
      "epoch": 0.019928137926869774,
      "grad_norm": 0.00135867390781641,
      "learning_rate": 6.068690868982958e-05,
      "loss": 46.0,
      "step": 990
    },
    {
      "epoch": 0.019948267359119136,
      "grad_norm": 0.0008750604465603828,
      "learning_rate": 6.050170066290234e-05,
      "loss": 46.0,
      "step": 991
    },
    {
      "epoch": 0.019968396791368498,
      "grad_norm": 0.0007627055747434497,
      "learning_rate": 6.031665302910594e-05,
      "loss": 46.0,
      "step": 992
    },
    {
      "epoch": 0.019988526223617863,
      "grad_norm": 0.0013612302718684077,
      "learning_rate": 6.01317665398745e-05,
      "loss": 46.0,
      "step": 993
    },
    {
      "epoch": 0.020008655655867225,
      "grad_norm": 0.0021670411806553602,
      "learning_rate": 5.994704194598775e-05,
      "loss": 46.0,
      "step": 994
    },
    {
      "epoch": 0.02002878508811659,
      "grad_norm": 0.0007712049409747124,
      "learning_rate": 5.976247999756802e-05,
      "loss": 46.0,
      "step": 995
    },
    {
      "epoch": 0.020048914520365953,
      "grad_norm": 0.0009516848367638886,
      "learning_rate": 5.957808144407716e-05,
      "loss": 46.0,
      "step": 996
    },
    {
      "epoch": 0.020069043952615318,
      "grad_norm": 0.0009539611055515707,
      "learning_rate": 5.9393847034313544e-05,
      "loss": 46.0,
      "step": 997
    },
    {
      "epoch": 0.02008917338486468,
      "grad_norm": 0.0009101739851757884,
      "learning_rate": 5.9209777516408924e-05,
      "loss": 46.0,
      "step": 998
    },
    {
      "epoch": 0.020109302817114042,
      "grad_norm": 0.0015371376648545265,
      "learning_rate": 5.902587363782553e-05,
      "loss": 46.0,
      "step": 999
    },
    {
      "epoch": 0.020129432249363408,
      "grad_norm": 0.0016428247326985002,
      "learning_rate": 5.884213614535296e-05,
      "loss": 46.0,
      "step": 1000
    },
    {
      "epoch": 0.02014956168161277,
      "grad_norm": 0.0010306787444278598,
      "learning_rate": 5.865856578510518e-05,
      "loss": 46.0,
      "step": 1001
    },
    {
      "epoch": 0.020169691113862135,
      "grad_norm": 0.0024218596518039703,
      "learning_rate": 5.847516330251741e-05,
      "loss": 46.0,
      "step": 1002
    },
    {
      "epoch": 0.020189820546111497,
      "grad_norm": 0.0018254693131893873,
      "learning_rate": 5.829192944234321e-05,
      "loss": 46.0,
      "step": 1003
    },
    {
      "epoch": 0.02020994997836086,
      "grad_norm": 0.00036783842369914055,
      "learning_rate": 5.8108864948651385e-05,
      "loss": 46.0,
      "step": 1004
    },
    {
      "epoch": 0.020230079410610224,
      "grad_norm": 0.0017898846417665482,
      "learning_rate": 5.792597056482294e-05,
      "loss": 46.0,
      "step": 1005
    },
    {
      "epoch": 0.020250208842859586,
      "grad_norm": 0.0014998508850112557,
      "learning_rate": 5.774324703354824e-05,
      "loss": 46.0,
      "step": 1006
    },
    {
      "epoch": 0.020270338275108952,
      "grad_norm": 0.001964285271242261,
      "learning_rate": 5.756069509682368e-05,
      "loss": 46.0,
      "step": 1007
    },
    {
      "epoch": 0.020290467707358314,
      "grad_norm": 0.00039214518619701266,
      "learning_rate": 5.737831549594903e-05,
      "loss": 46.0,
      "step": 1008
    },
    {
      "epoch": 0.020310597139607676,
      "grad_norm": 0.001849150750786066,
      "learning_rate": 5.719610897152405e-05,
      "loss": 46.0,
      "step": 1009
    },
    {
      "epoch": 0.02033072657185704,
      "grad_norm": 0.0016469095135107636,
      "learning_rate": 5.70140762634459e-05,
      "loss": 46.0,
      "step": 1010
    },
    {
      "epoch": 0.020350856004106403,
      "grad_norm": 0.0011177381966263056,
      "learning_rate": 5.683221811090569e-05,
      "loss": 46.0,
      "step": 1011
    },
    {
      "epoch": 0.02037098543635577,
      "grad_norm": 0.001804203144274652,
      "learning_rate": 5.665053525238595e-05,
      "loss": 46.0,
      "step": 1012
    },
    {
      "epoch": 0.02039111486860513,
      "grad_norm": 0.0011881906539201736,
      "learning_rate": 5.646902842565707e-05,
      "loss": 46.0,
      "step": 1013
    },
    {
      "epoch": 0.020411244300854496,
      "grad_norm": 0.0010538804344832897,
      "learning_rate": 5.6287698367774897e-05,
      "loss": 46.0,
      "step": 1014
    },
    {
      "epoch": 0.020431373733103858,
      "grad_norm": 0.0013392227701842785,
      "learning_rate": 5.610654581507728e-05,
      "loss": 46.0,
      "step": 1015
    },
    {
      "epoch": 0.02045150316535322,
      "grad_norm": 0.0014469471061602235,
      "learning_rate": 5.592557150318145e-05,
      "loss": 46.0,
      "step": 1016
    },
    {
      "epoch": 0.020471632597602585,
      "grad_norm": 0.0014158233534544706,
      "learning_rate": 5.574477616698061e-05,
      "loss": 46.0,
      "step": 1017
    },
    {
      "epoch": 0.020491762029851947,
      "grad_norm": 0.00027452100766822696,
      "learning_rate": 5.5564160540641394e-05,
      "loss": 46.0,
      "step": 1018
    },
    {
      "epoch": 0.020511891462101313,
      "grad_norm": 0.001543979742564261,
      "learning_rate": 5.538372535760057e-05,
      "loss": 46.0,
      "step": 1019
    },
    {
      "epoch": 0.020532020894350675,
      "grad_norm": 0.0013933605514466763,
      "learning_rate": 5.520347135056212e-05,
      "loss": 46.0,
      "step": 1020
    },
    {
      "epoch": 0.020552150326600037,
      "grad_norm": 0.0014277072623372078,
      "learning_rate": 5.502339925149449e-05,
      "loss": 46.0,
      "step": 1021
    },
    {
      "epoch": 0.020572279758849402,
      "grad_norm": 0.001016245107166469,
      "learning_rate": 5.484350979162724e-05,
      "loss": 46.0,
      "step": 1022
    },
    {
      "epoch": 0.020592409191098764,
      "grad_norm": 0.0023218076676130295,
      "learning_rate": 5.466380370144843e-05,
      "loss": 46.0,
      "step": 1023
    },
    {
      "epoch": 0.02061253862334813,
      "grad_norm": 0.0016895633889362216,
      "learning_rate": 5.448428171070141e-05,
      "loss": 46.0,
      "step": 1024
    },
    {
      "epoch": 0.02063266805559749,
      "grad_norm": 0.0021997210569679737,
      "learning_rate": 5.430494454838202e-05,
      "loss": 46.0,
      "step": 1025
    },
    {
      "epoch": 0.020652797487846854,
      "grad_norm": 0.0012985903304070234,
      "learning_rate": 5.412579294273547e-05,
      "loss": 46.0,
      "step": 1026
    },
    {
      "epoch": 0.02067292692009622,
      "grad_norm": 0.0007923256489448249,
      "learning_rate": 5.3946827621253514e-05,
      "loss": 46.0,
      "step": 1027
    },
    {
      "epoch": 0.02069305635234558,
      "grad_norm": 0.0030492981895804405,
      "learning_rate": 5.376804931067141e-05,
      "loss": 46.0,
      "step": 1028
    },
    {
      "epoch": 0.020713185784594947,
      "grad_norm": 0.0012466289335861802,
      "learning_rate": 5.358945873696514e-05,
      "loss": 46.0,
      "step": 1029
    },
    {
      "epoch": 0.02073331521684431,
      "grad_norm": 0.0008472254266962409,
      "learning_rate": 5.3411056625348135e-05,
      "loss": 46.0,
      "step": 1030
    },
    {
      "epoch": 0.020753444649093674,
      "grad_norm": 0.00038629811024293303,
      "learning_rate": 5.323284370026873e-05,
      "loss": 46.0,
      "step": 1031
    },
    {
      "epoch": 0.020773574081343036,
      "grad_norm": 0.001900954986922443,
      "learning_rate": 5.3054820685406817e-05,
      "loss": 46.0,
      "step": 1032
    },
    {
      "epoch": 0.020793703513592398,
      "grad_norm": 0.0016893133288249373,
      "learning_rate": 5.2876988303671316e-05,
      "loss": 46.0,
      "step": 1033
    },
    {
      "epoch": 0.020813832945841763,
      "grad_norm": 0.001547012128867209,
      "learning_rate": 5.269934727719685e-05,
      "loss": 46.0,
      "step": 1034
    },
    {
      "epoch": 0.020833962378091125,
      "grad_norm": 0.0023651174269616604,
      "learning_rate": 5.252189832734108e-05,
      "loss": 46.0,
      "step": 1035
    },
    {
      "epoch": 0.02085409181034049,
      "grad_norm": 0.0014698889572173357,
      "learning_rate": 5.2344642174681716e-05,
      "loss": 46.0,
      "step": 1036
    },
    {
      "epoch": 0.020874221242589853,
      "grad_norm": 0.0011334436712786555,
      "learning_rate": 5.2167579539013456e-05,
      "loss": 46.0,
      "step": 1037
    },
    {
      "epoch": 0.020894350674839215,
      "grad_norm": 0.0009258949430659413,
      "learning_rate": 5.199071113934533e-05,
      "loss": 46.0,
      "step": 1038
    },
    {
      "epoch": 0.02091448010708858,
      "grad_norm": 0.0010753805981948972,
      "learning_rate": 5.1814037693897464e-05,
      "loss": 46.0,
      "step": 1039
    },
    {
      "epoch": 0.020934609539337942,
      "grad_norm": 0.0010734342504292727,
      "learning_rate": 5.1637559920098466e-05,
      "loss": 46.0,
      "step": 1040
    },
    {
      "epoch": 0.020954738971587308,
      "grad_norm": 0.0009417047840543091,
      "learning_rate": 5.146127853458225e-05,
      "loss": 46.0,
      "step": 1041
    },
    {
      "epoch": 0.02097486840383667,
      "grad_norm": 0.0005359546048566699,
      "learning_rate": 5.1285194253185295e-05,
      "loss": 46.0,
      "step": 1042
    },
    {
      "epoch": 0.02099499783608603,
      "grad_norm": 0.0010907109826803207,
      "learning_rate": 5.110930779094365e-05,
      "loss": 46.0,
      "step": 1043
    },
    {
      "epoch": 0.021015127268335397,
      "grad_norm": 0.0011225123889744282,
      "learning_rate": 5.093361986209015e-05,
      "loss": 46.0,
      "step": 1044
    },
    {
      "epoch": 0.02103525670058476,
      "grad_norm": 0.0012630325509235263,
      "learning_rate": 5.075813118005135e-05,
      "loss": 46.0,
      "step": 1045
    },
    {
      "epoch": 0.021055386132834124,
      "grad_norm": 0.00048818206414580345,
      "learning_rate": 5.05828424574448e-05,
      "loss": 46.0,
      "step": 1046
    },
    {
      "epoch": 0.021075515565083486,
      "grad_norm": 0.0010798347648233175,
      "learning_rate": 5.0407754406075926e-05,
      "loss": 46.0,
      "step": 1047
    },
    {
      "epoch": 0.021095644997332852,
      "grad_norm": 0.0014009498991072178,
      "learning_rate": 5.023286773693541e-05,
      "loss": 46.0,
      "step": 1048
    },
    {
      "epoch": 0.021115774429582214,
      "grad_norm": 0.0009970440296456218,
      "learning_rate": 5.005818316019618e-05,
      "loss": 46.0,
      "step": 1049
    },
    {
      "epoch": 0.021135903861831576,
      "grad_norm": 0.0017505913274362683,
      "learning_rate": 4.988370138521031e-05,
      "loss": 46.0,
      "step": 1050
    },
    {
      "epoch": 0.02115603329408094,
      "grad_norm": 0.0009628281113691628,
      "learning_rate": 4.970942312050659e-05,
      "loss": 46.0,
      "step": 1051
    },
    {
      "epoch": 0.021176162726330303,
      "grad_norm": 0.0015546507202088833,
      "learning_rate": 4.953534907378724e-05,
      "loss": 46.0,
      "step": 1052
    },
    {
      "epoch": 0.02119629215857967,
      "grad_norm": 0.0013801100431010127,
      "learning_rate": 4.9361479951925314e-05,
      "loss": 46.0,
      "step": 1053
    },
    {
      "epoch": 0.02121642159082903,
      "grad_norm": 0.0011676892172545195,
      "learning_rate": 4.918781646096161e-05,
      "loss": 46.0,
      "step": 1054
    },
    {
      "epoch": 0.021236551023078393,
      "grad_norm": 0.0014288736274465919,
      "learning_rate": 4.901435930610202e-05,
      "loss": 46.0,
      "step": 1055
    },
    {
      "epoch": 0.021256680455327758,
      "grad_norm": 0.0012626959942281246,
      "learning_rate": 4.884110919171446e-05,
      "loss": 46.0,
      "step": 1056
    },
    {
      "epoch": 0.02127680988757712,
      "grad_norm": 0.0011516448576003313,
      "learning_rate": 4.866806682132611e-05,
      "loss": 46.0,
      "step": 1057
    },
    {
      "epoch": 0.021296939319826486,
      "grad_norm": 0.0008224432240240276,
      "learning_rate": 4.849523289762063e-05,
      "loss": 46.0,
      "step": 1058
    },
    {
      "epoch": 0.021317068752075848,
      "grad_norm": 0.0012658087071031332,
      "learning_rate": 4.832260812243513e-05,
      "loss": 46.0,
      "step": 1059
    },
    {
      "epoch": 0.02133719818432521,
      "grad_norm": 0.0008029589662328362,
      "learning_rate": 4.8150193196757533e-05,
      "loss": 46.0,
      "step": 1060
    },
    {
      "epoch": 0.021357327616574575,
      "grad_norm": 0.0006066640489734709,
      "learning_rate": 4.7977988820723454e-05,
      "loss": 46.0,
      "step": 1061
    },
    {
      "epoch": 0.021377457048823937,
      "grad_norm": 0.0006281784153543413,
      "learning_rate": 4.78059956936137e-05,
      "loss": 46.0,
      "step": 1062
    },
    {
      "epoch": 0.021397586481073302,
      "grad_norm": 0.0009794096695259213,
      "learning_rate": 4.7634214513851125e-05,
      "loss": 46.0,
      "step": 1063
    },
    {
      "epoch": 0.021417715913322664,
      "grad_norm": 0.0011547215981408954,
      "learning_rate": 4.746264597899792e-05,
      "loss": 46.0,
      "step": 1064
    },
    {
      "epoch": 0.02143784534557203,
      "grad_norm": 0.0010263699805364013,
      "learning_rate": 4.7291290785752795e-05,
      "loss": 46.0,
      "step": 1065
    },
    {
      "epoch": 0.021457974777821392,
      "grad_norm": 0.0010340644512325525,
      "learning_rate": 4.7120149629948185e-05,
      "loss": 46.0,
      "step": 1066
    },
    {
      "epoch": 0.021478104210070754,
      "grad_norm": 0.0010057146428152919,
      "learning_rate": 4.694922320654727e-05,
      "loss": 46.0,
      "step": 1067
    },
    {
      "epoch": 0.02149823364232012,
      "grad_norm": 0.0014495252398774028,
      "learning_rate": 4.677851220964136e-05,
      "loss": 46.0,
      "step": 1068
    },
    {
      "epoch": 0.02151836307456948,
      "grad_norm": 0.0006856170948594809,
      "learning_rate": 4.660801733244685e-05,
      "loss": 46.0,
      "step": 1069
    },
    {
      "epoch": 0.021538492506818847,
      "grad_norm": 0.0015510255470871925,
      "learning_rate": 4.643773926730267e-05,
      "loss": 46.0,
      "step": 1070
    },
    {
      "epoch": 0.02155862193906821,
      "grad_norm": 0.0007154226186685264,
      "learning_rate": 4.626767870566722e-05,
      "loss": 46.0,
      "step": 1071
    },
    {
      "epoch": 0.02157875137131757,
      "grad_norm": 0.0010833822889253497,
      "learning_rate": 4.6097836338115626e-05,
      "loss": 46.0,
      "step": 1072
    },
    {
      "epoch": 0.021598880803566936,
      "grad_norm": 0.001424678135663271,
      "learning_rate": 4.592821285433716e-05,
      "loss": 46.0,
      "step": 1073
    },
    {
      "epoch": 0.021619010235816298,
      "grad_norm": 0.0014538168907165527,
      "learning_rate": 4.575880894313207e-05,
      "loss": 46.0,
      "step": 1074
    },
    {
      "epoch": 0.021639139668065663,
      "grad_norm": 0.0019246512092649937,
      "learning_rate": 4.558962529240913e-05,
      "loss": 46.0,
      "step": 1075
    },
    {
      "epoch": 0.021659269100315025,
      "grad_norm": 0.0011248672381043434,
      "learning_rate": 4.5420662589182525e-05,
      "loss": 46.0,
      "step": 1076
    },
    {
      "epoch": 0.021679398532564387,
      "grad_norm": 0.0008184146718122065,
      "learning_rate": 4.5251921519569404e-05,
      "loss": 46.0,
      "step": 1077
    },
    {
      "epoch": 0.021699527964813753,
      "grad_norm": 0.0008564481395296752,
      "learning_rate": 4.508340276878679e-05,
      "loss": 46.0,
      "step": 1078
    },
    {
      "epoch": 0.021719657397063115,
      "grad_norm": 0.001509159803390503,
      "learning_rate": 4.491510702114894e-05,
      "loss": 46.0,
      "step": 1079
    },
    {
      "epoch": 0.02173978682931248,
      "grad_norm": 0.0011579337297007442,
      "learning_rate": 4.474703496006455e-05,
      "loss": 46.0,
      "step": 1080
    },
    {
      "epoch": 0.021759916261561842,
      "grad_norm": 0.0016495742602273822,
      "learning_rate": 4.457918726803404e-05,
      "loss": 46.0,
      "step": 1081
    },
    {
      "epoch": 0.021780045693811208,
      "grad_norm": 0.002713642781600356,
      "learning_rate": 4.441156462664663e-05,
      "loss": 46.0,
      "step": 1082
    },
    {
      "epoch": 0.02180017512606057,
      "grad_norm": 0.0008418328943662345,
      "learning_rate": 4.424416771657778e-05,
      "loss": 46.0,
      "step": 1083
    },
    {
      "epoch": 0.02182030455830993,
      "grad_norm": 0.0012767758453264832,
      "learning_rate": 4.407699721758614e-05,
      "loss": 46.0,
      "step": 1084
    },
    {
      "epoch": 0.021840433990559297,
      "grad_norm": 0.0016243146965280175,
      "learning_rate": 4.391005380851115e-05,
      "loss": 46.0,
      "step": 1085
    },
    {
      "epoch": 0.02186056342280866,
      "grad_norm": 0.0008095060475170612,
      "learning_rate": 4.3743338167269955e-05,
      "loss": 46.0,
      "step": 1086
    },
    {
      "epoch": 0.021880692855058025,
      "grad_norm": 0.0006309272139333189,
      "learning_rate": 4.35768509708548e-05,
      "loss": 46.0,
      "step": 1087
    },
    {
      "epoch": 0.021900822287307387,
      "grad_norm": 0.0012147061061114073,
      "learning_rate": 4.3410592895330385e-05,
      "loss": 46.0,
      "step": 1088
    },
    {
      "epoch": 0.02192095171955675,
      "grad_norm": 0.0011163427261635661,
      "learning_rate": 4.324456461583084e-05,
      "loss": 46.0,
      "step": 1089
    },
    {
      "epoch": 0.021941081151806114,
      "grad_norm": 0.0011350977001711726,
      "learning_rate": 4.30787668065573e-05,
      "loss": 46.0,
      "step": 1090
    },
    {
      "epoch": 0.021961210584055476,
      "grad_norm": 0.0018085506744682789,
      "learning_rate": 4.291320014077488e-05,
      "loss": 46.0,
      "step": 1091
    },
    {
      "epoch": 0.02198134001630484,
      "grad_norm": 0.0012762745609506965,
      "learning_rate": 4.274786529081023e-05,
      "loss": 46.0,
      "step": 1092
    },
    {
      "epoch": 0.022001469448554203,
      "grad_norm": 0.0016571565065532923,
      "learning_rate": 4.2582762928048516e-05,
      "loss": 46.0,
      "step": 1093
    },
    {
      "epoch": 0.022021598880803565,
      "grad_norm": 0.0008646156056784093,
      "learning_rate": 4.241789372293087e-05,
      "loss": 46.0,
      "step": 1094
    },
    {
      "epoch": 0.02204172831305293,
      "grad_norm": 0.0007551155867986381,
      "learning_rate": 4.2253258344951616e-05,
      "loss": 46.0,
      "step": 1095
    },
    {
      "epoch": 0.022061857745302293,
      "grad_norm": 0.0014457149663940072,
      "learning_rate": 4.208885746265565e-05,
      "loss": 46.0,
      "step": 1096
    },
    {
      "epoch": 0.022081987177551658,
      "grad_norm": 0.0011477636871859431,
      "learning_rate": 4.1924691743635504e-05,
      "loss": 46.0,
      "step": 1097
    },
    {
      "epoch": 0.02210211660980102,
      "grad_norm": 0.0014144850429147482,
      "learning_rate": 4.1760761854528886e-05,
      "loss": 46.0,
      "step": 1098
    },
    {
      "epoch": 0.022122246042050386,
      "grad_norm": 0.0015863333828747272,
      "learning_rate": 4.159706846101574e-05,
      "loss": 46.0,
      "step": 1099
    },
    {
      "epoch": 0.022142375474299748,
      "grad_norm": 0.0018363846465945244,
      "learning_rate": 4.14336122278158e-05,
      "loss": 46.0,
      "step": 1100
    },
    {
      "epoch": 0.02216250490654911,
      "grad_norm": 0.0014002359239384532,
      "learning_rate": 4.127039381868561e-05,
      "loss": 46.0,
      "step": 1101
    },
    {
      "epoch": 0.022182634338798475,
      "grad_norm": 0.0017210880760103464,
      "learning_rate": 4.1107413896416026e-05,
      "loss": 46.0,
      "step": 1102
    },
    {
      "epoch": 0.022202763771047837,
      "grad_norm": 0.0009532097028568387,
      "learning_rate": 4.0944673122829515e-05,
      "loss": 46.0,
      "step": 1103
    },
    {
      "epoch": 0.022222893203297202,
      "grad_norm": 0.0010593609185889363,
      "learning_rate": 4.0782172158777296e-05,
      "loss": 46.0,
      "step": 1104
    },
    {
      "epoch": 0.022243022635546564,
      "grad_norm": 0.0011959448456764221,
      "learning_rate": 4.0619911664136935e-05,
      "loss": 46.0,
      "step": 1105
    },
    {
      "epoch": 0.022263152067795926,
      "grad_norm": 0.0016879923641681671,
      "learning_rate": 4.045789229780935e-05,
      "loss": 46.0,
      "step": 1106
    },
    {
      "epoch": 0.022283281500045292,
      "grad_norm": 0.002184116980060935,
      "learning_rate": 4.029611471771646e-05,
      "loss": 46.0,
      "step": 1107
    },
    {
      "epoch": 0.022303410932294654,
      "grad_norm": 0.0009731932077556849,
      "learning_rate": 4.0134579580798196e-05,
      "loss": 46.0,
      "step": 1108
    },
    {
      "epoch": 0.02232354036454402,
      "grad_norm": 0.0014566800091415644,
      "learning_rate": 3.9973287543010064e-05,
      "loss": 46.0,
      "step": 1109
    },
    {
      "epoch": 0.02234366979679338,
      "grad_norm": 0.0013168570585548878,
      "learning_rate": 3.981223925932036e-05,
      "loss": 46.0,
      "step": 1110
    },
    {
      "epoch": 0.022363799229042743,
      "grad_norm": 0.0014629282522946596,
      "learning_rate": 3.965143538370763e-05,
      "loss": 46.0,
      "step": 1111
    },
    {
      "epoch": 0.02238392866129211,
      "grad_norm": 0.00042389618465676904,
      "learning_rate": 3.949087656915784e-05,
      "loss": 46.0,
      "step": 1112
    },
    {
      "epoch": 0.02240405809354147,
      "grad_norm": 0.0019659867975860834,
      "learning_rate": 3.933056346766194e-05,
      "loss": 46.0,
      "step": 1113
    },
    {
      "epoch": 0.022424187525790836,
      "grad_norm": 0.000607995898462832,
      "learning_rate": 3.9170496730212944e-05,
      "loss": 46.0,
      "step": 1114
    },
    {
      "epoch": 0.022444316958040198,
      "grad_norm": 0.0007831425755284727,
      "learning_rate": 3.901067700680361e-05,
      "loss": 46.0,
      "step": 1115
    },
    {
      "epoch": 0.022464446390289564,
      "grad_norm": 0.0012595128500834107,
      "learning_rate": 3.885110494642349e-05,
      "loss": 46.0,
      "step": 1116
    },
    {
      "epoch": 0.022484575822538926,
      "grad_norm": 0.0016614568885415792,
      "learning_rate": 3.869178119705648e-05,
      "loss": 46.0,
      "step": 1117
    },
    {
      "epoch": 0.022504705254788288,
      "grad_norm": 0.001990359975025058,
      "learning_rate": 3.853270640567821e-05,
      "loss": 46.0,
      "step": 1118
    },
    {
      "epoch": 0.022524834687037653,
      "grad_norm": 0.0012406391324475408,
      "learning_rate": 3.837388121825323e-05,
      "loss": 46.0,
      "step": 1119
    },
    {
      "epoch": 0.022544964119287015,
      "grad_norm": 0.0009122318006120622,
      "learning_rate": 3.8215306279732654e-05,
      "loss": 46.0,
      "step": 1120
    },
    {
      "epoch": 0.02256509355153638,
      "grad_norm": 0.0009612272842787206,
      "learning_rate": 3.805698223405124e-05,
      "loss": 46.0,
      "step": 1121
    },
    {
      "epoch": 0.022585222983785742,
      "grad_norm": 0.0005490859039127827,
      "learning_rate": 3.789890972412512e-05,
      "loss": 46.0,
      "step": 1122
    },
    {
      "epoch": 0.022605352416035104,
      "grad_norm": 0.0007892982102930546,
      "learning_rate": 3.774108939184874e-05,
      "loss": 46.0,
      "step": 1123
    },
    {
      "epoch": 0.02262548184828447,
      "grad_norm": 0.0013402944896370173,
      "learning_rate": 3.7583521878092766e-05,
      "loss": 46.0,
      "step": 1124
    },
    {
      "epoch": 0.022645611280533832,
      "grad_norm": 0.001181807485409081,
      "learning_rate": 3.7426207822701055e-05,
      "loss": 46.0,
      "step": 1125
    },
    {
      "epoch": 0.022665740712783197,
      "grad_norm": 0.0008717543096281588,
      "learning_rate": 3.7269147864488366e-05,
      "loss": 46.0,
      "step": 1126
    },
    {
      "epoch": 0.02268587014503256,
      "grad_norm": 0.0019186298595741391,
      "learning_rate": 3.711234264123747e-05,
      "loss": 46.0,
      "step": 1127
    },
    {
      "epoch": 0.02270599957728192,
      "grad_norm": 0.0011491653276607394,
      "learning_rate": 3.695579278969693e-05,
      "loss": 46.0,
      "step": 1128
    },
    {
      "epoch": 0.022726129009531287,
      "grad_norm": 0.0029499900992959738,
      "learning_rate": 3.679949894557808e-05,
      "loss": 46.0,
      "step": 1129
    },
    {
      "epoch": 0.02274625844178065,
      "grad_norm": 0.0024853902868926525,
      "learning_rate": 3.664346174355285e-05,
      "loss": 46.0,
      "step": 1130
    },
    {
      "epoch": 0.022766387874030014,
      "grad_norm": 0.0006707283901050687,
      "learning_rate": 3.648768181725093e-05,
      "loss": 46.0,
      "step": 1131
    },
    {
      "epoch": 0.022786517306279376,
      "grad_norm": 0.0017571687931194901,
      "learning_rate": 3.6332159799257205e-05,
      "loss": 46.0,
      "step": 1132
    },
    {
      "epoch": 0.02280664673852874,
      "grad_norm": 0.0012608080869540572,
      "learning_rate": 3.617689632110942e-05,
      "loss": 46.0,
      "step": 1133
    },
    {
      "epoch": 0.022826776170778103,
      "grad_norm": 0.002300672000274062,
      "learning_rate": 3.60218920132953e-05,
      "loss": 46.0,
      "step": 1134
    },
    {
      "epoch": 0.022846905603027465,
      "grad_norm": 0.0005551140056923032,
      "learning_rate": 3.586714750525026e-05,
      "loss": 46.0,
      "step": 1135
    },
    {
      "epoch": 0.02286703503527683,
      "grad_norm": 0.0018374003702774644,
      "learning_rate": 3.571266342535461e-05,
      "loss": 46.0,
      "step": 1136
    },
    {
      "epoch": 0.022887164467526193,
      "grad_norm": 0.0016562079545110464,
      "learning_rate": 3.555844040093129e-05,
      "loss": 46.0,
      "step": 1137
    },
    {
      "epoch": 0.02290729389977556,
      "grad_norm": 0.0018418794497847557,
      "learning_rate": 3.540447905824293e-05,
      "loss": 46.0,
      "step": 1138
    },
    {
      "epoch": 0.02292742333202492,
      "grad_norm": 0.00215694191865623,
      "learning_rate": 3.525078002248974e-05,
      "loss": 46.0,
      "step": 1139
    },
    {
      "epoch": 0.022947552764274282,
      "grad_norm": 0.0012009814381599426,
      "learning_rate": 3.509734391780663e-05,
      "loss": 46.0,
      "step": 1140
    },
    {
      "epoch": 0.022967682196523648,
      "grad_norm": 0.0014138978440314531,
      "learning_rate": 3.494417136726091e-05,
      "loss": 46.0,
      "step": 1141
    },
    {
      "epoch": 0.02298781162877301,
      "grad_norm": 0.0006015094695612788,
      "learning_rate": 3.479126299284957e-05,
      "loss": 46.0,
      "step": 1142
    },
    {
      "epoch": 0.023007941061022375,
      "grad_norm": 0.0021540354937314987,
      "learning_rate": 3.463861941549693e-05,
      "loss": 46.0,
      "step": 1143
    },
    {
      "epoch": 0.023028070493271737,
      "grad_norm": 0.0009588591638021171,
      "learning_rate": 3.448624125505194e-05,
      "loss": 46.0,
      "step": 1144
    },
    {
      "epoch": 0.0230481999255211,
      "grad_norm": 0.0016799316508695483,
      "learning_rate": 3.433412913028585e-05,
      "loss": 46.0,
      "step": 1145
    },
    {
      "epoch": 0.023068329357770465,
      "grad_norm": 0.0027795052155852318,
      "learning_rate": 3.418228365888955e-05,
      "loss": 46.0,
      "step": 1146
    },
    {
      "epoch": 0.023088458790019827,
      "grad_norm": 0.001736775622703135,
      "learning_rate": 3.403070545747107e-05,
      "loss": 46.0,
      "step": 1147
    },
    {
      "epoch": 0.023108588222269192,
      "grad_norm": 0.0009725184645503759,
      "learning_rate": 3.3879395141553225e-05,
      "loss": 46.0,
      "step": 1148
    },
    {
      "epoch": 0.023128717654518554,
      "grad_norm": 0.001233744085766375,
      "learning_rate": 3.3728353325570915e-05,
      "loss": 46.0,
      "step": 1149
    },
    {
      "epoch": 0.02314884708676792,
      "grad_norm": 0.0011711488477885723,
      "learning_rate": 3.357758062286883e-05,
      "loss": 46.0,
      "step": 1150
    },
    {
      "epoch": 0.02316897651901728,
      "grad_norm": 0.0012906735064461827,
      "learning_rate": 3.342707764569873e-05,
      "loss": 46.0,
      "step": 1151
    },
    {
      "epoch": 0.023189105951266643,
      "grad_norm": 0.0016261962009593844,
      "learning_rate": 3.327684500521724e-05,
      "loss": 46.0,
      "step": 1152
    },
    {
      "epoch": 0.02320923538351601,
      "grad_norm": 0.0019309535855427384,
      "learning_rate": 3.312688331148299e-05,
      "loss": 46.0,
      "step": 1153
    },
    {
      "epoch": 0.02322936481576537,
      "grad_norm": 0.0018105946946889162,
      "learning_rate": 3.29771931734546e-05,
      "loss": 46.0,
      "step": 1154
    },
    {
      "epoch": 0.023249494248014736,
      "grad_norm": 0.0008983553270809352,
      "learning_rate": 3.282777519898779e-05,
      "loss": 46.0,
      "step": 1155
    },
    {
      "epoch": 0.023269623680264098,
      "grad_norm": 0.0006004321039654315,
      "learning_rate": 3.267862999483318e-05,
      "loss": 46.0,
      "step": 1156
    },
    {
      "epoch": 0.02328975311251346,
      "grad_norm": 0.0009507157956250012,
      "learning_rate": 3.252975816663375e-05,
      "loss": 46.0,
      "step": 1157
    },
    {
      "epoch": 0.023309882544762826,
      "grad_norm": 0.0011066242586821318,
      "learning_rate": 3.238116031892227e-05,
      "loss": 46.0,
      "step": 1158
    },
    {
      "epoch": 0.023330011977012188,
      "grad_norm": 0.0016599443042650819,
      "learning_rate": 3.223283705511908e-05,
      "loss": 46.0,
      "step": 1159
    },
    {
      "epoch": 0.023350141409261553,
      "grad_norm": 0.0013380105374380946,
      "learning_rate": 3.208478897752931e-05,
      "loss": 46.0,
      "step": 1160
    },
    {
      "epoch": 0.023370270841510915,
      "grad_norm": 0.0011801571818068624,
      "learning_rate": 3.193701668734083e-05,
      "loss": 46.0,
      "step": 1161
    },
    {
      "epoch": 0.023390400273760277,
      "grad_norm": 0.00122344761621207,
      "learning_rate": 3.178952078462143e-05,
      "loss": 46.0,
      "step": 1162
    },
    {
      "epoch": 0.023410529706009642,
      "grad_norm": 0.0011970382183790207,
      "learning_rate": 3.164230186831671e-05,
      "loss": 46.0,
      "step": 1163
    },
    {
      "epoch": 0.023430659138259004,
      "grad_norm": 0.0010325321927666664,
      "learning_rate": 3.149536053624735e-05,
      "loss": 46.0,
      "step": 1164
    },
    {
      "epoch": 0.02345078857050837,
      "grad_norm": 0.0016847345978021622,
      "learning_rate": 3.134869738510697e-05,
      "loss": 46.0,
      "step": 1165
    },
    {
      "epoch": 0.023470918002757732,
      "grad_norm": 0.0009486636845394969,
      "learning_rate": 3.1202313010459414e-05,
      "loss": 46.0,
      "step": 1166
    },
    {
      "epoch": 0.023491047435007097,
      "grad_norm": 0.0013974226312711835,
      "learning_rate": 3.1056208006736634e-05,
      "loss": 46.0,
      "step": 1167
    },
    {
      "epoch": 0.02351117686725646,
      "grad_norm": 0.002067785942927003,
      "learning_rate": 3.0910382967235995e-05,
      "loss": 46.0,
      "step": 1168
    },
    {
      "epoch": 0.02353130629950582,
      "grad_norm": 0.0010473356815055013,
      "learning_rate": 3.076483848411803e-05,
      "loss": 46.0,
      "step": 1169
    },
    {
      "epoch": 0.023551435731755187,
      "grad_norm": 0.0015565203502774239,
      "learning_rate": 3.061957514840408e-05,
      "loss": 46.0,
      "step": 1170
    },
    {
      "epoch": 0.02357156516400455,
      "grad_norm": 0.002033325145021081,
      "learning_rate": 3.0474593549973673e-05,
      "loss": 46.0,
      "step": 1171
    },
    {
      "epoch": 0.023591694596253914,
      "grad_norm": 0.001210788730531931,
      "learning_rate": 3.032989427756243e-05,
      "loss": 46.0,
      "step": 1172
    },
    {
      "epoch": 0.023611824028503276,
      "grad_norm": 0.0010564669501036406,
      "learning_rate": 3.0185477918759353e-05,
      "loss": 46.0,
      "step": 1173
    },
    {
      "epoch": 0.023631953460752638,
      "grad_norm": 0.0009370659245178103,
      "learning_rate": 3.0041345060004776e-05,
      "loss": 46.0,
      "step": 1174
    },
    {
      "epoch": 0.023652082893002004,
      "grad_norm": 0.0006624764064326882,
      "learning_rate": 2.989749628658759e-05,
      "loss": 46.0,
      "step": 1175
    },
    {
      "epoch": 0.023672212325251366,
      "grad_norm": 0.002800694201141596,
      "learning_rate": 2.9753932182643272e-05,
      "loss": 46.0,
      "step": 1176
    },
    {
      "epoch": 0.02369234175750073,
      "grad_norm": 0.001191929099150002,
      "learning_rate": 2.9610653331151216e-05,
      "loss": 46.0,
      "step": 1177
    },
    {
      "epoch": 0.023712471189750093,
      "grad_norm": 0.0008834595791995525,
      "learning_rate": 2.9467660313932565e-05,
      "loss": 46.0,
      "step": 1178
    },
    {
      "epoch": 0.023732600621999455,
      "grad_norm": 0.0013140215305611491,
      "learning_rate": 2.932495371164764e-05,
      "loss": 46.0,
      "step": 1179
    },
    {
      "epoch": 0.023732600621999455,
      "eval_loss": 11.5,
      "eval_runtime": 126.7638,
      "eval_samples_per_second": 165.016,
      "eval_steps_per_second": 82.508,
      "step": 1179
    },
    {
      "epoch": 0.02375273005424882,
      "grad_norm": 0.00120261637493968,
      "learning_rate": 2.9182534103793825e-05,
      "loss": 46.0,
      "step": 1180
    },
    {
      "epoch": 0.023772859486498182,
      "grad_norm": 0.0013184483395889401,
      "learning_rate": 2.9040402068702977e-05,
      "loss": 46.0,
      "step": 1181
    },
    {
      "epoch": 0.023792988918747548,
      "grad_norm": 0.0016656328225508332,
      "learning_rate": 2.8898558183539283e-05,
      "loss": 46.0,
      "step": 1182
    },
    {
      "epoch": 0.02381311835099691,
      "grad_norm": 0.0012708749854937196,
      "learning_rate": 2.8757003024296768e-05,
      "loss": 46.0,
      "step": 1183
    },
    {
      "epoch": 0.023833247783246275,
      "grad_norm": 0.0014254259876906872,
      "learning_rate": 2.8615737165796974e-05,
      "loss": 46.0,
      "step": 1184
    },
    {
      "epoch": 0.023853377215495637,
      "grad_norm": 0.0012592601124197245,
      "learning_rate": 2.8474761181686772e-05,
      "loss": 46.0,
      "step": 1185
    },
    {
      "epoch": 0.023873506647745,
      "grad_norm": 0.001736863050609827,
      "learning_rate": 2.8334075644435807e-05,
      "loss": 46.0,
      "step": 1186
    },
    {
      "epoch": 0.023893636079994365,
      "grad_norm": 0.0012428145855665207,
      "learning_rate": 2.8193681125334393e-05,
      "loss": 46.0,
      "step": 1187
    },
    {
      "epoch": 0.023913765512243727,
      "grad_norm": 0.001219391357153654,
      "learning_rate": 2.805357819449098e-05,
      "loss": 46.0,
      "step": 1188
    },
    {
      "epoch": 0.023933894944493092,
      "grad_norm": 0.0018028883496299386,
      "learning_rate": 2.7913767420830105e-05,
      "loss": 46.0,
      "step": 1189
    },
    {
      "epoch": 0.023954024376742454,
      "grad_norm": 0.001996217295527458,
      "learning_rate": 2.7774249372089688e-05,
      "loss": 46.0,
      "step": 1190
    },
    {
      "epoch": 0.023974153808991816,
      "grad_norm": 0.0012688592541962862,
      "learning_rate": 2.7635024614819205e-05,
      "loss": 46.0,
      "step": 1191
    },
    {
      "epoch": 0.02399428324124118,
      "grad_norm": 0.0006761057302355766,
      "learning_rate": 2.749609371437697e-05,
      "loss": 46.0,
      "step": 1192
    },
    {
      "epoch": 0.024014412673490543,
      "grad_norm": 0.0008217204012908041,
      "learning_rate": 2.735745723492815e-05,
      "loss": 46.0,
      "step": 1193
    },
    {
      "epoch": 0.02403454210573991,
      "grad_norm": 0.0008044593851082027,
      "learning_rate": 2.7219115739442215e-05,
      "loss": 46.0,
      "step": 1194
    },
    {
      "epoch": 0.02405467153798927,
      "grad_norm": 0.0011257351143285632,
      "learning_rate": 2.7081069789690883e-05,
      "loss": 46.0,
      "step": 1195
    },
    {
      "epoch": 0.024074800970238633,
      "grad_norm": 0.001486622029915452,
      "learning_rate": 2.694331994624567e-05,
      "loss": 46.0,
      "step": 1196
    },
    {
      "epoch": 0.024094930402488,
      "grad_norm": 0.0016462607309222221,
      "learning_rate": 2.6805866768475663e-05,
      "loss": 46.0,
      "step": 1197
    },
    {
      "epoch": 0.02411505983473736,
      "grad_norm": 0.0006136257434263825,
      "learning_rate": 2.666871081454533e-05,
      "loss": 46.0,
      "step": 1198
    },
    {
      "epoch": 0.024135189266986726,
      "grad_norm": 0.0008609068463556468,
      "learning_rate": 2.6531852641412082e-05,
      "loss": 46.0,
      "step": 1199
    },
    {
      "epoch": 0.024155318699236088,
      "grad_norm": 0.0013050955021753907,
      "learning_rate": 2.6395292804824244e-05,
      "loss": 46.0,
      "step": 1200
    },
    {
      "epoch": 0.024175448131485453,
      "grad_norm": 0.00126195780467242,
      "learning_rate": 2.625903185931853e-05,
      "loss": 46.0,
      "step": 1201
    },
    {
      "epoch": 0.024195577563734815,
      "grad_norm": 0.001260231714695692,
      "learning_rate": 2.612307035821805e-05,
      "loss": 46.0,
      "step": 1202
    },
    {
      "epoch": 0.024215706995984177,
      "grad_norm": 0.0008337153121829033,
      "learning_rate": 2.5987408853629846e-05,
      "loss": 46.0,
      "step": 1203
    },
    {
      "epoch": 0.024235836428233543,
      "grad_norm": 0.0017989326734095812,
      "learning_rate": 2.5852047896442853e-05,
      "loss": 46.0,
      "step": 1204
    },
    {
      "epoch": 0.024255965860482905,
      "grad_norm": 0.0008393581956624985,
      "learning_rate": 2.5716988036325386e-05,
      "loss": 46.0,
      "step": 1205
    },
    {
      "epoch": 0.02427609529273227,
      "grad_norm": 0.001092239166609943,
      "learning_rate": 2.5582229821723257e-05,
      "loss": 46.0,
      "step": 1206
    },
    {
      "epoch": 0.024296224724981632,
      "grad_norm": 0.0011140386341139674,
      "learning_rate": 2.5447773799857244e-05,
      "loss": 46.0,
      "step": 1207
    },
    {
      "epoch": 0.024316354157230994,
      "grad_norm": 0.0014232645044103265,
      "learning_rate": 2.5313620516721105e-05,
      "loss": 46.0,
      "step": 1208
    },
    {
      "epoch": 0.02433648358948036,
      "grad_norm": 0.0015523895854130387,
      "learning_rate": 2.5179770517079093e-05,
      "loss": 46.0,
      "step": 1209
    },
    {
      "epoch": 0.02435661302172972,
      "grad_norm": 0.0020029693841934204,
      "learning_rate": 2.5046224344464074e-05,
      "loss": 46.0,
      "step": 1210
    },
    {
      "epoch": 0.024376742453979087,
      "grad_norm": 0.0009358267998322845,
      "learning_rate": 2.4912982541175033e-05,
      "loss": 46.0,
      "step": 1211
    },
    {
      "epoch": 0.02439687188622845,
      "grad_norm": 0.0009875150863081217,
      "learning_rate": 2.4780045648274975e-05,
      "loss": 46.0,
      "step": 1212
    },
    {
      "epoch": 0.02441700131847781,
      "grad_norm": 0.0015209922567009926,
      "learning_rate": 2.4647414205588827e-05,
      "loss": 46.0,
      "step": 1213
    },
    {
      "epoch": 0.024437130750727176,
      "grad_norm": 0.0008846839773468673,
      "learning_rate": 2.451508875170104e-05,
      "loss": 46.0,
      "step": 1214
    },
    {
      "epoch": 0.024457260182976538,
      "grad_norm": 0.0009960222523659468,
      "learning_rate": 2.4383069823953663e-05,
      "loss": 46.0,
      "step": 1215
    },
    {
      "epoch": 0.024477389615225904,
      "grad_norm": 0.001352517050690949,
      "learning_rate": 2.4251357958443867e-05,
      "loss": 46.0,
      "step": 1216
    },
    {
      "epoch": 0.024497519047475266,
      "grad_norm": 0.0013506961986422539,
      "learning_rate": 2.4119953690022025e-05,
      "loss": 46.0,
      "step": 1217
    },
    {
      "epoch": 0.02451764847972463,
      "grad_norm": 0.0009574260911904275,
      "learning_rate": 2.3988857552289333e-05,
      "loss": 46.0,
      "step": 1218
    },
    {
      "epoch": 0.024537777911973993,
      "grad_norm": 0.0007269601919688284,
      "learning_rate": 2.3858070077595908e-05,
      "loss": 46.0,
      "step": 1219
    },
    {
      "epoch": 0.024557907344223355,
      "grad_norm": 0.0006699137156829238,
      "learning_rate": 2.372759179703822e-05,
      "loss": 46.0,
      "step": 1220
    },
    {
      "epoch": 0.02457803677647272,
      "grad_norm": 0.0015291008166968822,
      "learning_rate": 2.3597423240457395e-05,
      "loss": 46.0,
      "step": 1221
    },
    {
      "epoch": 0.024598166208722082,
      "grad_norm": 0.002010623225942254,
      "learning_rate": 2.3467564936436703e-05,
      "loss": 46.0,
      "step": 1222
    },
    {
      "epoch": 0.024618295640971448,
      "grad_norm": 0.0008198167197406292,
      "learning_rate": 2.3338017412299663e-05,
      "loss": 46.0,
      "step": 1223
    },
    {
      "epoch": 0.02463842507322081,
      "grad_norm": 0.0006754023488610983,
      "learning_rate": 2.3208781194107664e-05,
      "loss": 46.0,
      "step": 1224
    },
    {
      "epoch": 0.024658554505470172,
      "grad_norm": 0.0009974197018891573,
      "learning_rate": 2.3079856806658107e-05,
      "loss": 46.0,
      "step": 1225
    },
    {
      "epoch": 0.024678683937719537,
      "grad_norm": 0.0012742577819153666,
      "learning_rate": 2.2951244773481995e-05,
      "loss": 46.0,
      "step": 1226
    },
    {
      "epoch": 0.0246988133699689,
      "grad_norm": 0.0013213262427598238,
      "learning_rate": 2.2822945616841963e-05,
      "loss": 46.0,
      "step": 1227
    },
    {
      "epoch": 0.024718942802218265,
      "grad_norm": 0.0006177601171657443,
      "learning_rate": 2.2694959857730204e-05,
      "loss": 46.0,
      "step": 1228
    },
    {
      "epoch": 0.024739072234467627,
      "grad_norm": 0.0009409788763150573,
      "learning_rate": 2.256728801586616e-05,
      "loss": 46.0,
      "step": 1229
    },
    {
      "epoch": 0.02475920166671699,
      "grad_norm": 0.001638015266507864,
      "learning_rate": 2.2439930609694658e-05,
      "loss": 46.0,
      "step": 1230
    },
    {
      "epoch": 0.024779331098966354,
      "grad_norm": 0.000653863069601357,
      "learning_rate": 2.231288815638356e-05,
      "loss": 46.0,
      "step": 1231
    },
    {
      "epoch": 0.024799460531215716,
      "grad_norm": 0.001285205944441259,
      "learning_rate": 2.2186161171821885e-05,
      "loss": 46.0,
      "step": 1232
    },
    {
      "epoch": 0.02481958996346508,
      "grad_norm": 0.001490448834374547,
      "learning_rate": 2.2059750170617523e-05,
      "loss": 46.0,
      "step": 1233
    },
    {
      "epoch": 0.024839719395714444,
      "grad_norm": 0.0018423368455842137,
      "learning_rate": 2.1933655666095275e-05,
      "loss": 46.0,
      "step": 1234
    },
    {
      "epoch": 0.02485984882796381,
      "grad_norm": 0.0011196645209565759,
      "learning_rate": 2.180787817029468e-05,
      "loss": 46.0,
      "step": 1235
    },
    {
      "epoch": 0.02487997826021317,
      "grad_norm": 0.000826796458568424,
      "learning_rate": 2.168241819396808e-05,
      "loss": 46.0,
      "step": 1236
    },
    {
      "epoch": 0.024900107692462533,
      "grad_norm": 0.0005586759652942419,
      "learning_rate": 2.1557276246578307e-05,
      "loss": 46.0,
      "step": 1237
    },
    {
      "epoch": 0.0249202371247119,
      "grad_norm": 0.0011236952850595117,
      "learning_rate": 2.1432452836296914e-05,
      "loss": 46.0,
      "step": 1238
    },
    {
      "epoch": 0.02494036655696126,
      "grad_norm": 0.001449284260161221,
      "learning_rate": 2.1307948470001782e-05,
      "loss": 46.0,
      "step": 1239
    },
    {
      "epoch": 0.024960495989210626,
      "grad_norm": 0.0007015886367298663,
      "learning_rate": 2.1183763653275378e-05,
      "loss": 46.0,
      "step": 1240
    },
    {
      "epoch": 0.024980625421459988,
      "grad_norm": 0.0011342605575919151,
      "learning_rate": 2.1059898890402473e-05,
      "loss": 46.0,
      "step": 1241
    },
    {
      "epoch": 0.02500075485370935,
      "grad_norm": 0.0012565052602440119,
      "learning_rate": 2.0936354684368163e-05,
      "loss": 46.0,
      "step": 1242
    },
    {
      "epoch": 0.025020884285958715,
      "grad_norm": 0.001035231165587902,
      "learning_rate": 2.0813131536855913e-05,
      "loss": 46.0,
      "step": 1243
    },
    {
      "epoch": 0.025041013718208077,
      "grad_norm": 0.0019417139701545238,
      "learning_rate": 2.0690229948245365e-05,
      "loss": 46.0,
      "step": 1244
    },
    {
      "epoch": 0.025061143150457443,
      "grad_norm": 0.0015144539065659046,
      "learning_rate": 2.0567650417610485e-05,
      "loss": 46.0,
      "step": 1245
    },
    {
      "epoch": 0.025081272582706805,
      "grad_norm": 0.0014989918563514948,
      "learning_rate": 2.0445393442717308e-05,
      "loss": 46.0,
      "step": 1246
    },
    {
      "epoch": 0.025101402014956167,
      "grad_norm": 0.0018054774263873696,
      "learning_rate": 2.032345952002218e-05,
      "loss": 46.0,
      "step": 1247
    },
    {
      "epoch": 0.025121531447205532,
      "grad_norm": 0.0009529809467494488,
      "learning_rate": 2.0201849144669525e-05,
      "loss": 46.0,
      "step": 1248
    },
    {
      "epoch": 0.025141660879454894,
      "grad_norm": 0.002208658494055271,
      "learning_rate": 2.0080562810489935e-05,
      "loss": 46.0,
      "step": 1249
    },
    {
      "epoch": 0.02516179031170426,
      "grad_norm": 0.0007078711641952395,
      "learning_rate": 1.995960100999814e-05,
      "loss": 46.0,
      "step": 1250
    },
    {
      "epoch": 0.02518191974395362,
      "grad_norm": 0.0010136510245501995,
      "learning_rate": 1.983896423439109e-05,
      "loss": 46.0,
      "step": 1251
    },
    {
      "epoch": 0.025202049176202987,
      "grad_norm": 0.0018538066651672125,
      "learning_rate": 1.9718652973545758e-05,
      "loss": 46.0,
      "step": 1252
    },
    {
      "epoch": 0.02522217860845235,
      "grad_norm": 0.0011417543282732368,
      "learning_rate": 1.9598667716017417e-05,
      "loss": 46.0,
      "step": 1253
    },
    {
      "epoch": 0.02524230804070171,
      "grad_norm": 0.000996634247712791,
      "learning_rate": 1.947900894903739e-05,
      "loss": 46.0,
      "step": 1254
    },
    {
      "epoch": 0.025262437472951076,
      "grad_norm": 0.001392628182657063,
      "learning_rate": 1.9359677158511334e-05,
      "loss": 46.0,
      "step": 1255
    },
    {
      "epoch": 0.02528256690520044,
      "grad_norm": 0.000720780692063272,
      "learning_rate": 1.9240672829017014e-05,
      "loss": 46.0,
      "step": 1256
    },
    {
      "epoch": 0.025302696337449804,
      "grad_norm": 0.001160190673545003,
      "learning_rate": 1.9121996443802482e-05,
      "loss": 46.0,
      "step": 1257
    },
    {
      "epoch": 0.025322825769699166,
      "grad_norm": 0.002251293743029237,
      "learning_rate": 1.9003648484784166e-05,
      "loss": 46.0,
      "step": 1258
    },
    {
      "epoch": 0.025342955201948528,
      "grad_norm": 0.00043235233169980347,
      "learning_rate": 1.8885629432544717e-05,
      "loss": 46.0,
      "step": 1259
    },
    {
      "epoch": 0.025363084634197893,
      "grad_norm": 0.0007145190611481667,
      "learning_rate": 1.8767939766331287e-05,
      "loss": 46.0,
      "step": 1260
    },
    {
      "epoch": 0.025383214066447255,
      "grad_norm": 0.0006261473754420877,
      "learning_rate": 1.865057996405336e-05,
      "loss": 46.0,
      "step": 1261
    },
    {
      "epoch": 0.02540334349869662,
      "grad_norm": 0.0010795745765790343,
      "learning_rate": 1.8533550502281015e-05,
      "loss": 46.0,
      "step": 1262
    },
    {
      "epoch": 0.025423472930945983,
      "grad_norm": 0.0009830056224018335,
      "learning_rate": 1.8416851856242868e-05,
      "loss": 46.0,
      "step": 1263
    },
    {
      "epoch": 0.025443602363195345,
      "grad_norm": 0.0014814147725701332,
      "learning_rate": 1.83004844998241e-05,
      "loss": 46.0,
      "step": 1264
    },
    {
      "epoch": 0.02546373179544471,
      "grad_norm": 0.0011380594223737717,
      "learning_rate": 1.8184448905564743e-05,
      "loss": 46.0,
      "step": 1265
    },
    {
      "epoch": 0.025483861227694072,
      "grad_norm": 0.0006726674037054181,
      "learning_rate": 1.8068745544657484e-05,
      "loss": 46.0,
      "step": 1266
    },
    {
      "epoch": 0.025503990659943437,
      "grad_norm": 0.0017750106053426862,
      "learning_rate": 1.7953374886946006e-05,
      "loss": 46.0,
      "step": 1267
    },
    {
      "epoch": 0.0255241200921928,
      "grad_norm": 0.0011626784689724445,
      "learning_rate": 1.7838337400922855e-05,
      "loss": 46.0,
      "step": 1268
    },
    {
      "epoch": 0.025544249524442165,
      "grad_norm": 0.0009647434344515204,
      "learning_rate": 1.772363355372776e-05,
      "loss": 46.0,
      "step": 1269
    },
    {
      "epoch": 0.025564378956691527,
      "grad_norm": 0.002181010087952018,
      "learning_rate": 1.7609263811145537e-05,
      "loss": 46.0,
      "step": 1270
    },
    {
      "epoch": 0.02558450838894089,
      "grad_norm": 0.0012630668934434652,
      "learning_rate": 1.74952286376043e-05,
      "loss": 46.0,
      "step": 1271
    },
    {
      "epoch": 0.025604637821190254,
      "grad_norm": 0.0009720325469970703,
      "learning_rate": 1.738152849617356e-05,
      "loss": 46.0,
      "step": 1272
    },
    {
      "epoch": 0.025624767253439616,
      "grad_norm": 0.0007137717329896986,
      "learning_rate": 1.72681638485624e-05,
      "loss": 46.0,
      "step": 1273
    },
    {
      "epoch": 0.02564489668568898,
      "grad_norm": 0.0014153624651953578,
      "learning_rate": 1.715513515511743e-05,
      "loss": 46.0,
      "step": 1274
    },
    {
      "epoch": 0.025665026117938344,
      "grad_norm": 0.001465531880967319,
      "learning_rate": 1.7042442874821164e-05,
      "loss": 46.0,
      "step": 1275
    },
    {
      "epoch": 0.025685155550187706,
      "grad_norm": 0.0011013116454705596,
      "learning_rate": 1.693008746528988e-05,
      "loss": 46.0,
      "step": 1276
    },
    {
      "epoch": 0.02570528498243707,
      "grad_norm": 0.0005420059314928949,
      "learning_rate": 1.681806938277205e-05,
      "loss": 46.0,
      "step": 1277
    },
    {
      "epoch": 0.025725414414686433,
      "grad_norm": 0.0007358815055340528,
      "learning_rate": 1.6706389082146244e-05,
      "loss": 46.0,
      "step": 1278
    },
    {
      "epoch": 0.0257455438469358,
      "grad_norm": 0.00043846582411788404,
      "learning_rate": 1.6595047016919373e-05,
      "loss": 46.0,
      "step": 1279
    },
    {
      "epoch": 0.02576567327918516,
      "grad_norm": 0.0007551101734861732,
      "learning_rate": 1.6484043639224955e-05,
      "loss": 46.0,
      "step": 1280
    },
    {
      "epoch": 0.025785802711434522,
      "grad_norm": 0.0011386704863980412,
      "learning_rate": 1.6373379399821033e-05,
      "loss": 46.0,
      "step": 1281
    },
    {
      "epoch": 0.025805932143683888,
      "grad_norm": 0.0014110167976468801,
      "learning_rate": 1.6263054748088658e-05,
      "loss": 46.0,
      "step": 1282
    },
    {
      "epoch": 0.02582606157593325,
      "grad_norm": 0.0017660473240539432,
      "learning_rate": 1.6153070132029723e-05,
      "loss": 46.0,
      "step": 1283
    },
    {
      "epoch": 0.025846191008182615,
      "grad_norm": 0.0021693487651646137,
      "learning_rate": 1.604342599826548e-05,
      "loss": 46.0,
      "step": 1284
    },
    {
      "epoch": 0.025866320440431977,
      "grad_norm": 0.001375765772536397,
      "learning_rate": 1.593412279203447e-05,
      "loss": 46.0,
      "step": 1285
    },
    {
      "epoch": 0.025886449872681343,
      "grad_norm": 0.001402435707859695,
      "learning_rate": 1.5825160957190798e-05,
      "loss": 46.0,
      "step": 1286
    },
    {
      "epoch": 0.025906579304930705,
      "grad_norm": 0.002132172929123044,
      "learning_rate": 1.5716540936202363e-05,
      "loss": 46.0,
      "step": 1287
    },
    {
      "epoch": 0.025926708737180067,
      "grad_norm": 0.00138960184995085,
      "learning_rate": 1.5608263170149095e-05,
      "loss": 46.0,
      "step": 1288
    },
    {
      "epoch": 0.025946838169429432,
      "grad_norm": 0.0014584781602025032,
      "learning_rate": 1.5500328098721017e-05,
      "loss": 46.0,
      "step": 1289
    },
    {
      "epoch": 0.025966967601678794,
      "grad_norm": 0.0017797322943806648,
      "learning_rate": 1.5392736160216635e-05,
      "loss": 46.0,
      "step": 1290
    },
    {
      "epoch": 0.02598709703392816,
      "grad_norm": 0.002001388929784298,
      "learning_rate": 1.5285487791541e-05,
      "loss": 46.0,
      "step": 1291
    },
    {
      "epoch": 0.02600722646617752,
      "grad_norm": 0.0019802197348326445,
      "learning_rate": 1.5178583428204085e-05,
      "loss": 46.0,
      "step": 1292
    },
    {
      "epoch": 0.026027355898426884,
      "grad_norm": 0.0018057803390547633,
      "learning_rate": 1.5072023504318867e-05,
      "loss": 46.0,
      "step": 1293
    },
    {
      "epoch": 0.02604748533067625,
      "grad_norm": 0.0009447969496250153,
      "learning_rate": 1.496580845259965e-05,
      "loss": 46.0,
      "step": 1294
    },
    {
      "epoch": 0.02606761476292561,
      "grad_norm": 0.0007863205391913652,
      "learning_rate": 1.4859938704360365e-05,
      "loss": 46.0,
      "step": 1295
    },
    {
      "epoch": 0.026087744195174976,
      "grad_norm": 0.0012318964581936598,
      "learning_rate": 1.475441468951263e-05,
      "loss": 46.0,
      "step": 1296
    },
    {
      "epoch": 0.02610787362742434,
      "grad_norm": 0.001262698438949883,
      "learning_rate": 1.4649236836564263e-05,
      "loss": 46.0,
      "step": 1297
    },
    {
      "epoch": 0.0261280030596737,
      "grad_norm": 0.0010360804153606296,
      "learning_rate": 1.4544405572617259e-05,
      "loss": 46.0,
      "step": 1298
    },
    {
      "epoch": 0.026148132491923066,
      "grad_norm": 0.001501108636148274,
      "learning_rate": 1.4439921323366323e-05,
      "loss": 46.0,
      "step": 1299
    },
    {
      "epoch": 0.026168261924172428,
      "grad_norm": 0.0006216936744749546,
      "learning_rate": 1.4335784513096929e-05,
      "loss": 46.0,
      "step": 1300
    },
    {
      "epoch": 0.026188391356421793,
      "grad_norm": 0.0012842519208788872,
      "learning_rate": 1.4231995564683732e-05,
      "loss": 46.0,
      "step": 1301
    },
    {
      "epoch": 0.026208520788671155,
      "grad_norm": 0.0013369874795898795,
      "learning_rate": 1.412855489958873e-05,
      "loss": 46.0,
      "step": 1302
    },
    {
      "epoch": 0.02622865022092052,
      "grad_norm": 0.0011299944017082453,
      "learning_rate": 1.4025462937859768e-05,
      "loss": 46.0,
      "step": 1303
    },
    {
      "epoch": 0.026248779653169883,
      "grad_norm": 0.0018812668276950717,
      "learning_rate": 1.3922720098128527e-05,
      "loss": 46.0,
      "step": 1304
    },
    {
      "epoch": 0.026268909085419245,
      "grad_norm": 0.001090607256628573,
      "learning_rate": 1.3820326797609129e-05,
      "loss": 46.0,
      "step": 1305
    },
    {
      "epoch": 0.02628903851766861,
      "grad_norm": 0.0012601654743775725,
      "learning_rate": 1.371828345209618e-05,
      "loss": 46.0,
      "step": 1306
    },
    {
      "epoch": 0.026309167949917972,
      "grad_norm": 0.0014517259551212192,
      "learning_rate": 1.361659047596332e-05,
      "loss": 46.0,
      "step": 1307
    },
    {
      "epoch": 0.026329297382167337,
      "grad_norm": 0.0019068201072514057,
      "learning_rate": 1.3515248282161319e-05,
      "loss": 46.0,
      "step": 1308
    },
    {
      "epoch": 0.0263494268144167,
      "grad_norm": 0.0016664571594446898,
      "learning_rate": 1.3414257282216535e-05,
      "loss": 46.0,
      "step": 1309
    },
    {
      "epoch": 0.02636955624666606,
      "grad_norm": 0.0009046939085237682,
      "learning_rate": 1.3313617886229269e-05,
      "loss": 46.0,
      "step": 1310
    },
    {
      "epoch": 0.026389685678915427,
      "grad_norm": 0.0005408989964053035,
      "learning_rate": 1.3213330502871956e-05,
      "loss": 46.0,
      "step": 1311
    },
    {
      "epoch": 0.02640981511116479,
      "grad_norm": 0.0008995769894681871,
      "learning_rate": 1.3113395539387674e-05,
      "loss": 46.0,
      "step": 1312
    },
    {
      "epoch": 0.026429944543414154,
      "grad_norm": 0.0012701263185590506,
      "learning_rate": 1.3013813401588315e-05,
      "loss": 46.0,
      "step": 1313
    },
    {
      "epoch": 0.026450073975663516,
      "grad_norm": 0.001226755790412426,
      "learning_rate": 1.2914584493853144e-05,
      "loss": 46.0,
      "step": 1314
    },
    {
      "epoch": 0.026470203407912878,
      "grad_norm": 0.0015894804382696748,
      "learning_rate": 1.2815709219126959e-05,
      "loss": 46.0,
      "step": 1315
    },
    {
      "epoch": 0.026490332840162244,
      "grad_norm": 0.0014253195840865374,
      "learning_rate": 1.2717187978918544e-05,
      "loss": 46.0,
      "step": 1316
    },
    {
      "epoch": 0.026510462272411606,
      "grad_norm": 0.0009140470647253096,
      "learning_rate": 1.2619021173299051e-05,
      "loss": 46.0,
      "step": 1317
    },
    {
      "epoch": 0.02653059170466097,
      "grad_norm": 0.0010910599958151579,
      "learning_rate": 1.2521209200900397e-05,
      "loss": 46.0,
      "step": 1318
    },
    {
      "epoch": 0.026550721136910333,
      "grad_norm": 0.000989207299426198,
      "learning_rate": 1.2423752458913518e-05,
      "loss": 46.0,
      "step": 1319
    },
    {
      "epoch": 0.0265708505691597,
      "grad_norm": 0.0009602408390492201,
      "learning_rate": 1.2326651343086937e-05,
      "loss": 46.0,
      "step": 1320
    },
    {
      "epoch": 0.02659098000140906,
      "grad_norm": 0.0004408732638694346,
      "learning_rate": 1.2229906247724998e-05,
      "loss": 46.0,
      "step": 1321
    },
    {
      "epoch": 0.026611109433658423,
      "grad_norm": 0.0010518889175727963,
      "learning_rate": 1.2133517565686381e-05,
      "loss": 46.0,
      "step": 1322
    },
    {
      "epoch": 0.026631238865907788,
      "grad_norm": 0.002272763755172491,
      "learning_rate": 1.2037485688382421e-05,
      "loss": 46.0,
      "step": 1323
    },
    {
      "epoch": 0.02665136829815715,
      "grad_norm": 0.0020480677485466003,
      "learning_rate": 1.1941811005775538e-05,
      "loss": 46.0,
      "step": 1324
    },
    {
      "epoch": 0.026671497730406515,
      "grad_norm": 0.002253229497000575,
      "learning_rate": 1.1846493906377743e-05,
      "loss": 46.0,
      "step": 1325
    },
    {
      "epoch": 0.026691627162655877,
      "grad_norm": 0.0012804159196093678,
      "learning_rate": 1.1751534777248885e-05,
      "loss": 46.0,
      "step": 1326
    },
    {
      "epoch": 0.02671175659490524,
      "grad_norm": 0.0019482570933178067,
      "learning_rate": 1.1656934003995302e-05,
      "loss": 46.0,
      "step": 1327
    },
    {
      "epoch": 0.026731886027154605,
      "grad_norm": 0.0008674189448356628,
      "learning_rate": 1.1562691970768014e-05,
      "loss": 46.0,
      "step": 1328
    },
    {
      "epoch": 0.026752015459403967,
      "grad_norm": 0.0017704269848763943,
      "learning_rate": 1.1468809060261399e-05,
      "loss": 46.0,
      "step": 1329
    },
    {
      "epoch": 0.026772144891653332,
      "grad_norm": 0.0014249221421778202,
      "learning_rate": 1.1375285653711399e-05,
      "loss": 46.0,
      "step": 1330
    },
    {
      "epoch": 0.026792274323902694,
      "grad_norm": 0.0009925938211381435,
      "learning_rate": 1.1282122130894202e-05,
      "loss": 46.0,
      "step": 1331
    },
    {
      "epoch": 0.026812403756152056,
      "grad_norm": 0.0008217705762945116,
      "learning_rate": 1.1189318870124531e-05,
      "loss": 46.0,
      "step": 1332
    },
    {
      "epoch": 0.02683253318840142,
      "grad_norm": 0.002136779949069023,
      "learning_rate": 1.1096876248254228e-05,
      "loss": 46.0,
      "step": 1333
    },
    {
      "epoch": 0.026852662620650784,
      "grad_norm": 0.0014067484298720956,
      "learning_rate": 1.1004794640670602e-05,
      "loss": 46.0,
      "step": 1334
    },
    {
      "epoch": 0.02687279205290015,
      "grad_norm": 0.0008450828609056771,
      "learning_rate": 1.0913074421295022e-05,
      "loss": 46.0,
      "step": 1335
    },
    {
      "epoch": 0.02689292148514951,
      "grad_norm": 0.0005786415422335267,
      "learning_rate": 1.0821715962581302e-05,
      "loss": 46.0,
      "step": 1336
    },
    {
      "epoch": 0.026913050917398876,
      "grad_norm": 0.0012445749016478658,
      "learning_rate": 1.0730719635514296e-05,
      "loss": 46.0,
      "step": 1337
    },
    {
      "epoch": 0.02693318034964824,
      "grad_norm": 0.0017225752817466855,
      "learning_rate": 1.0640085809608257e-05,
      "loss": 46.0,
      "step": 1338
    },
    {
      "epoch": 0.0269533097818976,
      "grad_norm": 0.0006431869696825743,
      "learning_rate": 1.0549814852905427e-05,
      "loss": 46.0,
      "step": 1339
    },
    {
      "epoch": 0.026973439214146966,
      "grad_norm": 0.0006889837677590549,
      "learning_rate": 1.0459907131974578e-05,
      "loss": 46.0,
      "step": 1340
    },
    {
      "epoch": 0.026993568646396328,
      "grad_norm": 0.0007062877994030714,
      "learning_rate": 1.0370363011909368e-05,
      "loss": 46.0,
      "step": 1341
    },
    {
      "epoch": 0.027013698078645693,
      "grad_norm": 0.0020865912083536386,
      "learning_rate": 1.0281182856327075e-05,
      "loss": 46.0,
      "step": 1342
    },
    {
      "epoch": 0.027033827510895055,
      "grad_norm": 0.000669913541059941,
      "learning_rate": 1.019236702736689e-05,
      "loss": 46.0,
      "step": 1343
    },
    {
      "epoch": 0.027053956943144417,
      "grad_norm": 0.001416582614183426,
      "learning_rate": 1.0103915885688686e-05,
      "loss": 46.0,
      "step": 1344
    },
    {
      "epoch": 0.027074086375393783,
      "grad_norm": 0.0017289246898144484,
      "learning_rate": 1.0015829790471288e-05,
      "loss": 46.0,
      "step": 1345
    },
    {
      "epoch": 0.027094215807643145,
      "grad_norm": 0.0016691044438630342,
      "learning_rate": 9.928109099411265e-06,
      "loss": 46.0,
      "step": 1346
    },
    {
      "epoch": 0.02711434523989251,
      "grad_norm": 0.0008188973879441619,
      "learning_rate": 9.840754168721289e-06,
      "loss": 46.0,
      "step": 1347
    },
    {
      "epoch": 0.027134474672141872,
      "grad_norm": 0.0006958742160350084,
      "learning_rate": 9.753765353128863e-06,
      "loss": 46.0,
      "step": 1348
    },
    {
      "epoch": 0.027154604104391234,
      "grad_norm": 0.0026792276185005903,
      "learning_rate": 9.667143005874679e-06,
      "loss": 46.0,
      "step": 1349
    },
    {
      "epoch": 0.0271747335366406,
      "grad_norm": 0.0012797150993719697,
      "learning_rate": 9.580887478711376e-06,
      "loss": 46.0,
      "step": 1350
    },
    {
      "epoch": 0.02719486296888996,
      "grad_norm": 0.0014746271772310138,
      "learning_rate": 9.494999121901948e-06,
      "loss": 46.0,
      "step": 1351
    },
    {
      "epoch": 0.027214992401139327,
      "grad_norm": 0.0014366628602147102,
      "learning_rate": 9.409478284218465e-06,
      "loss": 46.0,
      "step": 1352
    },
    {
      "epoch": 0.02723512183338869,
      "grad_norm": 0.0005030794418416917,
      "learning_rate": 9.32432531294054e-06,
      "loss": 46.0,
      "step": 1353
    },
    {
      "epoch": 0.027255251265638054,
      "grad_norm": 0.001364423893392086,
      "learning_rate": 9.239540553853987e-06,
      "loss": 46.0,
      "step": 1354
    },
    {
      "epoch": 0.027275380697887416,
      "grad_norm": 0.002308462280780077,
      "learning_rate": 9.155124351249434e-06,
      "loss": 46.0,
      "step": 1355
    },
    {
      "epoch": 0.02729551013013678,
      "grad_norm": 0.0006031619850546122,
      "learning_rate": 9.071077047920807e-06,
      "loss": 46.0,
      "step": 1356
    },
    {
      "epoch": 0.027315639562386144,
      "grad_norm": 0.0008249651291407645,
      "learning_rate": 8.987398985164108e-06,
      "loss": 46.0,
      "step": 1357
    },
    {
      "epoch": 0.027335768994635506,
      "grad_norm": 0.0013380757300183177,
      "learning_rate": 8.904090502775875e-06,
      "loss": 46.0,
      "step": 1358
    },
    {
      "epoch": 0.02735589842688487,
      "grad_norm": 0.0014677924336865544,
      "learning_rate": 8.821151939051953e-06,
      "loss": 46.0,
      "step": 1359
    },
    {
      "epoch": 0.027376027859134233,
      "grad_norm": 0.0010561698582023382,
      "learning_rate": 8.73858363078589e-06,
      "loss": 46.0,
      "step": 1360
    },
    {
      "epoch": 0.027396157291383595,
      "grad_norm": 0.0006301281973719597,
      "learning_rate": 8.656385913267872e-06,
      "loss": 46.0,
      "step": 1361
    },
    {
      "epoch": 0.02741628672363296,
      "grad_norm": 0.0013005957007408142,
      "learning_rate": 8.574559120283099e-06,
      "loss": 46.0,
      "step": 1362
    },
    {
      "epoch": 0.027436416155882323,
      "grad_norm": 0.0025409117806702852,
      "learning_rate": 8.493103584110595e-06,
      "loss": 46.0,
      "step": 1363
    },
    {
      "epoch": 0.027456545588131688,
      "grad_norm": 0.0010066272225230932,
      "learning_rate": 8.412019635521784e-06,
      "loss": 46.0,
      "step": 1364
    },
    {
      "epoch": 0.02747667502038105,
      "grad_norm": 0.0008767693652771413,
      "learning_rate": 8.331307603779137e-06,
      "loss": 46.0,
      "step": 1365
    },
    {
      "epoch": 0.027496804452630412,
      "grad_norm": 0.0005324966041371226,
      "learning_rate": 8.250967816634914e-06,
      "loss": 46.0,
      "step": 1366
    },
    {
      "epoch": 0.027516933884879777,
      "grad_norm": 0.0013536482583731413,
      "learning_rate": 8.171000600329682e-06,
      "loss": 46.0,
      "step": 1367
    },
    {
      "epoch": 0.02753706331712914,
      "grad_norm": 0.0011457927757874131,
      "learning_rate": 8.091406279591207e-06,
      "loss": 46.0,
      "step": 1368
    },
    {
      "epoch": 0.027557192749378505,
      "grad_norm": 0.0013843755004927516,
      "learning_rate": 8.012185177632914e-06,
      "loss": 46.0,
      "step": 1369
    },
    {
      "epoch": 0.027577322181627867,
      "grad_norm": 0.0013142600655555725,
      "learning_rate": 7.933337616152747e-06,
      "loss": 46.0,
      "step": 1370
    },
    {
      "epoch": 0.027597451613877232,
      "grad_norm": 0.0014753196155652404,
      "learning_rate": 7.854863915331745e-06,
      "loss": 46.0,
      "step": 1371
    },
    {
      "epoch": 0.027617581046126594,
      "grad_norm": 0.0005986293544992805,
      "learning_rate": 7.776764393832825e-06,
      "loss": 46.0,
      "step": 1372
    },
    {
      "epoch": 0.027637710478375956,
      "grad_norm": 0.001749454764649272,
      "learning_rate": 7.69903936879941e-06,
      "loss": 46.0,
      "step": 1373
    },
    {
      "epoch": 0.02765783991062532,
      "grad_norm": 0.0005876508657820523,
      "learning_rate": 7.6216891558542395e-06,
      "loss": 46.0,
      "step": 1374
    },
    {
      "epoch": 0.027677969342874684,
      "grad_norm": 0.0010496970498934388,
      "learning_rate": 7.54471406909798e-06,
      "loss": 46.0,
      "step": 1375
    },
    {
      "epoch": 0.02769809877512405,
      "grad_norm": 0.0011588014895096421,
      "learning_rate": 7.468114421107997e-06,
      "loss": 46.0,
      "step": 1376
    },
    {
      "epoch": 0.02771822820737341,
      "grad_norm": 0.002383692190051079,
      "learning_rate": 7.391890522937139e-06,
      "loss": 46.0,
      "step": 1377
    },
    {
      "epoch": 0.027738357639622773,
      "grad_norm": 0.0012233968591317534,
      "learning_rate": 7.3160426841123676e-06,
      "loss": 46.0,
      "step": 1378
    },
    {
      "epoch": 0.02775848707187214,
      "grad_norm": 0.0014729154063388705,
      "learning_rate": 7.240571212633618e-06,
      "loss": 46.0,
      "step": 1379
    },
    {
      "epoch": 0.0277786165041215,
      "grad_norm": 0.0009423243463970721,
      "learning_rate": 7.165476414972416e-06,
      "loss": 46.0,
      "step": 1380
    },
    {
      "epoch": 0.027798745936370866,
      "grad_norm": 0.0012143378844484687,
      "learning_rate": 7.090758596070801e-06,
      "loss": 46.0,
      "step": 1381
    },
    {
      "epoch": 0.027818875368620228,
      "grad_norm": 0.0015902521554380655,
      "learning_rate": 7.016418059339879e-06,
      "loss": 46.0,
      "step": 1382
    },
    {
      "epoch": 0.02783900480086959,
      "grad_norm": 0.001080973306670785,
      "learning_rate": 6.942455106658785e-06,
      "loss": 46.0,
      "step": 1383
    },
    {
      "epoch": 0.027859134233118955,
      "grad_norm": 0.0013885076623409986,
      "learning_rate": 6.868870038373332e-06,
      "loss": 46.0,
      "step": 1384
    },
    {
      "epoch": 0.027879263665368317,
      "grad_norm": 0.0010777899296954274,
      "learning_rate": 6.795663153294896e-06,
      "loss": 46.0,
      "step": 1385
    },
    {
      "epoch": 0.027899393097617683,
      "grad_norm": 0.0009947263170033693,
      "learning_rate": 6.7228347486990365e-06,
      "loss": 46.0,
      "step": 1386
    },
    {
      "epoch": 0.027919522529867045,
      "grad_norm": 0.0010311356745660305,
      "learning_rate": 6.6503851203245205e-06,
      "loss": 46.0,
      "step": 1387
    },
    {
      "epoch": 0.02793965196211641,
      "grad_norm": 0.0013068821281194687,
      "learning_rate": 6.57831456237189e-06,
      "loss": 46.0,
      "step": 1388
    },
    {
      "epoch": 0.027959781394365772,
      "grad_norm": 0.0010487777180969715,
      "learning_rate": 6.506623367502418e-06,
      "loss": 46.0,
      "step": 1389
    },
    {
      "epoch": 0.027979910826615134,
      "grad_norm": 0.0008867370779626071,
      "learning_rate": 6.4353118268368986e-06,
      "loss": 46.0,
      "step": 1390
    },
    {
      "epoch": 0.0280000402588645,
      "grad_norm": 0.0019279102561995387,
      "learning_rate": 6.3643802299543696e-06,
      "loss": 46.0,
      "step": 1391
    },
    {
      "epoch": 0.02802016969111386,
      "grad_norm": 0.001558710471726954,
      "learning_rate": 6.293828864891105e-06,
      "loss": 46.0,
      "step": 1392
    },
    {
      "epoch": 0.028040299123363227,
      "grad_norm": 0.0016826376086100936,
      "learning_rate": 6.223658018139245e-06,
      "loss": 46.0,
      "step": 1393
    },
    {
      "epoch": 0.02806042855561259,
      "grad_norm": 0.0012350878678262234,
      "learning_rate": 6.153867974645833e-06,
      "loss": 46.0,
      "step": 1394
    },
    {
      "epoch": 0.02808055798786195,
      "grad_norm": 0.0007092293817549944,
      "learning_rate": 6.084459017811473e-06,
      "loss": 46.0,
      "step": 1395
    },
    {
      "epoch": 0.028100687420111316,
      "grad_norm": 0.0008784400415606797,
      "learning_rate": 6.015431429489371e-06,
      "loss": 46.0,
      "step": 1396
    },
    {
      "epoch": 0.02812081685236068,
      "grad_norm": 0.0008223768672905862,
      "learning_rate": 5.946785489983941e-06,
      "loss": 46.0,
      "step": 1397
    },
    {
      "epoch": 0.028140946284610044,
      "grad_norm": 0.0015569966053590178,
      "learning_rate": 5.87852147804997e-06,
      "loss": 46.0,
      "step": 1398
    },
    {
      "epoch": 0.028161075716859406,
      "grad_norm": 0.002495410619303584,
      "learning_rate": 5.810639670891216e-06,
      "loss": 46.0,
      "step": 1399
    },
    {
      "epoch": 0.028181205149108768,
      "grad_norm": 0.0011495595099404454,
      "learning_rate": 5.743140344159459e-06,
      "loss": 46.0,
      "step": 1400
    },
    {
      "epoch": 0.028201334581358133,
      "grad_norm": 0.0013125301338732243,
      "learning_rate": 5.676023771953265e-06,
      "loss": 46.0,
      "step": 1401
    },
    {
      "epoch": 0.028221464013607495,
      "grad_norm": 0.0009252792806364596,
      "learning_rate": 5.6092902268169986e-06,
      "loss": 46.0,
      "step": 1402
    },
    {
      "epoch": 0.02824159344585686,
      "grad_norm": 0.0010239135008305311,
      "learning_rate": 5.542939979739559e-06,
      "loss": 46.0,
      "step": 1403
    },
    {
      "epoch": 0.028261722878106223,
      "grad_norm": 0.0005345203098841012,
      "learning_rate": 5.47697330015341e-06,
      "loss": 46.0,
      "step": 1404
    },
    {
      "epoch": 0.028281852310355588,
      "grad_norm": 0.0007038481417112052,
      "learning_rate": 5.411390455933463e-06,
      "loss": 46.0,
      "step": 1405
    },
    {
      "epoch": 0.02830198174260495,
      "grad_norm": 0.0007712701335549355,
      "learning_rate": 5.346191713395888e-06,
      "loss": 46.0,
      "step": 1406
    },
    {
      "epoch": 0.028322111174854312,
      "grad_norm": 0.0020393729209899902,
      "learning_rate": 5.2813773372971995e-06,
      "loss": 46.0,
      "step": 1407
    },
    {
      "epoch": 0.028342240607103678,
      "grad_norm": 0.0008605642360635102,
      "learning_rate": 5.216947590833032e-06,
      "loss": 46.0,
      "step": 1408
    },
    {
      "epoch": 0.02836237003935304,
      "grad_norm": 0.0014623169554397464,
      "learning_rate": 5.152902735637166e-06,
      "loss": 46.0,
      "step": 1409
    },
    {
      "epoch": 0.028382499471602405,
      "grad_norm": 0.0009752597543410957,
      "learning_rate": 5.089243031780389e-06,
      "loss": 46.0,
      "step": 1410
    },
    {
      "epoch": 0.028402628903851767,
      "grad_norm": 0.0009389633196406066,
      "learning_rate": 5.025968737769548e-06,
      "loss": 46.0,
      "step": 1411
    },
    {
      "epoch": 0.02842275833610113,
      "grad_norm": 0.0007833832642063498,
      "learning_rate": 4.963080110546336e-06,
      "loss": 46.0,
      "step": 1412
    },
    {
      "epoch": 0.028442887768350494,
      "grad_norm": 0.0010727515909820795,
      "learning_rate": 4.90057740548645e-06,
      "loss": 46.0,
      "step": 1413
    },
    {
      "epoch": 0.028463017200599856,
      "grad_norm": 0.0007540370570495725,
      "learning_rate": 4.838460876398365e-06,
      "loss": 46.0,
      "step": 1414
    },
    {
      "epoch": 0.028483146632849222,
      "grad_norm": 0.0013027754612267017,
      "learning_rate": 4.776730775522464e-06,
      "loss": 46.0,
      "step": 1415
    },
    {
      "epoch": 0.028503276065098584,
      "grad_norm": 0.0007448008400388062,
      "learning_rate": 4.715387353529855e-06,
      "loss": 46.0,
      "step": 1416
    },
    {
      "epoch": 0.028523405497347946,
      "grad_norm": 0.0008857371867634356,
      "learning_rate": 4.654430859521519e-06,
      "loss": 46.0,
      "step": 1417
    },
    {
      "epoch": 0.02854353492959731,
      "grad_norm": 0.002097605960443616,
      "learning_rate": 4.593861541027155e-06,
      "loss": 46.0,
      "step": 1418
    },
    {
      "epoch": 0.028563664361846673,
      "grad_norm": 0.0014931777259334922,
      "learning_rate": 4.53367964400423e-06,
      "loss": 46.0,
      "step": 1419
    },
    {
      "epoch": 0.02858379379409604,
      "grad_norm": 0.001164833316579461,
      "learning_rate": 4.473885412837065e-06,
      "loss": 46.0,
      "step": 1420
    },
    {
      "epoch": 0.0286039232263454,
      "grad_norm": 0.0007305769831873477,
      "learning_rate": 4.414479090335644e-06,
      "loss": 46.0,
      "step": 1421
    },
    {
      "epoch": 0.028624052658594766,
      "grad_norm": 0.0013536449987441301,
      "learning_rate": 4.355460917734866e-06,
      "loss": 46.0,
      "step": 1422
    },
    {
      "epoch": 0.028644182090844128,
      "grad_norm": 0.0007573326583951712,
      "learning_rate": 4.296831134693358e-06,
      "loss": 46.0,
      "step": 1423
    },
    {
      "epoch": 0.02866431152309349,
      "grad_norm": 0.0009904114995151758,
      "learning_rate": 4.238589979292651e-06,
      "loss": 46.0,
      "step": 1424
    },
    {
      "epoch": 0.028684440955342855,
      "grad_norm": 0.0010607121512293816,
      "learning_rate": 4.180737688036096e-06,
      "loss": 46.0,
      "step": 1425
    },
    {
      "epoch": 0.028704570387592217,
      "grad_norm": 0.001276315306313336,
      "learning_rate": 4.1232744958479955e-06,
      "loss": 46.0,
      "step": 1426
    },
    {
      "epoch": 0.028724699819841583,
      "grad_norm": 0.0012834541266784072,
      "learning_rate": 4.066200636072604e-06,
      "loss": 46.0,
      "step": 1427
    },
    {
      "epoch": 0.028744829252090945,
      "grad_norm": 0.001088009332306683,
      "learning_rate": 4.0095163404732075e-06,
      "loss": 46.0,
      "step": 1428
    },
    {
      "epoch": 0.028764958684340307,
      "grad_norm": 0.0008485732250846922,
      "learning_rate": 3.953221839231125e-06,
      "loss": 46.0,
      "step": 1429
    },
    {
      "epoch": 0.028785088116589672,
      "grad_norm": 0.00294103124178946,
      "learning_rate": 3.897317360944874e-06,
      "loss": 46.0,
      "step": 1430
    },
    {
      "epoch": 0.028805217548839034,
      "grad_norm": 0.0009056427516043186,
      "learning_rate": 3.841803132629107e-06,
      "loss": 46.0,
      "step": 1431
    },
    {
      "epoch": 0.0288253469810884,
      "grad_norm": 0.001108710654079914,
      "learning_rate": 3.786679379713842e-06,
      "loss": 46.0,
      "step": 1432
    },
    {
      "epoch": 0.02884547641333776,
      "grad_norm": 0.0013181203976273537,
      "learning_rate": 3.731946326043423e-06,
      "loss": 46.0,
      "step": 1433
    },
    {
      "epoch": 0.028865605845587124,
      "grad_norm": 0.0015622148057445884,
      "learning_rate": 3.677604193875639e-06,
      "loss": 46.0,
      "step": 1434
    },
    {
      "epoch": 0.02888573527783649,
      "grad_norm": 0.001430216245353222,
      "learning_rate": 3.6236532038809167e-06,
      "loss": 46.0,
      "step": 1435
    },
    {
      "epoch": 0.02890586471008585,
      "grad_norm": 0.0011702035553753376,
      "learning_rate": 3.5700935751412644e-06,
      "loss": 46.0,
      "step": 1436
    },
    {
      "epoch": 0.028925994142335217,
      "grad_norm": 0.0011749324621632695,
      "learning_rate": 3.5169255251495283e-06,
      "loss": 46.0,
      "step": 1437
    },
    {
      "epoch": 0.02894612357458458,
      "grad_norm": 0.0011062580160796642,
      "learning_rate": 3.464149269808392e-06,
      "loss": 46.0,
      "step": 1438
    },
    {
      "epoch": 0.028966253006833944,
      "grad_norm": 0.001277309376746416,
      "learning_rate": 3.411765023429625e-06,
      "loss": 46.0,
      "step": 1439
    },
    {
      "epoch": 0.028986382439083306,
      "grad_norm": 0.002218514448031783,
      "learning_rate": 3.3597729987330796e-06,
      "loss": 46.0,
      "step": 1440
    },
    {
      "epoch": 0.029006511871332668,
      "grad_norm": 0.0007218050304800272,
      "learning_rate": 3.3081734068459045e-06,
      "loss": 46.0,
      "step": 1441
    },
    {
      "epoch": 0.029026641303582033,
      "grad_norm": 0.0006786247249692678,
      "learning_rate": 3.25696645730168e-06,
      "loss": 46.0,
      "step": 1442
    },
    {
      "epoch": 0.029046770735831395,
      "grad_norm": 0.0008958657272160053,
      "learning_rate": 3.2061523580395824e-06,
      "loss": 46.0,
      "step": 1443
    },
    {
      "epoch": 0.02906690016808076,
      "grad_norm": 0.0011994513915851712,
      "learning_rate": 3.155731315403465e-06,
      "loss": 46.0,
      "step": 1444
    },
    {
      "epoch": 0.029087029600330123,
      "grad_norm": 0.001030144514515996,
      "learning_rate": 3.1057035341411357e-06,
      "loss": 46.0,
      "step": 1445
    },
    {
      "epoch": 0.029107159032579485,
      "grad_norm": 0.0007214623037725687,
      "learning_rate": 3.056069217403401e-06,
      "loss": 46.0,
      "step": 1446
    },
    {
      "epoch": 0.02912728846482885,
      "grad_norm": 0.001022842712700367,
      "learning_rate": 3.006828566743358e-06,
      "loss": 46.0,
      "step": 1447
    },
    {
      "epoch": 0.029147417897078212,
      "grad_norm": 0.0007378848385997117,
      "learning_rate": 2.957981782115471e-06,
      "loss": 46.0,
      "step": 1448
    },
    {
      "epoch": 0.029167547329327578,
      "grad_norm": 0.001484018168412149,
      "learning_rate": 2.909529061874816e-06,
      "loss": 46.0,
      "step": 1449
    },
    {
      "epoch": 0.02918767676157694,
      "grad_norm": 0.0008505574078299105,
      "learning_rate": 2.861470602776317e-06,
      "loss": 46.0,
      "step": 1450
    },
    {
      "epoch": 0.0292078061938263,
      "grad_norm": 0.0009992974810302258,
      "learning_rate": 2.8138065999738337e-06,
      "loss": 46.0,
      "step": 1451
    },
    {
      "epoch": 0.029227935626075667,
      "grad_norm": 0.000925807689782232,
      "learning_rate": 2.766537247019485e-06,
      "loss": 46.0,
      "step": 1452
    },
    {
      "epoch": 0.02924806505832503,
      "grad_norm": 0.001936123939231038,
      "learning_rate": 2.7196627358627713e-06,
      "loss": 46.0,
      "step": 1453
    },
    {
      "epoch": 0.029268194490574394,
      "grad_norm": 0.0008978885016404092,
      "learning_rate": 2.673183256849876e-06,
      "loss": 46.0,
      "step": 1454
    },
    {
      "epoch": 0.029288323922823756,
      "grad_norm": 0.001972075318917632,
      "learning_rate": 2.627098998722799e-06,
      "loss": 46.0,
      "step": 1455
    },
    {
      "epoch": 0.029308453355073122,
      "grad_norm": 0.001669483259320259,
      "learning_rate": 2.58141014861869e-06,
      "loss": 46.0,
      "step": 1456
    },
    {
      "epoch": 0.029328582787322484,
      "grad_norm": 0.0009523354819975793,
      "learning_rate": 2.536116892069007e-06,
      "loss": 46.0,
      "step": 1457
    },
    {
      "epoch": 0.029348712219571846,
      "grad_norm": 0.0025416603311896324,
      "learning_rate": 2.4912194129988353e-06,
      "loss": 46.0,
      "step": 1458
    },
    {
      "epoch": 0.02936884165182121,
      "grad_norm": 0.0017256122082471848,
      "learning_rate": 2.4467178937260692e-06,
      "loss": 46.0,
      "step": 1459
    },
    {
      "epoch": 0.029388971084070573,
      "grad_norm": 0.0008214665576815605,
      "learning_rate": 2.4026125149607225e-06,
      "loss": 46.0,
      "step": 1460
    },
    {
      "epoch": 0.02940910051631994,
      "grad_norm": 0.0010338842403143644,
      "learning_rate": 2.3589034558041624e-06,
      "loss": 46.0,
      "step": 1461
    },
    {
      "epoch": 0.0294292299485693,
      "grad_norm": 0.001692043850198388,
      "learning_rate": 2.3155908937484093e-06,
      "loss": 46.0,
      "step": 1462
    },
    {
      "epoch": 0.029449359380818663,
      "grad_norm": 0.0017285742796957493,
      "learning_rate": 2.2726750046754175e-06,
      "loss": 46.0,
      "step": 1463
    },
    {
      "epoch": 0.029469488813068028,
      "grad_norm": 0.0008371649892069399,
      "learning_rate": 2.2301559628563062e-06,
      "loss": 46.0,
      "step": 1464
    },
    {
      "epoch": 0.02948961824531739,
      "grad_norm": 0.0009613548754714429,
      "learning_rate": 2.1880339409507288e-06,
      "loss": 46.0,
      "step": 1465
    },
    {
      "epoch": 0.029509747677566756,
      "grad_norm": 0.0014171084621921182,
      "learning_rate": 2.146309110006128e-06,
      "loss": 46.0,
      "step": 1466
    },
    {
      "epoch": 0.029529877109816118,
      "grad_norm": 0.0011761389905586839,
      "learning_rate": 2.1049816394570486e-06,
      "loss": 46.0,
      "step": 1467
    },
    {
      "epoch": 0.02955000654206548,
      "grad_norm": 0.0011704186908900738,
      "learning_rate": 2.064051697124425e-06,
      "loss": 46.0,
      "step": 1468
    },
    {
      "epoch": 0.029570135974314845,
      "grad_norm": 0.0009983095806092024,
      "learning_rate": 2.0235194492149832e-06,
      "loss": 46.0,
      "step": 1469
    },
    {
      "epoch": 0.029590265406564207,
      "grad_norm": 0.0015483457827940583,
      "learning_rate": 1.983385060320453e-06,
      "loss": 46.0,
      "step": 1470
    },
    {
      "epoch": 0.029610394838813572,
      "grad_norm": 0.0015588031383231282,
      "learning_rate": 1.943648693416966e-06,
      "loss": 46.0,
      "step": 1471
    },
    {
      "epoch": 0.029630524271062934,
      "grad_norm": 0.00174389174208045,
      "learning_rate": 1.9043105098643931e-06,
      "loss": 46.0,
      "step": 1472
    },
    {
      "epoch": 0.0296506537033123,
      "grad_norm": 0.001446812180802226,
      "learning_rate": 1.865370669405675e-06,
      "loss": 46.0,
      "step": 1473
    },
    {
      "epoch": 0.029670783135561662,
      "grad_norm": 0.000889250251930207,
      "learning_rate": 1.82682933016618e-06,
      "loss": 46.0,
      "step": 1474
    },
    {
      "epoch": 0.029690912567811024,
      "grad_norm": 0.0005816498887725174,
      "learning_rate": 1.7886866486530374e-06,
      "loss": 46.0,
      "step": 1475
    },
    {
      "epoch": 0.02971104200006039,
      "grad_norm": 0.0008788988925516605,
      "learning_rate": 1.7509427797545718e-06,
      "loss": 46.0,
      "step": 1476
    },
    {
      "epoch": 0.02973117143230975,
      "grad_norm": 0.0010405541397631168,
      "learning_rate": 1.7135978767395588e-06,
      "loss": 46.0,
      "step": 1477
    },
    {
      "epoch": 0.029751300864559117,
      "grad_norm": 0.0011400578077882528,
      "learning_rate": 1.676652091256714e-06,
      "loss": 46.0,
      "step": 1478
    },
    {
      "epoch": 0.02977143029680848,
      "grad_norm": 0.0015322790713980794,
      "learning_rate": 1.6401055733340164e-06,
      "loss": 46.0,
      "step": 1479
    },
    {
      "epoch": 0.02979155972905784,
      "grad_norm": 0.0007389390957541764,
      "learning_rate": 1.6039584713781308e-06,
      "loss": 46.0,
      "step": 1480
    },
    {
      "epoch": 0.029811689161307206,
      "grad_norm": 0.0011101323179900646,
      "learning_rate": 1.5682109321737637e-06,
      "loss": 46.0,
      "step": 1481
    },
    {
      "epoch": 0.029831818593556568,
      "grad_norm": 0.0012494990369305015,
      "learning_rate": 1.5328631008831197e-06,
      "loss": 46.0,
      "step": 1482
    },
    {
      "epoch": 0.029851948025805933,
      "grad_norm": 0.0019527755212038755,
      "learning_rate": 1.497915121045268e-06,
      "loss": 46.0,
      "step": 1483
    },
    {
      "epoch": 0.029872077458055295,
      "grad_norm": 0.0015624084044247866,
      "learning_rate": 1.4633671345755884e-06,
      "loss": 46.0,
      "step": 1484
    },
    {
      "epoch": 0.029892206890304657,
      "grad_norm": 0.0013834653655067086,
      "learning_rate": 1.4292192817651706e-06,
      "loss": 46.0,
      "step": 1485
    },
    {
      "epoch": 0.029912336322554023,
      "grad_norm": 0.0015040615107864141,
      "learning_rate": 1.3954717012802599e-06,
      "loss": 46.0,
      "step": 1486
    },
    {
      "epoch": 0.029932465754803385,
      "grad_norm": 0.001884009805507958,
      "learning_rate": 1.3621245301617014e-06,
      "loss": 46.0,
      "step": 1487
    },
    {
      "epoch": 0.02995259518705275,
      "grad_norm": 0.0003439519787207246,
      "learning_rate": 1.3291779038243745e-06,
      "loss": 46.0,
      "step": 1488
    },
    {
      "epoch": 0.029972724619302112,
      "grad_norm": 0.0009911386296153069,
      "learning_rate": 1.2966319560566264e-06,
      "loss": 46.0,
      "step": 1489
    },
    {
      "epoch": 0.029992854051551478,
      "grad_norm": 0.001606732839718461,
      "learning_rate": 1.2644868190197501e-06,
      "loss": 46.0,
      "step": 1490
    },
    {
      "epoch": 0.03001298348380084,
      "grad_norm": 0.0009569000103510916,
      "learning_rate": 1.2327426232474626e-06,
      "loss": 46.0,
      "step": 1491
    },
    {
      "epoch": 0.0300331129160502,
      "grad_norm": 0.002008062554523349,
      "learning_rate": 1.201399497645328e-06,
      "loss": 46.0,
      "step": 1492
    },
    {
      "epoch": 0.030053242348299567,
      "grad_norm": 0.0015580368926748633,
      "learning_rate": 1.1704575694902686e-06,
      "loss": 46.0,
      "step": 1493
    },
    {
      "epoch": 0.03007337178054893,
      "grad_norm": 0.0017130931373685598,
      "learning_rate": 1.1399169644300323e-06,
      "loss": 46.0,
      "step": 1494
    },
    {
      "epoch": 0.030093501212798295,
      "grad_norm": 0.0015125928912311792,
      "learning_rate": 1.1097778064827257e-06,
      "loss": 46.0,
      "step": 1495
    },
    {
      "epoch": 0.030113630645047657,
      "grad_norm": 0.0013810923555865884,
      "learning_rate": 1.0800402180362158e-06,
      "loss": 46.0,
      "step": 1496
    },
    {
      "epoch": 0.03013376007729702,
      "grad_norm": 0.0014871679013594985,
      "learning_rate": 1.0507043198477617e-06,
      "loss": 46.0,
      "step": 1497
    },
    {
      "epoch": 0.030153889509546384,
      "grad_norm": 0.0008668963564559817,
      "learning_rate": 1.0217702310433842e-06,
      "loss": 46.0,
      "step": 1498
    },
    {
      "epoch": 0.030174018941795746,
      "grad_norm": 0.00135446572676301,
      "learning_rate": 9.93238069117508e-07,
      "loss": 46.0,
      "step": 1499
    },
    {
      "epoch": 0.03019414837404511,
      "grad_norm": 0.0012777193915098906,
      "learning_rate": 9.651079499323978e-07,
      "loss": 46.0,
      "step": 1500
    },
    {
      "epoch": 0.030214277806294473,
      "grad_norm": 0.0014395661419257522,
      "learning_rate": 9.373799877177236e-07,
      "loss": 46.0,
      "step": 1501
    },
    {
      "epoch": 0.030234407238543835,
      "grad_norm": 0.0019440649775788188,
      "learning_rate": 9.100542950701063e-07,
      "loss": 46.0,
      "step": 1502
    },
    {
      "epoch": 0.0302545366707932,
      "grad_norm": 0.0007341218297369778,
      "learning_rate": 8.831309829526291e-07,
      "loss": 46.0,
      "step": 1503
    },
    {
      "epoch": 0.030274666103042563,
      "grad_norm": 0.0010124749969691038,
      "learning_rate": 8.566101606944266e-07,
      "loss": 46.0,
      "step": 1504
    },
    {
      "epoch": 0.030294795535291928,
      "grad_norm": 0.0011337018804624677,
      "learning_rate": 8.304919359901963e-07,
      "loss": 46.0,
      "step": 1505
    },
    {
      "epoch": 0.03031492496754129,
      "grad_norm": 0.0006525327335111797,
      "learning_rate": 8.047764148997883e-07,
      "loss": 46.0,
      "step": 1506
    },
    {
      "epoch": 0.030335054399790656,
      "grad_norm": 0.0014475996140390635,
      "learning_rate": 7.794637018477824e-07,
      "loss": 46.0,
      "step": 1507
    },
    {
      "epoch": 0.030355183832040018,
      "grad_norm": 0.0006679428042843938,
      "learning_rate": 7.545538996230228e-07,
      "loss": 46.0,
      "step": 1508
    },
    {
      "epoch": 0.03037531326428938,
      "grad_norm": 0.001389230601489544,
      "learning_rate": 7.300471093782624e-07,
      "loss": 46.0,
      "step": 1509
    },
    {
      "epoch": 0.030395442696538745,
      "grad_norm": 0.0013642680132761598,
      "learning_rate": 7.059434306297075e-07,
      "loss": 46.0,
      "step": 1510
    },
    {
      "epoch": 0.030415572128788107,
      "grad_norm": 0.000978952506557107,
      "learning_rate": 6.822429612566184e-07,
      "loss": 46.0,
      "step": 1511
    },
    {
      "epoch": 0.030435701561037472,
      "grad_norm": 0.0009029002394527197,
      "learning_rate": 6.589457975009205e-07,
      "loss": 46.0,
      "step": 1512
    },
    {
      "epoch": 0.030455830993286834,
      "grad_norm": 0.002438169904053211,
      "learning_rate": 6.360520339668163e-07,
      "loss": 46.0,
      "step": 1513
    },
    {
      "epoch": 0.030475960425536196,
      "grad_norm": 0.0020010732114315033,
      "learning_rate": 6.135617636204072e-07,
      "loss": 46.0,
      "step": 1514
    },
    {
      "epoch": 0.030496089857785562,
      "grad_norm": 0.0014994231751188636,
      "learning_rate": 5.91475077789272e-07,
      "loss": 46.0,
      "step": 1515
    },
    {
      "epoch": 0.030516219290034924,
      "grad_norm": 0.0007911003194749355,
      "learning_rate": 5.697920661621558e-07,
      "loss": 46.0,
      "step": 1516
    },
    {
      "epoch": 0.03053634872228429,
      "grad_norm": 0.0011950345942750573,
      "learning_rate": 5.485128167885933e-07,
      "loss": 46.0,
      "step": 1517
    },
    {
      "epoch": 0.03055647815453365,
      "grad_norm": 0.0012883899034932256,
      "learning_rate": 5.276374160784858e-07,
      "loss": 46.0,
      "step": 1518
    },
    {
      "epoch": 0.030576607586783013,
      "grad_norm": 0.002167344558984041,
      "learning_rate": 5.071659488018688e-07,
      "loss": 46.0,
      "step": 1519
    },
    {
      "epoch": 0.03059673701903238,
      "grad_norm": 0.0008158805430866778,
      "learning_rate": 4.870984980884341e-07,
      "loss": 46.0,
      "step": 1520
    },
    {
      "epoch": 0.03061686645128174,
      "grad_norm": 0.00043665210250765085,
      "learning_rate": 4.674351454273307e-07,
      "loss": 46.0,
      "step": 1521
    },
    {
      "epoch": 0.030636995883531106,
      "grad_norm": 0.0012343135895207524,
      "learning_rate": 4.481759706666755e-07,
      "loss": 46.0,
      "step": 1522
    },
    {
      "epoch": 0.030657125315780468,
      "grad_norm": 0.0007814933778718114,
      "learning_rate": 4.2932105201339835e-07,
      "loss": 46.0,
      "step": 1523
    },
    {
      "epoch": 0.030677254748029834,
      "grad_norm": 0.0008475257782265544,
      "learning_rate": 4.1087046603279777e-07,
      "loss": 46.0,
      "step": 1524
    },
    {
      "epoch": 0.030697384180279196,
      "grad_norm": 0.0010306923650205135,
      "learning_rate": 3.9282428764827463e-07,
      "loss": 46.0,
      "step": 1525
    },
    {
      "epoch": 0.030717513612528557,
      "grad_norm": 0.0033900176640599966,
      "learning_rate": 3.751825901410433e-07,
      "loss": 46.0,
      "step": 1526
    },
    {
      "epoch": 0.030737643044777923,
      "grad_norm": 0.0020439354702830315,
      "learning_rate": 3.579454451498099e-07,
      "loss": 46.0,
      "step": 1527
    },
    {
      "epoch": 0.030757772477027285,
      "grad_norm": 0.0013795166742056608,
      "learning_rate": 3.411129226704945e-07,
      "loss": 46.0,
      "step": 1528
    },
    {
      "epoch": 0.03077790190927665,
      "grad_norm": 0.0019550782162696123,
      "learning_rate": 3.246850910559318e-07,
      "loss": 46.0,
      "step": 1529
    },
    {
      "epoch": 0.030798031341526012,
      "grad_norm": 0.001795038697309792,
      "learning_rate": 3.0866201701560406e-07,
      "loss": 46.0,
      "step": 1530
    },
    {
      "epoch": 0.030818160773775374,
      "grad_norm": 0.0019376088166609406,
      "learning_rate": 2.9304376561539726e-07,
      "loss": 46.0,
      "step": 1531
    },
    {
      "epoch": 0.03083829020602474,
      "grad_norm": 0.0016611182363703847,
      "learning_rate": 2.7783040027726804e-07,
      "loss": 46.0,
      "step": 1532
    },
    {
      "epoch": 0.030858419638274102,
      "grad_norm": 0.0016700802370905876,
      "learning_rate": 2.630219827790659e-07,
      "loss": 46.0,
      "step": 1533
    },
    {
      "epoch": 0.030878549070523467,
      "grad_norm": 0.0005473219207488,
      "learning_rate": 2.4861857325421123e-07,
      "loss": 46.0,
      "step": 1534
    },
    {
      "epoch": 0.03089867850277283,
      "grad_norm": 0.0012385062873363495,
      "learning_rate": 2.346202301915068e-07,
      "loss": 46.0,
      "step": 1535
    },
    {
      "epoch": 0.03091880793502219,
      "grad_norm": 0.0020921400282531977,
      "learning_rate": 2.2102701043487105e-07,
      "loss": 46.0,
      "step": 1536
    },
    {
      "epoch": 0.030938937367271557,
      "grad_norm": 0.0009741144021973014,
      "learning_rate": 2.0783896918310508e-07,
      "loss": 46.0,
      "step": 1537
    },
    {
      "epoch": 0.03095906679952092,
      "grad_norm": 0.0017884114058688283,
      "learning_rate": 1.9505615998969274e-07,
      "loss": 46.0,
      "step": 1538
    },
    {
      "epoch": 0.030979196231770284,
      "grad_norm": 0.0012170026311650872,
      "learning_rate": 1.8267863476255643e-07,
      "loss": 46.0,
      "step": 1539
    },
    {
      "epoch": 0.030999325664019646,
      "grad_norm": 0.0015093215042725205,
      "learning_rate": 1.7070644376386835e-07,
      "loss": 46.0,
      "step": 1540
    },
    {
      "epoch": 0.03101945509626901,
      "grad_norm": 0.0010898308828473091,
      "learning_rate": 1.5913963560981738e-07,
      "loss": 46.0,
      "step": 1541
    },
    {
      "epoch": 0.031039584528518373,
      "grad_norm": 0.000764137483201921,
      "learning_rate": 1.4797825727044246e-07,
      "loss": 46.0,
      "step": 1542
    },
    {
      "epoch": 0.031059713960767735,
      "grad_norm": 0.0012036709813401103,
      "learning_rate": 1.3722235406943285e-07,
      "loss": 46.0,
      "step": 1543
    },
    {
      "epoch": 0.0310798433930171,
      "grad_norm": 0.0012527679791674018,
      "learning_rate": 1.2687196968392822e-07,
      "loss": 46.0,
      "step": 1544
    },
    {
      "epoch": 0.031099972825266463,
      "grad_norm": 0.0017057860968634486,
      "learning_rate": 1.1692714614436329e-07,
      "loss": 46.0,
      "step": 1545
    },
    {
      "epoch": 0.03112010225751583,
      "grad_norm": 0.0012995371362194419,
      "learning_rate": 1.0738792383427898e-07,
      "loss": 46.0,
      "step": 1546
    },
    {
      "epoch": 0.03114023168976519,
      "grad_norm": 0.0005156396073289216,
      "learning_rate": 9.82543414901782e-08,
      "loss": 46.0,
      "step": 1547
    },
    {
      "epoch": 0.031160361122014552,
      "grad_norm": 0.0023463049437850714,
      "learning_rate": 8.952643620134815e-08,
      "loss": 46.0,
      "step": 1548
    },
    {
      "epoch": 0.031180490554263918,
      "grad_norm": 0.0005802233936265111,
      "learning_rate": 8.1204243409716e-08,
      "loss": 46.0,
      "step": 1549
    },
    {
      "epoch": 0.03120061998651328,
      "grad_norm": 0.0014365284005180001,
      "learning_rate": 7.328779690972675e-08,
      "loss": 46.0,
      "step": 1550
    },
    {
      "epoch": 0.031220749418762645,
      "grad_norm": 0.0009548702510073781,
      "learning_rate": 6.577712884816566e-08,
      "loss": 46.0,
      "step": 1551
    },
    {
      "epoch": 0.031240878851012007,
      "grad_norm": 0.000912398740183562,
      "learning_rate": 5.867226972404716e-08,
      "loss": 46.0,
      "step": 1552
    },
    {
      "epoch": 0.03126100828326137,
      "grad_norm": 0.0013671774649992585,
      "learning_rate": 5.197324838851492e-08,
      "loss": 46.0,
      "step": 1553
    },
    {
      "epoch": 0.031281137715510735,
      "grad_norm": 0.001228424021974206,
      "learning_rate": 4.5680092044686486e-08,
      "loss": 46.0,
      "step": 1554
    },
    {
      "epoch": 0.031301267147760096,
      "grad_norm": 0.0009602979407645762,
      "learning_rate": 3.9792826247553315e-08,
      "loss": 46.0,
      "step": 1555
    },
    {
      "epoch": 0.03132139658000946,
      "grad_norm": 0.0012437499826774001,
      "learning_rate": 3.431147490390307e-08,
      "loss": 46.0,
      "step": 1556
    },
    {
      "epoch": 0.03134152601225883,
      "grad_norm": 0.0018415412632748485,
      "learning_rate": 2.9236060272186395e-08,
      "loss": 46.0,
      "step": 1557
    },
    {
      "epoch": 0.03136165544450819,
      "grad_norm": 0.0008638726430945098,
      "learning_rate": 2.4566602962450282e-08,
      "loss": 46.0,
      "step": 1558
    },
    {
      "epoch": 0.03138178487675755,
      "grad_norm": 0.001755962148308754,
      "learning_rate": 2.0303121936227077e-08,
      "loss": 46.0,
      "step": 1559
    },
    {
      "epoch": 0.03140191430900691,
      "grad_norm": 0.000949330220464617,
      "learning_rate": 1.6445634506512265e-08,
      "loss": 46.0,
      "step": 1560
    },
    {
      "epoch": 0.031422043741256275,
      "grad_norm": 0.000980414217337966,
      "learning_rate": 1.2994156337620134e-08,
      "loss": 46.0,
      "step": 1561
    },
    {
      "epoch": 0.031442173173505644,
      "grad_norm": 0.001756619312800467,
      "learning_rate": 9.948701445194885e-09,
      "loss": 46.0,
      "step": 1562
    },
    {
      "epoch": 0.031462302605755006,
      "grad_norm": 0.0016652131453156471,
      "learning_rate": 7.3092821960774046e-09,
      "loss": 46.0,
      "step": 1563
    },
    {
      "epoch": 0.03148243203800437,
      "grad_norm": 0.0008927117451094091,
      "learning_rate": 5.0759093083385665e-09,
      "loss": 46.0,
      "step": 1564
    },
    {
      "epoch": 0.03150256147025373,
      "grad_norm": 0.0012494467664510012,
      "learning_rate": 3.24859185114601e-09,
      "loss": 46.0,
      "step": 1565
    },
    {
      "epoch": 0.03152269090250309,
      "grad_norm": 0.0012330285971984267,
      "learning_rate": 1.8273372448307512e-09,
      "loss": 46.0,
      "step": 1566
    },
    {
      "epoch": 0.03154282033475246,
      "grad_norm": 0.002677972661331296,
      "learning_rate": 8.121512607317528e-10,
      "loss": 46.0,
      "step": 1567
    },
    {
      "epoch": 0.03156294976700182,
      "grad_norm": 0.0020114348735660315,
      "learning_rate": 2.0303802130694493e-10,
      "loss": 46.0,
      "step": 1568
    },
    {
      "epoch": 0.031583079199251185,
      "grad_norm": 0.0010916402097791433,
      "learning_rate": 0.0,
      "loss": 46.0,
      "step": 1569
    }
  ],
  "logging_steps": 1,
  "max_steps": 1569,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 393,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 36155043692544.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}