{
  "best_metric": 0.7434989809989929,
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
  "epoch": 1.0049566294919454,
  "eval_steps": 50,
  "global_step": 101,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.009913258983890954,
      "grad_norm": 0.48874759674072266,
      "learning_rate": 2e-05,
      "loss": 1.1772,
      "step": 1
    },
    {
      "epoch": 0.009913258983890954,
      "eval_loss": 1.1726865768432617,
      "eval_runtime": 45.2328,
      "eval_samples_per_second": 7.517,
      "eval_steps_per_second": 1.879,
      "step": 1
    },
    {
      "epoch": 0.01982651796778191,
      "grad_norm": 0.474133163690567,
      "learning_rate": 4e-05,
      "loss": 1.1973,
      "step": 2
    },
    {
      "epoch": 0.02973977695167286,
      "grad_norm": 0.5018295645713806,
      "learning_rate": 6e-05,
      "loss": 1.2222,
      "step": 3
    },
    {
      "epoch": 0.03965303593556382,
      "grad_norm": 0.45751234889030457,
      "learning_rate": 8e-05,
      "loss": 1.2127,
      "step": 4
    },
    {
      "epoch": 0.04956629491945477,
      "grad_norm": 0.5277777314186096,
      "learning_rate": 0.0001,
      "loss": 1.1466,
      "step": 5
    },
    {
      "epoch": 0.05947955390334572,
      "grad_norm": 0.4355424642562866,
      "learning_rate": 0.00012,
      "loss": 1.0944,
      "step": 6
    },
    {
      "epoch": 0.06939281288723669,
      "grad_norm": 0.4266590476036072,
      "learning_rate": 0.00014,
      "loss": 1.0243,
      "step": 7
    },
    {
      "epoch": 0.07930607187112763,
      "grad_norm": 0.8596389293670654,
      "learning_rate": 0.00016,
      "loss": 0.9531,
      "step": 8
    },
    {
      "epoch": 0.08921933085501858,
      "grad_norm": 0.39760446548461914,
      "learning_rate": 0.00018,
      "loss": 0.915,
      "step": 9
    },
    {
      "epoch": 0.09913258983890955,
      "grad_norm": 0.3741200268268585,
      "learning_rate": 0.0002,
      "loss": 0.9735,
      "step": 10
    },
    {
      "epoch": 0.1090458488228005,
      "grad_norm": 0.3944251835346222,
      "learning_rate": 0.00019994041405510705,
      "loss": 0.9357,
      "step": 11
    },
    {
      "epoch": 0.11895910780669144,
      "grad_norm": 0.3373732268810272,
      "learning_rate": 0.0001997617272301248,
      "loss": 0.9593,
      "step": 12
    },
    {
      "epoch": 0.1288723667905824,
      "grad_norm": 0.3242512047290802,
      "learning_rate": 0.0001994641524695193,
      "loss": 0.9296,
      "step": 13
    },
    {
      "epoch": 0.13878562577447337,
      "grad_norm": 0.3163102865219116,
      "learning_rate": 0.00019904804439875633,
      "loss": 0.9026,
      "step": 14
    },
    {
      "epoch": 0.14869888475836432,
      "grad_norm": 0.3128542900085449,
      "learning_rate": 0.0001985138989016874,
      "loss": 0.9143,
      "step": 15
    },
    {
      "epoch": 0.15861214374225527,
      "grad_norm": 0.29705294966697693,
      "learning_rate": 0.00019786235252959553,
      "loss": 0.9004,
      "step": 16
    },
    {
      "epoch": 0.16852540272614622,
      "grad_norm": 0.3244360089302063,
      "learning_rate": 0.0001970941817426052,
      "loss": 0.7955,
      "step": 17
    },
    {
      "epoch": 0.17843866171003717,
      "grad_norm": 0.2974328398704529,
      "learning_rate": 0.00019621030198436006,
      "loss": 0.9283,
      "step": 18
    },
    {
      "epoch": 0.18835192069392812,
      "grad_norm": 0.2881193459033966,
      "learning_rate": 0.00019521176659107142,
      "loss": 0.8537,
      "step": 19
    },
    {
      "epoch": 0.1982651796778191,
      "grad_norm": 0.2800627052783966,
      "learning_rate": 0.00019409976553623766,
      "loss": 0.8994,
      "step": 20
    },
    {
      "epoch": 0.20817843866171004,
      "grad_norm": 0.2811860740184784,
      "learning_rate": 0.00019287562401253022,
      "loss": 0.8763,
      "step": 21
    },
    {
      "epoch": 0.218091697645601,
      "grad_norm": 0.25567537546157837,
      "learning_rate": 0.00019154080085253666,
      "loss": 0.8475,
      "step": 22
    },
    {
      "epoch": 0.22800495662949194,
      "grad_norm": 0.2695324718952179,
      "learning_rate": 0.0001900968867902419,
      "loss": 0.87,
      "step": 23
    },
    {
      "epoch": 0.2379182156133829,
      "grad_norm": 0.2898944914340973,
      "learning_rate": 0.000188545602565321,
      "loss": 0.8119,
      "step": 24
    },
    {
      "epoch": 0.24783147459727387,
      "grad_norm": 0.2776627838611603,
      "learning_rate": 0.00018688879687250067,
      "loss": 0.8292,
      "step": 25
    },
    {
      "epoch": 0.2577447335811648,
      "grad_norm": 0.28858432173728943,
      "learning_rate": 0.00018512844415843514,
      "loss": 0.8165,
      "step": 26
    },
    {
      "epoch": 0.26765799256505574,
      "grad_norm": 0.31470319628715515,
      "learning_rate": 0.00018326664226872065,
      "loss": 0.8366,
      "step": 27
    },
    {
      "epoch": 0.27757125154894674,
      "grad_norm": 0.3070776164531708,
      "learning_rate": 0.00018130560994785325,
      "loss": 0.8505,
      "step": 28
    },
    {
      "epoch": 0.2874845105328377,
      "grad_norm": 0.3148271441459656,
      "learning_rate": 0.00017924768419510904,
      "loss": 0.8784,
      "step": 29
    },
    {
      "epoch": 0.29739776951672864,
      "grad_norm": 0.281838983297348,
      "learning_rate": 0.00017709531747949796,
      "loss": 0.8435,
      "step": 30
    },
    {
      "epoch": 0.3073110285006196,
      "grad_norm": 0.2795185446739197,
      "learning_rate": 0.00017485107481711012,
      "loss": 0.8421,
      "step": 31
    },
    {
      "epoch": 0.31722428748451054,
      "grad_norm": 0.2741798758506775,
      "learning_rate": 0.00017251763071433765,
      "loss": 0.7954,
      "step": 32
    },
    {
      "epoch": 0.3271375464684015,
      "grad_norm": 0.28247004747390747,
      "learning_rate": 0.00017009776598061495,
      "loss": 0.7691,
      "step": 33
    },
    {
      "epoch": 0.33705080545229243,
      "grad_norm": 0.3221910893917084,
      "learning_rate": 0.00016759436441447545,
      "loss": 0.8613,
      "step": 34
    },
    {
      "epoch": 0.3469640644361834,
      "grad_norm": 0.300059974193573,
      "learning_rate": 0.00016501040936687443,
      "loss": 0.814,
      "step": 35
    },
    {
      "epoch": 0.35687732342007433,
      "grad_norm": 0.3095373213291168,
      "learning_rate": 0.00016234898018587337,
      "loss": 0.8116,
      "step": 36
    },
    {
      "epoch": 0.3667905824039653,
      "grad_norm": 0.28610774874687195,
      "learning_rate": 0.00015961324854692254,
      "loss": 0.8363,
      "step": 37
    },
    {
      "epoch": 0.37670384138785623,
      "grad_norm": 0.3051545023918152,
      "learning_rate": 0.00015680647467311557,
      "loss": 0.8035,
      "step": 38
    },
    {
      "epoch": 0.38661710037174724,
      "grad_norm": 0.28369075059890747,
      "learning_rate": 0.00015393200344991995,
      "loss": 0.8328,
      "step": 39
    },
    {
      "epoch": 0.3965303593556382,
      "grad_norm": 0.2789386510848999,
      "learning_rate": 0.0001509932604390136,
      "loss": 0.8211,
      "step": 40
    },
    {
      "epoch": 0.40644361833952913,
      "grad_norm": 0.3046523928642273,
      "learning_rate": 0.00014799374779597867,
      "loss": 0.804,
      "step": 41
    },
    {
      "epoch": 0.4163568773234201,
      "grad_norm": 0.29437047243118286,
      "learning_rate": 0.00014493704009671613,
      "loss": 0.799,
      "step": 42
    },
    {
      "epoch": 0.42627013630731103,
      "grad_norm": 0.2865321934223175,
      "learning_rate": 0.0001418267800775565,
      "loss": 0.7877,
      "step": 43
    },
    {
      "epoch": 0.436183395291202,
      "grad_norm": 0.2856364846229553,
      "learning_rate": 0.0001386666742941419,
      "loss": 0.8098,
      "step": 44
    },
    {
      "epoch": 0.44609665427509293,
      "grad_norm": 0.2996920049190521,
      "learning_rate": 0.00013546048870425356,
      "loss": 0.8478,
      "step": 45
    },
    {
      "epoch": 0.4560099132589839,
      "grad_norm": 0.30788877606391907,
      "learning_rate": 0.00013221204417984908,
      "loss": 0.8112,
      "step": 46
    },
    {
      "epoch": 0.46592317224287483,
      "grad_norm": 0.29653915762901306,
      "learning_rate": 0.00012892521195365678,
      "loss": 0.7905,
      "step": 47
    },
    {
      "epoch": 0.4758364312267658,
      "grad_norm": 0.3146958351135254,
      "learning_rate": 0.0001256039090057547,
      "loss": 0.8241,
      "step": 48
    },
    {
      "epoch": 0.4857496902106567,
      "grad_norm": 0.3192234933376312,
      "learning_rate": 0.00012225209339563145,
      "loss": 0.7645,
      "step": 49
    },
    {
      "epoch": 0.49566294919454773,
      "grad_norm": 0.32355308532714844,
      "learning_rate": 0.00011887375954529168,
      "loss": 0.7881,
      "step": 50
    },
    {
      "epoch": 0.49566294919454773,
      "eval_loss": 0.7790313959121704,
      "eval_runtime": 45.7186,
      "eval_samples_per_second": 7.437,
      "eval_steps_per_second": 1.859,
      "step": 50
    },
    {
      "epoch": 0.5055762081784386,
      "grad_norm": 0.3305985927581787,
      "learning_rate": 0.00011547293347902812,
      "loss": 0.7902,
      "step": 51
    },
    {
      "epoch": 0.5154894671623296,
      "grad_norm": 0.331398069858551,
      "learning_rate": 0.0001120536680255323,
      "loss": 0.7915,
      "step": 52
    },
    {
      "epoch": 0.5254027261462205,
      "grad_norm": 0.34784436225891113,
      "learning_rate": 0.00010862003798806196,
      "loss": 0.7742,
      "step": 53
    },
    {
      "epoch": 0.5353159851301115,
      "grad_norm": 0.3431916832923889,
      "learning_rate": 0.00010517613528842097,
      "loss": 0.8222,
      "step": 54
    },
    {
      "epoch": 0.5452292441140025,
      "grad_norm": 0.3108989894390106,
      "learning_rate": 0.00010172606409053886,
      "loss": 0.753,
      "step": 55
    },
    {
      "epoch": 0.5551425030978935,
      "grad_norm": 0.3834403157234192,
      "learning_rate": 9.827393590946116e-05,
      "loss": 0.7495,
      "step": 56
    },
    {
      "epoch": 0.5650557620817844,
      "grad_norm": 0.3325962722301483,
      "learning_rate": 9.482386471157904e-05,
      "loss": 0.7572,
      "step": 57
    },
    {
      "epoch": 0.5749690210656754,
      "grad_norm": 0.34333735704421997,
      "learning_rate": 9.137996201193805e-05,
      "loss": 0.7767,
      "step": 58
    },
    {
      "epoch": 0.5848822800495663,
      "grad_norm": 0.32998785376548767,
      "learning_rate": 8.79463319744677e-05,
      "loss": 0.7841,
      "step": 59
    },
    {
      "epoch": 0.5947955390334573,
      "grad_norm": 0.33316344022750854,
      "learning_rate": 8.452706652097186e-05,
      "loss": 0.7639,
      "step": 60
    },
    {
      "epoch": 0.6047087980173482,
      "grad_norm": 0.3633474111557007,
      "learning_rate": 8.112624045470835e-05,
      "loss": 0.8559,
      "step": 61
    },
    {
      "epoch": 0.6146220570012392,
      "grad_norm": 0.3570536971092224,
      "learning_rate": 7.774790660436858e-05,
      "loss": 0.7631,
      "step": 62
    },
    {
      "epoch": 0.6245353159851301,
      "grad_norm": 0.3394259214401245,
      "learning_rate": 7.43960909942453e-05,
      "loss": 0.7786,
      "step": 63
    },
    {
      "epoch": 0.6344485749690211,
      "grad_norm": 0.3467733860015869,
      "learning_rate": 7.107478804634325e-05,
      "loss": 0.7614,
      "step": 64
    },
    {
      "epoch": 0.644361833952912,
      "grad_norm": 0.3295450210571289,
      "learning_rate": 6.778795582015097e-05,
      "loss": 0.8523,
      "step": 65
    },
    {
      "epoch": 0.654275092936803,
      "grad_norm": 0.3386925756931305,
      "learning_rate": 6.453951129574644e-05,
      "loss": 0.8138,
      "step": 66
    },
    {
      "epoch": 0.6641883519206939,
      "grad_norm": 0.32922279834747314,
      "learning_rate": 6.133332570585812e-05,
      "loss": 0.7738,
      "step": 67
    },
    {
      "epoch": 0.6741016109045849,
      "grad_norm": 0.3545457720756531,
      "learning_rate": 5.817321992244351e-05,
      "loss": 0.7795,
      "step": 68
    },
    {
      "epoch": 0.6840148698884758,
      "grad_norm": 0.34623822569847107,
      "learning_rate": 5.506295990328385e-05,
      "loss": 0.7551,
      "step": 69
    },
    {
      "epoch": 0.6939281288723668,
      "grad_norm": 0.3696240186691284,
      "learning_rate": 5.200625220402139e-05,
      "loss": 0.7543,
      "step": 70
    },
    {
      "epoch": 0.7038413878562577,
      "grad_norm": 0.34937718510627747,
      "learning_rate": 4.900673956098644e-05,
      "loss": 0.7364,
      "step": 71
    },
    {
      "epoch": 0.7137546468401487,
      "grad_norm": 0.36205634474754333,
      "learning_rate": 4.606799655008009e-05,
      "loss": 0.7579,
      "step": 72
    },
    {
      "epoch": 0.7236679058240396,
      "grad_norm": 0.3667196035385132,
      "learning_rate": 4.3193525326884435e-05,
      "loss": 0.7343,
      "step": 73
    },
    {
      "epoch": 0.7335811648079306,
      "grad_norm": 0.36573389172554016,
      "learning_rate": 4.038675145307747e-05,
      "loss": 0.7218,
      "step": 74
    },
    {
      "epoch": 0.7434944237918215,
      "grad_norm": 0.367421954870224,
      "learning_rate": 3.7651019814126654e-05,
      "loss": 0.8087,
      "step": 75
    },
    {
      "epoch": 0.7534076827757125,
      "grad_norm": 0.3466510474681854,
      "learning_rate": 3.498959063312558e-05,
      "loss": 0.797,
      "step": 76
    },
    {
      "epoch": 0.7633209417596035,
      "grad_norm": 0.3628087341785431,
      "learning_rate": 3.2405635585524565e-05,
      "loss": 0.7412,
      "step": 77
    },
    {
      "epoch": 0.7732342007434945,
      "grad_norm": 0.3664226830005646,
      "learning_rate": 2.9902234019385057e-05,
      "loss": 0.7456,
      "step": 78
    },
    {
      "epoch": 0.7831474597273854,
      "grad_norm": 0.346771240234375,
      "learning_rate": 2.7482369285662378e-05,
      "loss": 0.769,
      "step": 79
    },
    {
      "epoch": 0.7930607187112764,
      "grad_norm": 0.38797229528427124,
      "learning_rate": 2.514892518288988e-05,
      "loss": 0.7946,
      "step": 80
    },
    {
      "epoch": 0.8029739776951673,
      "grad_norm": 0.33963850140571594,
      "learning_rate": 2.290468252050204e-05,
      "loss": 0.7541,
      "step": 81
    },
    {
      "epoch": 0.8128872366790583,
      "grad_norm": 0.3752409517765045,
      "learning_rate": 2.0752315804890977e-05,
      "loss": 0.8104,
      "step": 82
    },
    {
      "epoch": 0.8228004956629492,
      "grad_norm": 0.33254724740982056,
      "learning_rate": 1.8694390052146737e-05,
      "loss": 0.7511,
      "step": 83
    },
    {
      "epoch": 0.8327137546468402,
      "grad_norm": 0.3548758029937744,
      "learning_rate": 1.6733357731279377e-05,
      "loss": 0.7255,
      "step": 84
    },
    {
      "epoch": 0.8426270136307311,
      "grad_norm": 0.34011703729629517,
      "learning_rate": 1.4871555841564887e-05,
      "loss": 0.7506,
      "step": 85
    },
    {
      "epoch": 0.8525402726146221,
      "grad_norm": 0.35038140416145325,
      "learning_rate": 1.311120312749935e-05,
      "loss": 0.7778,
      "step": 86
    },
    {
      "epoch": 0.862453531598513,
      "grad_norm": 0.35488229990005493,
      "learning_rate": 1.1454397434679021e-05,
      "loss": 0.7953,
      "step": 87
    },
    {
      "epoch": 0.872366790582404,
      "grad_norm": 0.3349529206752777,
      "learning_rate": 9.903113209758096e-06,
      "loss": 0.7046,
      "step": 88
    },
    {
      "epoch": 0.8822800495662949,
      "grad_norm": 0.3759743869304657,
      "learning_rate": 8.45919914746337e-06,
      "loss": 0.763,
      "step": 89
    },
    {
      "epoch": 0.8921933085501859,
      "grad_norm": 0.3336428999900818,
      "learning_rate": 7.124375987469767e-06,
      "loss": 0.7372,
      "step": 90
    },
    {
      "epoch": 0.9021065675340768,
      "grad_norm": 0.3244943916797638,
      "learning_rate": 5.900234463762366e-06,
      "loss": 0.7363,
      "step": 91
    },
    {
      "epoch": 0.9120198265179678,
      "grad_norm": 0.3384454846382141,
      "learning_rate": 4.788233408928589e-06,
      "loss": 0.7456,
      "step": 92
    },
    {
      "epoch": 0.9219330855018587,
      "grad_norm": 0.3504377603530884,
      "learning_rate": 3.789698015639953e-06,
      "loss": 0.826,
      "step": 93
    },
    {
      "epoch": 0.9318463444857497,
      "grad_norm": 0.36054572463035583,
      "learning_rate": 2.905818257394799e-06,
      "loss": 0.8068,
      "step": 94
    },
    {
      "epoch": 0.9417596034696406,
      "grad_norm": 0.3620031476020813,
      "learning_rate": 2.137647470404469e-06,
      "loss": 0.7452,
      "step": 95
    },
    {
      "epoch": 0.9516728624535316,
      "grad_norm": 0.345386266708374,
      "learning_rate": 1.48610109831262e-06,
      "loss": 0.7458,
      "step": 96
    },
    {
      "epoch": 0.9615861214374225,
      "grad_norm": 0.3529147505760193,
      "learning_rate": 9.519556012436815e-07,
      "loss": 0.7703,
      "step": 97
    },
    {
      "epoch": 0.9714993804213135,
      "grad_norm": 0.3554355204105377,
      "learning_rate": 5.358475304807375e-07,
      "loss": 0.7556,
      "step": 98
    },
    {
      "epoch": 0.9814126394052045,
      "grad_norm": 0.35240188241004944,
      "learning_rate": 2.382727698752474e-07,
      "loss": 0.7362,
      "step": 99
    },
    {
      "epoch": 0.9913258983890955,
      "grad_norm": 0.3343020975589752,
      "learning_rate": 5.958594489295921e-08,
      "loss": 0.7531,
      "step": 100
    },
    {
      "epoch": 0.9913258983890955,
      "eval_loss": 0.7434989809989929,
      "eval_runtime": 45.6881,
      "eval_samples_per_second": 7.442,
      "eval_steps_per_second": 1.86,
      "step": 100
    },
    {
      "epoch": 1.0049566294919454,
      "grad_norm": 0.5763421654701233,
      "learning_rate": 0.0,
      "loss": 1.0404,
      "step": 101
    }
  ],
  "logging_steps": 1,
  "max_steps": 101,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 2,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.27994227137708e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}