lesso's picture
Training in progress, step 150, checkpoint
9d648e0 verified
raw
history blame
26.9 kB
{
"best_metric": 10.709113121032715,
"best_model_checkpoint": "miner_id_24/checkpoint-150",
"epoch": 0.055699962866691426,
"eval_steps": 50,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003713330857779428,
"grad_norm": 0.4906452000141144,
"learning_rate": 1.007e-05,
"loss": 10.835,
"step": 1
},
{
"epoch": 0.0003713330857779428,
"eval_loss": 10.832501411437988,
"eval_runtime": 8.608,
"eval_samples_per_second": 131.738,
"eval_steps_per_second": 32.993,
"step": 1
},
{
"epoch": 0.0007426661715558856,
"grad_norm": 0.4328306317329407,
"learning_rate": 2.014e-05,
"loss": 10.8341,
"step": 2
},
{
"epoch": 0.0011139992573338284,
"grad_norm": 0.37087035179138184,
"learning_rate": 3.0209999999999997e-05,
"loss": 10.8293,
"step": 3
},
{
"epoch": 0.0014853323431117712,
"grad_norm": 0.39447692036628723,
"learning_rate": 4.028e-05,
"loss": 10.8318,
"step": 4
},
{
"epoch": 0.001856665428889714,
"grad_norm": 0.4673115611076355,
"learning_rate": 5.035e-05,
"loss": 10.8282,
"step": 5
},
{
"epoch": 0.0022279985146676567,
"grad_norm": 0.4667337238788605,
"learning_rate": 6.0419999999999994e-05,
"loss": 10.8325,
"step": 6
},
{
"epoch": 0.0025993316004456,
"grad_norm": 0.37647679448127747,
"learning_rate": 7.049e-05,
"loss": 10.8326,
"step": 7
},
{
"epoch": 0.0029706646862235424,
"grad_norm": 0.4516909420490265,
"learning_rate": 8.056e-05,
"loss": 10.8271,
"step": 8
},
{
"epoch": 0.0033419977720014855,
"grad_norm": 0.5389593839645386,
"learning_rate": 9.062999999999999e-05,
"loss": 10.8291,
"step": 9
},
{
"epoch": 0.003713330857779428,
"grad_norm": 0.40566104650497437,
"learning_rate": 0.0001007,
"loss": 10.8375,
"step": 10
},
{
"epoch": 0.004084663943557371,
"grad_norm": 0.48166805505752563,
"learning_rate": 0.00010017,
"loss": 10.8247,
"step": 11
},
{
"epoch": 0.004455997029335313,
"grad_norm": 0.4881480038166046,
"learning_rate": 9.963999999999999e-05,
"loss": 10.8119,
"step": 12
},
{
"epoch": 0.004827330115113257,
"grad_norm": 0.4888553321361542,
"learning_rate": 9.910999999999999e-05,
"loss": 10.8045,
"step": 13
},
{
"epoch": 0.0051986632008912,
"grad_norm": 0.5155038237571716,
"learning_rate": 9.858e-05,
"loss": 10.8187,
"step": 14
},
{
"epoch": 0.005569996286669142,
"grad_norm": 0.5028515458106995,
"learning_rate": 9.805e-05,
"loss": 10.8058,
"step": 15
},
{
"epoch": 0.005941329372447085,
"grad_norm": 0.5659723877906799,
"learning_rate": 9.752e-05,
"loss": 10.8044,
"step": 16
},
{
"epoch": 0.0063126624582250275,
"grad_norm": 0.552946150302887,
"learning_rate": 9.698999999999999e-05,
"loss": 10.8163,
"step": 17
},
{
"epoch": 0.006683995544002971,
"grad_norm": 0.575535774230957,
"learning_rate": 9.646e-05,
"loss": 10.7946,
"step": 18
},
{
"epoch": 0.007055328629780914,
"grad_norm": 0.5085407495498657,
"learning_rate": 9.593e-05,
"loss": 10.7978,
"step": 19
},
{
"epoch": 0.007426661715558856,
"grad_norm": 0.48418793082237244,
"learning_rate": 9.539999999999999e-05,
"loss": 10.8028,
"step": 20
},
{
"epoch": 0.007797994801336799,
"grad_norm": 0.5124921202659607,
"learning_rate": 9.487e-05,
"loss": 10.8111,
"step": 21
},
{
"epoch": 0.008169327887114742,
"grad_norm": 0.629709780216217,
"learning_rate": 9.434e-05,
"loss": 10.7865,
"step": 22
},
{
"epoch": 0.008540660972892685,
"grad_norm": 0.5611943006515503,
"learning_rate": 9.381e-05,
"loss": 10.7937,
"step": 23
},
{
"epoch": 0.008911994058670627,
"grad_norm": 0.5063443183898926,
"learning_rate": 9.327999999999999e-05,
"loss": 10.7651,
"step": 24
},
{
"epoch": 0.00928332714444857,
"grad_norm": 0.6020315885543823,
"learning_rate": 9.274999999999999e-05,
"loss": 10.7925,
"step": 25
},
{
"epoch": 0.009654660230226514,
"grad_norm": 0.5812113285064697,
"learning_rate": 9.222e-05,
"loss": 10.7925,
"step": 26
},
{
"epoch": 0.010025993316004456,
"grad_norm": 0.561542272567749,
"learning_rate": 9.169e-05,
"loss": 10.761,
"step": 27
},
{
"epoch": 0.0103973264017824,
"grad_norm": 0.5838537812232971,
"learning_rate": 9.116e-05,
"loss": 10.753,
"step": 28
},
{
"epoch": 0.010768659487560341,
"grad_norm": 0.5075663328170776,
"learning_rate": 9.062999999999999e-05,
"loss": 10.777,
"step": 29
},
{
"epoch": 0.011139992573338284,
"grad_norm": 0.5487096309661865,
"learning_rate": 9.01e-05,
"loss": 10.7593,
"step": 30
},
{
"epoch": 0.011511325659116228,
"grad_norm": 0.4488137662410736,
"learning_rate": 8.957e-05,
"loss": 10.7719,
"step": 31
},
{
"epoch": 0.01188265874489417,
"grad_norm": 0.5524377226829529,
"learning_rate": 8.903999999999999e-05,
"loss": 10.7879,
"step": 32
},
{
"epoch": 0.012253991830672113,
"grad_norm": 0.5395157933235168,
"learning_rate": 8.850999999999999e-05,
"loss": 10.775,
"step": 33
},
{
"epoch": 0.012625324916450055,
"grad_norm": 0.5032263994216919,
"learning_rate": 8.798e-05,
"loss": 10.7386,
"step": 34
},
{
"epoch": 0.012996658002227999,
"grad_norm": 0.5099305510520935,
"learning_rate": 8.745e-05,
"loss": 10.7548,
"step": 35
},
{
"epoch": 0.013367991088005942,
"grad_norm": 0.5281259417533875,
"learning_rate": 8.692e-05,
"loss": 10.7409,
"step": 36
},
{
"epoch": 0.013739324173783884,
"grad_norm": 0.5443010330200195,
"learning_rate": 8.638999999999999e-05,
"loss": 10.7454,
"step": 37
},
{
"epoch": 0.014110657259561827,
"grad_norm": 0.5503028631210327,
"learning_rate": 8.586e-05,
"loss": 10.7472,
"step": 38
},
{
"epoch": 0.014481990345339769,
"grad_norm": 0.5560601949691772,
"learning_rate": 8.533e-05,
"loss": 10.7289,
"step": 39
},
{
"epoch": 0.014853323431117713,
"grad_norm": 0.5327764749526978,
"learning_rate": 8.479999999999999e-05,
"loss": 10.7278,
"step": 40
},
{
"epoch": 0.015224656516895656,
"grad_norm": 0.49340856075286865,
"learning_rate": 8.427e-05,
"loss": 10.7534,
"step": 41
},
{
"epoch": 0.015595989602673598,
"grad_norm": 0.5683452486991882,
"learning_rate": 8.374e-05,
"loss": 10.7333,
"step": 42
},
{
"epoch": 0.01596732268845154,
"grad_norm": 0.5234541893005371,
"learning_rate": 8.321e-05,
"loss": 10.7106,
"step": 43
},
{
"epoch": 0.016338655774229483,
"grad_norm": 0.5029370188713074,
"learning_rate": 8.268e-05,
"loss": 10.7453,
"step": 44
},
{
"epoch": 0.01670998886000743,
"grad_norm": 0.5557736158370972,
"learning_rate": 8.214999999999999e-05,
"loss": 10.7304,
"step": 45
},
{
"epoch": 0.01708132194578537,
"grad_norm": 0.5473191142082214,
"learning_rate": 8.162e-05,
"loss": 10.7416,
"step": 46
},
{
"epoch": 0.017452655031563312,
"grad_norm": 0.6747854948043823,
"learning_rate": 8.108999999999998e-05,
"loss": 10.7589,
"step": 47
},
{
"epoch": 0.017823988117341254,
"grad_norm": 0.5004291534423828,
"learning_rate": 8.056e-05,
"loss": 10.7202,
"step": 48
},
{
"epoch": 0.0181953212031192,
"grad_norm": 0.524412214756012,
"learning_rate": 8.003e-05,
"loss": 10.7175,
"step": 49
},
{
"epoch": 0.01856665428889714,
"grad_norm": 0.5781667828559875,
"learning_rate": 7.95e-05,
"loss": 10.7257,
"step": 50
},
{
"epoch": 0.01856665428889714,
"eval_loss": 10.735980033874512,
"eval_runtime": 8.5155,
"eval_samples_per_second": 133.168,
"eval_steps_per_second": 33.351,
"step": 50
},
{
"epoch": 0.018937987374675083,
"grad_norm": 0.4127328395843506,
"learning_rate": 7.897e-05,
"loss": 10.7444,
"step": 51
},
{
"epoch": 0.019309320460453028,
"grad_norm": 0.3050692081451416,
"learning_rate": 7.843999999999999e-05,
"loss": 10.7566,
"step": 52
},
{
"epoch": 0.01968065354623097,
"grad_norm": 0.3271315395832062,
"learning_rate": 7.790999999999999e-05,
"loss": 10.7442,
"step": 53
},
{
"epoch": 0.02005198663200891,
"grad_norm": 0.32859012484550476,
"learning_rate": 7.738e-05,
"loss": 10.7451,
"step": 54
},
{
"epoch": 0.020423319717786857,
"grad_norm": 0.3305034637451172,
"learning_rate": 7.685e-05,
"loss": 10.7568,
"step": 55
},
{
"epoch": 0.0207946528035648,
"grad_norm": 0.3300938308238983,
"learning_rate": 7.632e-05,
"loss": 10.7359,
"step": 56
},
{
"epoch": 0.02116598588934274,
"grad_norm": 0.3706151247024536,
"learning_rate": 7.578999999999999e-05,
"loss": 10.7507,
"step": 57
},
{
"epoch": 0.021537318975120682,
"grad_norm": 0.3721340000629425,
"learning_rate": 7.526e-05,
"loss": 10.7443,
"step": 58
},
{
"epoch": 0.021908652060898627,
"grad_norm": 0.30731654167175293,
"learning_rate": 7.473e-05,
"loss": 10.7477,
"step": 59
},
{
"epoch": 0.02227998514667657,
"grad_norm": 0.3816874623298645,
"learning_rate": 7.419999999999999e-05,
"loss": 10.7548,
"step": 60
},
{
"epoch": 0.02265131823245451,
"grad_norm": 0.3089187443256378,
"learning_rate": 7.367e-05,
"loss": 10.7363,
"step": 61
},
{
"epoch": 0.023022651318232456,
"grad_norm": 0.3170040547847748,
"learning_rate": 7.314e-05,
"loss": 10.7385,
"step": 62
},
{
"epoch": 0.023393984404010398,
"grad_norm": 0.3177466094493866,
"learning_rate": 7.261e-05,
"loss": 10.7296,
"step": 63
},
{
"epoch": 0.02376531748978834,
"grad_norm": 0.32649269700050354,
"learning_rate": 7.208e-05,
"loss": 10.7279,
"step": 64
},
{
"epoch": 0.024136650575566285,
"grad_norm": 0.31671473383903503,
"learning_rate": 7.154999999999999e-05,
"loss": 10.7275,
"step": 65
},
{
"epoch": 0.024507983661344226,
"grad_norm": 0.3684324622154236,
"learning_rate": 7.102e-05,
"loss": 10.7562,
"step": 66
},
{
"epoch": 0.024879316747122168,
"grad_norm": 0.35255691409111023,
"learning_rate": 7.049e-05,
"loss": 10.7501,
"step": 67
},
{
"epoch": 0.02525064983290011,
"grad_norm": 0.3559291362762451,
"learning_rate": 6.996e-05,
"loss": 10.7445,
"step": 68
},
{
"epoch": 0.025621982918678055,
"grad_norm": 0.29838132858276367,
"learning_rate": 6.943e-05,
"loss": 10.7373,
"step": 69
},
{
"epoch": 0.025993316004455997,
"grad_norm": 0.32272014021873474,
"learning_rate": 6.89e-05,
"loss": 10.7402,
"step": 70
},
{
"epoch": 0.02636464909023394,
"grad_norm": 0.29716819524765015,
"learning_rate": 6.837e-05,
"loss": 10.7499,
"step": 71
},
{
"epoch": 0.026735982176011884,
"grad_norm": 0.2992607057094574,
"learning_rate": 6.784e-05,
"loss": 10.7316,
"step": 72
},
{
"epoch": 0.027107315261789826,
"grad_norm": 0.3100895285606384,
"learning_rate": 6.730999999999999e-05,
"loss": 10.7266,
"step": 73
},
{
"epoch": 0.027478648347567768,
"grad_norm": 0.3643105924129486,
"learning_rate": 6.678e-05,
"loss": 10.7325,
"step": 74
},
{
"epoch": 0.027849981433345713,
"grad_norm": 0.3808096945285797,
"learning_rate": 6.625e-05,
"loss": 10.715,
"step": 75
},
{
"epoch": 0.028221314519123655,
"grad_norm": 0.3222029209136963,
"learning_rate": 6.572e-05,
"loss": 10.7365,
"step": 76
},
{
"epoch": 0.028592647604901596,
"grad_norm": 0.32192742824554443,
"learning_rate": 6.519e-05,
"loss": 10.7343,
"step": 77
},
{
"epoch": 0.028963980690679538,
"grad_norm": 0.40252402424812317,
"learning_rate": 6.466e-05,
"loss": 10.7201,
"step": 78
},
{
"epoch": 0.029335313776457483,
"grad_norm": 0.3803711533546448,
"learning_rate": 6.413e-05,
"loss": 10.7316,
"step": 79
},
{
"epoch": 0.029706646862235425,
"grad_norm": 0.3504716455936432,
"learning_rate": 6.359999999999999e-05,
"loss": 10.7356,
"step": 80
},
{
"epoch": 0.030077979948013367,
"grad_norm": 0.35962575674057007,
"learning_rate": 6.306999999999999e-05,
"loss": 10.7201,
"step": 81
},
{
"epoch": 0.030449313033791312,
"grad_norm": 0.36944904923439026,
"learning_rate": 6.254000000000001e-05,
"loss": 10.7067,
"step": 82
},
{
"epoch": 0.030820646119569254,
"grad_norm": 0.38394877314567566,
"learning_rate": 6.201e-05,
"loss": 10.7409,
"step": 83
},
{
"epoch": 0.031191979205347196,
"grad_norm": 0.41426077485084534,
"learning_rate": 6.148e-05,
"loss": 10.748,
"step": 84
},
{
"epoch": 0.03156331229112514,
"grad_norm": 0.3880663812160492,
"learning_rate": 6.095e-05,
"loss": 10.7121,
"step": 85
},
{
"epoch": 0.03193464537690308,
"grad_norm": 0.4019233286380768,
"learning_rate": 6.0419999999999994e-05,
"loss": 10.7155,
"step": 86
},
{
"epoch": 0.032305978462681025,
"grad_norm": 0.4152543544769287,
"learning_rate": 5.988999999999999e-05,
"loss": 10.7029,
"step": 87
},
{
"epoch": 0.032677311548458966,
"grad_norm": 0.47186049818992615,
"learning_rate": 5.9359999999999994e-05,
"loss": 10.7208,
"step": 88
},
{
"epoch": 0.03304864463423691,
"grad_norm": 0.4307488203048706,
"learning_rate": 5.8830000000000004e-05,
"loss": 10.701,
"step": 89
},
{
"epoch": 0.03341997772001486,
"grad_norm": 0.47585317492485046,
"learning_rate": 5.83e-05,
"loss": 10.7097,
"step": 90
},
{
"epoch": 0.0337913108057928,
"grad_norm": 0.4662454426288605,
"learning_rate": 5.777e-05,
"loss": 10.6999,
"step": 91
},
{
"epoch": 0.03416264389157074,
"grad_norm": 0.45331087708473206,
"learning_rate": 5.7239999999999994e-05,
"loss": 10.6841,
"step": 92
},
{
"epoch": 0.03453397697734868,
"grad_norm": 0.5249789357185364,
"learning_rate": 5.671e-05,
"loss": 10.7172,
"step": 93
},
{
"epoch": 0.034905310063126624,
"grad_norm": 0.5332587361335754,
"learning_rate": 5.6179999999999994e-05,
"loss": 10.7276,
"step": 94
},
{
"epoch": 0.035276643148904566,
"grad_norm": 0.4843442440032959,
"learning_rate": 5.5650000000000004e-05,
"loss": 10.7224,
"step": 95
},
{
"epoch": 0.03564797623468251,
"grad_norm": 0.5148628950119019,
"learning_rate": 5.512e-05,
"loss": 10.6775,
"step": 96
},
{
"epoch": 0.036019309320460456,
"grad_norm": 0.5223995447158813,
"learning_rate": 5.459e-05,
"loss": 10.7094,
"step": 97
},
{
"epoch": 0.0363906424062384,
"grad_norm": 0.5281713604927063,
"learning_rate": 5.406e-05,
"loss": 10.6981,
"step": 98
},
{
"epoch": 0.03676197549201634,
"grad_norm": 0.6166062951087952,
"learning_rate": 5.353e-05,
"loss": 10.7082,
"step": 99
},
{
"epoch": 0.03713330857779428,
"grad_norm": 0.7465858459472656,
"learning_rate": 5.2999999999999994e-05,
"loss": 10.6922,
"step": 100
},
{
"epoch": 0.03713330857779428,
"eval_loss": 10.71716022491455,
"eval_runtime": 8.5368,
"eval_samples_per_second": 132.837,
"eval_steps_per_second": 33.268,
"step": 100
},
{
"epoch": 0.03750464166357222,
"grad_norm": 0.30814334750175476,
"learning_rate": 5.246999999999999e-05,
"loss": 10.7375,
"step": 101
},
{
"epoch": 0.037875974749350165,
"grad_norm": 0.34333688020706177,
"learning_rate": 5.194e-05,
"loss": 10.7405,
"step": 102
},
{
"epoch": 0.03824730783512811,
"grad_norm": 0.396445631980896,
"learning_rate": 5.141e-05,
"loss": 10.7225,
"step": 103
},
{
"epoch": 0.038618640920906055,
"grad_norm": 0.3750787079334259,
"learning_rate": 5.088e-05,
"loss": 10.7264,
"step": 104
},
{
"epoch": 0.038989974006684,
"grad_norm": 0.30554020404815674,
"learning_rate": 5.035e-05,
"loss": 10.7261,
"step": 105
},
{
"epoch": 0.03936130709246194,
"grad_norm": 0.35283979773521423,
"learning_rate": 4.9819999999999994e-05,
"loss": 10.7309,
"step": 106
},
{
"epoch": 0.03973264017823988,
"grad_norm": 0.39950183033943176,
"learning_rate": 4.929e-05,
"loss": 10.7223,
"step": 107
},
{
"epoch": 0.04010397326401782,
"grad_norm": 0.38336852192878723,
"learning_rate": 4.876e-05,
"loss": 10.7209,
"step": 108
},
{
"epoch": 0.040475306349795764,
"grad_norm": 0.362358421087265,
"learning_rate": 4.823e-05,
"loss": 10.735,
"step": 109
},
{
"epoch": 0.04084663943557371,
"grad_norm": 0.397513747215271,
"learning_rate": 4.7699999999999994e-05,
"loss": 10.7304,
"step": 110
},
{
"epoch": 0.041217972521351655,
"grad_norm": 0.4068267047405243,
"learning_rate": 4.717e-05,
"loss": 10.7297,
"step": 111
},
{
"epoch": 0.0415893056071296,
"grad_norm": 0.38471511006355286,
"learning_rate": 4.6639999999999994e-05,
"loss": 10.754,
"step": 112
},
{
"epoch": 0.04196063869290754,
"grad_norm": 0.3469901978969574,
"learning_rate": 4.611e-05,
"loss": 10.7255,
"step": 113
},
{
"epoch": 0.04233197177868548,
"grad_norm": 0.3917114734649658,
"learning_rate": 4.558e-05,
"loss": 10.72,
"step": 114
},
{
"epoch": 0.04270330486446342,
"grad_norm": 0.3135025203227997,
"learning_rate": 4.505e-05,
"loss": 10.7204,
"step": 115
},
{
"epoch": 0.043074637950241364,
"grad_norm": 0.32310202717781067,
"learning_rate": 4.4519999999999994e-05,
"loss": 10.7334,
"step": 116
},
{
"epoch": 0.04344597103601931,
"grad_norm": 0.316651314496994,
"learning_rate": 4.399e-05,
"loss": 10.7357,
"step": 117
},
{
"epoch": 0.043817304121797254,
"grad_norm": 0.3423415422439575,
"learning_rate": 4.346e-05,
"loss": 10.7195,
"step": 118
},
{
"epoch": 0.044188637207575196,
"grad_norm": 0.3235597610473633,
"learning_rate": 4.293e-05,
"loss": 10.7321,
"step": 119
},
{
"epoch": 0.04455997029335314,
"grad_norm": 0.33004501461982727,
"learning_rate": 4.2399999999999994e-05,
"loss": 10.7148,
"step": 120
},
{
"epoch": 0.04493130337913108,
"grad_norm": 0.3902532756328583,
"learning_rate": 4.187e-05,
"loss": 10.7344,
"step": 121
},
{
"epoch": 0.04530263646490902,
"grad_norm": 0.30236899852752686,
"learning_rate": 4.134e-05,
"loss": 10.7194,
"step": 122
},
{
"epoch": 0.04567396955068696,
"grad_norm": 0.38607534766197205,
"learning_rate": 4.081e-05,
"loss": 10.7046,
"step": 123
},
{
"epoch": 0.04604530263646491,
"grad_norm": 0.3628384470939636,
"learning_rate": 4.028e-05,
"loss": 10.7198,
"step": 124
},
{
"epoch": 0.046416635722242854,
"grad_norm": 0.36593976616859436,
"learning_rate": 3.975e-05,
"loss": 10.7281,
"step": 125
},
{
"epoch": 0.046787968808020795,
"grad_norm": 0.3433733880519867,
"learning_rate": 3.9219999999999994e-05,
"loss": 10.7148,
"step": 126
},
{
"epoch": 0.04715930189379874,
"grad_norm": 0.37441202998161316,
"learning_rate": 3.869e-05,
"loss": 10.7132,
"step": 127
},
{
"epoch": 0.04753063497957668,
"grad_norm": 0.3574247658252716,
"learning_rate": 3.816e-05,
"loss": 10.7207,
"step": 128
},
{
"epoch": 0.04790196806535462,
"grad_norm": 0.3976617157459259,
"learning_rate": 3.763e-05,
"loss": 10.7027,
"step": 129
},
{
"epoch": 0.04827330115113257,
"grad_norm": 0.4094492495059967,
"learning_rate": 3.7099999999999994e-05,
"loss": 10.7168,
"step": 130
},
{
"epoch": 0.04864463423691051,
"grad_norm": 0.3910907506942749,
"learning_rate": 3.657e-05,
"loss": 10.6894,
"step": 131
},
{
"epoch": 0.04901596732268845,
"grad_norm": 0.347520112991333,
"learning_rate": 3.604e-05,
"loss": 10.7075,
"step": 132
},
{
"epoch": 0.049387300408466395,
"grad_norm": 0.33360084891319275,
"learning_rate": 3.551e-05,
"loss": 10.7175,
"step": 133
},
{
"epoch": 0.049758633494244336,
"grad_norm": 0.3098675608634949,
"learning_rate": 3.498e-05,
"loss": 10.6943,
"step": 134
},
{
"epoch": 0.05012996658002228,
"grad_norm": 0.417607843875885,
"learning_rate": 3.445e-05,
"loss": 10.696,
"step": 135
},
{
"epoch": 0.05050129966580022,
"grad_norm": 0.37847384810447693,
"learning_rate": 3.392e-05,
"loss": 10.7031,
"step": 136
},
{
"epoch": 0.05087263275157817,
"grad_norm": 0.34361228346824646,
"learning_rate": 3.339e-05,
"loss": 10.714,
"step": 137
},
{
"epoch": 0.05124396583735611,
"grad_norm": 0.4933507740497589,
"learning_rate": 3.286e-05,
"loss": 10.6758,
"step": 138
},
{
"epoch": 0.05161529892313405,
"grad_norm": 0.412986695766449,
"learning_rate": 3.233e-05,
"loss": 10.6963,
"step": 139
},
{
"epoch": 0.051986632008911994,
"grad_norm": 0.43772485852241516,
"learning_rate": 3.1799999999999994e-05,
"loss": 10.7149,
"step": 140
},
{
"epoch": 0.052357965094689936,
"grad_norm": 0.37518948316574097,
"learning_rate": 3.1270000000000004e-05,
"loss": 10.6951,
"step": 141
},
{
"epoch": 0.05272929818046788,
"grad_norm": 0.36650022864341736,
"learning_rate": 3.074e-05,
"loss": 10.6925,
"step": 142
},
{
"epoch": 0.05310063126624582,
"grad_norm": 0.4543534219264984,
"learning_rate": 3.0209999999999997e-05,
"loss": 10.683,
"step": 143
},
{
"epoch": 0.05347196435202377,
"grad_norm": 0.47826236486434937,
"learning_rate": 2.9679999999999997e-05,
"loss": 10.7077,
"step": 144
},
{
"epoch": 0.05384329743780171,
"grad_norm": 0.3722932040691376,
"learning_rate": 2.915e-05,
"loss": 10.6916,
"step": 145
},
{
"epoch": 0.05421463052357965,
"grad_norm": 0.5447331666946411,
"learning_rate": 2.8619999999999997e-05,
"loss": 10.7065,
"step": 146
},
{
"epoch": 0.05458596360935759,
"grad_norm": 0.42991727590560913,
"learning_rate": 2.8089999999999997e-05,
"loss": 10.6734,
"step": 147
},
{
"epoch": 0.054957296695135535,
"grad_norm": 0.5424541234970093,
"learning_rate": 2.756e-05,
"loss": 10.6609,
"step": 148
},
{
"epoch": 0.05532862978091348,
"grad_norm": 0.6233406066894531,
"learning_rate": 2.703e-05,
"loss": 10.6865,
"step": 149
},
{
"epoch": 0.055699962866691426,
"grad_norm": 0.635688066482544,
"learning_rate": 2.6499999999999997e-05,
"loss": 10.6352,
"step": 150
},
{
"epoch": 0.055699962866691426,
"eval_loss": 10.709113121032715,
"eval_runtime": 8.5126,
"eval_samples_per_second": 133.215,
"eval_steps_per_second": 33.362,
"step": 150
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 25616292249600.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}