{ "best_metric": 0.6912915706634521, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.026439288783131735, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013219644391565866, "grad_norm": 0.19907163083553314, "learning_rate": 1e-05, "loss": 0.7056, "step": 1 }, { "epoch": 0.00013219644391565866, "eval_loss": 1.0835129022598267, "eval_runtime": 1288.5293, "eval_samples_per_second": 9.887, "eval_steps_per_second": 2.472, "step": 1 }, { "epoch": 0.0002643928878313173, "grad_norm": 0.24988684058189392, "learning_rate": 2e-05, "loss": 0.782, "step": 2 }, { "epoch": 0.000396589331746976, "grad_norm": 0.27034634351730347, "learning_rate": 3e-05, "loss": 0.8054, "step": 3 }, { "epoch": 0.0005287857756626346, "grad_norm": 0.31130942702293396, "learning_rate": 4e-05, "loss": 0.8834, "step": 4 }, { "epoch": 0.0006609822195782933, "grad_norm": 0.2468542903661728, "learning_rate": 5e-05, "loss": 0.8018, "step": 5 }, { "epoch": 0.000793178663493952, "grad_norm": 0.2454385757446289, "learning_rate": 6e-05, "loss": 0.8879, "step": 6 }, { "epoch": 0.0009253751074096107, "grad_norm": 0.25996339321136475, "learning_rate": 7e-05, "loss": 0.874, "step": 7 }, { "epoch": 0.0010575715513252693, "grad_norm": 0.2632141709327698, "learning_rate": 8e-05, "loss": 0.9047, "step": 8 }, { "epoch": 0.001189767995240928, "grad_norm": 0.27899569272994995, "learning_rate": 9e-05, "loss": 0.9292, "step": 9 }, { "epoch": 0.0013219644391565867, "grad_norm": 0.2870829701423645, "learning_rate": 0.0001, "loss": 0.839, "step": 10 }, { "epoch": 0.0014541608830722455, "grad_norm": 0.3339110016822815, "learning_rate": 9.999316524962345e-05, "loss": 0.7648, "step": 11 }, { "epoch": 0.001586357326987904, "grad_norm": 0.3318046033382416, "learning_rate": 9.997266286704631e-05, "loss": 0.9358, "step": 12 }, { "epoch": 0.0017185537709035626, "grad_norm": 0.2891906499862671, "learning_rate": 9.993849845741524e-05, "loss": 0.816, "step": 13 }, { "epoch": 0.0018507502148192214, "grad_norm": 0.33678507804870605, "learning_rate": 9.989068136093873e-05, "loss": 0.9115, "step": 14 }, { "epoch": 0.00198294665873488, "grad_norm": 0.29576581716537476, "learning_rate": 9.98292246503335e-05, "loss": 0.9148, "step": 15 }, { "epoch": 0.0021151431026505386, "grad_norm": 0.30421528220176697, "learning_rate": 9.975414512725057e-05, "loss": 0.7969, "step": 16 }, { "epoch": 0.0022473395465661974, "grad_norm": 0.28432103991508484, "learning_rate": 9.966546331768191e-05, "loss": 0.8462, "step": 17 }, { "epoch": 0.002379535990481856, "grad_norm": 0.31790468096733093, "learning_rate": 9.956320346634876e-05, "loss": 0.8638, "step": 18 }, { "epoch": 0.0025117324343975145, "grad_norm": 0.2695980668067932, "learning_rate": 9.944739353007344e-05, "loss": 0.8375, "step": 19 }, { "epoch": 0.0026439288783131733, "grad_norm": 0.2560156285762787, "learning_rate": 9.931806517013612e-05, "loss": 0.844, "step": 20 }, { "epoch": 0.002776125322228832, "grad_norm": 0.2801140248775482, "learning_rate": 9.917525374361912e-05, "loss": 0.8196, "step": 21 }, { "epoch": 0.002908321766144491, "grad_norm": 0.2795511782169342, "learning_rate": 9.901899829374047e-05, "loss": 0.8558, "step": 22 }, { "epoch": 0.0030405182100601493, "grad_norm": 0.28868985176086426, "learning_rate": 9.884934153917997e-05, "loss": 0.8086, "step": 23 }, { "epoch": 0.003172714653975808, "grad_norm": 0.2757827043533325, "learning_rate": 9.86663298624003e-05, "loss": 0.7993, "step": 24 }, { "epoch": 0.003304911097891467, "grad_norm": 0.3054489195346832, "learning_rate": 9.847001329696653e-05, "loss": 0.9127, "step": 25 }, { "epoch": 0.003437107541807125, "grad_norm": 0.3149811327457428, "learning_rate": 9.826044551386744e-05, "loss": 0.9135, "step": 26 }, { "epoch": 0.003569303985722784, "grad_norm": 0.3033928871154785, "learning_rate": 9.803768380684242e-05, "loss": 0.8242, "step": 27 }, { "epoch": 0.003701500429638443, "grad_norm": 0.3094732463359833, "learning_rate": 9.780178907671789e-05, "loss": 0.931, "step": 28 }, { "epoch": 0.0038336968735541016, "grad_norm": 0.3037339746952057, "learning_rate": 9.755282581475769e-05, "loss": 0.9231, "step": 29 }, { "epoch": 0.00396589331746976, "grad_norm": 0.3039802610874176, "learning_rate": 9.729086208503174e-05, "loss": 0.8578, "step": 30 }, { "epoch": 0.004098089761385418, "grad_norm": 0.32226693630218506, "learning_rate": 9.701596950580806e-05, "loss": 0.852, "step": 31 }, { "epoch": 0.004230286205301077, "grad_norm": 0.3024626076221466, "learning_rate": 9.672822322997305e-05, "loss": 0.9268, "step": 32 }, { "epoch": 0.004362482649216736, "grad_norm": 0.3129371106624603, "learning_rate": 9.642770192448536e-05, "loss": 0.8091, "step": 33 }, { "epoch": 0.004494679093132395, "grad_norm": 0.3346866965293884, "learning_rate": 9.611448774886924e-05, "loss": 0.8698, "step": 34 }, { "epoch": 0.0046268755370480535, "grad_norm": 0.3507196307182312, "learning_rate": 9.578866633275288e-05, "loss": 0.8524, "step": 35 }, { "epoch": 0.004759071980963712, "grad_norm": 0.35556769371032715, "learning_rate": 9.545032675245813e-05, "loss": 0.8095, "step": 36 }, { "epoch": 0.004891268424879371, "grad_norm": 0.3588707745075226, "learning_rate": 9.509956150664796e-05, "loss": 0.8525, "step": 37 }, { "epoch": 0.005023464868795029, "grad_norm": 0.35559579730033875, "learning_rate": 9.473646649103818e-05, "loss": 0.8739, "step": 38 }, { "epoch": 0.005155661312710688, "grad_norm": 0.35974085330963135, "learning_rate": 9.43611409721806e-05, "loss": 0.8515, "step": 39 }, { "epoch": 0.005287857756626347, "grad_norm": 0.3806246817111969, "learning_rate": 9.397368756032445e-05, "loss": 0.7688, "step": 40 }, { "epoch": 0.005420054200542005, "grad_norm": 0.34556135535240173, "learning_rate": 9.357421218136386e-05, "loss": 0.8072, "step": 41 }, { "epoch": 0.005552250644457664, "grad_norm": 0.37214338779449463, "learning_rate": 9.316282404787871e-05, "loss": 0.8728, "step": 42 }, { "epoch": 0.005684447088373323, "grad_norm": 0.37648141384124756, "learning_rate": 9.273963562927695e-05, "loss": 0.7623, "step": 43 }, { "epoch": 0.005816643532288982, "grad_norm": 0.33934396505355835, "learning_rate": 9.230476262104677e-05, "loss": 0.7707, "step": 44 }, { "epoch": 0.00594883997620464, "grad_norm": 0.383687287569046, "learning_rate": 9.185832391312644e-05, "loss": 0.7691, "step": 45 }, { "epoch": 0.0060810364201202985, "grad_norm": 0.3688667416572571, "learning_rate": 9.140044155740101e-05, "loss": 0.7647, "step": 46 }, { "epoch": 0.006213232864035957, "grad_norm": 0.4019969403743744, "learning_rate": 9.093124073433463e-05, "loss": 0.7542, "step": 47 }, { "epoch": 0.006345429307951616, "grad_norm": 0.38373300433158875, "learning_rate": 9.045084971874738e-05, "loss": 0.7444, "step": 48 }, { "epoch": 0.006477625751867275, "grad_norm": 0.40427687764167786, "learning_rate": 8.995939984474624e-05, "loss": 0.7061, "step": 49 }, { "epoch": 0.006609822195782934, "grad_norm": 0.586796224117279, "learning_rate": 8.945702546981969e-05, "loss": 0.6336, "step": 50 }, { "epoch": 0.006609822195782934, "eval_loss": 0.7983591556549072, "eval_runtime": 1297.6383, "eval_samples_per_second": 9.818, "eval_steps_per_second": 2.454, "step": 50 }, { "epoch": 0.0067420186396985925, "grad_norm": 0.28511500358581543, "learning_rate": 8.894386393810563e-05, "loss": 0.651, "step": 51 }, { "epoch": 0.00687421508361425, "grad_norm": 0.27332431077957153, "learning_rate": 8.842005554284296e-05, "loss": 0.6603, "step": 52 }, { "epoch": 0.007006411527529909, "grad_norm": 0.2159067839384079, "learning_rate": 8.788574348801675e-05, "loss": 0.6719, "step": 53 }, { "epoch": 0.007138607971445568, "grad_norm": 0.20144110918045044, "learning_rate": 8.73410738492077e-05, "loss": 0.7026, "step": 54 }, { "epoch": 0.007270804415361227, "grad_norm": 0.19617719948291779, "learning_rate": 8.678619553365659e-05, "loss": 0.7491, "step": 55 }, { "epoch": 0.007403000859276886, "grad_norm": 0.20323942601680756, "learning_rate": 8.622126023955446e-05, "loss": 0.6962, "step": 56 }, { "epoch": 0.007535197303192544, "grad_norm": 0.22517581284046173, "learning_rate": 8.564642241456986e-05, "loss": 0.7197, "step": 57 }, { "epoch": 0.007667393747108203, "grad_norm": 0.19670049846172333, "learning_rate": 8.506183921362443e-05, "loss": 0.7349, "step": 58 }, { "epoch": 0.007799590191023861, "grad_norm": 0.22828136384487152, "learning_rate": 8.44676704559283e-05, "loss": 0.7543, "step": 59 }, { "epoch": 0.00793178663493952, "grad_norm": 0.22368234395980835, "learning_rate": 8.386407858128706e-05, "loss": 0.776, "step": 60 }, { "epoch": 0.00806398307885518, "grad_norm": 0.22974425554275513, "learning_rate": 8.32512286056924e-05, "loss": 0.7536, "step": 61 }, { "epoch": 0.008196179522770837, "grad_norm": 0.2255934476852417, "learning_rate": 8.262928807620843e-05, "loss": 0.8714, "step": 62 }, { "epoch": 0.008328375966686495, "grad_norm": 0.24428120255470276, "learning_rate": 8.199842702516583e-05, "loss": 0.8089, "step": 63 }, { "epoch": 0.008460572410602154, "grad_norm": 0.2589269280433655, "learning_rate": 8.135881792367686e-05, "loss": 0.6915, "step": 64 }, { "epoch": 0.008592768854517813, "grad_norm": 0.2671506702899933, "learning_rate": 8.07106356344834e-05, "loss": 0.836, "step": 65 }, { "epoch": 0.008724965298433472, "grad_norm": 0.290968656539917, "learning_rate": 8.005405736415126e-05, "loss": 0.8378, "step": 66 }, { "epoch": 0.00885716174234913, "grad_norm": 0.28429681062698364, "learning_rate": 7.938926261462366e-05, "loss": 0.8242, "step": 67 }, { "epoch": 0.00898935818626479, "grad_norm": 0.2940610349178314, "learning_rate": 7.871643313414718e-05, "loss": 0.9007, "step": 68 }, { "epoch": 0.009121554630180448, "grad_norm": 0.26045677065849304, "learning_rate": 7.803575286758364e-05, "loss": 0.7044, "step": 69 }, { "epoch": 0.009253751074096107, "grad_norm": 0.2554974853992462, "learning_rate": 7.734740790612136e-05, "loss": 0.7129, "step": 70 }, { "epoch": 0.009385947518011766, "grad_norm": 0.24801850318908691, "learning_rate": 7.66515864363997e-05, "loss": 0.7456, "step": 71 }, { "epoch": 0.009518143961927425, "grad_norm": 0.26676586270332336, "learning_rate": 7.594847868906076e-05, "loss": 0.8307, "step": 72 }, { "epoch": 0.009650340405843083, "grad_norm": 0.28508061170578003, "learning_rate": 7.52382768867422e-05, "loss": 0.7025, "step": 73 }, { "epoch": 0.009782536849758742, "grad_norm": 0.2627425789833069, "learning_rate": 7.452117519152542e-05, "loss": 0.8818, "step": 74 }, { "epoch": 0.009914733293674401, "grad_norm": 0.27217817306518555, "learning_rate": 7.379736965185368e-05, "loss": 0.8648, "step": 75 }, { "epoch": 0.010046929737590058, "grad_norm": 0.26670771837234497, "learning_rate": 7.30670581489344e-05, "loss": 0.7436, "step": 76 }, { "epoch": 0.010179126181505717, "grad_norm": 0.2777654826641083, "learning_rate": 7.233044034264034e-05, "loss": 0.8184, "step": 77 }, { "epoch": 0.010311322625421376, "grad_norm": 0.2620057761669159, "learning_rate": 7.158771761692464e-05, "loss": 0.8208, "step": 78 }, { "epoch": 0.010443519069337034, "grad_norm": 0.2977304756641388, "learning_rate": 7.083909302476453e-05, "loss": 0.8203, "step": 79 }, { "epoch": 0.010575715513252693, "grad_norm": 0.29825136065483093, "learning_rate": 7.008477123264848e-05, "loss": 0.7629, "step": 80 }, { "epoch": 0.010707911957168352, "grad_norm": 0.30665692687034607, "learning_rate": 6.932495846462261e-05, "loss": 0.8057, "step": 81 }, { "epoch": 0.01084010840108401, "grad_norm": 0.304373562335968, "learning_rate": 6.855986244591104e-05, "loss": 0.851, "step": 82 }, { "epoch": 0.01097230484499967, "grad_norm": 0.30313679575920105, "learning_rate": 6.778969234612584e-05, "loss": 0.6742, "step": 83 }, { "epoch": 0.011104501288915328, "grad_norm": 0.3030165731906891, "learning_rate": 6.701465872208216e-05, "loss": 0.7918, "step": 84 }, { "epoch": 0.011236697732830987, "grad_norm": 0.32320520281791687, "learning_rate": 6.623497346023418e-05, "loss": 0.8337, "step": 85 }, { "epoch": 0.011368894176746646, "grad_norm": 0.3468799591064453, "learning_rate": 6.545084971874738e-05, "loss": 0.7626, "step": 86 }, { "epoch": 0.011501090620662305, "grad_norm": 0.32202163338661194, "learning_rate": 6.466250186922325e-05, "loss": 0.6401, "step": 87 }, { "epoch": 0.011633287064577964, "grad_norm": 0.3291841745376587, "learning_rate": 6.387014543809223e-05, "loss": 0.848, "step": 88 }, { "epoch": 0.01176548350849362, "grad_norm": 0.33629122376441956, "learning_rate": 6.307399704769099e-05, "loss": 0.7528, "step": 89 }, { "epoch": 0.01189767995240928, "grad_norm": 0.3629985749721527, "learning_rate": 6.227427435703997e-05, "loss": 0.7181, "step": 90 }, { "epoch": 0.012029876396324938, "grad_norm": 0.3643951714038849, "learning_rate": 6.147119600233758e-05, "loss": 0.6931, "step": 91 }, { "epoch": 0.012162072840240597, "grad_norm": 0.3484621047973633, "learning_rate": 6.066498153718735e-05, "loss": 0.7821, "step": 92 }, { "epoch": 0.012294269284156256, "grad_norm": 0.3612138330936432, "learning_rate": 5.985585137257401e-05, "loss": 0.7201, "step": 93 }, { "epoch": 0.012426465728071915, "grad_norm": 0.4237396717071533, "learning_rate": 5.90440267166055e-05, "loss": 0.7003, "step": 94 }, { "epoch": 0.012558662171987573, "grad_norm": 0.4192473888397217, "learning_rate": 5.8229729514036705e-05, "loss": 0.7339, "step": 95 }, { "epoch": 0.012690858615903232, "grad_norm": 0.40529993176460266, "learning_rate": 5.74131823855921e-05, "loss": 0.723, "step": 96 }, { "epoch": 0.012823055059818891, "grad_norm": 0.4157446622848511, "learning_rate": 5.6594608567103456e-05, "loss": 0.6994, "step": 97 }, { "epoch": 0.01295525150373455, "grad_norm": 0.426584392786026, "learning_rate": 5.577423184847932e-05, "loss": 0.6503, "step": 98 }, { "epoch": 0.013087447947650209, "grad_norm": 0.4566839337348938, "learning_rate": 5.495227651252315e-05, "loss": 0.623, "step": 99 }, { "epoch": 0.013219644391565867, "grad_norm": 0.5874345302581787, "learning_rate": 5.4128967273616625e-05, "loss": 0.6912, "step": 100 }, { "epoch": 0.013219644391565867, "eval_loss": 0.7339485883712769, "eval_runtime": 1295.8455, "eval_samples_per_second": 9.831, "eval_steps_per_second": 2.458, "step": 100 }, { "epoch": 0.013351840835481526, "grad_norm": 0.2449018806219101, "learning_rate": 5.330452921628497e-05, "loss": 0.5968, "step": 101 }, { "epoch": 0.013484037279397185, "grad_norm": 0.2391372174024582, "learning_rate": 5.247918773366112e-05, "loss": 0.5804, "step": 102 }, { "epoch": 0.013616233723312842, "grad_norm": 0.259091854095459, "learning_rate": 5.165316846586541e-05, "loss": 0.5899, "step": 103 }, { "epoch": 0.0137484301672285, "grad_norm": 0.21162453293800354, "learning_rate": 5.0826697238317935e-05, "loss": 0.5955, "step": 104 }, { "epoch": 0.01388062661114416, "grad_norm": 0.2207092046737671, "learning_rate": 5e-05, "loss": 0.7089, "step": 105 }, { "epoch": 0.014012823055059818, "grad_norm": 0.21915991604328156, "learning_rate": 4.917330276168208e-05, "loss": 0.6625, "step": 106 }, { "epoch": 0.014145019498975477, "grad_norm": 0.21760006248950958, "learning_rate": 4.834683153413459e-05, "loss": 0.6857, "step": 107 }, { "epoch": 0.014277215942891136, "grad_norm": 0.21286870539188385, "learning_rate": 4.7520812266338885e-05, "loss": 0.5821, "step": 108 }, { "epoch": 0.014409412386806795, "grad_norm": 0.2752554416656494, "learning_rate": 4.669547078371504e-05, "loss": 0.8374, "step": 109 }, { "epoch": 0.014541608830722454, "grad_norm": 0.2630784213542938, "learning_rate": 4.5871032726383386e-05, "loss": 0.7673, "step": 110 }, { "epoch": 0.014673805274638112, "grad_norm": 0.2437119334936142, "learning_rate": 4.504772348747687e-05, "loss": 0.6531, "step": 111 }, { "epoch": 0.014806001718553771, "grad_norm": 0.2728565037250519, "learning_rate": 4.4225768151520694e-05, "loss": 0.6917, "step": 112 }, { "epoch": 0.01493819816246943, "grad_norm": 0.2970212399959564, "learning_rate": 4.3405391432896555e-05, "loss": 0.802, "step": 113 }, { "epoch": 0.015070394606385089, "grad_norm": 0.2753089666366577, "learning_rate": 4.2586817614407895e-05, "loss": 0.7221, "step": 114 }, { "epoch": 0.015202591050300748, "grad_norm": 0.3019776940345764, "learning_rate": 4.17702704859633e-05, "loss": 0.7218, "step": 115 }, { "epoch": 0.015334787494216406, "grad_norm": 0.3079054057598114, "learning_rate": 4.095597328339452e-05, "loss": 0.7368, "step": 116 }, { "epoch": 0.015466983938132063, "grad_norm": 0.34811916947364807, "learning_rate": 4.0144148627425993e-05, "loss": 0.8815, "step": 117 }, { "epoch": 0.015599180382047722, "grad_norm": 0.2802199721336365, "learning_rate": 3.933501846281267e-05, "loss": 0.7978, "step": 118 }, { "epoch": 0.015731376825963383, "grad_norm": 0.2739661633968353, "learning_rate": 3.852880399766243e-05, "loss": 0.7422, "step": 119 }, { "epoch": 0.01586357326987904, "grad_norm": 0.2667839229106903, "learning_rate": 3.772572564296005e-05, "loss": 0.7184, "step": 120 }, { "epoch": 0.0159957697137947, "grad_norm": 0.27689000964164734, "learning_rate": 3.6926002952309016e-05, "loss": 0.6337, "step": 121 }, { "epoch": 0.01612796615771036, "grad_norm": 0.26580536365509033, "learning_rate": 3.612985456190778e-05, "loss": 0.6738, "step": 122 }, { "epoch": 0.016260162601626018, "grad_norm": 0.2640027105808258, "learning_rate": 3.533749813077677e-05, "loss": 0.6694, "step": 123 }, { "epoch": 0.016392359045541673, "grad_norm": 0.2729874849319458, "learning_rate": 3.4549150281252636e-05, "loss": 0.7466, "step": 124 }, { "epoch": 0.016524555489457332, "grad_norm": 0.32576844096183777, "learning_rate": 3.3765026539765834e-05, "loss": 0.8741, "step": 125 }, { "epoch": 0.01665675193337299, "grad_norm": 0.27072277665138245, "learning_rate": 3.298534127791785e-05, "loss": 0.6398, "step": 126 }, { "epoch": 0.01678894837728865, "grad_norm": 0.2898540496826172, "learning_rate": 3.221030765387417e-05, "loss": 0.7319, "step": 127 }, { "epoch": 0.01692114482120431, "grad_norm": 0.28591421246528625, "learning_rate": 3.144013755408895e-05, "loss": 0.6388, "step": 128 }, { "epoch": 0.017053341265119967, "grad_norm": 0.32040935754776, "learning_rate": 3.0675041535377405e-05, "loss": 0.8053, "step": 129 }, { "epoch": 0.017185537709035626, "grad_norm": 0.2715124487876892, "learning_rate": 2.991522876735154e-05, "loss": 0.6613, "step": 130 }, { "epoch": 0.017317734152951285, "grad_norm": 0.32151684165000916, "learning_rate": 2.916090697523549e-05, "loss": 0.7721, "step": 131 }, { "epoch": 0.017449930596866944, "grad_norm": 0.3153347074985504, "learning_rate": 2.8412282383075363e-05, "loss": 0.778, "step": 132 }, { "epoch": 0.017582127040782602, "grad_norm": 0.27657490968704224, "learning_rate": 2.766955965735968e-05, "loss": 0.6472, "step": 133 }, { "epoch": 0.01771432348469826, "grad_norm": 0.29782021045684814, "learning_rate": 2.693294185106562e-05, "loss": 0.6402, "step": 134 }, { "epoch": 0.01784651992861392, "grad_norm": 0.36264482140541077, "learning_rate": 2.6202630348146324e-05, "loss": 0.8151, "step": 135 }, { "epoch": 0.01797871637252958, "grad_norm": 0.3695935904979706, "learning_rate": 2.547882480847461e-05, "loss": 0.7485, "step": 136 }, { "epoch": 0.018110912816445238, "grad_norm": 0.376255601644516, "learning_rate": 2.476172311325783e-05, "loss": 0.8114, "step": 137 }, { "epoch": 0.018243109260360896, "grad_norm": 0.3442426919937134, "learning_rate": 2.405152131093926e-05, "loss": 0.7121, "step": 138 }, { "epoch": 0.018375305704276555, "grad_norm": 0.3616703748703003, "learning_rate": 2.3348413563600325e-05, "loss": 0.747, "step": 139 }, { "epoch": 0.018507502148192214, "grad_norm": 0.3357325494289398, "learning_rate": 2.2652592093878666e-05, "loss": 0.6589, "step": 140 }, { "epoch": 0.018639698592107873, "grad_norm": 0.44670650362968445, "learning_rate": 2.196424713241637e-05, "loss": 0.7165, "step": 141 }, { "epoch": 0.01877189503602353, "grad_norm": 0.3949689567089081, "learning_rate": 2.128356686585282e-05, "loss": 0.7339, "step": 142 }, { "epoch": 0.01890409147993919, "grad_norm": 0.3814798891544342, "learning_rate": 2.061073738537635e-05, "loss": 0.7213, "step": 143 }, { "epoch": 0.01903628792385485, "grad_norm": 0.40541478991508484, "learning_rate": 1.9945942635848748e-05, "loss": 0.744, "step": 144 }, { "epoch": 0.019168484367770508, "grad_norm": 0.3879254460334778, "learning_rate": 1.928936436551661e-05, "loss": 0.7091, "step": 145 }, { "epoch": 0.019300680811686167, "grad_norm": 0.39314571022987366, "learning_rate": 1.8641182076323148e-05, "loss": 0.632, "step": 146 }, { "epoch": 0.019432877255601826, "grad_norm": 0.401945561170578, "learning_rate": 1.800157297483417e-05, "loss": 0.6097, "step": 147 }, { "epoch": 0.019565073699517484, "grad_norm": 0.40109848976135254, "learning_rate": 1.7370711923791567e-05, "loss": 0.5687, "step": 148 }, { "epoch": 0.019697270143433143, "grad_norm": 0.4402889311313629, "learning_rate": 1.6748771394307585e-05, "loss": 0.5392, "step": 149 }, { "epoch": 0.019829466587348802, "grad_norm": 0.6230471730232239, "learning_rate": 1.6135921418712956e-05, "loss": 0.579, "step": 150 }, { "epoch": 0.019829466587348802, "eval_loss": 0.7001804709434509, "eval_runtime": 1297.795, "eval_samples_per_second": 9.817, "eval_steps_per_second": 2.454, "step": 150 }, { "epoch": 0.019961663031264457, "grad_norm": 0.22197431325912476, "learning_rate": 1.553232954407171e-05, "loss": 0.5478, "step": 151 }, { "epoch": 0.020093859475180116, "grad_norm": 0.21502351760864258, "learning_rate": 1.4938160786375572e-05, "loss": 0.6139, "step": 152 }, { "epoch": 0.020226055919095775, "grad_norm": 0.24084487557411194, "learning_rate": 1.435357758543015e-05, "loss": 0.6676, "step": 153 }, { "epoch": 0.020358252363011434, "grad_norm": 0.2299962192773819, "learning_rate": 1.3778739760445552e-05, "loss": 0.6047, "step": 154 }, { "epoch": 0.020490448806927093, "grad_norm": 0.23507559299468994, "learning_rate": 1.3213804466343421e-05, "loss": 0.6331, "step": 155 }, { "epoch": 0.02062264525084275, "grad_norm": 0.2816745936870575, "learning_rate": 1.2658926150792322e-05, "loss": 0.6266, "step": 156 }, { "epoch": 0.02075484169475841, "grad_norm": 0.242003932595253, "learning_rate": 1.2114256511983274e-05, "loss": 0.6411, "step": 157 }, { "epoch": 0.02088703813867407, "grad_norm": 0.2502986192703247, "learning_rate": 1.157994445715706e-05, "loss": 0.6425, "step": 158 }, { "epoch": 0.021019234582589728, "grad_norm": 0.26039353013038635, "learning_rate": 1.1056136061894384e-05, "loss": 0.6891, "step": 159 }, { "epoch": 0.021151431026505386, "grad_norm": 0.2787843644618988, "learning_rate": 1.0542974530180327e-05, "loss": 0.7302, "step": 160 }, { "epoch": 0.021283627470421045, "grad_norm": 0.25993210077285767, "learning_rate": 1.0040600155253765e-05, "loss": 0.7215, "step": 161 }, { "epoch": 0.021415823914336704, "grad_norm": 0.25290802121162415, "learning_rate": 9.549150281252633e-06, "loss": 0.6854, "step": 162 }, { "epoch": 0.021548020358252363, "grad_norm": 0.30008333921432495, "learning_rate": 9.068759265665384e-06, "loss": 0.652, "step": 163 }, { "epoch": 0.02168021680216802, "grad_norm": 0.3080776333808899, "learning_rate": 8.599558442598998e-06, "loss": 0.7674, "step": 164 }, { "epoch": 0.02181241324608368, "grad_norm": 0.28364238142967224, "learning_rate": 8.141676086873572e-06, "loss": 0.6617, "step": 165 }, { "epoch": 0.02194460968999934, "grad_norm": 0.3111041188240051, "learning_rate": 7.695237378953223e-06, "loss": 0.7853, "step": 166 }, { "epoch": 0.022076806133914998, "grad_norm": 0.32185930013656616, "learning_rate": 7.260364370723044e-06, "loss": 0.7072, "step": 167 }, { "epoch": 0.022209002577830657, "grad_norm": 0.2824515402317047, "learning_rate": 6.837175952121306e-06, "loss": 0.6561, "step": 168 }, { "epoch": 0.022341199021746316, "grad_norm": 0.34787365794181824, "learning_rate": 6.425787818636131e-06, "loss": 0.7325, "step": 169 }, { "epoch": 0.022473395465661974, "grad_norm": 0.2667436897754669, "learning_rate": 6.026312439675552e-06, "loss": 0.6375, "step": 170 }, { "epoch": 0.022605591909577633, "grad_norm": 0.2661244869232178, "learning_rate": 5.6388590278194096e-06, "loss": 0.7065, "step": 171 }, { "epoch": 0.022737788353493292, "grad_norm": 0.27021366357803345, "learning_rate": 5.263533508961827e-06, "loss": 0.6792, "step": 172 }, { "epoch": 0.02286998479740895, "grad_norm": 0.2824627161026001, "learning_rate": 4.900438493352055e-06, "loss": 0.7068, "step": 173 }, { "epoch": 0.02300218124132461, "grad_norm": 0.29387539625167847, "learning_rate": 4.549673247541875e-06, "loss": 0.739, "step": 174 }, { "epoch": 0.02313437768524027, "grad_norm": 0.2964057922363281, "learning_rate": 4.2113336672471245e-06, "loss": 0.6702, "step": 175 }, { "epoch": 0.023266574129155927, "grad_norm": 0.29672032594680786, "learning_rate": 3.885512251130763e-06, "loss": 0.6956, "step": 176 }, { "epoch": 0.023398770573071586, "grad_norm": 0.3046170771121979, "learning_rate": 3.5722980755146517e-06, "loss": 0.7628, "step": 177 }, { "epoch": 0.02353096701698724, "grad_norm": 0.3279201090335846, "learning_rate": 3.271776770026963e-06, "loss": 0.6563, "step": 178 }, { "epoch": 0.0236631634609029, "grad_norm": 0.28089141845703125, "learning_rate": 2.9840304941919415e-06, "loss": 0.6747, "step": 179 }, { "epoch": 0.02379535990481856, "grad_norm": 0.3343091309070587, "learning_rate": 2.7091379149682685e-06, "loss": 0.8329, "step": 180 }, { "epoch": 0.023927556348734218, "grad_norm": 0.3131818473339081, "learning_rate": 2.4471741852423237e-06, "loss": 0.6915, "step": 181 }, { "epoch": 0.024059752792649877, "grad_norm": 0.35552290081977844, "learning_rate": 2.1982109232821178e-06, "loss": 0.7611, "step": 182 }, { "epoch": 0.024191949236565535, "grad_norm": 0.30909493565559387, "learning_rate": 1.962316193157593e-06, "loss": 0.661, "step": 183 }, { "epoch": 0.024324145680481194, "grad_norm": 0.34362322092056274, "learning_rate": 1.7395544861325718e-06, "loss": 0.7195, "step": 184 }, { "epoch": 0.024456342124396853, "grad_norm": 0.33635377883911133, "learning_rate": 1.5299867030334814e-06, "loss": 0.7171, "step": 185 }, { "epoch": 0.02458853856831251, "grad_norm": 0.36710497736930847, "learning_rate": 1.333670137599713e-06, "loss": 0.7877, "step": 186 }, { "epoch": 0.02472073501222817, "grad_norm": 0.3873934745788574, "learning_rate": 1.1506584608200367e-06, "loss": 0.7987, "step": 187 }, { "epoch": 0.02485293145614383, "grad_norm": 0.3363410532474518, "learning_rate": 9.810017062595322e-07, "loss": 0.6828, "step": 188 }, { "epoch": 0.024985127900059488, "grad_norm": 0.3702673316001892, "learning_rate": 8.247462563808817e-07, "loss": 0.6746, "step": 189 }, { "epoch": 0.025117324343975147, "grad_norm": 0.4157086908817291, "learning_rate": 6.819348298638839e-07, "loss": 0.8022, "step": 190 }, { "epoch": 0.025249520787890806, "grad_norm": 0.3965856432914734, "learning_rate": 5.526064699265753e-07, "loss": 0.744, "step": 191 }, { "epoch": 0.025381717231806464, "grad_norm": 0.41364598274230957, "learning_rate": 4.367965336512403e-07, "loss": 0.7628, "step": 192 }, { "epoch": 0.025513913675722123, "grad_norm": 0.36574241518974304, "learning_rate": 3.3453668231809286e-07, "loss": 0.5987, "step": 193 }, { "epoch": 0.025646110119637782, "grad_norm": 0.39211803674697876, "learning_rate": 2.458548727494292e-07, "loss": 0.6974, "step": 194 }, { "epoch": 0.02577830656355344, "grad_norm": 0.4119443893432617, "learning_rate": 1.7077534966650766e-07, "loss": 0.7052, "step": 195 }, { "epoch": 0.0259105030074691, "grad_norm": 0.4326936900615692, "learning_rate": 1.0931863906127327e-07, "loss": 0.7098, "step": 196 }, { "epoch": 0.02604269945138476, "grad_norm": 0.4216960370540619, "learning_rate": 6.150154258476315e-08, "loss": 0.6271, "step": 197 }, { "epoch": 0.026174895895300417, "grad_norm": 0.4399435818195343, "learning_rate": 2.7337132953697554e-08, "loss": 0.6338, "step": 198 }, { "epoch": 0.026307092339216076, "grad_norm": 0.436718612909317, "learning_rate": 6.834750376549792e-09, "loss": 0.6532, "step": 199 }, { "epoch": 0.026439288783131735, "grad_norm": 0.5619792938232422, "learning_rate": 0.0, "loss": 0.604, "step": 200 }, { "epoch": 0.026439288783131735, "eval_loss": 0.6912915706634521, "eval_runtime": 1295.737, "eval_samples_per_second": 9.832, "eval_steps_per_second": 2.458, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.7586505622880256e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }