{ "best_metric": 0.9818174242973328, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.020891001201232568, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010445500600616284, "grad_norm": 1.0160599946975708, "learning_rate": 1.007e-05, "loss": 0.9026, "step": 1 }, { "epoch": 0.00010445500600616284, "eval_loss": 1.3163843154907227, "eval_runtime": 118.417, "eval_samples_per_second": 34.041, "eval_steps_per_second": 8.512, "step": 1 }, { "epoch": 0.00020891001201232568, "grad_norm": 1.103493571281433, "learning_rate": 2.014e-05, "loss": 0.9891, "step": 2 }, { "epoch": 0.0003133650180184885, "grad_norm": 1.031664490699768, "learning_rate": 3.0209999999999997e-05, "loss": 1.0485, "step": 3 }, { "epoch": 0.00041782002402465136, "grad_norm": 0.985599160194397, "learning_rate": 4.028e-05, "loss": 1.1374, "step": 4 }, { "epoch": 0.0005222750300308142, "grad_norm": 0.8242583274841309, "learning_rate": 5.035e-05, "loss": 1.0288, "step": 5 }, { "epoch": 0.000626730036036977, "grad_norm": 0.8407261967658997, "learning_rate": 6.0419999999999994e-05, "loss": 1.0153, "step": 6 }, { "epoch": 0.0007311850420431399, "grad_norm": 1.0834583044052124, "learning_rate": 7.049e-05, "loss": 0.9715, "step": 7 }, { "epoch": 0.0008356400480493027, "grad_norm": 1.1035923957824707, "learning_rate": 8.056e-05, "loss": 1.0755, "step": 8 }, { "epoch": 0.0009400950540554656, "grad_norm": 0.9618456363677979, "learning_rate": 9.062999999999999e-05, "loss": 0.9921, "step": 9 }, { "epoch": 0.0010445500600616284, "grad_norm": 1.0877008438110352, "learning_rate": 0.0001007, "loss": 1.1217, "step": 10 }, { "epoch": 0.0011490050660677912, "grad_norm": 0.9684674739837646, "learning_rate": 0.00010017, "loss": 1.0438, "step": 11 }, { "epoch": 0.001253460072073954, "grad_norm": 0.8440430164337158, "learning_rate": 9.963999999999999e-05, "loss": 1.0471, "step": 12 }, { "epoch": 0.001357915078080117, "grad_norm": 0.7783815860748291, "learning_rate": 9.910999999999999e-05, "loss": 0.9841, "step": 13 }, { "epoch": 0.0014623700840862798, "grad_norm": 0.8125823140144348, "learning_rate": 9.858e-05, "loss": 1.0053, "step": 14 }, { "epoch": 0.0015668250900924426, "grad_norm": 0.8336841464042664, "learning_rate": 9.805e-05, "loss": 0.9672, "step": 15 }, { "epoch": 0.0016712800960986054, "grad_norm": 0.9024210572242737, "learning_rate": 9.752e-05, "loss": 1.1344, "step": 16 }, { "epoch": 0.0017757351021047683, "grad_norm": 0.9698878526687622, "learning_rate": 9.698999999999999e-05, "loss": 1.1192, "step": 17 }, { "epoch": 0.0018801901081109311, "grad_norm": 0.9430877566337585, "learning_rate": 9.646e-05, "loss": 1.0096, "step": 18 }, { "epoch": 0.001984645114117094, "grad_norm": 0.9249778985977173, "learning_rate": 9.593e-05, "loss": 1.0335, "step": 19 }, { "epoch": 0.002089100120123257, "grad_norm": 1.2025758028030396, "learning_rate": 9.539999999999999e-05, "loss": 1.2275, "step": 20 }, { "epoch": 0.0021935551261294197, "grad_norm": 1.0294830799102783, "learning_rate": 9.487e-05, "loss": 1.1516, "step": 21 }, { "epoch": 0.0022980101321355825, "grad_norm": 1.0079749822616577, "learning_rate": 9.434e-05, "loss": 1.0904, "step": 22 }, { "epoch": 0.0024024651381417453, "grad_norm": 1.0860754251480103, "learning_rate": 9.381e-05, "loss": 1.1063, "step": 23 }, { "epoch": 0.002506920144147908, "grad_norm": 1.0929911136627197, "learning_rate": 9.327999999999999e-05, "loss": 1.213, "step": 24 }, { "epoch": 0.002611375150154071, "grad_norm": 1.0362168550491333, "learning_rate": 9.274999999999999e-05, "loss": 1.105, "step": 25 }, { "epoch": 0.002715830156160234, "grad_norm": 1.035015344619751, "learning_rate": 9.222e-05, "loss": 1.0202, "step": 26 }, { "epoch": 0.0028202851621663967, "grad_norm": 1.1152434349060059, "learning_rate": 9.169e-05, "loss": 1.0801, "step": 27 }, { "epoch": 0.0029247401681725595, "grad_norm": 1.1516571044921875, "learning_rate": 9.116e-05, "loss": 1.0891, "step": 28 }, { "epoch": 0.0030291951741787224, "grad_norm": 1.0675947666168213, "learning_rate": 9.062999999999999e-05, "loss": 1.0438, "step": 29 }, { "epoch": 0.0031336501801848852, "grad_norm": 1.0566611289978027, "learning_rate": 9.01e-05, "loss": 1.0598, "step": 30 }, { "epoch": 0.003238105186191048, "grad_norm": 1.0381075143814087, "learning_rate": 8.957e-05, "loss": 0.9869, "step": 31 }, { "epoch": 0.003342560192197211, "grad_norm": 1.0372414588928223, "learning_rate": 8.903999999999999e-05, "loss": 1.0221, "step": 32 }, { "epoch": 0.0034470151982033737, "grad_norm": 1.1146482229232788, "learning_rate": 8.850999999999999e-05, "loss": 1.1111, "step": 33 }, { "epoch": 0.0035514702042095366, "grad_norm": 1.117113471031189, "learning_rate": 8.798e-05, "loss": 1.0337, "step": 34 }, { "epoch": 0.0036559252102156994, "grad_norm": 1.1380937099456787, "learning_rate": 8.745e-05, "loss": 1.0539, "step": 35 }, { "epoch": 0.0037603802162218623, "grad_norm": 1.125671148300171, "learning_rate": 8.692e-05, "loss": 1.2324, "step": 36 }, { "epoch": 0.003864835222228025, "grad_norm": 1.178640604019165, "learning_rate": 8.638999999999999e-05, "loss": 1.0558, "step": 37 }, { "epoch": 0.003969290228234188, "grad_norm": 1.1600550413131714, "learning_rate": 8.586e-05, "loss": 1.1477, "step": 38 }, { "epoch": 0.004073745234240351, "grad_norm": 1.1267294883728027, "learning_rate": 8.533e-05, "loss": 1.1102, "step": 39 }, { "epoch": 0.004178200240246514, "grad_norm": 1.148314118385315, "learning_rate": 8.479999999999999e-05, "loss": 1.1249, "step": 40 }, { "epoch": 0.004282655246252677, "grad_norm": 1.7030447721481323, "learning_rate": 8.427e-05, "loss": 1.3342, "step": 41 }, { "epoch": 0.004387110252258839, "grad_norm": 1.1302878856658936, "learning_rate": 8.374e-05, "loss": 1.0549, "step": 42 }, { "epoch": 0.004491565258265003, "grad_norm": 1.2263422012329102, "learning_rate": 8.321e-05, "loss": 1.049, "step": 43 }, { "epoch": 0.004596020264271165, "grad_norm": 1.1995285749435425, "learning_rate": 8.268e-05, "loss": 1.0055, "step": 44 }, { "epoch": 0.004700475270277328, "grad_norm": 1.2773244380950928, "learning_rate": 8.214999999999999e-05, "loss": 1.0983, "step": 45 }, { "epoch": 0.004804930276283491, "grad_norm": 1.3492332696914673, "learning_rate": 8.162e-05, "loss": 1.181, "step": 46 }, { "epoch": 0.004909385282289654, "grad_norm": 1.35885751247406, "learning_rate": 8.108999999999998e-05, "loss": 1.0918, "step": 47 }, { "epoch": 0.005013840288295816, "grad_norm": 1.250424861907959, "learning_rate": 8.056e-05, "loss": 0.9498, "step": 48 }, { "epoch": 0.00511829529430198, "grad_norm": 1.5273371934890747, "learning_rate": 8.003e-05, "loss": 1.0708, "step": 49 }, { "epoch": 0.005222750300308142, "grad_norm": 1.8503930568695068, "learning_rate": 7.95e-05, "loss": 1.3104, "step": 50 }, { "epoch": 0.005222750300308142, "eval_loss": 1.0883654356002808, "eval_runtime": 120.0706, "eval_samples_per_second": 33.572, "eval_steps_per_second": 8.395, "step": 50 }, { "epoch": 0.005327205306314305, "grad_norm": 0.9896413087844849, "learning_rate": 7.897e-05, "loss": 0.9202, "step": 51 }, { "epoch": 0.005431660312320468, "grad_norm": 0.840713381767273, "learning_rate": 7.843999999999999e-05, "loss": 0.8256, "step": 52 }, { "epoch": 0.005536115318326631, "grad_norm": 0.7416518330574036, "learning_rate": 7.790999999999999e-05, "loss": 0.9071, "step": 53 }, { "epoch": 0.005640570324332793, "grad_norm": 0.7955224514007568, "learning_rate": 7.738e-05, "loss": 0.9046, "step": 54 }, { "epoch": 0.005745025330338957, "grad_norm": 0.7123813629150391, "learning_rate": 7.685e-05, "loss": 1.0314, "step": 55 }, { "epoch": 0.005849480336345119, "grad_norm": 0.683822751045227, "learning_rate": 7.632e-05, "loss": 0.9354, "step": 56 }, { "epoch": 0.005953935342351282, "grad_norm": 0.6209269165992737, "learning_rate": 7.578999999999999e-05, "loss": 0.8914, "step": 57 }, { "epoch": 0.006058390348357445, "grad_norm": 0.6532514691352844, "learning_rate": 7.526e-05, "loss": 1.0181, "step": 58 }, { "epoch": 0.006162845354363608, "grad_norm": 0.6706631183624268, "learning_rate": 7.473e-05, "loss": 0.9697, "step": 59 }, { "epoch": 0.0062673003603697704, "grad_norm": 0.6528756022453308, "learning_rate": 7.419999999999999e-05, "loss": 0.9479, "step": 60 }, { "epoch": 0.006371755366375934, "grad_norm": 0.7368625998497009, "learning_rate": 7.367e-05, "loss": 0.9429, "step": 61 }, { "epoch": 0.006476210372382096, "grad_norm": 0.7886870503425598, "learning_rate": 7.314e-05, "loss": 1.0517, "step": 62 }, { "epoch": 0.006580665378388259, "grad_norm": 0.7552511692047119, "learning_rate": 7.261e-05, "loss": 0.997, "step": 63 }, { "epoch": 0.006685120384394422, "grad_norm": 0.7769532799720764, "learning_rate": 7.208e-05, "loss": 0.9554, "step": 64 }, { "epoch": 0.006789575390400585, "grad_norm": 0.8453531265258789, "learning_rate": 7.154999999999999e-05, "loss": 1.0108, "step": 65 }, { "epoch": 0.0068940303964067475, "grad_norm": 0.8387408256530762, "learning_rate": 7.102e-05, "loss": 0.9538, "step": 66 }, { "epoch": 0.006998485402412911, "grad_norm": 0.8454548120498657, "learning_rate": 7.049e-05, "loss": 0.9305, "step": 67 }, { "epoch": 0.007102940408419073, "grad_norm": 0.9299591779708862, "learning_rate": 6.996e-05, "loss": 1.1564, "step": 68 }, { "epoch": 0.0072073954144252364, "grad_norm": 0.863427460193634, "learning_rate": 6.943e-05, "loss": 0.9635, "step": 69 }, { "epoch": 0.007311850420431399, "grad_norm": 0.9572794437408447, "learning_rate": 6.89e-05, "loss": 1.1278, "step": 70 }, { "epoch": 0.007416305426437562, "grad_norm": 0.9274687767028809, "learning_rate": 6.837e-05, "loss": 1.0153, "step": 71 }, { "epoch": 0.0075207604324437245, "grad_norm": 0.8995688557624817, "learning_rate": 6.784e-05, "loss": 1.0095, "step": 72 }, { "epoch": 0.007625215438449888, "grad_norm": 0.9216225743293762, "learning_rate": 6.730999999999999e-05, "loss": 1.004, "step": 73 }, { "epoch": 0.00772967044445605, "grad_norm": 0.8909146785736084, "learning_rate": 6.678e-05, "loss": 0.9789, "step": 74 }, { "epoch": 0.007834125450462213, "grad_norm": 0.8936184048652649, "learning_rate": 6.625e-05, "loss": 0.9622, "step": 75 }, { "epoch": 0.007938580456468376, "grad_norm": 0.9004867672920227, "learning_rate": 6.572e-05, "loss": 0.9201, "step": 76 }, { "epoch": 0.008043035462474538, "grad_norm": 1.025423288345337, "learning_rate": 6.519e-05, "loss": 1.1964, "step": 77 }, { "epoch": 0.008147490468480702, "grad_norm": 1.002456784248352, "learning_rate": 6.466e-05, "loss": 1.1274, "step": 78 }, { "epoch": 0.008251945474486865, "grad_norm": 0.967106819152832, "learning_rate": 6.413e-05, "loss": 0.9247, "step": 79 }, { "epoch": 0.008356400480493027, "grad_norm": 1.0033572912216187, "learning_rate": 6.359999999999999e-05, "loss": 1.0381, "step": 80 }, { "epoch": 0.00846085548649919, "grad_norm": 0.9540228843688965, "learning_rate": 6.306999999999999e-05, "loss": 0.8963, "step": 81 }, { "epoch": 0.008565310492505354, "grad_norm": 1.1677919626235962, "learning_rate": 6.254000000000001e-05, "loss": 1.1372, "step": 82 }, { "epoch": 0.008669765498511516, "grad_norm": 1.0950039625167847, "learning_rate": 6.201e-05, "loss": 1.052, "step": 83 }, { "epoch": 0.008774220504517679, "grad_norm": 1.028153657913208, "learning_rate": 6.148e-05, "loss": 0.9451, "step": 84 }, { "epoch": 0.008878675510523841, "grad_norm": 1.1274486780166626, "learning_rate": 6.095e-05, "loss": 1.0042, "step": 85 }, { "epoch": 0.008983130516530005, "grad_norm": 1.1423695087432861, "learning_rate": 6.0419999999999994e-05, "loss": 1.094, "step": 86 }, { "epoch": 0.009087585522536168, "grad_norm": 1.1429065465927124, "learning_rate": 5.988999999999999e-05, "loss": 0.9644, "step": 87 }, { "epoch": 0.00919204052854233, "grad_norm": 1.2021771669387817, "learning_rate": 5.9359999999999994e-05, "loss": 1.1807, "step": 88 }, { "epoch": 0.009296495534548492, "grad_norm": 1.1174052953720093, "learning_rate": 5.8830000000000004e-05, "loss": 1.029, "step": 89 }, { "epoch": 0.009400950540554657, "grad_norm": 1.2131744623184204, "learning_rate": 5.83e-05, "loss": 1.2473, "step": 90 }, { "epoch": 0.009505405546560819, "grad_norm": 1.1659351587295532, "learning_rate": 5.777e-05, "loss": 1.1075, "step": 91 }, { "epoch": 0.009609860552566981, "grad_norm": 1.155617594718933, "learning_rate": 5.7239999999999994e-05, "loss": 0.9338, "step": 92 }, { "epoch": 0.009714315558573145, "grad_norm": 1.1732633113861084, "learning_rate": 5.671e-05, "loss": 1.1125, "step": 93 }, { "epoch": 0.009818770564579308, "grad_norm": 1.1406437158584595, "learning_rate": 5.6179999999999994e-05, "loss": 1.0323, "step": 94 }, { "epoch": 0.00992322557058547, "grad_norm": 1.25766122341156, "learning_rate": 5.5650000000000004e-05, "loss": 1.153, "step": 95 }, { "epoch": 0.010027680576591633, "grad_norm": 1.3154778480529785, "learning_rate": 5.512e-05, "loss": 1.1242, "step": 96 }, { "epoch": 0.010132135582597797, "grad_norm": 1.355385184288025, "learning_rate": 5.459e-05, "loss": 1.1835, "step": 97 }, { "epoch": 0.01023659058860396, "grad_norm": 1.3438916206359863, "learning_rate": 5.406e-05, "loss": 1.0795, "step": 98 }, { "epoch": 0.010341045594610122, "grad_norm": 1.2769006490707397, "learning_rate": 5.353e-05, "loss": 0.9322, "step": 99 }, { "epoch": 0.010445500600616284, "grad_norm": 1.896607518196106, "learning_rate": 5.2999999999999994e-05, "loss": 1.1321, "step": 100 }, { "epoch": 0.010445500600616284, "eval_loss": 1.0444438457489014, "eval_runtime": 118.2357, "eval_samples_per_second": 34.093, "eval_steps_per_second": 8.525, "step": 100 }, { "epoch": 0.010549955606622448, "grad_norm": 0.6772998571395874, "learning_rate": 5.246999999999999e-05, "loss": 0.8867, "step": 101 }, { "epoch": 0.01065441061262861, "grad_norm": 0.6309265494346619, "learning_rate": 5.194e-05, "loss": 0.9269, "step": 102 }, { "epoch": 0.010758865618634773, "grad_norm": 0.6723343729972839, "learning_rate": 5.141e-05, "loss": 0.9639, "step": 103 }, { "epoch": 0.010863320624640935, "grad_norm": 0.6599306464195251, "learning_rate": 5.088e-05, "loss": 0.9483, "step": 104 }, { "epoch": 0.0109677756306471, "grad_norm": 0.5985355973243713, "learning_rate": 5.035e-05, "loss": 0.9826, "step": 105 }, { "epoch": 0.011072230636653262, "grad_norm": 0.6056426763534546, "learning_rate": 4.9819999999999994e-05, "loss": 0.87, "step": 106 }, { "epoch": 0.011176685642659424, "grad_norm": 0.6577640771865845, "learning_rate": 4.929e-05, "loss": 0.8896, "step": 107 }, { "epoch": 0.011281140648665587, "grad_norm": 0.6197834014892578, "learning_rate": 4.876e-05, "loss": 0.9857, "step": 108 }, { "epoch": 0.011385595654671751, "grad_norm": 0.6561485528945923, "learning_rate": 4.823e-05, "loss": 1.0036, "step": 109 }, { "epoch": 0.011490050660677913, "grad_norm": 0.6277485489845276, "learning_rate": 4.7699999999999994e-05, "loss": 0.9196, "step": 110 }, { "epoch": 0.011594505666684076, "grad_norm": 0.6193849444389343, "learning_rate": 4.717e-05, "loss": 0.8803, "step": 111 }, { "epoch": 0.011698960672690238, "grad_norm": 0.64503014087677, "learning_rate": 4.6639999999999994e-05, "loss": 1.0019, "step": 112 }, { "epoch": 0.011803415678696402, "grad_norm": 0.686529278755188, "learning_rate": 4.611e-05, "loss": 0.9412, "step": 113 }, { "epoch": 0.011907870684702565, "grad_norm": 0.7062692642211914, "learning_rate": 4.558e-05, "loss": 1.0139, "step": 114 }, { "epoch": 0.012012325690708727, "grad_norm": 0.7408269643783569, "learning_rate": 4.505e-05, "loss": 0.9398, "step": 115 }, { "epoch": 0.01211678069671489, "grad_norm": 0.8046457767486572, "learning_rate": 4.4519999999999994e-05, "loss": 1.0817, "step": 116 }, { "epoch": 0.012221235702721054, "grad_norm": 0.8560929894447327, "learning_rate": 4.399e-05, "loss": 0.9393, "step": 117 }, { "epoch": 0.012325690708727216, "grad_norm": 0.8270806074142456, "learning_rate": 4.346e-05, "loss": 1.029, "step": 118 }, { "epoch": 0.012430145714733378, "grad_norm": 0.8439892530441284, "learning_rate": 4.293e-05, "loss": 1.0061, "step": 119 }, { "epoch": 0.012534600720739541, "grad_norm": 0.9163686037063599, "learning_rate": 4.2399999999999994e-05, "loss": 1.1759, "step": 120 }, { "epoch": 0.012639055726745705, "grad_norm": 0.9552029371261597, "learning_rate": 4.187e-05, "loss": 0.9827, "step": 121 }, { "epoch": 0.012743510732751867, "grad_norm": 0.9216101169586182, "learning_rate": 4.134e-05, "loss": 1.0798, "step": 122 }, { "epoch": 0.01284796573875803, "grad_norm": 0.9589611887931824, "learning_rate": 4.081e-05, "loss": 1.077, "step": 123 }, { "epoch": 0.012952420744764192, "grad_norm": 0.9211677312850952, "learning_rate": 4.028e-05, "loss": 1.0484, "step": 124 }, { "epoch": 0.013056875750770356, "grad_norm": 0.8966543078422546, "learning_rate": 3.975e-05, "loss": 0.9896, "step": 125 }, { "epoch": 0.013161330756776519, "grad_norm": 0.9282961487770081, "learning_rate": 3.9219999999999994e-05, "loss": 1.0094, "step": 126 }, { "epoch": 0.013265785762782681, "grad_norm": 1.004485011100769, "learning_rate": 3.869e-05, "loss": 1.1737, "step": 127 }, { "epoch": 0.013370240768788844, "grad_norm": 0.9591395854949951, "learning_rate": 3.816e-05, "loss": 1.0858, "step": 128 }, { "epoch": 0.013474695774795008, "grad_norm": 0.9005763530731201, "learning_rate": 3.763e-05, "loss": 1.0078, "step": 129 }, { "epoch": 0.01357915078080117, "grad_norm": 0.9479995965957642, "learning_rate": 3.7099999999999994e-05, "loss": 1.0498, "step": 130 }, { "epoch": 0.013683605786807333, "grad_norm": 1.0200867652893066, "learning_rate": 3.657e-05, "loss": 1.0824, "step": 131 }, { "epoch": 0.013788060792813495, "grad_norm": 0.9186935424804688, "learning_rate": 3.604e-05, "loss": 0.9936, "step": 132 }, { "epoch": 0.013892515798819659, "grad_norm": 0.9905325770378113, "learning_rate": 3.551e-05, "loss": 1.0225, "step": 133 }, { "epoch": 0.013996970804825822, "grad_norm": 1.0167120695114136, "learning_rate": 3.498e-05, "loss": 1.1188, "step": 134 }, { "epoch": 0.014101425810831984, "grad_norm": 0.9497846961021423, "learning_rate": 3.445e-05, "loss": 0.9271, "step": 135 }, { "epoch": 0.014205880816838146, "grad_norm": 1.0277209281921387, "learning_rate": 3.392e-05, "loss": 1.0421, "step": 136 }, { "epoch": 0.01431033582284431, "grad_norm": 0.9843363761901855, "learning_rate": 3.339e-05, "loss": 0.982, "step": 137 }, { "epoch": 0.014414790828850473, "grad_norm": 1.0494071245193481, "learning_rate": 3.286e-05, "loss": 0.9804, "step": 138 }, { "epoch": 0.014519245834856635, "grad_norm": 1.0694974660873413, "learning_rate": 3.233e-05, "loss": 0.9624, "step": 139 }, { "epoch": 0.014623700840862798, "grad_norm": 1.0880765914916992, "learning_rate": 3.1799999999999994e-05, "loss": 0.9901, "step": 140 }, { "epoch": 0.014728155846868962, "grad_norm": 1.053983211517334, "learning_rate": 3.1270000000000004e-05, "loss": 0.9555, "step": 141 }, { "epoch": 0.014832610852875124, "grad_norm": 1.0926487445831299, "learning_rate": 3.074e-05, "loss": 1.03, "step": 142 }, { "epoch": 0.014937065858881287, "grad_norm": 1.1903960704803467, "learning_rate": 3.0209999999999997e-05, "loss": 1.0765, "step": 143 }, { "epoch": 0.015041520864887449, "grad_norm": 1.2311145067214966, "learning_rate": 2.9679999999999997e-05, "loss": 1.0678, "step": 144 }, { "epoch": 0.015145975870893613, "grad_norm": 1.1940836906433105, "learning_rate": 2.915e-05, "loss": 1.0461, "step": 145 }, { "epoch": 0.015250430876899776, "grad_norm": 1.228232979774475, "learning_rate": 2.8619999999999997e-05, "loss": 0.9819, "step": 146 }, { "epoch": 0.015354885882905938, "grad_norm": 1.2038990259170532, "learning_rate": 2.8089999999999997e-05, "loss": 0.9445, "step": 147 }, { "epoch": 0.0154593408889121, "grad_norm": 1.2821253538131714, "learning_rate": 2.756e-05, "loss": 1.1162, "step": 148 }, { "epoch": 0.015563795894918265, "grad_norm": 1.437116265296936, "learning_rate": 2.703e-05, "loss": 1.0603, "step": 149 }, { "epoch": 0.015668250900924427, "grad_norm": 1.6678568124771118, "learning_rate": 2.6499999999999997e-05, "loss": 1.0682, "step": 150 }, { "epoch": 0.015668250900924427, "eval_loss": 0.9961364269256592, "eval_runtime": 118.6077, "eval_samples_per_second": 33.986, "eval_steps_per_second": 8.499, "step": 150 }, { "epoch": 0.01577270590693059, "grad_norm": 0.5151348114013672, "learning_rate": 2.597e-05, "loss": 0.7635, "step": 151 }, { "epoch": 0.015877160912936752, "grad_norm": 0.5203879475593567, "learning_rate": 2.544e-05, "loss": 0.7112, "step": 152 }, { "epoch": 0.015981615918942916, "grad_norm": 0.5102455019950867, "learning_rate": 2.4909999999999997e-05, "loss": 0.8134, "step": 153 }, { "epoch": 0.016086070924949077, "grad_norm": 0.5462666153907776, "learning_rate": 2.438e-05, "loss": 0.925, "step": 154 }, { "epoch": 0.01619052593095524, "grad_norm": 0.5957190990447998, "learning_rate": 2.3849999999999997e-05, "loss": 0.9079, "step": 155 }, { "epoch": 0.016294980936961405, "grad_norm": 0.6015512347221375, "learning_rate": 2.3319999999999997e-05, "loss": 0.9956, "step": 156 }, { "epoch": 0.016399435942967566, "grad_norm": 0.5997916460037231, "learning_rate": 2.279e-05, "loss": 0.9413, "step": 157 }, { "epoch": 0.01650389094897373, "grad_norm": 0.5999729037284851, "learning_rate": 2.2259999999999997e-05, "loss": 0.8335, "step": 158 }, { "epoch": 0.016608345954979894, "grad_norm": 0.6232542991638184, "learning_rate": 2.173e-05, "loss": 0.9134, "step": 159 }, { "epoch": 0.016712800960986054, "grad_norm": 0.607313334941864, "learning_rate": 2.1199999999999997e-05, "loss": 0.874, "step": 160 }, { "epoch": 0.01681725596699222, "grad_norm": 0.6412212252616882, "learning_rate": 2.067e-05, "loss": 0.9721, "step": 161 }, { "epoch": 0.01692171097299838, "grad_norm": 0.650705099105835, "learning_rate": 2.014e-05, "loss": 0.9523, "step": 162 }, { "epoch": 0.017026165979004543, "grad_norm": 0.6729899644851685, "learning_rate": 1.9609999999999997e-05, "loss": 0.9684, "step": 163 }, { "epoch": 0.017130620985010708, "grad_norm": 0.6449539065361023, "learning_rate": 1.908e-05, "loss": 0.808, "step": 164 }, { "epoch": 0.01723507599101687, "grad_norm": 0.6991842985153198, "learning_rate": 1.8549999999999997e-05, "loss": 0.9929, "step": 165 }, { "epoch": 0.017339530997023032, "grad_norm": 0.7484295964241028, "learning_rate": 1.802e-05, "loss": 0.9746, "step": 166 }, { "epoch": 0.017443986003029197, "grad_norm": 0.7161227464675903, "learning_rate": 1.749e-05, "loss": 0.9454, "step": 167 }, { "epoch": 0.017548441009035357, "grad_norm": 0.7815462946891785, "learning_rate": 1.696e-05, "loss": 1.0301, "step": 168 }, { "epoch": 0.01765289601504152, "grad_norm": 0.8647356033325195, "learning_rate": 1.643e-05, "loss": 1.0621, "step": 169 }, { "epoch": 0.017757351021047682, "grad_norm": 0.9504815340042114, "learning_rate": 1.5899999999999997e-05, "loss": 1.0426, "step": 170 }, { "epoch": 0.017861806027053846, "grad_norm": 0.8482909202575684, "learning_rate": 1.537e-05, "loss": 0.9898, "step": 171 }, { "epoch": 0.01796626103306001, "grad_norm": 0.8360997438430786, "learning_rate": 1.4839999999999999e-05, "loss": 0.9783, "step": 172 }, { "epoch": 0.01807071603906617, "grad_norm": 0.9085504412651062, "learning_rate": 1.4309999999999999e-05, "loss": 0.9865, "step": 173 }, { "epoch": 0.018175171045072335, "grad_norm": 0.8988630771636963, "learning_rate": 1.378e-05, "loss": 1.0591, "step": 174 }, { "epoch": 0.0182796260510785, "grad_norm": 0.8486796617507935, "learning_rate": 1.3249999999999999e-05, "loss": 0.9894, "step": 175 }, { "epoch": 0.01838408105708466, "grad_norm": 0.8764381408691406, "learning_rate": 1.272e-05, "loss": 0.9253, "step": 176 }, { "epoch": 0.018488536063090824, "grad_norm": 0.9448692798614502, "learning_rate": 1.219e-05, "loss": 1.0425, "step": 177 }, { "epoch": 0.018592991069096985, "grad_norm": 0.9180240631103516, "learning_rate": 1.1659999999999998e-05, "loss": 0.9328, "step": 178 }, { "epoch": 0.01869744607510315, "grad_norm": 0.9340706467628479, "learning_rate": 1.1129999999999998e-05, "loss": 1.0015, "step": 179 }, { "epoch": 0.018801901081109313, "grad_norm": 0.8770861029624939, "learning_rate": 1.0599999999999998e-05, "loss": 0.9812, "step": 180 }, { "epoch": 0.018906356087115474, "grad_norm": 0.9975367188453674, "learning_rate": 1.007e-05, "loss": 1.0984, "step": 181 }, { "epoch": 0.019010811093121638, "grad_norm": 0.9696022868156433, "learning_rate": 9.54e-06, "loss": 0.9888, "step": 182 }, { "epoch": 0.019115266099127802, "grad_norm": 0.8801543116569519, "learning_rate": 9.01e-06, "loss": 0.8672, "step": 183 }, { "epoch": 0.019219721105133963, "grad_norm": 0.9794437885284424, "learning_rate": 8.48e-06, "loss": 1.0545, "step": 184 }, { "epoch": 0.019324176111140127, "grad_norm": 0.9684680700302124, "learning_rate": 7.949999999999998e-06, "loss": 0.9275, "step": 185 }, { "epoch": 0.01942863111714629, "grad_norm": 0.956508219242096, "learning_rate": 7.419999999999999e-06, "loss": 0.9679, "step": 186 }, { "epoch": 0.01953308612315245, "grad_norm": 1.0241084098815918, "learning_rate": 6.89e-06, "loss": 1.0333, "step": 187 }, { "epoch": 0.019637541129158616, "grad_norm": 1.13876211643219, "learning_rate": 6.36e-06, "loss": 1.189, "step": 188 }, { "epoch": 0.019741996135164776, "grad_norm": 1.0502783060073853, "learning_rate": 5.829999999999999e-06, "loss": 1.0062, "step": 189 }, { "epoch": 0.01984645114117094, "grad_norm": 1.0701584815979004, "learning_rate": 5.299999999999999e-06, "loss": 0.9934, "step": 190 }, { "epoch": 0.019950906147177105, "grad_norm": 1.1496695280075073, "learning_rate": 4.77e-06, "loss": 1.0933, "step": 191 }, { "epoch": 0.020055361153183265, "grad_norm": 1.1266313791275024, "learning_rate": 4.24e-06, "loss": 1.0909, "step": 192 }, { "epoch": 0.02015981615918943, "grad_norm": 1.1178048849105835, "learning_rate": 3.7099999999999996e-06, "loss": 0.9263, "step": 193 }, { "epoch": 0.020264271165195594, "grad_norm": 1.1649036407470703, "learning_rate": 3.18e-06, "loss": 1.0362, "step": 194 }, { "epoch": 0.020368726171201754, "grad_norm": 1.1672587394714355, "learning_rate": 2.6499999999999996e-06, "loss": 1.0083, "step": 195 }, { "epoch": 0.02047318117720792, "grad_norm": 1.1676815748214722, "learning_rate": 2.12e-06, "loss": 0.9924, "step": 196 }, { "epoch": 0.02057763618321408, "grad_norm": 1.3110767602920532, "learning_rate": 1.59e-06, "loss": 0.9344, "step": 197 }, { "epoch": 0.020682091189220243, "grad_norm": 1.4102957248687744, "learning_rate": 1.06e-06, "loss": 1.1094, "step": 198 }, { "epoch": 0.020786546195226407, "grad_norm": 1.5473552942276, "learning_rate": 5.3e-07, "loss": 1.0472, "step": 199 }, { "epoch": 0.020891001201232568, "grad_norm": 2.232775926589966, "learning_rate": 0.0, "loss": 1.3862, "step": 200 }, { "epoch": 0.020891001201232568, "eval_loss": 0.9818174242973328, "eval_runtime": 118.3422, "eval_samples_per_second": 34.062, "eval_steps_per_second": 8.518, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.06657392623616e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }