|
{ |
|
"best_metric": 0.9818174242973328, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.020891001201232568, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00010445500600616284, |
|
"grad_norm": 1.0160599946975708, |
|
"learning_rate": 1.007e-05, |
|
"loss": 0.9026, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00010445500600616284, |
|
"eval_loss": 1.3163843154907227, |
|
"eval_runtime": 118.417, |
|
"eval_samples_per_second": 34.041, |
|
"eval_steps_per_second": 8.512, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00020891001201232568, |
|
"grad_norm": 1.103493571281433, |
|
"learning_rate": 2.014e-05, |
|
"loss": 0.9891, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0003133650180184885, |
|
"grad_norm": 1.031664490699768, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 1.0485, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00041782002402465136, |
|
"grad_norm": 0.985599160194397, |
|
"learning_rate": 4.028e-05, |
|
"loss": 1.1374, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0005222750300308142, |
|
"grad_norm": 0.8242583274841309, |
|
"learning_rate": 5.035e-05, |
|
"loss": 1.0288, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.000626730036036977, |
|
"grad_norm": 0.8407261967658997, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 1.0153, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0007311850420431399, |
|
"grad_norm": 1.0834583044052124, |
|
"learning_rate": 7.049e-05, |
|
"loss": 0.9715, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0008356400480493027, |
|
"grad_norm": 1.1035923957824707, |
|
"learning_rate": 8.056e-05, |
|
"loss": 1.0755, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0009400950540554656, |
|
"grad_norm": 0.9618456363677979, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 0.9921, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0010445500600616284, |
|
"grad_norm": 1.0877008438110352, |
|
"learning_rate": 0.0001007, |
|
"loss": 1.1217, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0011490050660677912, |
|
"grad_norm": 0.9684674739837646, |
|
"learning_rate": 0.00010017, |
|
"loss": 1.0438, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.001253460072073954, |
|
"grad_norm": 0.8440430164337158, |
|
"learning_rate": 9.963999999999999e-05, |
|
"loss": 1.0471, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.001357915078080117, |
|
"grad_norm": 0.7783815860748291, |
|
"learning_rate": 9.910999999999999e-05, |
|
"loss": 0.9841, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0014623700840862798, |
|
"grad_norm": 0.8125823140144348, |
|
"learning_rate": 9.858e-05, |
|
"loss": 1.0053, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0015668250900924426, |
|
"grad_norm": 0.8336841464042664, |
|
"learning_rate": 9.805e-05, |
|
"loss": 0.9672, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0016712800960986054, |
|
"grad_norm": 0.9024210572242737, |
|
"learning_rate": 9.752e-05, |
|
"loss": 1.1344, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0017757351021047683, |
|
"grad_norm": 0.9698878526687622, |
|
"learning_rate": 9.698999999999999e-05, |
|
"loss": 1.1192, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0018801901081109311, |
|
"grad_norm": 0.9430877566337585, |
|
"learning_rate": 9.646e-05, |
|
"loss": 1.0096, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001984645114117094, |
|
"grad_norm": 0.9249778985977173, |
|
"learning_rate": 9.593e-05, |
|
"loss": 1.0335, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002089100120123257, |
|
"grad_norm": 1.2025758028030396, |
|
"learning_rate": 9.539999999999999e-05, |
|
"loss": 1.2275, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0021935551261294197, |
|
"grad_norm": 1.0294830799102783, |
|
"learning_rate": 9.487e-05, |
|
"loss": 1.1516, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0022980101321355825, |
|
"grad_norm": 1.0079749822616577, |
|
"learning_rate": 9.434e-05, |
|
"loss": 1.0904, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0024024651381417453, |
|
"grad_norm": 1.0860754251480103, |
|
"learning_rate": 9.381e-05, |
|
"loss": 1.1063, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002506920144147908, |
|
"grad_norm": 1.0929911136627197, |
|
"learning_rate": 9.327999999999999e-05, |
|
"loss": 1.213, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002611375150154071, |
|
"grad_norm": 1.0362168550491333, |
|
"learning_rate": 9.274999999999999e-05, |
|
"loss": 1.105, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.002715830156160234, |
|
"grad_norm": 1.035015344619751, |
|
"learning_rate": 9.222e-05, |
|
"loss": 1.0202, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0028202851621663967, |
|
"grad_norm": 1.1152434349060059, |
|
"learning_rate": 9.169e-05, |
|
"loss": 1.0801, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0029247401681725595, |
|
"grad_norm": 1.1516571044921875, |
|
"learning_rate": 9.116e-05, |
|
"loss": 1.0891, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0030291951741787224, |
|
"grad_norm": 1.0675947666168213, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 1.0438, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0031336501801848852, |
|
"grad_norm": 1.0566611289978027, |
|
"learning_rate": 9.01e-05, |
|
"loss": 1.0598, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003238105186191048, |
|
"grad_norm": 1.0381075143814087, |
|
"learning_rate": 8.957e-05, |
|
"loss": 0.9869, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003342560192197211, |
|
"grad_norm": 1.0372414588928223, |
|
"learning_rate": 8.903999999999999e-05, |
|
"loss": 1.0221, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0034470151982033737, |
|
"grad_norm": 1.1146482229232788, |
|
"learning_rate": 8.850999999999999e-05, |
|
"loss": 1.1111, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0035514702042095366, |
|
"grad_norm": 1.117113471031189, |
|
"learning_rate": 8.798e-05, |
|
"loss": 1.0337, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0036559252102156994, |
|
"grad_norm": 1.1380937099456787, |
|
"learning_rate": 8.745e-05, |
|
"loss": 1.0539, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0037603802162218623, |
|
"grad_norm": 1.125671148300171, |
|
"learning_rate": 8.692e-05, |
|
"loss": 1.2324, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.003864835222228025, |
|
"grad_norm": 1.178640604019165, |
|
"learning_rate": 8.638999999999999e-05, |
|
"loss": 1.0558, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.003969290228234188, |
|
"grad_norm": 1.1600550413131714, |
|
"learning_rate": 8.586e-05, |
|
"loss": 1.1477, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004073745234240351, |
|
"grad_norm": 1.1267294883728027, |
|
"learning_rate": 8.533e-05, |
|
"loss": 1.1102, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.004178200240246514, |
|
"grad_norm": 1.148314118385315, |
|
"learning_rate": 8.479999999999999e-05, |
|
"loss": 1.1249, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004282655246252677, |
|
"grad_norm": 1.7030447721481323, |
|
"learning_rate": 8.427e-05, |
|
"loss": 1.3342, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.004387110252258839, |
|
"grad_norm": 1.1302878856658936, |
|
"learning_rate": 8.374e-05, |
|
"loss": 1.0549, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004491565258265003, |
|
"grad_norm": 1.2263422012329102, |
|
"learning_rate": 8.321e-05, |
|
"loss": 1.049, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.004596020264271165, |
|
"grad_norm": 1.1995285749435425, |
|
"learning_rate": 8.268e-05, |
|
"loss": 1.0055, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.004700475270277328, |
|
"grad_norm": 1.2773244380950928, |
|
"learning_rate": 8.214999999999999e-05, |
|
"loss": 1.0983, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004804930276283491, |
|
"grad_norm": 1.3492332696914673, |
|
"learning_rate": 8.162e-05, |
|
"loss": 1.181, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.004909385282289654, |
|
"grad_norm": 1.35885751247406, |
|
"learning_rate": 8.108999999999998e-05, |
|
"loss": 1.0918, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.005013840288295816, |
|
"grad_norm": 1.250424861907959, |
|
"learning_rate": 8.056e-05, |
|
"loss": 0.9498, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.00511829529430198, |
|
"grad_norm": 1.5273371934890747, |
|
"learning_rate": 8.003e-05, |
|
"loss": 1.0708, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.005222750300308142, |
|
"grad_norm": 1.8503930568695068, |
|
"learning_rate": 7.95e-05, |
|
"loss": 1.3104, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005222750300308142, |
|
"eval_loss": 1.0883654356002808, |
|
"eval_runtime": 120.0706, |
|
"eval_samples_per_second": 33.572, |
|
"eval_steps_per_second": 8.395, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005327205306314305, |
|
"grad_norm": 0.9896413087844849, |
|
"learning_rate": 7.897e-05, |
|
"loss": 0.9202, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005431660312320468, |
|
"grad_norm": 0.840713381767273, |
|
"learning_rate": 7.843999999999999e-05, |
|
"loss": 0.8256, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.005536115318326631, |
|
"grad_norm": 0.7416518330574036, |
|
"learning_rate": 7.790999999999999e-05, |
|
"loss": 0.9071, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.005640570324332793, |
|
"grad_norm": 0.7955224514007568, |
|
"learning_rate": 7.738e-05, |
|
"loss": 0.9046, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005745025330338957, |
|
"grad_norm": 0.7123813629150391, |
|
"learning_rate": 7.685e-05, |
|
"loss": 1.0314, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.005849480336345119, |
|
"grad_norm": 0.683822751045227, |
|
"learning_rate": 7.632e-05, |
|
"loss": 0.9354, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.005953935342351282, |
|
"grad_norm": 0.6209269165992737, |
|
"learning_rate": 7.578999999999999e-05, |
|
"loss": 0.8914, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.006058390348357445, |
|
"grad_norm": 0.6532514691352844, |
|
"learning_rate": 7.526e-05, |
|
"loss": 1.0181, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.006162845354363608, |
|
"grad_norm": 0.6706631183624268, |
|
"learning_rate": 7.473e-05, |
|
"loss": 0.9697, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0062673003603697704, |
|
"grad_norm": 0.6528756022453308, |
|
"learning_rate": 7.419999999999999e-05, |
|
"loss": 0.9479, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006371755366375934, |
|
"grad_norm": 0.7368625998497009, |
|
"learning_rate": 7.367e-05, |
|
"loss": 0.9429, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.006476210372382096, |
|
"grad_norm": 0.7886870503425598, |
|
"learning_rate": 7.314e-05, |
|
"loss": 1.0517, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.006580665378388259, |
|
"grad_norm": 0.7552511692047119, |
|
"learning_rate": 7.261e-05, |
|
"loss": 0.997, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.006685120384394422, |
|
"grad_norm": 0.7769532799720764, |
|
"learning_rate": 7.208e-05, |
|
"loss": 0.9554, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.006789575390400585, |
|
"grad_norm": 0.8453531265258789, |
|
"learning_rate": 7.154999999999999e-05, |
|
"loss": 1.0108, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0068940303964067475, |
|
"grad_norm": 0.8387408256530762, |
|
"learning_rate": 7.102e-05, |
|
"loss": 0.9538, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.006998485402412911, |
|
"grad_norm": 0.8454548120498657, |
|
"learning_rate": 7.049e-05, |
|
"loss": 0.9305, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.007102940408419073, |
|
"grad_norm": 0.9299591779708862, |
|
"learning_rate": 6.996e-05, |
|
"loss": 1.1564, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0072073954144252364, |
|
"grad_norm": 0.863427460193634, |
|
"learning_rate": 6.943e-05, |
|
"loss": 0.9635, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.007311850420431399, |
|
"grad_norm": 0.9572794437408447, |
|
"learning_rate": 6.89e-05, |
|
"loss": 1.1278, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.007416305426437562, |
|
"grad_norm": 0.9274687767028809, |
|
"learning_rate": 6.837e-05, |
|
"loss": 1.0153, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0075207604324437245, |
|
"grad_norm": 0.8995688557624817, |
|
"learning_rate": 6.784e-05, |
|
"loss": 1.0095, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007625215438449888, |
|
"grad_norm": 0.9216225743293762, |
|
"learning_rate": 6.730999999999999e-05, |
|
"loss": 1.004, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.00772967044445605, |
|
"grad_norm": 0.8909146785736084, |
|
"learning_rate": 6.678e-05, |
|
"loss": 0.9789, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.007834125450462213, |
|
"grad_norm": 0.8936184048652649, |
|
"learning_rate": 6.625e-05, |
|
"loss": 0.9622, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007938580456468376, |
|
"grad_norm": 0.9004867672920227, |
|
"learning_rate": 6.572e-05, |
|
"loss": 0.9201, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.008043035462474538, |
|
"grad_norm": 1.025423288345337, |
|
"learning_rate": 6.519e-05, |
|
"loss": 1.1964, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.008147490468480702, |
|
"grad_norm": 1.002456784248352, |
|
"learning_rate": 6.466e-05, |
|
"loss": 1.1274, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.008251945474486865, |
|
"grad_norm": 0.967106819152832, |
|
"learning_rate": 6.413e-05, |
|
"loss": 0.9247, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.008356400480493027, |
|
"grad_norm": 1.0033572912216187, |
|
"learning_rate": 6.359999999999999e-05, |
|
"loss": 1.0381, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.00846085548649919, |
|
"grad_norm": 0.9540228843688965, |
|
"learning_rate": 6.306999999999999e-05, |
|
"loss": 0.8963, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.008565310492505354, |
|
"grad_norm": 1.1677919626235962, |
|
"learning_rate": 6.254000000000001e-05, |
|
"loss": 1.1372, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.008669765498511516, |
|
"grad_norm": 1.0950039625167847, |
|
"learning_rate": 6.201e-05, |
|
"loss": 1.052, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.008774220504517679, |
|
"grad_norm": 1.028153657913208, |
|
"learning_rate": 6.148e-05, |
|
"loss": 0.9451, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.008878675510523841, |
|
"grad_norm": 1.1274486780166626, |
|
"learning_rate": 6.095e-05, |
|
"loss": 1.0042, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.008983130516530005, |
|
"grad_norm": 1.1423695087432861, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 1.094, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.009087585522536168, |
|
"grad_norm": 1.1429065465927124, |
|
"learning_rate": 5.988999999999999e-05, |
|
"loss": 0.9644, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.00919204052854233, |
|
"grad_norm": 1.2021771669387817, |
|
"learning_rate": 5.9359999999999994e-05, |
|
"loss": 1.1807, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.009296495534548492, |
|
"grad_norm": 1.1174052953720093, |
|
"learning_rate": 5.8830000000000004e-05, |
|
"loss": 1.029, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.009400950540554657, |
|
"grad_norm": 1.2131744623184204, |
|
"learning_rate": 5.83e-05, |
|
"loss": 1.2473, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.009505405546560819, |
|
"grad_norm": 1.1659351587295532, |
|
"learning_rate": 5.777e-05, |
|
"loss": 1.1075, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.009609860552566981, |
|
"grad_norm": 1.155617594718933, |
|
"learning_rate": 5.7239999999999994e-05, |
|
"loss": 0.9338, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.009714315558573145, |
|
"grad_norm": 1.1732633113861084, |
|
"learning_rate": 5.671e-05, |
|
"loss": 1.1125, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.009818770564579308, |
|
"grad_norm": 1.1406437158584595, |
|
"learning_rate": 5.6179999999999994e-05, |
|
"loss": 1.0323, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.00992322557058547, |
|
"grad_norm": 1.25766122341156, |
|
"learning_rate": 5.5650000000000004e-05, |
|
"loss": 1.153, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.010027680576591633, |
|
"grad_norm": 1.3154778480529785, |
|
"learning_rate": 5.512e-05, |
|
"loss": 1.1242, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.010132135582597797, |
|
"grad_norm": 1.355385184288025, |
|
"learning_rate": 5.459e-05, |
|
"loss": 1.1835, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.01023659058860396, |
|
"grad_norm": 1.3438916206359863, |
|
"learning_rate": 5.406e-05, |
|
"loss": 1.0795, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.010341045594610122, |
|
"grad_norm": 1.2769006490707397, |
|
"learning_rate": 5.353e-05, |
|
"loss": 0.9322, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.010445500600616284, |
|
"grad_norm": 1.896607518196106, |
|
"learning_rate": 5.2999999999999994e-05, |
|
"loss": 1.1321, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.010445500600616284, |
|
"eval_loss": 1.0444438457489014, |
|
"eval_runtime": 118.2357, |
|
"eval_samples_per_second": 34.093, |
|
"eval_steps_per_second": 8.525, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.010549955606622448, |
|
"grad_norm": 0.6772998571395874, |
|
"learning_rate": 5.246999999999999e-05, |
|
"loss": 0.8867, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.01065441061262861, |
|
"grad_norm": 0.6309265494346619, |
|
"learning_rate": 5.194e-05, |
|
"loss": 0.9269, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.010758865618634773, |
|
"grad_norm": 0.6723343729972839, |
|
"learning_rate": 5.141e-05, |
|
"loss": 0.9639, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.010863320624640935, |
|
"grad_norm": 0.6599306464195251, |
|
"learning_rate": 5.088e-05, |
|
"loss": 0.9483, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0109677756306471, |
|
"grad_norm": 0.5985355973243713, |
|
"learning_rate": 5.035e-05, |
|
"loss": 0.9826, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.011072230636653262, |
|
"grad_norm": 0.6056426763534546, |
|
"learning_rate": 4.9819999999999994e-05, |
|
"loss": 0.87, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.011176685642659424, |
|
"grad_norm": 0.6577640771865845, |
|
"learning_rate": 4.929e-05, |
|
"loss": 0.8896, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.011281140648665587, |
|
"grad_norm": 0.6197834014892578, |
|
"learning_rate": 4.876e-05, |
|
"loss": 0.9857, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.011385595654671751, |
|
"grad_norm": 0.6561485528945923, |
|
"learning_rate": 4.823e-05, |
|
"loss": 1.0036, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.011490050660677913, |
|
"grad_norm": 0.6277485489845276, |
|
"learning_rate": 4.7699999999999994e-05, |
|
"loss": 0.9196, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.011594505666684076, |
|
"grad_norm": 0.6193849444389343, |
|
"learning_rate": 4.717e-05, |
|
"loss": 0.8803, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.011698960672690238, |
|
"grad_norm": 0.64503014087677, |
|
"learning_rate": 4.6639999999999994e-05, |
|
"loss": 1.0019, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.011803415678696402, |
|
"grad_norm": 0.686529278755188, |
|
"learning_rate": 4.611e-05, |
|
"loss": 0.9412, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.011907870684702565, |
|
"grad_norm": 0.7062692642211914, |
|
"learning_rate": 4.558e-05, |
|
"loss": 1.0139, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.012012325690708727, |
|
"grad_norm": 0.7408269643783569, |
|
"learning_rate": 4.505e-05, |
|
"loss": 0.9398, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01211678069671489, |
|
"grad_norm": 0.8046457767486572, |
|
"learning_rate": 4.4519999999999994e-05, |
|
"loss": 1.0817, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.012221235702721054, |
|
"grad_norm": 0.8560929894447327, |
|
"learning_rate": 4.399e-05, |
|
"loss": 0.9393, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.012325690708727216, |
|
"grad_norm": 0.8270806074142456, |
|
"learning_rate": 4.346e-05, |
|
"loss": 1.029, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.012430145714733378, |
|
"grad_norm": 0.8439892530441284, |
|
"learning_rate": 4.293e-05, |
|
"loss": 1.0061, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.012534600720739541, |
|
"grad_norm": 0.9163686037063599, |
|
"learning_rate": 4.2399999999999994e-05, |
|
"loss": 1.1759, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.012639055726745705, |
|
"grad_norm": 0.9552029371261597, |
|
"learning_rate": 4.187e-05, |
|
"loss": 0.9827, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.012743510732751867, |
|
"grad_norm": 0.9216101169586182, |
|
"learning_rate": 4.134e-05, |
|
"loss": 1.0798, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.01284796573875803, |
|
"grad_norm": 0.9589611887931824, |
|
"learning_rate": 4.081e-05, |
|
"loss": 1.077, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.012952420744764192, |
|
"grad_norm": 0.9211677312850952, |
|
"learning_rate": 4.028e-05, |
|
"loss": 1.0484, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.013056875750770356, |
|
"grad_norm": 0.8966543078422546, |
|
"learning_rate": 3.975e-05, |
|
"loss": 0.9896, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.013161330756776519, |
|
"grad_norm": 0.9282961487770081, |
|
"learning_rate": 3.9219999999999994e-05, |
|
"loss": 1.0094, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.013265785762782681, |
|
"grad_norm": 1.004485011100769, |
|
"learning_rate": 3.869e-05, |
|
"loss": 1.1737, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.013370240768788844, |
|
"grad_norm": 0.9591395854949951, |
|
"learning_rate": 3.816e-05, |
|
"loss": 1.0858, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.013474695774795008, |
|
"grad_norm": 0.9005763530731201, |
|
"learning_rate": 3.763e-05, |
|
"loss": 1.0078, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01357915078080117, |
|
"grad_norm": 0.9479995965957642, |
|
"learning_rate": 3.7099999999999994e-05, |
|
"loss": 1.0498, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.013683605786807333, |
|
"grad_norm": 1.0200867652893066, |
|
"learning_rate": 3.657e-05, |
|
"loss": 1.0824, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.013788060792813495, |
|
"grad_norm": 0.9186935424804688, |
|
"learning_rate": 3.604e-05, |
|
"loss": 0.9936, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.013892515798819659, |
|
"grad_norm": 0.9905325770378113, |
|
"learning_rate": 3.551e-05, |
|
"loss": 1.0225, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.013996970804825822, |
|
"grad_norm": 1.0167120695114136, |
|
"learning_rate": 3.498e-05, |
|
"loss": 1.1188, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.014101425810831984, |
|
"grad_norm": 0.9497846961021423, |
|
"learning_rate": 3.445e-05, |
|
"loss": 0.9271, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.014205880816838146, |
|
"grad_norm": 1.0277209281921387, |
|
"learning_rate": 3.392e-05, |
|
"loss": 1.0421, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.01431033582284431, |
|
"grad_norm": 0.9843363761901855, |
|
"learning_rate": 3.339e-05, |
|
"loss": 0.982, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.014414790828850473, |
|
"grad_norm": 1.0494071245193481, |
|
"learning_rate": 3.286e-05, |
|
"loss": 0.9804, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.014519245834856635, |
|
"grad_norm": 1.0694974660873413, |
|
"learning_rate": 3.233e-05, |
|
"loss": 0.9624, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.014623700840862798, |
|
"grad_norm": 1.0880765914916992, |
|
"learning_rate": 3.1799999999999994e-05, |
|
"loss": 0.9901, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.014728155846868962, |
|
"grad_norm": 1.053983211517334, |
|
"learning_rate": 3.1270000000000004e-05, |
|
"loss": 0.9555, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.014832610852875124, |
|
"grad_norm": 1.0926487445831299, |
|
"learning_rate": 3.074e-05, |
|
"loss": 1.03, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.014937065858881287, |
|
"grad_norm": 1.1903960704803467, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 1.0765, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.015041520864887449, |
|
"grad_norm": 1.2311145067214966, |
|
"learning_rate": 2.9679999999999997e-05, |
|
"loss": 1.0678, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.015145975870893613, |
|
"grad_norm": 1.1940836906433105, |
|
"learning_rate": 2.915e-05, |
|
"loss": 1.0461, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.015250430876899776, |
|
"grad_norm": 1.228232979774475, |
|
"learning_rate": 2.8619999999999997e-05, |
|
"loss": 0.9819, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.015354885882905938, |
|
"grad_norm": 1.2038990259170532, |
|
"learning_rate": 2.8089999999999997e-05, |
|
"loss": 0.9445, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.0154593408889121, |
|
"grad_norm": 1.2821253538131714, |
|
"learning_rate": 2.756e-05, |
|
"loss": 1.1162, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.015563795894918265, |
|
"grad_norm": 1.437116265296936, |
|
"learning_rate": 2.703e-05, |
|
"loss": 1.0603, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.015668250900924427, |
|
"grad_norm": 1.6678568124771118, |
|
"learning_rate": 2.6499999999999997e-05, |
|
"loss": 1.0682, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015668250900924427, |
|
"eval_loss": 0.9961364269256592, |
|
"eval_runtime": 118.6077, |
|
"eval_samples_per_second": 33.986, |
|
"eval_steps_per_second": 8.499, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01577270590693059, |
|
"grad_norm": 0.5151348114013672, |
|
"learning_rate": 2.597e-05, |
|
"loss": 0.7635, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.015877160912936752, |
|
"grad_norm": 0.5203879475593567, |
|
"learning_rate": 2.544e-05, |
|
"loss": 0.7112, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.015981615918942916, |
|
"grad_norm": 0.5102455019950867, |
|
"learning_rate": 2.4909999999999997e-05, |
|
"loss": 0.8134, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.016086070924949077, |
|
"grad_norm": 0.5462666153907776, |
|
"learning_rate": 2.438e-05, |
|
"loss": 0.925, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.01619052593095524, |
|
"grad_norm": 0.5957190990447998, |
|
"learning_rate": 2.3849999999999997e-05, |
|
"loss": 0.9079, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.016294980936961405, |
|
"grad_norm": 0.6015512347221375, |
|
"learning_rate": 2.3319999999999997e-05, |
|
"loss": 0.9956, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.016399435942967566, |
|
"grad_norm": 0.5997916460037231, |
|
"learning_rate": 2.279e-05, |
|
"loss": 0.9413, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.01650389094897373, |
|
"grad_norm": 0.5999729037284851, |
|
"learning_rate": 2.2259999999999997e-05, |
|
"loss": 0.8335, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.016608345954979894, |
|
"grad_norm": 0.6232542991638184, |
|
"learning_rate": 2.173e-05, |
|
"loss": 0.9134, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.016712800960986054, |
|
"grad_norm": 0.607313334941864, |
|
"learning_rate": 2.1199999999999997e-05, |
|
"loss": 0.874, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01681725596699222, |
|
"grad_norm": 0.6412212252616882, |
|
"learning_rate": 2.067e-05, |
|
"loss": 0.9721, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.01692171097299838, |
|
"grad_norm": 0.650705099105835, |
|
"learning_rate": 2.014e-05, |
|
"loss": 0.9523, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.017026165979004543, |
|
"grad_norm": 0.6729899644851685, |
|
"learning_rate": 1.9609999999999997e-05, |
|
"loss": 0.9684, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.017130620985010708, |
|
"grad_norm": 0.6449539065361023, |
|
"learning_rate": 1.908e-05, |
|
"loss": 0.808, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.01723507599101687, |
|
"grad_norm": 0.6991842985153198, |
|
"learning_rate": 1.8549999999999997e-05, |
|
"loss": 0.9929, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.017339530997023032, |
|
"grad_norm": 0.7484295964241028, |
|
"learning_rate": 1.802e-05, |
|
"loss": 0.9746, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.017443986003029197, |
|
"grad_norm": 0.7161227464675903, |
|
"learning_rate": 1.749e-05, |
|
"loss": 0.9454, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.017548441009035357, |
|
"grad_norm": 0.7815462946891785, |
|
"learning_rate": 1.696e-05, |
|
"loss": 1.0301, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.01765289601504152, |
|
"grad_norm": 0.8647356033325195, |
|
"learning_rate": 1.643e-05, |
|
"loss": 1.0621, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.017757351021047682, |
|
"grad_norm": 0.9504815340042114, |
|
"learning_rate": 1.5899999999999997e-05, |
|
"loss": 1.0426, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.017861806027053846, |
|
"grad_norm": 0.8482909202575684, |
|
"learning_rate": 1.537e-05, |
|
"loss": 0.9898, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.01796626103306001, |
|
"grad_norm": 0.8360997438430786, |
|
"learning_rate": 1.4839999999999999e-05, |
|
"loss": 0.9783, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.01807071603906617, |
|
"grad_norm": 0.9085504412651062, |
|
"learning_rate": 1.4309999999999999e-05, |
|
"loss": 0.9865, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.018175171045072335, |
|
"grad_norm": 0.8988630771636963, |
|
"learning_rate": 1.378e-05, |
|
"loss": 1.0591, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0182796260510785, |
|
"grad_norm": 0.8486796617507935, |
|
"learning_rate": 1.3249999999999999e-05, |
|
"loss": 0.9894, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01838408105708466, |
|
"grad_norm": 0.8764381408691406, |
|
"learning_rate": 1.272e-05, |
|
"loss": 0.9253, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.018488536063090824, |
|
"grad_norm": 0.9448692798614502, |
|
"learning_rate": 1.219e-05, |
|
"loss": 1.0425, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.018592991069096985, |
|
"grad_norm": 0.9180240631103516, |
|
"learning_rate": 1.1659999999999998e-05, |
|
"loss": 0.9328, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.01869744607510315, |
|
"grad_norm": 0.9340706467628479, |
|
"learning_rate": 1.1129999999999998e-05, |
|
"loss": 1.0015, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.018801901081109313, |
|
"grad_norm": 0.8770861029624939, |
|
"learning_rate": 1.0599999999999998e-05, |
|
"loss": 0.9812, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.018906356087115474, |
|
"grad_norm": 0.9975367188453674, |
|
"learning_rate": 1.007e-05, |
|
"loss": 1.0984, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.019010811093121638, |
|
"grad_norm": 0.9696022868156433, |
|
"learning_rate": 9.54e-06, |
|
"loss": 0.9888, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.019115266099127802, |
|
"grad_norm": 0.8801543116569519, |
|
"learning_rate": 9.01e-06, |
|
"loss": 0.8672, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.019219721105133963, |
|
"grad_norm": 0.9794437885284424, |
|
"learning_rate": 8.48e-06, |
|
"loss": 1.0545, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.019324176111140127, |
|
"grad_norm": 0.9684680700302124, |
|
"learning_rate": 7.949999999999998e-06, |
|
"loss": 0.9275, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.01942863111714629, |
|
"grad_norm": 0.956508219242096, |
|
"learning_rate": 7.419999999999999e-06, |
|
"loss": 0.9679, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.01953308612315245, |
|
"grad_norm": 1.0241084098815918, |
|
"learning_rate": 6.89e-06, |
|
"loss": 1.0333, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.019637541129158616, |
|
"grad_norm": 1.13876211643219, |
|
"learning_rate": 6.36e-06, |
|
"loss": 1.189, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.019741996135164776, |
|
"grad_norm": 1.0502783060073853, |
|
"learning_rate": 5.829999999999999e-06, |
|
"loss": 1.0062, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.01984645114117094, |
|
"grad_norm": 1.0701584815979004, |
|
"learning_rate": 5.299999999999999e-06, |
|
"loss": 0.9934, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.019950906147177105, |
|
"grad_norm": 1.1496695280075073, |
|
"learning_rate": 4.77e-06, |
|
"loss": 1.0933, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.020055361153183265, |
|
"grad_norm": 1.1266313791275024, |
|
"learning_rate": 4.24e-06, |
|
"loss": 1.0909, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.02015981615918943, |
|
"grad_norm": 1.1178048849105835, |
|
"learning_rate": 3.7099999999999996e-06, |
|
"loss": 0.9263, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.020264271165195594, |
|
"grad_norm": 1.1649036407470703, |
|
"learning_rate": 3.18e-06, |
|
"loss": 1.0362, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.020368726171201754, |
|
"grad_norm": 1.1672587394714355, |
|
"learning_rate": 2.6499999999999996e-06, |
|
"loss": 1.0083, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.02047318117720792, |
|
"grad_norm": 1.1676815748214722, |
|
"learning_rate": 2.12e-06, |
|
"loss": 0.9924, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.02057763618321408, |
|
"grad_norm": 1.3110767602920532, |
|
"learning_rate": 1.59e-06, |
|
"loss": 0.9344, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.020682091189220243, |
|
"grad_norm": 1.4102957248687744, |
|
"learning_rate": 1.06e-06, |
|
"loss": 1.1094, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.020786546195226407, |
|
"grad_norm": 1.5473552942276, |
|
"learning_rate": 5.3e-07, |
|
"loss": 1.0472, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.020891001201232568, |
|
"grad_norm": 2.232775926589966, |
|
"learning_rate": 0.0, |
|
"loss": 1.3862, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.020891001201232568, |
|
"eval_loss": 0.9818174242973328, |
|
"eval_runtime": 118.3422, |
|
"eval_samples_per_second": 34.062, |
|
"eval_steps_per_second": 8.518, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.06657392623616e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|