{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999679969277051, "eval_steps": 500, "global_step": 15623, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 33.57031222772094, "learning_rate": 2.1321961620469085e-08, "loss": 1.805, "step": 1 }, { "epoch": 0.0, "grad_norm": 30.62354289175189, "learning_rate": 4.264392324093817e-08, "loss": 1.9924, "step": 2 }, { "epoch": 0.0, "grad_norm": 33.432584220852284, "learning_rate": 6.396588486140725e-08, "loss": 1.8947, "step": 3 }, { "epoch": 0.0, "grad_norm": 30.99339404605826, "learning_rate": 8.528784648187634e-08, "loss": 1.8676, "step": 4 }, { "epoch": 0.0, "grad_norm": 37.29054836754739, "learning_rate": 1.0660980810234542e-07, "loss": 1.7941, "step": 5 }, { "epoch": 0.0, "grad_norm": 31.41660533747613, "learning_rate": 1.279317697228145e-07, "loss": 1.9214, "step": 6 }, { "epoch": 0.0, "grad_norm": 42.071869869927916, "learning_rate": 1.4925373134328358e-07, "loss": 1.8996, "step": 7 }, { "epoch": 0.0, "grad_norm": 44.76683983143669, "learning_rate": 1.7057569296375268e-07, "loss": 1.9124, "step": 8 }, { "epoch": 0.0, "grad_norm": 23.935831473887372, "learning_rate": 1.918976545842218e-07, "loss": 1.9045, "step": 9 }, { "epoch": 0.0, "grad_norm": 32.35377444478138, "learning_rate": 2.1321961620469084e-07, "loss": 2.0825, "step": 10 }, { "epoch": 0.0, "grad_norm": 27.549788609712735, "learning_rate": 2.3454157782515995e-07, "loss": 1.8677, "step": 11 }, { "epoch": 0.0, "grad_norm": 41.494449894320645, "learning_rate": 2.55863539445629e-07, "loss": 1.9774, "step": 12 }, { "epoch": 0.0, "grad_norm": 14.49889800759277, "learning_rate": 2.771855010660981e-07, "loss": 0.9054, "step": 13 }, { "epoch": 0.0, "grad_norm": 46.96340929371742, "learning_rate": 2.9850746268656716e-07, "loss": 1.8873, "step": 14 }, { "epoch": 0.0, "grad_norm": 83.08097496644541, "learning_rate": 3.1982942430703626e-07, "loss": 2.0233, "step": 15 }, { "epoch": 0.0, "grad_norm": 46.674965169654804, "learning_rate": 3.4115138592750537e-07, "loss": 1.7128, "step": 16 }, { "epoch": 0.0, "grad_norm": 7.963177286173548, "learning_rate": 3.624733475479744e-07, "loss": 0.8177, "step": 17 }, { "epoch": 0.0, "grad_norm": 31.647676380515726, "learning_rate": 3.837953091684436e-07, "loss": 1.9083, "step": 18 }, { "epoch": 0.0, "grad_norm": 86.68430088479528, "learning_rate": 4.0511727078891263e-07, "loss": 1.9992, "step": 19 }, { "epoch": 0.0, "grad_norm": 119.36920680946395, "learning_rate": 4.264392324093817e-07, "loss": 1.7906, "step": 20 }, { "epoch": 0.0, "grad_norm": 73.89450340052323, "learning_rate": 4.4776119402985074e-07, "loss": 1.8989, "step": 21 }, { "epoch": 0.0, "grad_norm": 84.40302543517517, "learning_rate": 4.690831556503199e-07, "loss": 1.7613, "step": 22 }, { "epoch": 0.0, "grad_norm": 28.71909775783994, "learning_rate": 4.904051172707889e-07, "loss": 1.8868, "step": 23 }, { "epoch": 0.0, "grad_norm": 143.36572249753948, "learning_rate": 5.11727078891258e-07, "loss": 1.8532, "step": 24 }, { "epoch": 0.0, "grad_norm": 42.15679276133985, "learning_rate": 5.33049040511727e-07, "loss": 1.7422, "step": 25 }, { "epoch": 0.0, "grad_norm": 24.393638988458775, "learning_rate": 5.543710021321962e-07, "loss": 1.7664, "step": 26 }, { "epoch": 0.0, "grad_norm": 27.137910986888166, "learning_rate": 5.756929637526653e-07, "loss": 1.768, "step": 27 }, { "epoch": 0.0, "grad_norm": 62.268227202733684, "learning_rate": 5.970149253731343e-07, "loss": 1.8131, "step": 28 }, { "epoch": 0.0, "grad_norm": 23.99007267682505, "learning_rate": 6.183368869936035e-07, "loss": 1.7067, "step": 29 }, { "epoch": 0.0, "grad_norm": 32.20685496942396, "learning_rate": 6.396588486140725e-07, "loss": 1.7394, "step": 30 }, { "epoch": 0.0, "grad_norm": 10.672517411970658, "learning_rate": 6.609808102345417e-07, "loss": 0.8006, "step": 31 }, { "epoch": 0.0, "grad_norm": 42.09682234256541, "learning_rate": 6.823027718550107e-07, "loss": 1.8982, "step": 32 }, { "epoch": 0.0, "grad_norm": 8.803050409398635, "learning_rate": 7.036247334754798e-07, "loss": 0.8037, "step": 33 }, { "epoch": 0.0, "grad_norm": 43.391088050402516, "learning_rate": 7.249466950959488e-07, "loss": 1.7651, "step": 34 }, { "epoch": 0.0, "grad_norm": 20.480046046279416, "learning_rate": 7.462686567164179e-07, "loss": 1.6076, "step": 35 }, { "epoch": 0.0, "grad_norm": 11.808209480331833, "learning_rate": 7.675906183368872e-07, "loss": 0.9305, "step": 36 }, { "epoch": 0.0, "grad_norm": 35.0491734835604, "learning_rate": 7.889125799573562e-07, "loss": 1.6179, "step": 37 }, { "epoch": 0.0, "grad_norm": 8.28829432744623, "learning_rate": 8.102345415778253e-07, "loss": 0.8084, "step": 38 }, { "epoch": 0.0, "grad_norm": 163.06635539364507, "learning_rate": 8.315565031982943e-07, "loss": 1.6661, "step": 39 }, { "epoch": 0.0, "grad_norm": 66.06087598981034, "learning_rate": 8.528784648187634e-07, "loss": 1.7462, "step": 40 }, { "epoch": 0.0, "grad_norm": 45.33299600755559, "learning_rate": 8.742004264392324e-07, "loss": 1.5141, "step": 41 }, { "epoch": 0.0, "grad_norm": 52.071223971678386, "learning_rate": 8.955223880597015e-07, "loss": 1.5245, "step": 42 }, { "epoch": 0.0, "grad_norm": 46.66415103131062, "learning_rate": 9.168443496801707e-07, "loss": 1.4713, "step": 43 }, { "epoch": 0.0, "grad_norm": 55.34239506337831, "learning_rate": 9.381663113006398e-07, "loss": 1.8445, "step": 44 }, { "epoch": 0.0, "grad_norm": 33.528470479777596, "learning_rate": 9.594882729211088e-07, "loss": 1.5886, "step": 45 }, { "epoch": 0.0, "grad_norm": 78.29648611520904, "learning_rate": 9.808102345415779e-07, "loss": 1.4611, "step": 46 }, { "epoch": 0.0, "grad_norm": 39.797317433805844, "learning_rate": 1.002132196162047e-06, "loss": 1.4394, "step": 47 }, { "epoch": 0.0, "grad_norm": 9.601664158326901, "learning_rate": 1.023454157782516e-06, "loss": 1.3576, "step": 48 }, { "epoch": 0.0, "grad_norm": 3.2342754480489746, "learning_rate": 1.044776119402985e-06, "loss": 0.7919, "step": 49 }, { "epoch": 0.0, "grad_norm": 18.826647113769447, "learning_rate": 1.066098081023454e-06, "loss": 1.3449, "step": 50 }, { "epoch": 0.0, "grad_norm": 60.47560808213527, "learning_rate": 1.0874200426439234e-06, "loss": 1.3123, "step": 51 }, { "epoch": 0.0, "grad_norm": 125.1126598672909, "learning_rate": 1.1087420042643924e-06, "loss": 1.3735, "step": 52 }, { "epoch": 0.0, "grad_norm": 9.621433967944519, "learning_rate": 1.1300639658848615e-06, "loss": 1.3189, "step": 53 }, { "epoch": 0.0, "grad_norm": 40.742214771723276, "learning_rate": 1.1513859275053305e-06, "loss": 1.3316, "step": 54 }, { "epoch": 0.0, "grad_norm": 10.093928187425508, "learning_rate": 1.1727078891257996e-06, "loss": 1.3702, "step": 55 }, { "epoch": 0.0, "grad_norm": 22.162798757714327, "learning_rate": 1.1940298507462686e-06, "loss": 1.2687, "step": 56 }, { "epoch": 0.0, "grad_norm": 33.9712016624994, "learning_rate": 1.2153518123667379e-06, "loss": 1.3363, "step": 57 }, { "epoch": 0.0, "grad_norm": 24.722424870411125, "learning_rate": 1.236673773987207e-06, "loss": 1.3114, "step": 58 }, { "epoch": 0.0, "grad_norm": 10.45427195386564, "learning_rate": 1.257995735607676e-06, "loss": 1.2263, "step": 59 }, { "epoch": 0.0, "grad_norm": 8.059132263195709, "learning_rate": 1.279317697228145e-06, "loss": 1.3256, "step": 60 }, { "epoch": 0.0, "grad_norm": 5.422399898128523, "learning_rate": 1.300639658848614e-06, "loss": 1.2673, "step": 61 }, { "epoch": 0.0, "grad_norm": 10.751032701572303, "learning_rate": 1.3219616204690834e-06, "loss": 1.3478, "step": 62 }, { "epoch": 0.0, "grad_norm": 4.154123886718362, "learning_rate": 1.3432835820895524e-06, "loss": 1.2127, "step": 63 }, { "epoch": 0.0, "grad_norm": 8.556736855986422, "learning_rate": 1.3646055437100215e-06, "loss": 1.2898, "step": 64 }, { "epoch": 0.0, "grad_norm": 9.7531663871978, "learning_rate": 1.3859275053304905e-06, "loss": 1.2966, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.9329963082164845, "learning_rate": 1.4072494669509596e-06, "loss": 0.7164, "step": 66 }, { "epoch": 0.0, "grad_norm": 46.20894365527159, "learning_rate": 1.4285714285714286e-06, "loss": 1.2471, "step": 67 }, { "epoch": 0.0, "grad_norm": 18.991803908341307, "learning_rate": 1.4498933901918977e-06, "loss": 1.2932, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.4279623130952688, "learning_rate": 1.4712153518123667e-06, "loss": 0.7121, "step": 69 }, { "epoch": 0.0, "grad_norm": 6.2302355941240535, "learning_rate": 1.4925373134328358e-06, "loss": 1.2024, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.6716512456033181, "learning_rate": 1.5138592750533053e-06, "loss": 0.7417, "step": 71 }, { "epoch": 0.0, "grad_norm": 9.812109179345212, "learning_rate": 1.5351812366737743e-06, "loss": 1.1817, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.7156909262009918, "learning_rate": 1.5565031982942434e-06, "loss": 0.6646, "step": 73 }, { "epoch": 0.0, "grad_norm": 7.772430061010195, "learning_rate": 1.5778251599147124e-06, "loss": 1.1508, "step": 74 }, { "epoch": 0.0, "grad_norm": 3.6086413556040364, "learning_rate": 1.5991471215351815e-06, "loss": 1.1354, "step": 75 }, { "epoch": 0.0, "grad_norm": 12.118014204531017, "learning_rate": 1.6204690831556505e-06, "loss": 1.2556, "step": 76 }, { "epoch": 0.0, "grad_norm": 42.882971372297966, "learning_rate": 1.6417910447761196e-06, "loss": 1.1246, "step": 77 }, { "epoch": 0.0, "grad_norm": 2.2494907878520487, "learning_rate": 1.6631130063965886e-06, "loss": 0.7026, "step": 78 }, { "epoch": 0.01, "grad_norm": 11.949445934349265, "learning_rate": 1.6844349680170577e-06, "loss": 1.129, "step": 79 }, { "epoch": 0.01, "grad_norm": 5.347183427392955, "learning_rate": 1.7057569296375267e-06, "loss": 1.2547, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.0163249546035202, "learning_rate": 1.7270788912579958e-06, "loss": 0.7077, "step": 81 }, { "epoch": 0.01, "grad_norm": 3.5551470944946737, "learning_rate": 1.7484008528784648e-06, "loss": 1.0974, "step": 82 }, { "epoch": 0.01, "grad_norm": 39.57091950356692, "learning_rate": 1.7697228144989339e-06, "loss": 1.2124, "step": 83 }, { "epoch": 0.01, "grad_norm": 6.766670411502214, "learning_rate": 1.791044776119403e-06, "loss": 1.0325, "step": 84 }, { "epoch": 0.01, "grad_norm": 17.91855597672552, "learning_rate": 1.812366737739872e-06, "loss": 1.0065, "step": 85 }, { "epoch": 0.01, "grad_norm": 17.545547852298842, "learning_rate": 1.8336886993603415e-06, "loss": 1.0728, "step": 86 }, { "epoch": 0.01, "grad_norm": 5.966201233957537, "learning_rate": 1.8550106609808105e-06, "loss": 1.0767, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.3809529479763731, "learning_rate": 1.8763326226012796e-06, "loss": 0.6249, "step": 88 }, { "epoch": 0.01, "grad_norm": 16.003249533053722, "learning_rate": 1.8976545842217486e-06, "loss": 1.2136, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.5093130671879955, "learning_rate": 1.9189765458422177e-06, "loss": 0.6062, "step": 90 }, { "epoch": 0.01, "grad_norm": 4.261089724383951, "learning_rate": 1.9402985074626867e-06, "loss": 1.0442, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.4539368659101555, "learning_rate": 1.9616204690831558e-06, "loss": 0.6978, "step": 92 }, { "epoch": 0.01, "grad_norm": 6.454266380907195, "learning_rate": 1.982942430703625e-06, "loss": 1.196, "step": 93 }, { "epoch": 0.01, "grad_norm": 4.077890024009569, "learning_rate": 2.004264392324094e-06, "loss": 1.2232, "step": 94 }, { "epoch": 0.01, "grad_norm": 10.797908617519246, "learning_rate": 2.025586353944563e-06, "loss": 1.0373, "step": 95 }, { "epoch": 0.01, "grad_norm": 11.404047282900343, "learning_rate": 2.046908315565032e-06, "loss": 1.1436, "step": 96 }, { "epoch": 0.01, "grad_norm": 17.768037255816708, "learning_rate": 2.068230277185501e-06, "loss": 1.0612, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.3162547671351341, "learning_rate": 2.08955223880597e-06, "loss": 0.6875, "step": 98 }, { "epoch": 0.01, "grad_norm": 6.130279920926572, "learning_rate": 2.110874200426439e-06, "loss": 1.1109, "step": 99 }, { "epoch": 0.01, "grad_norm": 10.465047821811352, "learning_rate": 2.132196162046908e-06, "loss": 1.0296, "step": 100 }, { "epoch": 0.01, "grad_norm": 9.581903862451773, "learning_rate": 2.1535181236673773e-06, "loss": 1.1832, "step": 101 }, { "epoch": 0.01, "grad_norm": 8.985973029398703, "learning_rate": 2.1748400852878467e-06, "loss": 1.0649, "step": 102 }, { "epoch": 0.01, "grad_norm": 25.14005166573558, "learning_rate": 2.1961620469083158e-06, "loss": 1.0155, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.3629092297204144, "learning_rate": 2.217484008528785e-06, "loss": 0.6642, "step": 104 }, { "epoch": 0.01, "grad_norm": 4.424432084683403, "learning_rate": 2.238805970149254e-06, "loss": 1.2572, "step": 105 }, { "epoch": 0.01, "grad_norm": 10.557041002051665, "learning_rate": 2.260127931769723e-06, "loss": 1.073, "step": 106 }, { "epoch": 0.01, "grad_norm": 4.384147868370333, "learning_rate": 2.281449893390192e-06, "loss": 1.1058, "step": 107 }, { "epoch": 0.01, "grad_norm": 13.377031426888262, "learning_rate": 2.302771855010661e-06, "loss": 1.0984, "step": 108 }, { "epoch": 0.01, "grad_norm": 5.618983510081214, "learning_rate": 2.32409381663113e-06, "loss": 1.0837, "step": 109 }, { "epoch": 0.01, "grad_norm": 13.89030865466635, "learning_rate": 2.345415778251599e-06, "loss": 1.0802, "step": 110 }, { "epoch": 0.01, "grad_norm": 6.391224273580091, "learning_rate": 2.366737739872068e-06, "loss": 1.209, "step": 111 }, { "epoch": 0.01, "grad_norm": 4.655894485709834, "learning_rate": 2.3880597014925373e-06, "loss": 1.1447, "step": 112 }, { "epoch": 0.01, "grad_norm": 6.864744955259223, "learning_rate": 2.4093816631130067e-06, "loss": 1.1942, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.9287660356107446, "learning_rate": 2.4307036247334758e-06, "loss": 1.0625, "step": 114 }, { "epoch": 0.01, "grad_norm": 11.094528571936557, "learning_rate": 2.452025586353945e-06, "loss": 1.0663, "step": 115 }, { "epoch": 0.01, "grad_norm": 7.853554285933759, "learning_rate": 2.473347547974414e-06, "loss": 1.192, "step": 116 }, { "epoch": 0.01, "grad_norm": 12.62208487502526, "learning_rate": 2.494669509594883e-06, "loss": 1.0612, "step": 117 }, { "epoch": 0.01, "grad_norm": 9.09072829933597, "learning_rate": 2.515991471215352e-06, "loss": 0.9242, "step": 118 }, { "epoch": 0.01, "grad_norm": 5.323365336580177, "learning_rate": 2.537313432835821e-06, "loss": 1.1497, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.5756267591591306, "learning_rate": 2.55863539445629e-06, "loss": 0.7101, "step": 120 }, { "epoch": 0.01, "grad_norm": 16.712958755252647, "learning_rate": 2.579957356076759e-06, "loss": 1.1263, "step": 121 }, { "epoch": 0.01, "grad_norm": 5.268349344873996, "learning_rate": 2.601279317697228e-06, "loss": 1.073, "step": 122 }, { "epoch": 0.01, "grad_norm": 6.72413166241232, "learning_rate": 2.6226012793176977e-06, "loss": 1.0575, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.353130649606368, "learning_rate": 2.6439232409381667e-06, "loss": 0.7054, "step": 124 }, { "epoch": 0.01, "grad_norm": 8.74705666128302, "learning_rate": 2.6652452025586358e-06, "loss": 0.8975, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.5316058054307893, "learning_rate": 2.686567164179105e-06, "loss": 0.556, "step": 126 }, { "epoch": 0.01, "grad_norm": 4.087785137909016, "learning_rate": 2.707889125799574e-06, "loss": 0.9884, "step": 127 }, { "epoch": 0.01, "grad_norm": 6.69504526659009, "learning_rate": 2.729211087420043e-06, "loss": 1.0525, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.6024436082101035, "learning_rate": 2.750533049040512e-06, "loss": 0.7367, "step": 129 }, { "epoch": 0.01, "grad_norm": 5.825905888041982, "learning_rate": 2.771855010660981e-06, "loss": 1.098, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.3274942734846504, "learning_rate": 2.79317697228145e-06, "loss": 0.7872, "step": 131 }, { "epoch": 0.01, "grad_norm": 3.1267293945051073, "learning_rate": 2.814498933901919e-06, "loss": 1.069, "step": 132 }, { "epoch": 0.01, "grad_norm": 5.682116572526708, "learning_rate": 2.835820895522388e-06, "loss": 1.1293, "step": 133 }, { "epoch": 0.01, "grad_norm": 4.421027570333572, "learning_rate": 2.8571428571428573e-06, "loss": 1.0781, "step": 134 }, { "epoch": 0.01, "grad_norm": 7.551260588493375, "learning_rate": 2.8784648187633263e-06, "loss": 1.0753, "step": 135 }, { "epoch": 0.01, "grad_norm": 10.443132222542385, "learning_rate": 2.8997867803837954e-06, "loss": 1.1326, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.3151432129400715, "learning_rate": 2.9211087420042644e-06, "loss": 0.7269, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.809236769655527, "learning_rate": 2.9424307036247335e-06, "loss": 1.0343, "step": 138 }, { "epoch": 0.01, "grad_norm": 4.9070962154969076, "learning_rate": 2.9637526652452025e-06, "loss": 1.0549, "step": 139 }, { "epoch": 0.01, "grad_norm": 3.606841554806748, "learning_rate": 2.9850746268656716e-06, "loss": 0.8862, "step": 140 }, { "epoch": 0.01, "grad_norm": 5.496369112553437, "learning_rate": 3.006396588486141e-06, "loss": 0.9157, "step": 141 }, { "epoch": 0.01, "grad_norm": 4.964014989308158, "learning_rate": 3.0277185501066105e-06, "loss": 1.0798, "step": 142 }, { "epoch": 0.01, "grad_norm": 2.5003179293239035, "learning_rate": 3.0490405117270796e-06, "loss": 1.1214, "step": 143 }, { "epoch": 0.01, "grad_norm": 4.38131305193012, "learning_rate": 3.0703624733475486e-06, "loss": 1.1578, "step": 144 }, { "epoch": 0.01, "grad_norm": 4.468381636693351, "learning_rate": 3.0916844349680177e-06, "loss": 1.1821, "step": 145 }, { "epoch": 0.01, "grad_norm": 12.785460802874274, "learning_rate": 3.1130063965884867e-06, "loss": 1.1118, "step": 146 }, { "epoch": 0.01, "grad_norm": 4.526362300973852, "learning_rate": 3.1343283582089558e-06, "loss": 1.0662, "step": 147 }, { "epoch": 0.01, "grad_norm": 4.2704782302414115, "learning_rate": 3.155650319829425e-06, "loss": 1.0003, "step": 148 }, { "epoch": 0.01, "grad_norm": 13.86881918059684, "learning_rate": 3.176972281449894e-06, "loss": 1.0173, "step": 149 }, { "epoch": 0.01, "grad_norm": 3.671765651012676, "learning_rate": 3.198294243070363e-06, "loss": 1.038, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.1853633750680788, "learning_rate": 3.219616204690832e-06, "loss": 0.7898, "step": 151 }, { "epoch": 0.01, "grad_norm": 6.044183616026581, "learning_rate": 3.240938166311301e-06, "loss": 1.0183, "step": 152 }, { "epoch": 0.01, "grad_norm": 14.60303798162056, "learning_rate": 3.26226012793177e-06, "loss": 0.9329, "step": 153 }, { "epoch": 0.01, "grad_norm": 19.177958266456063, "learning_rate": 3.283582089552239e-06, "loss": 0.9704, "step": 154 }, { "epoch": 0.01, "grad_norm": 4.610572657548652, "learning_rate": 3.304904051172708e-06, "loss": 1.0827, "step": 155 }, { "epoch": 0.01, "grad_norm": 4.428041644732342, "learning_rate": 3.3262260127931773e-06, "loss": 0.9321, "step": 156 }, { "epoch": 0.01, "grad_norm": 3.1886091798505958, "learning_rate": 3.3475479744136463e-06, "loss": 1.0574, "step": 157 }, { "epoch": 0.01, "grad_norm": 13.024769860824613, "learning_rate": 3.3688699360341154e-06, "loss": 1.0498, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.891716856864044, "learning_rate": 3.3901918976545844e-06, "loss": 1.0512, "step": 159 }, { "epoch": 0.01, "grad_norm": 5.573492347457078, "learning_rate": 3.4115138592750535e-06, "loss": 1.0314, "step": 160 }, { "epoch": 0.01, "grad_norm": 7.398282814635445, "learning_rate": 3.4328358208955225e-06, "loss": 0.9653, "step": 161 }, { "epoch": 0.01, "grad_norm": 9.526078151517195, "learning_rate": 3.4541577825159916e-06, "loss": 1.2824, "step": 162 }, { "epoch": 0.01, "grad_norm": 15.286398380524673, "learning_rate": 3.4754797441364606e-06, "loss": 0.9649, "step": 163 }, { "epoch": 0.01, "grad_norm": 5.331496717499158, "learning_rate": 3.4968017057569297e-06, "loss": 1.0209, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.6700929279695362, "learning_rate": 3.5181236673773987e-06, "loss": 0.8304, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.5570207154183802, "learning_rate": 3.5394456289978678e-06, "loss": 0.6876, "step": 166 }, { "epoch": 0.01, "grad_norm": 3.100968204611906, "learning_rate": 3.560767590618337e-06, "loss": 1.1019, "step": 167 }, { "epoch": 0.01, "grad_norm": 4.494068278156651, "learning_rate": 3.582089552238806e-06, "loss": 1.0077, "step": 168 }, { "epoch": 0.01, "grad_norm": 4.308380678391349, "learning_rate": 3.603411513859275e-06, "loss": 1.033, "step": 169 }, { "epoch": 0.01, "grad_norm": 2.2976859159705816, "learning_rate": 3.624733475479744e-06, "loss": 1.1956, "step": 170 }, { "epoch": 0.01, "grad_norm": 3.497898354617572, "learning_rate": 3.6460554371002135e-06, "loss": 1.1742, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.1528034011589465, "learning_rate": 3.667377398720683e-06, "loss": 0.5638, "step": 172 }, { "epoch": 0.01, "grad_norm": 10.851596539486176, "learning_rate": 3.688699360341152e-06, "loss": 1.0012, "step": 173 }, { "epoch": 0.01, "grad_norm": 5.668807978380108, "learning_rate": 3.710021321961621e-06, "loss": 0.9531, "step": 174 }, { "epoch": 0.01, "grad_norm": 3.873608348104675, "learning_rate": 3.73134328358209e-06, "loss": 1.0952, "step": 175 }, { "epoch": 0.01, "grad_norm": 3.079499731544709, "learning_rate": 3.752665245202559e-06, "loss": 1.0113, "step": 176 }, { "epoch": 0.01, "grad_norm": 4.896492684997542, "learning_rate": 3.773987206823028e-06, "loss": 1.0694, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.8743203171073246, "learning_rate": 3.7953091684434973e-06, "loss": 0.6666, "step": 178 }, { "epoch": 0.01, "grad_norm": 3.2037348522999594, "learning_rate": 3.816631130063966e-06, "loss": 1.1356, "step": 179 }, { "epoch": 0.01, "grad_norm": 3.3043815736828286, "learning_rate": 3.837953091684435e-06, "loss": 1.028, "step": 180 }, { "epoch": 0.01, "grad_norm": 2.588440270129462, "learning_rate": 3.859275053304904e-06, "loss": 0.9376, "step": 181 }, { "epoch": 0.01, "grad_norm": 2.2331321618791167, "learning_rate": 3.8805970149253735e-06, "loss": 1.0606, "step": 182 }, { "epoch": 0.01, "grad_norm": 20.041692729782078, "learning_rate": 3.9019189765458425e-06, "loss": 1.1942, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.9712706204950603, "learning_rate": 3.9232409381663116e-06, "loss": 1.0485, "step": 184 }, { "epoch": 0.01, "grad_norm": 15.338711672441951, "learning_rate": 3.944562899786781e-06, "loss": 0.9327, "step": 185 }, { "epoch": 0.01, "grad_norm": 11.991468674592422, "learning_rate": 3.96588486140725e-06, "loss": 1.0008, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.7253774679975131, "learning_rate": 3.987206823027719e-06, "loss": 0.6322, "step": 187 }, { "epoch": 0.01, "grad_norm": 2.622911443489667, "learning_rate": 4.008528784648188e-06, "loss": 1.0566, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.4229597360336614, "learning_rate": 4.029850746268657e-06, "loss": 0.6933, "step": 189 }, { "epoch": 0.01, "grad_norm": 2.8590602169144024, "learning_rate": 4.051172707889126e-06, "loss": 1.0186, "step": 190 }, { "epoch": 0.01, "grad_norm": 5.158828273798686, "learning_rate": 4.072494669509595e-06, "loss": 0.9573, "step": 191 }, { "epoch": 0.01, "grad_norm": 2.375641129330687, "learning_rate": 4.093816631130064e-06, "loss": 1.0638, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.3671500288615275, "learning_rate": 4.115138592750533e-06, "loss": 0.702, "step": 193 }, { "epoch": 0.01, "grad_norm": 5.81915095668887, "learning_rate": 4.136460554371002e-06, "loss": 0.9561, "step": 194 }, { "epoch": 0.01, "grad_norm": 4.134158944724188, "learning_rate": 4.157782515991471e-06, "loss": 0.9811, "step": 195 }, { "epoch": 0.01, "grad_norm": 12.330828977658001, "learning_rate": 4.17910447761194e-06, "loss": 1.1751, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.297186166162684, "learning_rate": 4.200426439232409e-06, "loss": 0.5828, "step": 197 }, { "epoch": 0.01, "grad_norm": 8.024777913124096, "learning_rate": 4.221748400852878e-06, "loss": 0.9054, "step": 198 }, { "epoch": 0.01, "grad_norm": 4.9290790646691915, "learning_rate": 4.243070362473347e-06, "loss": 1.0784, "step": 199 }, { "epoch": 0.01, "grad_norm": 4.003161010129966, "learning_rate": 4.264392324093816e-06, "loss": 1.0304, "step": 200 }, { "epoch": 0.01, "grad_norm": 4.353143634805772, "learning_rate": 4.2857142857142855e-06, "loss": 1.0711, "step": 201 }, { "epoch": 0.01, "grad_norm": 3.904427918221677, "learning_rate": 4.3070362473347545e-06, "loss": 1.0402, "step": 202 }, { "epoch": 0.01, "grad_norm": 4.866807614242119, "learning_rate": 4.3283582089552236e-06, "loss": 1.0411, "step": 203 }, { "epoch": 0.01, "grad_norm": 8.958628225109223, "learning_rate": 4.3496801705756935e-06, "loss": 1.0246, "step": 204 }, { "epoch": 0.01, "grad_norm": 7.631111107821585, "learning_rate": 4.3710021321961625e-06, "loss": 1.0851, "step": 205 }, { "epoch": 0.01, "grad_norm": 2.8962141890107684, "learning_rate": 4.3923240938166316e-06, "loss": 0.9332, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.4957392750326042, "learning_rate": 4.413646055437101e-06, "loss": 0.6802, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.2125092007194385, "learning_rate": 4.43496801705757e-06, "loss": 0.618, "step": 208 }, { "epoch": 0.01, "grad_norm": 2.854163414786598, "learning_rate": 4.456289978678039e-06, "loss": 1.0574, "step": 209 }, { "epoch": 0.01, "grad_norm": 3.9574314697452584, "learning_rate": 4.477611940298508e-06, "loss": 1.1872, "step": 210 }, { "epoch": 0.01, "grad_norm": 2.6202329098046198, "learning_rate": 4.498933901918977e-06, "loss": 1.0943, "step": 211 }, { "epoch": 0.01, "grad_norm": 3.2972524350164134, "learning_rate": 4.520255863539446e-06, "loss": 1.0407, "step": 212 }, { "epoch": 0.01, "grad_norm": 3.2183143771760148, "learning_rate": 4.541577825159915e-06, "loss": 1.038, "step": 213 }, { "epoch": 0.01, "grad_norm": 7.737242417363471, "learning_rate": 4.562899786780384e-06, "loss": 0.8911, "step": 214 }, { "epoch": 0.01, "grad_norm": 3.6941077305335086, "learning_rate": 4.584221748400853e-06, "loss": 1.0498, "step": 215 }, { "epoch": 0.01, "grad_norm": 2.511701506529865, "learning_rate": 4.605543710021322e-06, "loss": 0.824, "step": 216 }, { "epoch": 0.01, "grad_norm": 3.603691317508072, "learning_rate": 4.626865671641791e-06, "loss": 0.9402, "step": 217 }, { "epoch": 0.01, "grad_norm": 2.4669320753308757, "learning_rate": 4.64818763326226e-06, "loss": 1.1755, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.4914502048842062, "learning_rate": 4.669509594882729e-06, "loss": 0.7058, "step": 219 }, { "epoch": 0.01, "grad_norm": 3.1152634781159425, "learning_rate": 4.690831556503198e-06, "loss": 1.0161, "step": 220 }, { "epoch": 0.01, "grad_norm": 4.777020749650226, "learning_rate": 4.712153518123667e-06, "loss": 0.9708, "step": 221 }, { "epoch": 0.01, "grad_norm": 8.81788025429857, "learning_rate": 4.733475479744136e-06, "loss": 1.0191, "step": 222 }, { "epoch": 0.01, "grad_norm": 3.1838863208814177, "learning_rate": 4.7547974413646055e-06, "loss": 0.9832, "step": 223 }, { "epoch": 0.01, "grad_norm": 15.925755776222712, "learning_rate": 4.7761194029850745e-06, "loss": 1.0214, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.2372285251144048, "learning_rate": 4.797441364605544e-06, "loss": 0.686, "step": 225 }, { "epoch": 0.01, "grad_norm": 13.792578077328917, "learning_rate": 4.8187633262260135e-06, "loss": 1.0305, "step": 226 }, { "epoch": 0.01, "grad_norm": 2.9230171105364082, "learning_rate": 4.8400852878464825e-06, "loss": 1.0557, "step": 227 }, { "epoch": 0.01, "grad_norm": 2.033034405828652, "learning_rate": 4.8614072494669516e-06, "loss": 0.8959, "step": 228 }, { "epoch": 0.01, "grad_norm": 6.282870931850638, "learning_rate": 4.882729211087421e-06, "loss": 0.9698, "step": 229 }, { "epoch": 0.01, "grad_norm": 3.04292773565994, "learning_rate": 4.90405117270789e-06, "loss": 0.9506, "step": 230 }, { "epoch": 0.01, "grad_norm": 3.0274717709947727, "learning_rate": 4.925373134328359e-06, "loss": 0.9464, "step": 231 }, { "epoch": 0.01, "grad_norm": 3.6342816241635623, "learning_rate": 4.946695095948828e-06, "loss": 0.9133, "step": 232 }, { "epoch": 0.01, "grad_norm": 2.4817923644894577, "learning_rate": 4.968017057569297e-06, "loss": 1.013, "step": 233 }, { "epoch": 0.01, "grad_norm": 4.868046441726476, "learning_rate": 4.989339019189766e-06, "loss": 1.0103, "step": 234 }, { "epoch": 0.02, "grad_norm": 3.33246085652587, "learning_rate": 5.010660980810235e-06, "loss": 0.9658, "step": 235 }, { "epoch": 0.02, "grad_norm": 2.239619524397853, "learning_rate": 5.031982942430704e-06, "loss": 0.9354, "step": 236 }, { "epoch": 0.02, "grad_norm": 2.2755972292830284, "learning_rate": 5.053304904051173e-06, "loss": 0.9672, "step": 237 }, { "epoch": 0.02, "grad_norm": 2.9482171644062594, "learning_rate": 5.074626865671642e-06, "loss": 0.9769, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.372161161884233, "learning_rate": 5.095948827292111e-06, "loss": 0.6503, "step": 239 }, { "epoch": 0.02, "grad_norm": 2.4673206266622705, "learning_rate": 5.11727078891258e-06, "loss": 0.9784, "step": 240 }, { "epoch": 0.02, "grad_norm": 2.713860807515261, "learning_rate": 5.138592750533049e-06, "loss": 0.9272, "step": 241 }, { "epoch": 0.02, "grad_norm": 2.0196553230165373, "learning_rate": 5.159914712153518e-06, "loss": 0.9468, "step": 242 }, { "epoch": 0.02, "grad_norm": 2.0254850085580207, "learning_rate": 5.181236673773987e-06, "loss": 1.0172, "step": 243 }, { "epoch": 0.02, "grad_norm": 7.241072107149399, "learning_rate": 5.202558635394456e-06, "loss": 1.0281, "step": 244 }, { "epoch": 0.02, "grad_norm": 2.682232223560338, "learning_rate": 5.2238805970149255e-06, "loss": 1.0433, "step": 245 }, { "epoch": 0.02, "grad_norm": 2.1983914505081494, "learning_rate": 5.245202558635395e-06, "loss": 0.9729, "step": 246 }, { "epoch": 0.02, "grad_norm": 3.1126249395894203, "learning_rate": 5.2665245202558636e-06, "loss": 0.9992, "step": 247 }, { "epoch": 0.02, "grad_norm": 10.114252066634727, "learning_rate": 5.2878464818763335e-06, "loss": 1.0238, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.3578036836448573, "learning_rate": 5.309168443496802e-06, "loss": 0.6669, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.9511139936418493, "learning_rate": 5.3304904051172716e-06, "loss": 1.0532, "step": 250 }, { "epoch": 0.02, "grad_norm": 2.769318290133275, "learning_rate": 5.351812366737741e-06, "loss": 1.0544, "step": 251 }, { "epoch": 0.02, "grad_norm": 2.4927313094740544, "learning_rate": 5.37313432835821e-06, "loss": 1.0286, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.899228057734884, "learning_rate": 5.394456289978679e-06, "loss": 0.9779, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.815695239403599, "learning_rate": 5.415778251599148e-06, "loss": 0.988, "step": 254 }, { "epoch": 0.02, "grad_norm": 2.3285479471603994, "learning_rate": 5.437100213219617e-06, "loss": 1.0166, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.2272759408126137, "learning_rate": 5.458422174840086e-06, "loss": 0.6912, "step": 256 }, { "epoch": 0.02, "grad_norm": 2.559779001256721, "learning_rate": 5.479744136460555e-06, "loss": 1.0179, "step": 257 }, { "epoch": 0.02, "grad_norm": 2.2898638434742966, "learning_rate": 5.501066098081024e-06, "loss": 0.9634, "step": 258 }, { "epoch": 0.02, "grad_norm": 2.156259184023171, "learning_rate": 5.522388059701493e-06, "loss": 0.9827, "step": 259 }, { "epoch": 0.02, "grad_norm": 2.032118221871673, "learning_rate": 5.543710021321962e-06, "loss": 0.803, "step": 260 }, { "epoch": 0.02, "grad_norm": 2.2277157435182255, "learning_rate": 5.565031982942431e-06, "loss": 0.9908, "step": 261 }, { "epoch": 0.02, "grad_norm": 2.6037622489529633, "learning_rate": 5.5863539445629e-06, "loss": 0.9971, "step": 262 }, { "epoch": 0.02, "grad_norm": 2.0079039380476833, "learning_rate": 5.607675906183369e-06, "loss": 0.9604, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.2861924530424933, "learning_rate": 5.628997867803838e-06, "loss": 0.6569, "step": 264 }, { "epoch": 0.02, "grad_norm": 2.147318155437169, "learning_rate": 5.650319829424308e-06, "loss": 0.9503, "step": 265 }, { "epoch": 0.02, "grad_norm": 2.065841730844835, "learning_rate": 5.671641791044776e-06, "loss": 0.9678, "step": 266 }, { "epoch": 0.02, "grad_norm": 2.235524021453829, "learning_rate": 5.692963752665246e-06, "loss": 1.1182, "step": 267 }, { "epoch": 0.02, "grad_norm": 2.2848694288014277, "learning_rate": 5.7142857142857145e-06, "loss": 1.0427, "step": 268 }, { "epoch": 0.02, "grad_norm": 2.004665332778247, "learning_rate": 5.735607675906184e-06, "loss": 1.1171, "step": 269 }, { "epoch": 0.02, "grad_norm": 2.530481001748687, "learning_rate": 5.756929637526653e-06, "loss": 1.0741, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.8897140747943535, "learning_rate": 5.7782515991471225e-06, "loss": 0.9954, "step": 271 }, { "epoch": 0.02, "grad_norm": 2.069016211348991, "learning_rate": 5.799573560767591e-06, "loss": 1.0207, "step": 272 }, { "epoch": 0.02, "grad_norm": 2.670894180786186, "learning_rate": 5.820895522388061e-06, "loss": 1.0231, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.9849214684095862, "learning_rate": 5.842217484008529e-06, "loss": 0.9042, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.7289995675665664, "learning_rate": 5.863539445628999e-06, "loss": 0.9067, "step": 275 }, { "epoch": 0.02, "grad_norm": 2.2209329576920287, "learning_rate": 5.884861407249467e-06, "loss": 0.9167, "step": 276 }, { "epoch": 0.02, "grad_norm": 2.475064334212574, "learning_rate": 5.906183368869937e-06, "loss": 0.9902, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.3960090844418447, "learning_rate": 5.927505330490405e-06, "loss": 0.6398, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.1657841567392258, "learning_rate": 5.948827292110875e-06, "loss": 0.5591, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.3419602207135044, "learning_rate": 5.970149253731343e-06, "loss": 0.6273, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.9491149858404604, "learning_rate": 5.991471215351813e-06, "loss": 0.9814, "step": 281 }, { "epoch": 0.02, "grad_norm": 2.205410569497368, "learning_rate": 6.012793176972282e-06, "loss": 0.9568, "step": 282 }, { "epoch": 0.02, "grad_norm": 2.0347009403709726, "learning_rate": 6.034115138592751e-06, "loss": 1.0418, "step": 283 }, { "epoch": 0.02, "grad_norm": 2.2992320343560793, "learning_rate": 6.055437100213221e-06, "loss": 0.8938, "step": 284 }, { "epoch": 0.02, "grad_norm": 3.4658637525823375, "learning_rate": 6.076759061833689e-06, "loss": 0.9778, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.862835503932866, "learning_rate": 6.098081023454159e-06, "loss": 1.0455, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.671648424649258, "learning_rate": 6.119402985074627e-06, "loss": 0.7763, "step": 287 }, { "epoch": 0.02, "grad_norm": 2.236431699934269, "learning_rate": 6.140724946695097e-06, "loss": 1.2726, "step": 288 }, { "epoch": 0.02, "grad_norm": 2.3640265296698013, "learning_rate": 6.1620469083155655e-06, "loss": 0.8485, "step": 289 }, { "epoch": 0.02, "grad_norm": 2.368269678880365, "learning_rate": 6.183368869936035e-06, "loss": 0.9522, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.749081492212689, "learning_rate": 6.2046908315565036e-06, "loss": 0.8011, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.9601791690598476, "learning_rate": 6.2260127931769735e-06, "loss": 0.9241, "step": 292 }, { "epoch": 0.02, "grad_norm": 2.57411179033064, "learning_rate": 6.247334754797442e-06, "loss": 0.9576, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.9442273338587173, "learning_rate": 6.2686567164179116e-06, "loss": 0.8266, "step": 294 }, { "epoch": 0.02, "grad_norm": 2.2602993864162073, "learning_rate": 6.28997867803838e-06, "loss": 0.8881, "step": 295 }, { "epoch": 0.02, "grad_norm": 2.0570773405370915, "learning_rate": 6.31130063965885e-06, "loss": 0.9222, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.816100739262284, "learning_rate": 6.332622601279318e-06, "loss": 0.9963, "step": 297 }, { "epoch": 0.02, "grad_norm": 5.4156007893877725, "learning_rate": 6.353944562899788e-06, "loss": 1.0295, "step": 298 }, { "epoch": 0.02, "grad_norm": 2.8953077024649287, "learning_rate": 6.375266524520256e-06, "loss": 0.9969, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.5354519609445787, "learning_rate": 6.396588486140726e-06, "loss": 0.6952, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.5994290817432941, "learning_rate": 6.417910447761194e-06, "loss": 0.8966, "step": 301 }, { "epoch": 0.02, "grad_norm": 2.216558402752407, "learning_rate": 6.439232409381664e-06, "loss": 0.9175, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.2695772843399653, "learning_rate": 6.460554371002132e-06, "loss": 0.7021, "step": 303 }, { "epoch": 0.02, "grad_norm": 2.265853506304056, "learning_rate": 6.481876332622602e-06, "loss": 0.9657, "step": 304 }, { "epoch": 0.02, "grad_norm": 2.3779331500801217, "learning_rate": 6.50319829424307e-06, "loss": 0.8821, "step": 305 }, { "epoch": 0.02, "grad_norm": 2.0474416477139528, "learning_rate": 6.52452025586354e-06, "loss": 0.8632, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.9336970430965577, "learning_rate": 6.545842217484008e-06, "loss": 1.0873, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.859021819702461, "learning_rate": 6.567164179104478e-06, "loss": 1.0273, "step": 308 }, { "epoch": 0.02, "grad_norm": 2.277115137161471, "learning_rate": 6.5884861407249465e-06, "loss": 0.9523, "step": 309 }, { "epoch": 0.02, "grad_norm": 2.138681369495456, "learning_rate": 6.609808102345416e-06, "loss": 1.1001, "step": 310 }, { "epoch": 0.02, "grad_norm": 2.2096296640801683, "learning_rate": 6.631130063965885e-06, "loss": 0.9937, "step": 311 }, { "epoch": 0.02, "grad_norm": 3.468775195743775, "learning_rate": 6.6524520255863545e-06, "loss": 0.9246, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.6475585111019118, "learning_rate": 6.673773987206824e-06, "loss": 0.8856, "step": 313 }, { "epoch": 0.02, "grad_norm": 4.869583538196838, "learning_rate": 6.695095948827293e-06, "loss": 0.9323, "step": 314 }, { "epoch": 0.02, "grad_norm": 2.6165355925646923, "learning_rate": 6.7164179104477625e-06, "loss": 0.9438, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.9128963519881943, "learning_rate": 6.737739872068231e-06, "loss": 0.8515, "step": 316 }, { "epoch": 0.02, "grad_norm": 2.011617214017061, "learning_rate": 6.759061833688701e-06, "loss": 1.0441, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.8563072823411444, "learning_rate": 6.780383795309169e-06, "loss": 1.001, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.6668871093246236, "learning_rate": 6.801705756929639e-06, "loss": 0.6075, "step": 319 }, { "epoch": 0.02, "grad_norm": 2.2763976335701277, "learning_rate": 6.823027718550107e-06, "loss": 0.9357, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.8460693689938203, "learning_rate": 6.844349680170577e-06, "loss": 0.9146, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.7690529793093341, "learning_rate": 6.865671641791045e-06, "loss": 0.917, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.90467184633066, "learning_rate": 6.886993603411515e-06, "loss": 1.0238, "step": 323 }, { "epoch": 0.02, "grad_norm": 2.6770003673073512, "learning_rate": 6.908315565031983e-06, "loss": 0.9208, "step": 324 }, { "epoch": 0.02, "grad_norm": 2.0578142309536123, "learning_rate": 6.929637526652453e-06, "loss": 1.0116, "step": 325 }, { "epoch": 0.02, "grad_norm": 2.6600675088085537, "learning_rate": 6.950959488272921e-06, "loss": 0.9537, "step": 326 }, { "epoch": 0.02, "grad_norm": 2.0831217022348762, "learning_rate": 6.972281449893391e-06, "loss": 1.0184, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.755713187189212, "learning_rate": 6.993603411513859e-06, "loss": 0.9169, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.1100904684444948, "learning_rate": 7.014925373134329e-06, "loss": 1.0926, "step": 329 }, { "epoch": 0.02, "grad_norm": 2.6029335264292532, "learning_rate": 7.0362473347547975e-06, "loss": 0.9263, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.552877390382623, "learning_rate": 7.057569296375267e-06, "loss": 0.7468, "step": 331 }, { "epoch": 0.02, "grad_norm": 2.0447717091227218, "learning_rate": 7.0788912579957356e-06, "loss": 1.0168, "step": 332 }, { "epoch": 0.02, "grad_norm": 2.4901608802037303, "learning_rate": 7.1002132196162055e-06, "loss": 0.8805, "step": 333 }, { "epoch": 0.02, "grad_norm": 2.193417839991823, "learning_rate": 7.121535181236674e-06, "loss": 1.0763, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.9296740824086427, "learning_rate": 7.1428571428571436e-06, "loss": 0.9518, "step": 335 }, { "epoch": 0.02, "grad_norm": 2.5944851526859707, "learning_rate": 7.164179104477612e-06, "loss": 0.9035, "step": 336 }, { "epoch": 0.02, "grad_norm": 2.797786215427403, "learning_rate": 7.185501066098082e-06, "loss": 0.8822, "step": 337 }, { "epoch": 0.02, "grad_norm": 2.158291358305976, "learning_rate": 7.20682302771855e-06, "loss": 1.0847, "step": 338 }, { "epoch": 0.02, "grad_norm": 2.4068470112239715, "learning_rate": 7.22814498933902e-06, "loss": 0.9771, "step": 339 }, { "epoch": 0.02, "grad_norm": 2.232898560109873, "learning_rate": 7.249466950959488e-06, "loss": 0.9513, "step": 340 }, { "epoch": 0.02, "grad_norm": 2.5325425089728832, "learning_rate": 7.270788912579958e-06, "loss": 0.8864, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.2273097780286324, "learning_rate": 7.292110874200427e-06, "loss": 0.787, "step": 342 }, { "epoch": 0.02, "grad_norm": 3.0215081518979683, "learning_rate": 7.313432835820896e-06, "loss": 0.9724, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.2508128374378726, "learning_rate": 7.334754797441366e-06, "loss": 0.9899, "step": 344 }, { "epoch": 0.02, "grad_norm": 2.2158099058280802, "learning_rate": 7.356076759061834e-06, "loss": 0.9007, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.9140779217630555, "learning_rate": 7.377398720682304e-06, "loss": 0.9759, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.8984521052141152, "learning_rate": 7.398720682302772e-06, "loss": 0.989, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.278261649238127, "learning_rate": 7.420042643923242e-06, "loss": 0.7343, "step": 348 }, { "epoch": 0.02, "grad_norm": 2.085502813760348, "learning_rate": 7.44136460554371e-06, "loss": 1.1479, "step": 349 }, { "epoch": 0.02, "grad_norm": 1.9249441559846776, "learning_rate": 7.46268656716418e-06, "loss": 0.9048, "step": 350 }, { "epoch": 0.02, "grad_norm": 2.675173572933447, "learning_rate": 7.484008528784648e-06, "loss": 0.9688, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.7250646440439825, "learning_rate": 7.505330490405118e-06, "loss": 0.8299, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.2105129241087929, "learning_rate": 7.5266524520255865e-06, "loss": 0.6754, "step": 353 }, { "epoch": 0.02, "grad_norm": 2.1775414356750504, "learning_rate": 7.547974413646056e-06, "loss": 0.9387, "step": 354 }, { "epoch": 0.02, "grad_norm": 1.8399774721002975, "learning_rate": 7.569296375266525e-06, "loss": 0.9105, "step": 355 }, { "epoch": 0.02, "grad_norm": 2.1254476014502512, "learning_rate": 7.5906183368869945e-06, "loss": 1.117, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.1774505521222316, "learning_rate": 7.611940298507463e-06, "loss": 0.7128, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.9793095234457845, "learning_rate": 7.633262260127933e-06, "loss": 0.888, "step": 358 }, { "epoch": 0.02, "grad_norm": 2.0612469541528204, "learning_rate": 7.654584221748402e-06, "loss": 0.926, "step": 359 }, { "epoch": 0.02, "grad_norm": 2.1071498298259, "learning_rate": 7.67590618336887e-06, "loss": 0.9884, "step": 360 }, { "epoch": 0.02, "grad_norm": 3.0959171658951754, "learning_rate": 7.69722814498934e-06, "loss": 0.9501, "step": 361 }, { "epoch": 0.02, "grad_norm": 2.3961514686423904, "learning_rate": 7.718550106609809e-06, "loss": 0.8609, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.8891541400725014, "learning_rate": 7.739872068230278e-06, "loss": 0.9319, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.4167788278656916, "learning_rate": 7.761194029850747e-06, "loss": 0.6716, "step": 364 }, { "epoch": 0.02, "grad_norm": 2.020991765600121, "learning_rate": 7.782515991471216e-06, "loss": 0.8547, "step": 365 }, { "epoch": 0.02, "grad_norm": 1.9474445856866205, "learning_rate": 7.803837953091685e-06, "loss": 0.9588, "step": 366 }, { "epoch": 0.02, "grad_norm": 2.097051613795387, "learning_rate": 7.825159914712154e-06, "loss": 0.8412, "step": 367 }, { "epoch": 0.02, "grad_norm": 2.1930027942035006, "learning_rate": 7.846481876332623e-06, "loss": 0.8936, "step": 368 }, { "epoch": 0.02, "grad_norm": 1.7942766680770112, "learning_rate": 7.867803837953092e-06, "loss": 0.9724, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.428568660058276, "learning_rate": 7.889125799573561e-06, "loss": 0.6297, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.8876414035113143, "learning_rate": 7.91044776119403e-06, "loss": 0.8484, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.987692043253051, "learning_rate": 7.9317697228145e-06, "loss": 0.9569, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.821596571013835, "learning_rate": 7.953091684434968e-06, "loss": 0.7581, "step": 373 }, { "epoch": 0.02, "grad_norm": 2.1413389458095224, "learning_rate": 7.974413646055437e-06, "loss": 0.9416, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.9797445761322756, "learning_rate": 7.995735607675907e-06, "loss": 0.9052, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.8660976757363255, "learning_rate": 8.017057569296376e-06, "loss": 0.8751, "step": 376 }, { "epoch": 0.02, "grad_norm": 2.010142024048656, "learning_rate": 8.038379530916846e-06, "loss": 0.918, "step": 377 }, { "epoch": 0.02, "grad_norm": 1.7981167855824551, "learning_rate": 8.059701492537314e-06, "loss": 0.6539, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.904233382758225, "learning_rate": 8.081023454157784e-06, "loss": 0.9786, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.0832545765728991, "learning_rate": 8.102345415778252e-06, "loss": 0.6725, "step": 380 }, { "epoch": 0.02, "grad_norm": 2.263236904108079, "learning_rate": 8.123667377398723e-06, "loss": 0.9517, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.8325487017547992, "learning_rate": 8.14498933901919e-06, "loss": 0.9906, "step": 382 }, { "epoch": 0.02, "grad_norm": 2.955567582957792, "learning_rate": 8.16631130063966e-06, "loss": 0.9669, "step": 383 }, { "epoch": 0.02, "grad_norm": 1.67076345976421, "learning_rate": 8.187633262260128e-06, "loss": 0.6517, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.956005184314237, "learning_rate": 8.208955223880599e-06, "loss": 0.7966, "step": 385 }, { "epoch": 0.02, "grad_norm": 2.8058146560015413, "learning_rate": 8.230277185501066e-06, "loss": 1.0437, "step": 386 }, { "epoch": 0.02, "grad_norm": 2.580367951458935, "learning_rate": 8.251599147121537e-06, "loss": 0.9155, "step": 387 }, { "epoch": 0.02, "grad_norm": 1.91929241387352, "learning_rate": 8.272921108742004e-06, "loss": 0.8838, "step": 388 }, { "epoch": 0.02, "grad_norm": 2.847116784772828, "learning_rate": 8.294243070362475e-06, "loss": 0.9071, "step": 389 }, { "epoch": 0.02, "grad_norm": 1.895725238359246, "learning_rate": 8.315565031982942e-06, "loss": 1.0226, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.0826434190601435, "learning_rate": 8.336886993603413e-06, "loss": 0.6184, "step": 391 }, { "epoch": 0.03, "grad_norm": 2.0331281770399166, "learning_rate": 8.35820895522388e-06, "loss": 0.8483, "step": 392 }, { "epoch": 0.03, "grad_norm": 2.1469677065916444, "learning_rate": 8.379530916844351e-06, "loss": 0.8159, "step": 393 }, { "epoch": 0.03, "grad_norm": 2.002674911741559, "learning_rate": 8.400852878464819e-06, "loss": 1.0094, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.5240571055446688, "learning_rate": 8.42217484008529e-06, "loss": 0.6759, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.2131634465039154, "learning_rate": 8.443496801705757e-06, "loss": 0.6859, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.8301900556965391, "learning_rate": 8.464818763326227e-06, "loss": 0.8771, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.8285845089867463, "learning_rate": 8.486140724946695e-06, "loss": 0.5769, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.9084408431291275, "learning_rate": 8.507462686567165e-06, "loss": 0.8864, "step": 399 }, { "epoch": 0.03, "grad_norm": 2.1670547130580875, "learning_rate": 8.528784648187633e-06, "loss": 0.9985, "step": 400 }, { "epoch": 0.03, "grad_norm": 2.5390286344199837, "learning_rate": 8.550106609808104e-06, "loss": 0.9714, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.9752207933307206, "learning_rate": 8.571428571428571e-06, "loss": 0.8593, "step": 402 }, { "epoch": 0.03, "grad_norm": 2.0057217146800803, "learning_rate": 8.592750533049042e-06, "loss": 0.9762, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.2068312209203693, "learning_rate": 8.614072494669509e-06, "loss": 0.6947, "step": 404 }, { "epoch": 0.03, "grad_norm": 2.5543084355453547, "learning_rate": 8.63539445628998e-06, "loss": 1.0427, "step": 405 }, { "epoch": 0.03, "grad_norm": 3.0718950510548044, "learning_rate": 8.656716417910447e-06, "loss": 0.94, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.9471148398370899, "learning_rate": 8.678038379530918e-06, "loss": 0.9792, "step": 407 }, { "epoch": 0.03, "grad_norm": 2.07934701223506, "learning_rate": 8.699360341151387e-06, "loss": 0.8412, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.8327583993238954, "learning_rate": 8.720682302771856e-06, "loss": 0.9671, "step": 409 }, { "epoch": 0.03, "grad_norm": 2.2535006214952, "learning_rate": 8.742004264392325e-06, "loss": 0.8972, "step": 410 }, { "epoch": 0.03, "grad_norm": 2.544927877014545, "learning_rate": 8.763326226012794e-06, "loss": 0.8273, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.9132207972001591, "learning_rate": 8.784648187633263e-06, "loss": 0.9837, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.460729356618233, "learning_rate": 8.805970149253732e-06, "loss": 0.7588, "step": 413 }, { "epoch": 0.03, "grad_norm": 2.7929768646840083, "learning_rate": 8.827292110874201e-06, "loss": 0.9311, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.5050343012171368, "learning_rate": 8.84861407249467e-06, "loss": 0.6977, "step": 415 }, { "epoch": 0.03, "grad_norm": 2.3903271427322386, "learning_rate": 8.86993603411514e-06, "loss": 1.0578, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.8819772956304892, "learning_rate": 8.891257995735608e-06, "loss": 0.9138, "step": 417 }, { "epoch": 0.03, "grad_norm": 2.2531465539634192, "learning_rate": 8.912579957356077e-06, "loss": 0.9123, "step": 418 }, { "epoch": 0.03, "grad_norm": 2.0047333398268217, "learning_rate": 8.933901918976547e-06, "loss": 0.8743, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.207185784392184, "learning_rate": 8.955223880597016e-06, "loss": 0.7834, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.887354429611839, "learning_rate": 8.976545842217485e-06, "loss": 1.045, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.6379697927661738, "learning_rate": 8.997867803837954e-06, "loss": 0.8543, "step": 422 }, { "epoch": 0.03, "grad_norm": 2.076360654579007, "learning_rate": 9.019189765458423e-06, "loss": 0.8921, "step": 423 }, { "epoch": 0.03, "grad_norm": 2.617635574696473, "learning_rate": 9.040511727078892e-06, "loss": 1.0062, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.4540404408502852, "learning_rate": 9.06183368869936e-06, "loss": 0.6812, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.8573181421398717, "learning_rate": 9.08315565031983e-06, "loss": 0.9201, "step": 426 }, { "epoch": 0.03, "grad_norm": 2.625782809842242, "learning_rate": 9.104477611940299e-06, "loss": 0.9123, "step": 427 }, { "epoch": 0.03, "grad_norm": 2.8461724874594103, "learning_rate": 9.125799573560768e-06, "loss": 0.9546, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.8376872214784896, "learning_rate": 9.147121535181237e-06, "loss": 0.9637, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.9564869804471432, "learning_rate": 9.168443496801706e-06, "loss": 0.9188, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.8221584990271755, "learning_rate": 9.189765458422175e-06, "loss": 0.9821, "step": 431 }, { "epoch": 0.03, "grad_norm": 2.0634636424043347, "learning_rate": 9.211087420042644e-06, "loss": 0.9347, "step": 432 }, { "epoch": 0.03, "grad_norm": 2.2339530568385872, "learning_rate": 9.232409381663113e-06, "loss": 0.9635, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.9339907636885598, "learning_rate": 9.253731343283582e-06, "loss": 0.8529, "step": 434 }, { "epoch": 0.03, "grad_norm": 4.920825997685458, "learning_rate": 9.275053304904051e-06, "loss": 0.9396, "step": 435 }, { "epoch": 0.03, "grad_norm": 2.4474625403787544, "learning_rate": 9.29637526652452e-06, "loss": 0.8637, "step": 436 }, { "epoch": 0.03, "grad_norm": 2.109905358786886, "learning_rate": 9.31769722814499e-06, "loss": 0.9729, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.1725934226474748, "learning_rate": 9.339019189765458e-06, "loss": 0.6743, "step": 438 }, { "epoch": 0.03, "grad_norm": 2.218826650554231, "learning_rate": 9.36034115138593e-06, "loss": 0.9479, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.9736716583257836, "learning_rate": 9.381663113006397e-06, "loss": 0.9956, "step": 440 }, { "epoch": 0.03, "grad_norm": 2.018653278576134, "learning_rate": 9.402985074626867e-06, "loss": 0.8589, "step": 441 }, { "epoch": 0.03, "grad_norm": 2.516081175469299, "learning_rate": 9.424307036247335e-06, "loss": 0.9513, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.78026054115916, "learning_rate": 9.445628997867805e-06, "loss": 0.9518, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.8690864457032421, "learning_rate": 9.466950959488273e-06, "loss": 0.9579, "step": 444 }, { "epoch": 0.03, "grad_norm": 2.2098465541865453, "learning_rate": 9.488272921108744e-06, "loss": 0.9379, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.748381105811784, "learning_rate": 9.509594882729211e-06, "loss": 0.8346, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.8946951213050347, "learning_rate": 9.530916844349682e-06, "loss": 0.9604, "step": 447 }, { "epoch": 0.03, "grad_norm": 3.302052364147268, "learning_rate": 9.552238805970149e-06, "loss": 0.9169, "step": 448 }, { "epoch": 0.03, "grad_norm": 2.094839939961966, "learning_rate": 9.57356076759062e-06, "loss": 0.838, "step": 449 }, { "epoch": 0.03, "grad_norm": 2.193190897176595, "learning_rate": 9.594882729211089e-06, "loss": 0.8908, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.9695871919786416, "learning_rate": 9.616204690831558e-06, "loss": 0.8752, "step": 451 }, { "epoch": 0.03, "grad_norm": 1.7683493148126013, "learning_rate": 9.637526652452027e-06, "loss": 0.994, "step": 452 }, { "epoch": 0.03, "grad_norm": 2.4001454328371365, "learning_rate": 9.658848614072496e-06, "loss": 0.929, "step": 453 }, { "epoch": 0.03, "grad_norm": 2.069920123138006, "learning_rate": 9.680170575692965e-06, "loss": 0.9561, "step": 454 }, { "epoch": 0.03, "grad_norm": 1.6718507932228739, "learning_rate": 9.701492537313434e-06, "loss": 0.9876, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.8799410475559402, "learning_rate": 9.722814498933903e-06, "loss": 0.9974, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.8349138280427888, "learning_rate": 9.744136460554372e-06, "loss": 0.9344, "step": 457 }, { "epoch": 0.03, "grad_norm": 2.2335137848622733, "learning_rate": 9.765458422174841e-06, "loss": 0.9019, "step": 458 }, { "epoch": 0.03, "grad_norm": 2.0665314375984933, "learning_rate": 9.78678038379531e-06, "loss": 0.9266, "step": 459 }, { "epoch": 0.03, "grad_norm": 2.018943415272218, "learning_rate": 9.80810234541578e-06, "loss": 0.9518, "step": 460 }, { "epoch": 0.03, "grad_norm": 1.966187139083669, "learning_rate": 9.829424307036248e-06, "loss": 0.8507, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.5558496035601521, "learning_rate": 9.850746268656717e-06, "loss": 0.6165, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.9356435193111146, "learning_rate": 9.872068230277187e-06, "loss": 0.9971, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.496079203637662, "learning_rate": 9.893390191897656e-06, "loss": 0.6743, "step": 464 }, { "epoch": 0.03, "grad_norm": 2.5301803599191266, "learning_rate": 9.914712153518125e-06, "loss": 0.9146, "step": 465 }, { "epoch": 0.03, "grad_norm": 2.102287331204199, "learning_rate": 9.936034115138594e-06, "loss": 0.9985, "step": 466 }, { "epoch": 0.03, "grad_norm": 1.892980694579424, "learning_rate": 9.957356076759063e-06, "loss": 0.8458, "step": 467 }, { "epoch": 0.03, "grad_norm": 1.877319193014805, "learning_rate": 9.978678038379532e-06, "loss": 0.8811, "step": 468 }, { "epoch": 0.03, "grad_norm": 2.0407057081738036, "learning_rate": 1e-05, "loss": 0.8425, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.8345032780043284, "learning_rate": 9.999999892555254e-06, "loss": 1.0087, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.7604328771820064, "learning_rate": 9.999999570221018e-06, "loss": 0.9097, "step": 471 }, { "epoch": 0.03, "grad_norm": 1.9624351169914807, "learning_rate": 9.999999032997307e-06, "loss": 0.8729, "step": 472 }, { "epoch": 0.03, "grad_norm": 2.1293883557611997, "learning_rate": 9.999998280884144e-06, "loss": 0.8763, "step": 473 }, { "epoch": 0.03, "grad_norm": 2.2413675298325804, "learning_rate": 9.999997313881561e-06, "loss": 0.8425, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.3066627387372536, "learning_rate": 9.999996131989602e-06, "loss": 0.7432, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.338561380320222, "learning_rate": 9.999994735208314e-06, "loss": 0.6157, "step": 476 }, { "epoch": 0.03, "grad_norm": 2.6512369594949416, "learning_rate": 9.99999312353776e-06, "loss": 0.7835, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.3359708974257336, "learning_rate": 9.999991296978006e-06, "loss": 0.7334, "step": 478 }, { "epoch": 0.03, "grad_norm": 2.6257456480713253, "learning_rate": 9.999989255529133e-06, "loss": 1.116, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.8935032562143579, "learning_rate": 9.99998699919123e-06, "loss": 0.9604, "step": 480 }, { "epoch": 0.03, "grad_norm": 2.0185219908315, "learning_rate": 9.99998452796439e-06, "loss": 0.9651, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.9795709970119253, "learning_rate": 9.99998184184872e-06, "loss": 0.9422, "step": 482 }, { "epoch": 0.03, "grad_norm": 1.8505323240516935, "learning_rate": 9.99997894084434e-06, "loss": 0.8641, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.9685195711128138, "learning_rate": 9.999975824951372e-06, "loss": 0.8299, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.8053805489792016, "learning_rate": 9.999972494169947e-06, "loss": 0.7235, "step": 485 }, { "epoch": 0.03, "grad_norm": 2.4529492493935074, "learning_rate": 9.999968948500211e-06, "loss": 0.8943, "step": 486 }, { "epoch": 0.03, "grad_norm": 2.146084854278972, "learning_rate": 9.999965187942317e-06, "loss": 0.9488, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.4638757446089985, "learning_rate": 9.999961212496425e-06, "loss": 0.7497, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.3014336349609745, "learning_rate": 9.999957022162707e-06, "loss": 0.5951, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.293180087409597, "learning_rate": 9.999952616941342e-06, "loss": 0.6169, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.911681910331497, "learning_rate": 9.99994799683252e-06, "loss": 0.8876, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.9421226816711785, "learning_rate": 9.999943161836439e-06, "loss": 0.8934, "step": 492 }, { "epoch": 0.03, "grad_norm": 2.539369464627973, "learning_rate": 9.999938111953306e-06, "loss": 0.906, "step": 493 }, { "epoch": 0.03, "grad_norm": 2.1011709625705532, "learning_rate": 9.999932847183343e-06, "loss": 0.9314, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.6918795472130692, "learning_rate": 9.99992736752677e-06, "loss": 0.8766, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.2463686580927325, "learning_rate": 9.999921672983826e-06, "loss": 0.6653, "step": 496 }, { "epoch": 0.03, "grad_norm": 1.7574502073790939, "learning_rate": 9.999915763554754e-06, "loss": 0.8357, "step": 497 }, { "epoch": 0.03, "grad_norm": 1.7882945042787335, "learning_rate": 9.999909639239809e-06, "loss": 0.8425, "step": 498 }, { "epoch": 0.03, "grad_norm": 2.1013645827810223, "learning_rate": 9.999903300039253e-06, "loss": 1.0129, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.9163667583252386, "learning_rate": 9.999896745953361e-06, "loss": 0.9078, "step": 500 }, { "epoch": 0.03, "grad_norm": 2.34609800968326, "learning_rate": 9.999889976982413e-06, "loss": 0.9683, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.7740684447427606, "learning_rate": 9.9998829931267e-06, "loss": 1.0378, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.7168381305898035, "learning_rate": 9.99987579438652e-06, "loss": 0.9278, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.8095032411899645, "learning_rate": 9.999868380762187e-06, "loss": 0.9572, "step": 504 }, { "epoch": 0.03, "grad_norm": 2.4674187546846635, "learning_rate": 9.999860752254016e-06, "loss": 0.9294, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.937638819284656, "learning_rate": 9.999852908862337e-06, "loss": 0.9122, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.8832548389532549, "learning_rate": 9.999844850587486e-06, "loss": 0.988, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.2466427461070053, "learning_rate": 9.999836577429808e-06, "loss": 0.7307, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.5047505505283074, "learning_rate": 9.99982808938966e-06, "loss": 0.6016, "step": 509 }, { "epoch": 0.03, "grad_norm": 2.367859977579129, "learning_rate": 9.999819386467409e-06, "loss": 0.9756, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.986225076083911, "learning_rate": 9.999810468663424e-06, "loss": 0.8594, "step": 511 }, { "epoch": 0.03, "grad_norm": 2.166325141356672, "learning_rate": 9.999801335978095e-06, "loss": 0.9193, "step": 512 }, { "epoch": 0.03, "grad_norm": 2.0234594555751464, "learning_rate": 9.999791988411807e-06, "loss": 0.8988, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.9344484411781204, "learning_rate": 9.999782425964968e-06, "loss": 1.0206, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.469213268495774, "learning_rate": 9.999772648637984e-06, "loss": 0.6838, "step": 515 }, { "epoch": 0.03, "grad_norm": 9.212391638035292, "learning_rate": 9.999762656431277e-06, "loss": 0.8612, "step": 516 }, { "epoch": 0.03, "grad_norm": 2.132675825946832, "learning_rate": 9.999752449345279e-06, "loss": 0.9512, "step": 517 }, { "epoch": 0.03, "grad_norm": 1.7786981779837516, "learning_rate": 9.999742027380426e-06, "loss": 0.974, "step": 518 }, { "epoch": 0.03, "grad_norm": 2.5019157199869304, "learning_rate": 9.999731390537168e-06, "loss": 0.9465, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.8497652890083018, "learning_rate": 9.999720538815959e-06, "loss": 0.8593, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.122944543407031, "learning_rate": 9.999709472217268e-06, "loss": 0.712, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.8770791991063052, "learning_rate": 9.999698190741569e-06, "loss": 0.8781, "step": 522 }, { "epoch": 0.03, "grad_norm": 1.9017741444316454, "learning_rate": 9.999686694389348e-06, "loss": 0.9423, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.9486128846774062, "learning_rate": 9.999674983161099e-06, "loss": 0.9694, "step": 524 }, { "epoch": 0.03, "grad_norm": 2.046376935688126, "learning_rate": 9.999663057057324e-06, "loss": 1.0076, "step": 525 }, { "epoch": 0.03, "grad_norm": 2.261216936569029, "learning_rate": 9.999650916078536e-06, "loss": 0.9512, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.8135815524044094, "learning_rate": 9.999638560225259e-06, "loss": 0.9165, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.9414264790465352, "learning_rate": 9.999625989498022e-06, "loss": 0.8225, "step": 528 }, { "epoch": 0.03, "grad_norm": 5.297515667540505, "learning_rate": 9.999613203897365e-06, "loss": 0.8446, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.6858632559778821, "learning_rate": 9.999600203423837e-06, "loss": 0.8168, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.8274747678227818, "learning_rate": 9.999586988078e-06, "loss": 1.1077, "step": 531 }, { "epoch": 0.03, "grad_norm": 1.9366416419498855, "learning_rate": 9.99957355786042e-06, "loss": 0.9857, "step": 532 }, { "epoch": 0.03, "grad_norm": 2.0499415976318276, "learning_rate": 9.999559912771673e-06, "loss": 0.8819, "step": 533 }, { "epoch": 0.03, "grad_norm": 2.8241689401757952, "learning_rate": 9.999546052812347e-06, "loss": 0.8265, "step": 534 }, { "epoch": 0.03, "grad_norm": 2.2878846047567785, "learning_rate": 9.999531977983038e-06, "loss": 0.9645, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.4210472256672273, "learning_rate": 9.999517688284348e-06, "loss": 0.6779, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.7337741462589147, "learning_rate": 9.999503183716894e-06, "loss": 0.8363, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.6471123682284958, "learning_rate": 9.999488464281298e-06, "loss": 0.7707, "step": 538 }, { "epoch": 0.03, "grad_norm": 1.7831383151429345, "learning_rate": 9.999473529978194e-06, "loss": 0.8689, "step": 539 }, { "epoch": 0.03, "grad_norm": 2.1250090981249308, "learning_rate": 9.999458380808222e-06, "loss": 1.0912, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.693756832997873, "learning_rate": 9.999443016772037e-06, "loss": 0.8196, "step": 541 }, { "epoch": 0.03, "grad_norm": 1.7536869848532244, "learning_rate": 9.999427437870292e-06, "loss": 0.8555, "step": 542 }, { "epoch": 0.03, "grad_norm": 1.1267279202041824, "learning_rate": 9.999411644103665e-06, "loss": 0.6371, "step": 543 }, { "epoch": 0.03, "grad_norm": 2.008911409155175, "learning_rate": 9.999395635472829e-06, "loss": 0.9414, "step": 544 }, { "epoch": 0.03, "grad_norm": 2.497976583935364, "learning_rate": 9.999379411978474e-06, "loss": 0.9962, "step": 545 }, { "epoch": 0.03, "grad_norm": 1.609157611443356, "learning_rate": 9.999362973621297e-06, "loss": 0.9138, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.8679809266393017, "learning_rate": 9.999346320402003e-06, "loss": 0.8567, "step": 547 }, { "epoch": 0.04, "grad_norm": 3.5424934743573613, "learning_rate": 9.999329452321312e-06, "loss": 0.9037, "step": 548 }, { "epoch": 0.04, "grad_norm": 2.1569343160008607, "learning_rate": 9.999312369379944e-06, "loss": 1.0079, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.867471927728961, "learning_rate": 9.999295071578637e-06, "loss": 0.8772, "step": 550 }, { "epoch": 0.04, "grad_norm": 5.0374126993222665, "learning_rate": 9.99927755891813e-06, "loss": 0.9971, "step": 551 }, { "epoch": 0.04, "grad_norm": 2.4003503482444315, "learning_rate": 9.999259831399181e-06, "loss": 0.9466, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.1861235841999025, "learning_rate": 9.99924188902255e-06, "loss": 0.7192, "step": 553 }, { "epoch": 0.04, "grad_norm": 2.1744097622427963, "learning_rate": 9.999223731789006e-06, "loss": 0.9248, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.1475918042870834, "learning_rate": 9.99920535969933e-06, "loss": 0.6906, "step": 555 }, { "epoch": 0.04, "grad_norm": 2.637362259016382, "learning_rate": 9.999186772754315e-06, "loss": 0.9719, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.935009964785856, "learning_rate": 9.999167970954756e-06, "loss": 0.9022, "step": 557 }, { "epoch": 0.04, "grad_norm": 2.0310304158432744, "learning_rate": 9.99914895430146e-06, "loss": 1.0383, "step": 558 }, { "epoch": 0.04, "grad_norm": 2.178497787201231, "learning_rate": 9.999129722795248e-06, "loss": 0.8674, "step": 559 }, { "epoch": 0.04, "grad_norm": 2.308882055123405, "learning_rate": 9.999110276436947e-06, "loss": 0.9178, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.946540827121913, "learning_rate": 9.999090615227389e-06, "loss": 0.8724, "step": 561 }, { "epoch": 0.04, "grad_norm": 2.493874849612961, "learning_rate": 9.999070739167423e-06, "loss": 0.929, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.7812121290419776, "learning_rate": 9.999050648257898e-06, "loss": 0.8271, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.950593895460384, "learning_rate": 9.999030342499682e-06, "loss": 0.9717, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.8072027138315165, "learning_rate": 9.999009821893648e-06, "loss": 0.7835, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.8189599602420197, "learning_rate": 9.998989086440673e-06, "loss": 1.0041, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.6470880109836012, "learning_rate": 9.998968136141655e-06, "loss": 0.8676, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.0771452951823954, "learning_rate": 9.998946970997489e-06, "loss": 0.5867, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.4070643919402717, "learning_rate": 9.998925591009086e-06, "loss": 0.7719, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.8456107673110593, "learning_rate": 9.998903996177365e-06, "loss": 0.8712, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.861023760906575, "learning_rate": 9.998882186503256e-06, "loss": 0.832, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.7005197389408688, "learning_rate": 9.998860161987693e-06, "loss": 0.9268, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.3524955715095237, "learning_rate": 9.998837922631625e-06, "loss": 0.7319, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.854058192999542, "learning_rate": 9.998815468436007e-06, "loss": 0.9515, "step": 574 }, { "epoch": 0.04, "grad_norm": 1.9863272315641018, "learning_rate": 9.998792799401804e-06, "loss": 0.89, "step": 575 }, { "epoch": 0.04, "grad_norm": 2.0135620223139052, "learning_rate": 9.998769915529991e-06, "loss": 0.8222, "step": 576 }, { "epoch": 0.04, "grad_norm": 2.5073999504814615, "learning_rate": 9.998746816821551e-06, "loss": 0.865, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.873488240517025, "learning_rate": 9.998723503277476e-06, "loss": 0.9605, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.1862262991269943, "learning_rate": 9.99869997489877e-06, "loss": 0.6351, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.9553182517532186, "learning_rate": 9.99867623168644e-06, "loss": 0.9356, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.6971077264584207, "learning_rate": 9.99865227364151e-06, "loss": 0.9935, "step": 581 }, { "epoch": 0.04, "grad_norm": 1.9557041499205794, "learning_rate": 9.99862810076501e-06, "loss": 0.9197, "step": 582 }, { "epoch": 0.04, "grad_norm": 1.3998419606084183, "learning_rate": 9.998603713057977e-06, "loss": 0.6843, "step": 583 }, { "epoch": 0.04, "grad_norm": 2.0178263943529124, "learning_rate": 9.99857911052146e-06, "loss": 0.7551, "step": 584 }, { "epoch": 0.04, "grad_norm": 1.3576447465179091, "learning_rate": 9.998554293156518e-06, "loss": 0.7632, "step": 585 }, { "epoch": 0.04, "grad_norm": 1.8935008118944774, "learning_rate": 9.998529260964214e-06, "loss": 0.8583, "step": 586 }, { "epoch": 0.04, "grad_norm": 2.1380672423193854, "learning_rate": 9.998504013945627e-06, "loss": 0.8671, "step": 587 }, { "epoch": 0.04, "grad_norm": 2.4341696032139035, "learning_rate": 9.99847855210184e-06, "loss": 0.8747, "step": 588 }, { "epoch": 0.04, "grad_norm": 1.7439438673020469, "learning_rate": 9.998452875433948e-06, "loss": 0.8599, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.838721578088193, "learning_rate": 9.998426983943055e-06, "loss": 0.8444, "step": 590 }, { "epoch": 0.04, "grad_norm": 2.21422226684355, "learning_rate": 9.998400877630272e-06, "loss": 0.8941, "step": 591 }, { "epoch": 0.04, "grad_norm": 2.009173938597275, "learning_rate": 9.998374556496724e-06, "loss": 0.9105, "step": 592 }, { "epoch": 0.04, "grad_norm": 2.093498802841387, "learning_rate": 9.99834802054354e-06, "loss": 0.8889, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.9945841249547984, "learning_rate": 9.998321269771862e-06, "loss": 0.9049, "step": 594 }, { "epoch": 0.04, "grad_norm": 2.0815775210445846, "learning_rate": 9.998294304182837e-06, "loss": 0.7463, "step": 595 }, { "epoch": 0.04, "grad_norm": 1.8794789742696927, "learning_rate": 9.998267123777628e-06, "loss": 0.8302, "step": 596 }, { "epoch": 0.04, "grad_norm": 1.74091954667285, "learning_rate": 9.998239728557399e-06, "loss": 0.8399, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.714653978161616, "learning_rate": 9.99821211852333e-06, "loss": 0.9171, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.6148766412533553, "learning_rate": 9.998184293676606e-06, "loss": 0.7856, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.684083278106475, "learning_rate": 9.998156254018423e-06, "loss": 0.8772, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.5951468837980591, "learning_rate": 9.998127999549988e-06, "loss": 0.8576, "step": 601 }, { "epoch": 0.04, "grad_norm": 2.2493085684045564, "learning_rate": 9.998099530272514e-06, "loss": 0.9456, "step": 602 }, { "epoch": 0.04, "grad_norm": 1.8247234466047848, "learning_rate": 9.998070846187225e-06, "loss": 0.8747, "step": 603 }, { "epoch": 0.04, "grad_norm": 2.055291221978266, "learning_rate": 9.998041947295353e-06, "loss": 1.0297, "step": 604 }, { "epoch": 0.04, "grad_norm": 2.070219121097452, "learning_rate": 9.99801283359814e-06, "loss": 0.8769, "step": 605 }, { "epoch": 0.04, "grad_norm": 1.306467001020141, "learning_rate": 9.99798350509684e-06, "loss": 0.6293, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.0728992143493152, "learning_rate": 9.997953961792708e-06, "loss": 0.5971, "step": 607 }, { "epoch": 0.04, "grad_norm": 2.090729663649634, "learning_rate": 9.997924203687018e-06, "loss": 0.8957, "step": 608 }, { "epoch": 0.04, "grad_norm": 2.375388075534504, "learning_rate": 9.997894230781048e-06, "loss": 0.9372, "step": 609 }, { "epoch": 0.04, "grad_norm": 2.1038484308473624, "learning_rate": 9.997864043076087e-06, "loss": 0.9373, "step": 610 }, { "epoch": 0.04, "grad_norm": 3.028478743611204, "learning_rate": 9.99783364057343e-06, "loss": 0.9657, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.9788547782092922, "learning_rate": 9.997803023274384e-06, "loss": 0.9231, "step": 612 }, { "epoch": 0.04, "grad_norm": 1.8862592118798074, "learning_rate": 9.997772191180269e-06, "loss": 0.9079, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.8162781154296608, "learning_rate": 9.997741144292406e-06, "loss": 0.8367, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.7478844554486639, "learning_rate": 9.997709882612128e-06, "loss": 0.7941, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.3964278736219145, "learning_rate": 9.997678406140783e-06, "loss": 0.7395, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.9987116572607537, "learning_rate": 9.99764671487972e-06, "loss": 0.8556, "step": 617 }, { "epoch": 0.04, "grad_norm": 2.054460943318328, "learning_rate": 9.997614808830305e-06, "loss": 0.8822, "step": 618 }, { "epoch": 0.04, "grad_norm": 2.255383287586647, "learning_rate": 9.997582687993905e-06, "loss": 0.9203, "step": 619 }, { "epoch": 0.04, "grad_norm": 2.024301225545036, "learning_rate": 9.997550352371903e-06, "loss": 0.8409, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.9720111384964951, "learning_rate": 9.99751780196569e-06, "loss": 0.9958, "step": 621 }, { "epoch": 0.04, "grad_norm": 3.377037771496998, "learning_rate": 9.997485036776662e-06, "loss": 1.0003, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.9323747848011505, "learning_rate": 9.997452056806226e-06, "loss": 0.9216, "step": 623 }, { "epoch": 0.04, "grad_norm": 2.11262024680662, "learning_rate": 9.997418862055804e-06, "loss": 0.9493, "step": 624 }, { "epoch": 0.04, "grad_norm": 2.1649418826814872, "learning_rate": 9.99738545252682e-06, "loss": 1.0347, "step": 625 }, { "epoch": 0.04, "grad_norm": 1.2440978592820247, "learning_rate": 9.997351828220711e-06, "loss": 0.6885, "step": 626 }, { "epoch": 0.04, "grad_norm": 2.1546627512096435, "learning_rate": 9.99731798913892e-06, "loss": 0.963, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.299629750000112, "learning_rate": 9.997283935282903e-06, "loss": 0.7199, "step": 628 }, { "epoch": 0.04, "grad_norm": 2.3019638363106463, "learning_rate": 9.997249666654122e-06, "loss": 1.0391, "step": 629 }, { "epoch": 0.04, "grad_norm": 1.9828992511830463, "learning_rate": 9.997215183254053e-06, "loss": 0.8681, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.458275474304409, "learning_rate": 9.997180485084175e-06, "loss": 0.6663, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.2269994484405278, "learning_rate": 9.997145572145981e-06, "loss": 0.7157, "step": 632 }, { "epoch": 0.04, "grad_norm": 2.032880030861346, "learning_rate": 9.99711044444097e-06, "loss": 0.813, "step": 633 }, { "epoch": 0.04, "grad_norm": 2.1211100588501326, "learning_rate": 9.997075101970652e-06, "loss": 0.8815, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.9497468966274338, "learning_rate": 9.997039544736547e-06, "loss": 1.0194, "step": 635 }, { "epoch": 0.04, "grad_norm": 2.2667769824020922, "learning_rate": 9.997003772740183e-06, "loss": 0.9551, "step": 636 }, { "epoch": 0.04, "grad_norm": 2.940833802796464, "learning_rate": 9.996967785983097e-06, "loss": 0.9029, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.909239046368037, "learning_rate": 9.996931584466836e-06, "loss": 0.9981, "step": 638 }, { "epoch": 0.04, "grad_norm": 1.8439648008519907, "learning_rate": 9.996895168192954e-06, "loss": 0.7319, "step": 639 }, { "epoch": 0.04, "grad_norm": 4.8564618509941875, "learning_rate": 9.996858537163019e-06, "loss": 1.0727, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.7420655350993166, "learning_rate": 9.996821691378603e-06, "loss": 0.8739, "step": 641 }, { "epoch": 0.04, "grad_norm": 2.5489195550243484, "learning_rate": 9.996784630841293e-06, "loss": 0.8928, "step": 642 }, { "epoch": 0.04, "grad_norm": 2.1542521363687275, "learning_rate": 9.996747355552675e-06, "loss": 0.8429, "step": 643 }, { "epoch": 0.04, "grad_norm": 2.063305290630099, "learning_rate": 9.996709865514357e-06, "loss": 0.8614, "step": 644 }, { "epoch": 0.04, "grad_norm": 1.8828386003525135, "learning_rate": 9.99667216072795e-06, "loss": 0.8999, "step": 645 }, { "epoch": 0.04, "grad_norm": 1.7726682849066497, "learning_rate": 9.996634241195071e-06, "loss": 0.9195, "step": 646 }, { "epoch": 0.04, "grad_norm": 2.235211887242947, "learning_rate": 9.996596106917353e-06, "loss": 0.9298, "step": 647 }, { "epoch": 0.04, "grad_norm": 1.4569295528736932, "learning_rate": 9.996557757896432e-06, "loss": 0.751, "step": 648 }, { "epoch": 0.04, "grad_norm": 1.756590155035569, "learning_rate": 9.99651919413396e-06, "loss": 0.7888, "step": 649 }, { "epoch": 0.04, "grad_norm": 2.0043798915121824, "learning_rate": 9.996480415631592e-06, "loss": 0.9993, "step": 650 }, { "epoch": 0.04, "grad_norm": 3.177462045415887, "learning_rate": 9.996441422390994e-06, "loss": 0.9156, "step": 651 }, { "epoch": 0.04, "grad_norm": 1.37190862204021, "learning_rate": 9.996402214413841e-06, "loss": 0.7083, "step": 652 }, { "epoch": 0.04, "grad_norm": 2.0201944608881095, "learning_rate": 9.996362791701822e-06, "loss": 0.8279, "step": 653 }, { "epoch": 0.04, "grad_norm": 2.077925966035502, "learning_rate": 9.996323154256628e-06, "loss": 0.8812, "step": 654 }, { "epoch": 0.04, "grad_norm": 1.7223799951410415, "learning_rate": 9.996283302079965e-06, "loss": 0.7786, "step": 655 }, { "epoch": 0.04, "grad_norm": 1.503968807505548, "learning_rate": 9.996243235173541e-06, "loss": 0.6811, "step": 656 }, { "epoch": 0.04, "grad_norm": 1.9315084088686838, "learning_rate": 9.996202953539085e-06, "loss": 0.9269, "step": 657 }, { "epoch": 0.04, "grad_norm": 2.2382202489558827, "learning_rate": 9.996162457178322e-06, "loss": 0.885, "step": 658 }, { "epoch": 0.04, "grad_norm": 1.826852337934393, "learning_rate": 9.996121746092996e-06, "loss": 0.9147, "step": 659 }, { "epoch": 0.04, "grad_norm": 1.8855097885157508, "learning_rate": 9.996080820284857e-06, "loss": 0.9579, "step": 660 }, { "epoch": 0.04, "grad_norm": 1.819130214448491, "learning_rate": 9.99603967975566e-06, "loss": 0.8385, "step": 661 }, { "epoch": 0.04, "grad_norm": 2.543976795044077, "learning_rate": 9.995998324507177e-06, "loss": 0.8681, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.8985948358131206, "learning_rate": 9.995956754541185e-06, "loss": 0.8705, "step": 663 }, { "epoch": 0.04, "grad_norm": 1.8777171598552829, "learning_rate": 9.995914969859469e-06, "loss": 0.8548, "step": 664 }, { "epoch": 0.04, "grad_norm": 1.7975245742758807, "learning_rate": 9.995872970463824e-06, "loss": 0.9957, "step": 665 }, { "epoch": 0.04, "grad_norm": 1.7570955179087886, "learning_rate": 9.995830756356058e-06, "loss": 0.8544, "step": 666 }, { "epoch": 0.04, "grad_norm": 2.375521992469731, "learning_rate": 9.995788327537983e-06, "loss": 0.8029, "step": 667 }, { "epoch": 0.04, "grad_norm": 1.2072879020818128, "learning_rate": 9.995745684011424e-06, "loss": 0.6604, "step": 668 }, { "epoch": 0.04, "grad_norm": 2.0680329360417957, "learning_rate": 9.995702825778213e-06, "loss": 0.9041, "step": 669 }, { "epoch": 0.04, "grad_norm": 2.372768218982615, "learning_rate": 9.99565975284019e-06, "loss": 0.9026, "step": 670 }, { "epoch": 0.04, "grad_norm": 3.0686607733176854, "learning_rate": 9.995616465199209e-06, "loss": 0.9455, "step": 671 }, { "epoch": 0.04, "grad_norm": 2.002466075895866, "learning_rate": 9.995572962857132e-06, "loss": 0.9757, "step": 672 }, { "epoch": 0.04, "grad_norm": 2.0218432517470717, "learning_rate": 9.995529245815824e-06, "loss": 0.8847, "step": 673 }, { "epoch": 0.04, "grad_norm": 1.2487461047848507, "learning_rate": 9.995485314077167e-06, "loss": 0.6849, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.3568678774898564, "learning_rate": 9.995441167643048e-06, "loss": 0.7098, "step": 675 }, { "epoch": 0.04, "grad_norm": 1.7044546049272282, "learning_rate": 9.995396806515363e-06, "loss": 0.7926, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.9861578934876674, "learning_rate": 9.995352230696021e-06, "loss": 0.9342, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.8716093150735433, "learning_rate": 9.995307440186937e-06, "loss": 0.9781, "step": 678 }, { "epoch": 0.04, "grad_norm": 1.7975434932859922, "learning_rate": 9.995262434990036e-06, "loss": 0.8439, "step": 679 }, { "epoch": 0.04, "grad_norm": 2.0857883138365683, "learning_rate": 9.995217215107251e-06, "loss": 0.9548, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.8529779900014782, "learning_rate": 9.995171780540528e-06, "loss": 0.9662, "step": 681 }, { "epoch": 0.04, "grad_norm": 1.9855254040066224, "learning_rate": 9.995126131291818e-06, "loss": 1.0007, "step": 682 }, { "epoch": 0.04, "grad_norm": 1.908925017582164, "learning_rate": 9.995080267363082e-06, "loss": 0.8765, "step": 683 }, { "epoch": 0.04, "grad_norm": 2.1772994321371586, "learning_rate": 9.995034188756294e-06, "loss": 0.9079, "step": 684 }, { "epoch": 0.04, "grad_norm": 1.7929628469468484, "learning_rate": 9.994987895473431e-06, "loss": 0.9029, "step": 685 }, { "epoch": 0.04, "grad_norm": 1.552195101021823, "learning_rate": 9.994941387516484e-06, "loss": 1.0249, "step": 686 }, { "epoch": 0.04, "grad_norm": 2.343643091394082, "learning_rate": 9.994894664887453e-06, "loss": 0.7418, "step": 687 }, { "epoch": 0.04, "grad_norm": 1.9254670139406849, "learning_rate": 9.994847727588344e-06, "loss": 0.968, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.7931059290061433, "learning_rate": 9.994800575621176e-06, "loss": 0.8948, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.9719822298595588, "learning_rate": 9.994753208987974e-06, "loss": 0.849, "step": 690 }, { "epoch": 0.04, "grad_norm": 1.7227673354799304, "learning_rate": 9.994705627690777e-06, "loss": 0.8851, "step": 691 }, { "epoch": 0.04, "grad_norm": 2.0391270836598743, "learning_rate": 9.994657831731624e-06, "loss": 0.8356, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.93323012057412, "learning_rate": 9.994609821112576e-06, "loss": 0.9381, "step": 693 }, { "epoch": 0.04, "grad_norm": 1.8024322752812825, "learning_rate": 9.99456159583569e-06, "loss": 0.8333, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.7972151927058455, "learning_rate": 9.994513155903042e-06, "loss": 0.8809, "step": 695 }, { "epoch": 0.04, "grad_norm": 1.3218595728227285, "learning_rate": 9.994464501316715e-06, "loss": 0.6935, "step": 696 }, { "epoch": 0.04, "grad_norm": 1.7485836473756184, "learning_rate": 9.994415632078797e-06, "loss": 0.8284, "step": 697 }, { "epoch": 0.04, "grad_norm": 1.4068096514174657, "learning_rate": 9.994366548191393e-06, "loss": 0.8103, "step": 698 }, { "epoch": 0.04, "grad_norm": 1.2833398605630912, "learning_rate": 9.994317249656607e-06, "loss": 0.7115, "step": 699 }, { "epoch": 0.04, "grad_norm": 2.865341717671388, "learning_rate": 9.99426773647656e-06, "loss": 1.0085, "step": 700 }, { "epoch": 0.04, "grad_norm": 1.8183209304819274, "learning_rate": 9.994218008653381e-06, "loss": 0.9062, "step": 701 }, { "epoch": 0.04, "grad_norm": 1.9437963765637996, "learning_rate": 9.994168066189205e-06, "loss": 0.9835, "step": 702 }, { "epoch": 0.04, "grad_norm": 2.496331143566423, "learning_rate": 9.994117909086179e-06, "loss": 0.9703, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.8396949449632982, "learning_rate": 9.994067537346461e-06, "loss": 1.0018, "step": 704 }, { "epoch": 0.05, "grad_norm": 1.0862885131537068, "learning_rate": 9.994016950972214e-06, "loss": 0.7166, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.9395758563149255, "learning_rate": 9.993966149965613e-06, "loss": 0.877, "step": 706 }, { "epoch": 0.05, "grad_norm": 1.693250100271307, "learning_rate": 9.99391513432884e-06, "loss": 0.998, "step": 707 }, { "epoch": 0.05, "grad_norm": 2.0069109788385004, "learning_rate": 9.993863904064087e-06, "loss": 1.0081, "step": 708 }, { "epoch": 0.05, "grad_norm": 1.9075052511740127, "learning_rate": 9.993812459173557e-06, "loss": 0.8772, "step": 709 }, { "epoch": 0.05, "grad_norm": 1.6764140179418598, "learning_rate": 9.993760799659463e-06, "loss": 0.8349, "step": 710 }, { "epoch": 0.05, "grad_norm": 1.9333780444453257, "learning_rate": 9.993708925524022e-06, "loss": 0.9717, "step": 711 }, { "epoch": 0.05, "grad_norm": 2.0619940124978804, "learning_rate": 9.993656836769464e-06, "loss": 0.8584, "step": 712 }, { "epoch": 0.05, "grad_norm": 2.581569194698192, "learning_rate": 9.993604533398029e-06, "loss": 0.9298, "step": 713 }, { "epoch": 0.05, "grad_norm": 1.1456485636683102, "learning_rate": 9.993552015411965e-06, "loss": 0.6449, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.5368497236094196, "learning_rate": 9.993499282813528e-06, "loss": 0.7333, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.8339315060691601, "learning_rate": 9.993446335604983e-06, "loss": 0.9087, "step": 716 }, { "epoch": 0.05, "grad_norm": 1.8130063152638507, "learning_rate": 9.993393173788608e-06, "loss": 0.8394, "step": 717 }, { "epoch": 0.05, "grad_norm": 2.099209263346353, "learning_rate": 9.993339797366687e-06, "loss": 1.0552, "step": 718 }, { "epoch": 0.05, "grad_norm": 1.000474365189886, "learning_rate": 9.993286206341515e-06, "loss": 0.6918, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.9174572412220756, "learning_rate": 9.993232400715394e-06, "loss": 0.836, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.719440678162477, "learning_rate": 9.993178380490636e-06, "loss": 0.9644, "step": 721 }, { "epoch": 0.05, "grad_norm": 1.6110658096905057, "learning_rate": 9.993124145669563e-06, "loss": 0.8401, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.5518254070725328, "learning_rate": 9.993069696254506e-06, "loss": 0.5477, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.9060650148878602, "learning_rate": 9.993015032247806e-06, "loss": 0.9098, "step": 724 }, { "epoch": 0.05, "grad_norm": 2.3132911808657046, "learning_rate": 9.992960153651812e-06, "loss": 0.9216, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.1791525713159117, "learning_rate": 9.992905060468882e-06, "loss": 0.7674, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.7372854773326158, "learning_rate": 9.992849752701384e-06, "loss": 0.9307, "step": 727 }, { "epoch": 0.05, "grad_norm": 1.8539323524272169, "learning_rate": 9.992794230351695e-06, "loss": 0.9309, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.2097362368512468, "learning_rate": 9.9927384934222e-06, "loss": 0.5835, "step": 729 }, { "epoch": 0.05, "grad_norm": 2.0913571596419627, "learning_rate": 9.992682541915297e-06, "loss": 0.9044, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.8963275004411473, "learning_rate": 9.99262637583339e-06, "loss": 1.0111, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.7789153803415139, "learning_rate": 9.992569995178891e-06, "loss": 0.9049, "step": 732 }, { "epoch": 0.05, "grad_norm": 2.1039172296445603, "learning_rate": 9.992513399954225e-06, "loss": 0.9493, "step": 733 }, { "epoch": 0.05, "grad_norm": 1.1967481008654202, "learning_rate": 9.992456590161825e-06, "loss": 0.639, "step": 734 }, { "epoch": 0.05, "grad_norm": 2.233054751312936, "learning_rate": 9.99239956580413e-06, "loss": 1.0182, "step": 735 }, { "epoch": 0.05, "grad_norm": 2.206033325182812, "learning_rate": 9.992342326883591e-06, "loss": 0.8689, "step": 736 }, { "epoch": 0.05, "grad_norm": 2.819610428173403, "learning_rate": 9.99228487340267e-06, "loss": 0.8248, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.2840570752068634, "learning_rate": 9.992227205363837e-06, "loss": 0.6098, "step": 738 }, { "epoch": 0.05, "grad_norm": 1.8181489623910394, "learning_rate": 9.992169322769568e-06, "loss": 0.7972, "step": 739 }, { "epoch": 0.05, "grad_norm": 2.01544420740561, "learning_rate": 9.99211122562235e-06, "loss": 0.8586, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.7302654496471583, "learning_rate": 9.992052913924683e-06, "loss": 0.8129, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.7451284830525373, "learning_rate": 9.99199438767907e-06, "loss": 0.7803, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.8680295491058325, "learning_rate": 9.991935646888031e-06, "loss": 0.8384, "step": 743 }, { "epoch": 0.05, "grad_norm": 2.508704476222301, "learning_rate": 9.991876691554086e-06, "loss": 0.7766, "step": 744 }, { "epoch": 0.05, "grad_norm": 2.245235147272344, "learning_rate": 9.991817521679769e-06, "loss": 0.9529, "step": 745 }, { "epoch": 0.05, "grad_norm": 2.0551642596585333, "learning_rate": 9.991758137267625e-06, "loss": 0.822, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.4242435149114612, "learning_rate": 9.991698538320205e-06, "loss": 0.6982, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.296457189849298, "learning_rate": 9.99163872484007e-06, "loss": 0.7744, "step": 748 }, { "epoch": 0.05, "grad_norm": 5.533863536111076, "learning_rate": 9.991578696829793e-06, "loss": 0.8107, "step": 749 }, { "epoch": 0.05, "grad_norm": 2.0235017413251626, "learning_rate": 9.99151845429195e-06, "loss": 0.9426, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.1322343337521918, "learning_rate": 9.991457997229136e-06, "loss": 0.7012, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.8156074222943137, "learning_rate": 9.991397325643943e-06, "loss": 0.8554, "step": 752 }, { "epoch": 0.05, "grad_norm": 2.0545895792111812, "learning_rate": 9.991336439538983e-06, "loss": 0.8522, "step": 753 }, { "epoch": 0.05, "grad_norm": 1.985133071574867, "learning_rate": 9.99127533891687e-06, "loss": 0.8561, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.9170721133859696, "learning_rate": 9.991214023780232e-06, "loss": 0.7461, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.7546025140437513, "learning_rate": 9.991152494131702e-06, "loss": 0.9452, "step": 756 }, { "epoch": 0.05, "grad_norm": 2.1005337722596065, "learning_rate": 9.991090749973926e-06, "loss": 0.9533, "step": 757 }, { "epoch": 0.05, "grad_norm": 2.1833901584307642, "learning_rate": 9.991028791309558e-06, "loss": 0.8661, "step": 758 }, { "epoch": 0.05, "grad_norm": 1.8955258956662666, "learning_rate": 9.99096661814126e-06, "loss": 0.8211, "step": 759 }, { "epoch": 0.05, "grad_norm": 2.1415446636371716, "learning_rate": 9.990904230471704e-06, "loss": 0.8965, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.4662131702582653, "learning_rate": 9.990841628303571e-06, "loss": 0.6841, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.7611788324385966, "learning_rate": 9.990778811639553e-06, "loss": 0.8299, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.4321848137513398, "learning_rate": 9.990715780482348e-06, "loss": 0.6401, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.824226345075787, "learning_rate": 9.990652534834666e-06, "loss": 0.9685, "step": 764 }, { "epoch": 0.05, "grad_norm": 3.404719399281497, "learning_rate": 9.990589074699225e-06, "loss": 1.0277, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.744737598211103, "learning_rate": 9.990525400078752e-06, "loss": 0.909, "step": 766 }, { "epoch": 0.05, "grad_norm": 2.6089154015541465, "learning_rate": 9.990461510975983e-06, "loss": 0.852, "step": 767 }, { "epoch": 0.05, "grad_norm": 2.1876913521369232, "learning_rate": 9.990397407393668e-06, "loss": 0.9376, "step": 768 }, { "epoch": 0.05, "grad_norm": 1.8233343584947856, "learning_rate": 9.990333089334556e-06, "loss": 0.9518, "step": 769 }, { "epoch": 0.05, "grad_norm": 1.9493396305156825, "learning_rate": 9.990268556801413e-06, "loss": 0.9256, "step": 770 }, { "epoch": 0.05, "grad_norm": 2.0179752176424333, "learning_rate": 9.990203809797014e-06, "loss": 0.9098, "step": 771 }, { "epoch": 0.05, "grad_norm": 2.1378682142913865, "learning_rate": 9.990138848324142e-06, "loss": 0.9696, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.108358304723358, "learning_rate": 9.990073672385588e-06, "loss": 0.7474, "step": 773 }, { "epoch": 0.05, "grad_norm": 1.2554966559490615, "learning_rate": 9.990008281984154e-06, "loss": 0.6619, "step": 774 }, { "epoch": 0.05, "grad_norm": 1.957383017426205, "learning_rate": 9.989942677122648e-06, "loss": 0.8995, "step": 775 }, { "epoch": 0.05, "grad_norm": 2.5204671812237973, "learning_rate": 9.989876857803891e-06, "loss": 0.9677, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.8260047978307112, "learning_rate": 9.989810824030712e-06, "loss": 0.9816, "step": 777 }, { "epoch": 0.05, "grad_norm": 1.96513429299933, "learning_rate": 9.989744575805951e-06, "loss": 1.0198, "step": 778 }, { "epoch": 0.05, "grad_norm": 2.688631383849354, "learning_rate": 9.989678113132451e-06, "loss": 0.882, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.8854543995767281, "learning_rate": 9.98961143601307e-06, "loss": 0.9842, "step": 780 }, { "epoch": 0.05, "grad_norm": 1.819278607184304, "learning_rate": 9.989544544450675e-06, "loss": 0.9801, "step": 781 }, { "epoch": 0.05, "grad_norm": 1.8221291583821322, "learning_rate": 9.989477438448138e-06, "loss": 0.8968, "step": 782 }, { "epoch": 0.05, "grad_norm": 1.259487421255768, "learning_rate": 9.989410118008348e-06, "loss": 0.6466, "step": 783 }, { "epoch": 0.05, "grad_norm": 1.2422320018141704, "learning_rate": 9.989342583134194e-06, "loss": 0.6006, "step": 784 }, { "epoch": 0.05, "grad_norm": 1.9172712075895095, "learning_rate": 9.98927483382858e-06, "loss": 0.8581, "step": 785 }, { "epoch": 0.05, "grad_norm": 2.032458674490977, "learning_rate": 9.989206870094416e-06, "loss": 1.0107, "step": 786 }, { "epoch": 0.05, "grad_norm": 2.264460247240953, "learning_rate": 9.989138691934628e-06, "loss": 1.2354, "step": 787 }, { "epoch": 0.05, "grad_norm": 1.199230768380042, "learning_rate": 9.98907029935214e-06, "loss": 0.6328, "step": 788 }, { "epoch": 0.05, "grad_norm": 2.0114522121640603, "learning_rate": 9.989001692349894e-06, "loss": 0.7802, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.849139347350564, "learning_rate": 9.98893287093084e-06, "loss": 0.8026, "step": 790 }, { "epoch": 0.05, "grad_norm": 2.0307269662528222, "learning_rate": 9.988863835097934e-06, "loss": 0.9998, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.255304437871241, "learning_rate": 9.988794584854143e-06, "loss": 0.7415, "step": 792 }, { "epoch": 0.05, "grad_norm": 2.125361016145657, "learning_rate": 9.988725120202442e-06, "loss": 1.017, "step": 793 }, { "epoch": 0.05, "grad_norm": 1.628566259577131, "learning_rate": 9.98865544114582e-06, "loss": 0.851, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.7344982172981513, "learning_rate": 9.98858554768727e-06, "loss": 0.8784, "step": 795 }, { "epoch": 0.05, "grad_norm": 1.864541029946326, "learning_rate": 9.988515439829795e-06, "loss": 0.9263, "step": 796 }, { "epoch": 0.05, "grad_norm": 2.112682540056012, "learning_rate": 9.988445117576408e-06, "loss": 0.9678, "step": 797 }, { "epoch": 0.05, "grad_norm": 1.9685926259311788, "learning_rate": 9.988374580930133e-06, "loss": 0.8988, "step": 798 }, { "epoch": 0.05, "grad_norm": 1.856078584075137, "learning_rate": 9.988303829894002e-06, "loss": 0.9647, "step": 799 }, { "epoch": 0.05, "grad_norm": 1.9628643435181286, "learning_rate": 9.988232864471053e-06, "loss": 0.9093, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.3391770800611702, "learning_rate": 9.988161684664336e-06, "loss": 0.6584, "step": 801 }, { "epoch": 0.05, "grad_norm": 1.6409601957127435, "learning_rate": 9.988090290476911e-06, "loss": 0.8242, "step": 802 }, { "epoch": 0.05, "grad_norm": 2.796129808836898, "learning_rate": 9.988018681911849e-06, "loss": 0.8298, "step": 803 }, { "epoch": 0.05, "grad_norm": 2.0945423932807503, "learning_rate": 9.987946858972224e-06, "loss": 0.8033, "step": 804 }, { "epoch": 0.05, "grad_norm": 1.9897791470539203, "learning_rate": 9.987874821661124e-06, "loss": 0.8187, "step": 805 }, { "epoch": 0.05, "grad_norm": 1.797426077720045, "learning_rate": 9.987802569981647e-06, "loss": 0.7701, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.8276209545965074, "learning_rate": 9.987730103936895e-06, "loss": 0.9512, "step": 807 }, { "epoch": 0.05, "grad_norm": 1.254313667992607, "learning_rate": 9.987657423529982e-06, "loss": 0.7079, "step": 808 }, { "epoch": 0.05, "grad_norm": 2.0389354585982473, "learning_rate": 9.987584528764036e-06, "loss": 0.8257, "step": 809 }, { "epoch": 0.05, "grad_norm": 2.803770235170558, "learning_rate": 9.987511419642186e-06, "loss": 1.0411, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.7533334003403436, "learning_rate": 9.987438096167577e-06, "loss": 0.9717, "step": 811 }, { "epoch": 0.05, "grad_norm": 1.5801389507220538, "learning_rate": 9.987364558343357e-06, "loss": 0.8636, "step": 812 }, { "epoch": 0.05, "grad_norm": 1.7117244744805025, "learning_rate": 9.987290806172689e-06, "loss": 0.93, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.2686432196810782, "learning_rate": 9.98721683965874e-06, "loss": 0.6471, "step": 814 }, { "epoch": 0.05, "grad_norm": 1.1782025824157119, "learning_rate": 9.987142658804692e-06, "loss": 0.6664, "step": 815 }, { "epoch": 0.05, "grad_norm": 2.6926327805941073, "learning_rate": 9.987068263613733e-06, "loss": 0.926, "step": 816 }, { "epoch": 0.05, "grad_norm": 1.7276440639772161, "learning_rate": 9.986993654089059e-06, "loss": 0.8874, "step": 817 }, { "epoch": 0.05, "grad_norm": 1.798812245516414, "learning_rate": 9.986918830233877e-06, "loss": 0.9475, "step": 818 }, { "epoch": 0.05, "grad_norm": 2.0058709037012195, "learning_rate": 9.986843792051402e-06, "loss": 0.8302, "step": 819 }, { "epoch": 0.05, "grad_norm": 1.7708498802284502, "learning_rate": 9.98676853954486e-06, "loss": 0.9687, "step": 820 }, { "epoch": 0.05, "grad_norm": 1.3320076848164066, "learning_rate": 9.986693072717483e-06, "loss": 0.7144, "step": 821 }, { "epoch": 0.05, "grad_norm": 1.8782770133073343, "learning_rate": 9.98661739157252e-06, "loss": 0.8901, "step": 822 }, { "epoch": 0.05, "grad_norm": 1.1440545134806153, "learning_rate": 9.986541496113215e-06, "loss": 0.6649, "step": 823 }, { "epoch": 0.05, "grad_norm": 1.9173560637717273, "learning_rate": 9.986465386342838e-06, "loss": 0.9254, "step": 824 }, { "epoch": 0.05, "grad_norm": 2.024827661947124, "learning_rate": 9.986389062264656e-06, "loss": 0.9171, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.6067948685380944, "learning_rate": 9.986312523881952e-06, "loss": 0.9523, "step": 826 }, { "epoch": 0.05, "grad_norm": 2.4189730004949186, "learning_rate": 9.98623577119801e-06, "loss": 0.8342, "step": 827 }, { "epoch": 0.05, "grad_norm": 1.8881329395777793, "learning_rate": 9.986158804216133e-06, "loss": 0.7924, "step": 828 }, { "epoch": 0.05, "grad_norm": 1.6208126083659207, "learning_rate": 9.98608162293963e-06, "loss": 0.8115, "step": 829 }, { "epoch": 0.05, "grad_norm": 2.0144161288922153, "learning_rate": 9.986004227371814e-06, "loss": 0.8898, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.7419646478588604, "learning_rate": 9.985926617516012e-06, "loss": 0.7802, "step": 831 }, { "epoch": 0.05, "grad_norm": 1.6259018681423012, "learning_rate": 9.985848793375563e-06, "loss": 0.9091, "step": 832 }, { "epoch": 0.05, "grad_norm": 1.0849421617324835, "learning_rate": 9.98577075495381e-06, "loss": 0.6656, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.6471919277929439, "learning_rate": 9.985692502254105e-06, "loss": 0.8589, "step": 834 }, { "epoch": 0.05, "grad_norm": 1.8360957252987014, "learning_rate": 9.985614035279813e-06, "loss": 0.8815, "step": 835 }, { "epoch": 0.05, "grad_norm": 1.8040297514923702, "learning_rate": 9.985535354034309e-06, "loss": 0.8804, "step": 836 }, { "epoch": 0.05, "grad_norm": 1.6174728402481822, "learning_rate": 9.985456458520968e-06, "loss": 0.8347, "step": 837 }, { "epoch": 0.05, "grad_norm": 1.5839193135933396, "learning_rate": 9.985377348743183e-06, "loss": 0.8245, "step": 838 }, { "epoch": 0.05, "grad_norm": 1.857832501406452, "learning_rate": 9.98529802470436e-06, "loss": 0.8139, "step": 839 }, { "epoch": 0.05, "grad_norm": 2.5207463424194816, "learning_rate": 9.985218486407899e-06, "loss": 0.903, "step": 840 }, { "epoch": 0.05, "grad_norm": 1.8389512208860535, "learning_rate": 9.985138733857225e-06, "loss": 0.8484, "step": 841 }, { "epoch": 0.05, "grad_norm": 2.27037619202481, "learning_rate": 9.985058767055765e-06, "loss": 0.7244, "step": 842 }, { "epoch": 0.05, "grad_norm": 2.518417018895201, "learning_rate": 9.984978586006951e-06, "loss": 0.8865, "step": 843 }, { "epoch": 0.05, "grad_norm": 1.8305926313369114, "learning_rate": 9.984898190714235e-06, "loss": 0.9162, "step": 844 }, { "epoch": 0.05, "grad_norm": 1.707245902917332, "learning_rate": 9.984817581181068e-06, "loss": 0.9378, "step": 845 }, { "epoch": 0.05, "grad_norm": 1.7759474181893682, "learning_rate": 9.984736757410915e-06, "loss": 0.9012, "step": 846 }, { "epoch": 0.05, "grad_norm": 1.7514321136163364, "learning_rate": 9.984655719407252e-06, "loss": 1.0319, "step": 847 }, { "epoch": 0.05, "grad_norm": 1.7276464953008441, "learning_rate": 9.98457446717356e-06, "loss": 0.796, "step": 848 }, { "epoch": 0.05, "grad_norm": 2.05768026893449, "learning_rate": 9.98449300071333e-06, "loss": 1.0094, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.7584603317129548, "learning_rate": 9.984411320030068e-06, "loss": 0.9617, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.5492150582159334, "learning_rate": 9.98432942512728e-06, "loss": 0.8665, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.1919140439945972, "learning_rate": 9.984247316008484e-06, "loss": 0.7297, "step": 852 }, { "epoch": 0.05, "grad_norm": 1.1598290853032225, "learning_rate": 9.984164992677215e-06, "loss": 0.6245, "step": 853 }, { "epoch": 0.05, "grad_norm": 1.8325432489550026, "learning_rate": 9.984082455137007e-06, "loss": 1.1537, "step": 854 }, { "epoch": 0.05, "grad_norm": 2.1840635473922467, "learning_rate": 9.983999703391408e-06, "loss": 0.9025, "step": 855 }, { "epoch": 0.05, "grad_norm": 1.7739275533922376, "learning_rate": 9.983916737443973e-06, "loss": 0.7989, "step": 856 }, { "epoch": 0.05, "grad_norm": 1.6691319391354316, "learning_rate": 9.98383355729827e-06, "loss": 0.8986, "step": 857 }, { "epoch": 0.05, "grad_norm": 1.4030389302401847, "learning_rate": 9.983750162957874e-06, "loss": 0.6792, "step": 858 }, { "epoch": 0.05, "grad_norm": 1.946092138313616, "learning_rate": 9.983666554426367e-06, "loss": 0.9164, "step": 859 }, { "epoch": 0.06, "grad_norm": 1.8419956188263835, "learning_rate": 9.983582731707346e-06, "loss": 0.9714, "step": 860 }, { "epoch": 0.06, "grad_norm": 2.0954309595240592, "learning_rate": 9.98349869480441e-06, "loss": 0.9357, "step": 861 }, { "epoch": 0.06, "grad_norm": 2.0912528930887873, "learning_rate": 9.98341444372117e-06, "loss": 0.9498, "step": 862 }, { "epoch": 0.06, "grad_norm": 1.7763422184642106, "learning_rate": 9.983329978461252e-06, "loss": 0.9339, "step": 863 }, { "epoch": 0.06, "grad_norm": 2.1067859098812076, "learning_rate": 9.983245299028281e-06, "loss": 0.816, "step": 864 }, { "epoch": 0.06, "grad_norm": 1.8110837955455958, "learning_rate": 9.9831604054259e-06, "loss": 0.8904, "step": 865 }, { "epoch": 0.06, "grad_norm": 1.6715353598856615, "learning_rate": 9.983075297657753e-06, "loss": 0.8011, "step": 866 }, { "epoch": 0.06, "grad_norm": 1.8837518613310067, "learning_rate": 9.982989975727502e-06, "loss": 0.8673, "step": 867 }, { "epoch": 0.06, "grad_norm": 2.0369963425673414, "learning_rate": 9.982904439638814e-06, "loss": 1.0164, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.613781745843069, "learning_rate": 9.982818689395362e-06, "loss": 0.8368, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.1438091408946043, "learning_rate": 9.982732725000834e-06, "loss": 0.6062, "step": 870 }, { "epoch": 0.06, "grad_norm": 2.0434366440992933, "learning_rate": 9.982646546458922e-06, "loss": 0.9181, "step": 871 }, { "epoch": 0.06, "grad_norm": 4.65050667111611, "learning_rate": 9.982560153773333e-06, "loss": 0.8695, "step": 872 }, { "epoch": 0.06, "grad_norm": 1.8226865095588387, "learning_rate": 9.982473546947777e-06, "loss": 0.8283, "step": 873 }, { "epoch": 0.06, "grad_norm": 1.866478959576701, "learning_rate": 9.982386725985979e-06, "loss": 1.1083, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.8092470648575885, "learning_rate": 9.982299690891668e-06, "loss": 0.8319, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.1950609129402123, "learning_rate": 9.982212441668586e-06, "loss": 0.6203, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.2973028340166881, "learning_rate": 9.982124978320482e-06, "loss": 0.6227, "step": 877 }, { "epoch": 0.06, "grad_norm": 2.5610919149436717, "learning_rate": 9.982037300851117e-06, "loss": 0.9848, "step": 878 }, { "epoch": 0.06, "grad_norm": 2.1925660521153874, "learning_rate": 9.981949409264256e-06, "loss": 1.0189, "step": 879 }, { "epoch": 0.06, "grad_norm": 2.178467478537844, "learning_rate": 9.981861303563678e-06, "loss": 0.8052, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.5970719617365519, "learning_rate": 9.981772983753169e-06, "loss": 0.8981, "step": 881 }, { "epoch": 0.06, "grad_norm": 2.2747593637969636, "learning_rate": 9.981684449836526e-06, "loss": 0.9511, "step": 882 }, { "epoch": 0.06, "grad_norm": 1.7384499491756018, "learning_rate": 9.981595701817552e-06, "loss": 0.8147, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.9736213892617, "learning_rate": 9.981506739700063e-06, "loss": 0.8742, "step": 884 }, { "epoch": 0.06, "grad_norm": 3.0062661622772544, "learning_rate": 9.981417563487884e-06, "loss": 0.8965, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.7068979452376731, "learning_rate": 9.981328173184845e-06, "loss": 0.8418, "step": 886 }, { "epoch": 0.06, "grad_norm": 1.7151527293076307, "learning_rate": 9.981238568794787e-06, "loss": 0.8996, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.830305983217042, "learning_rate": 9.981148750321563e-06, "loss": 0.8647, "step": 888 }, { "epoch": 0.06, "grad_norm": 1.98711436942181, "learning_rate": 9.981058717769031e-06, "loss": 1.1502, "step": 889 }, { "epoch": 0.06, "grad_norm": 2.1700082344978706, "learning_rate": 9.980968471141064e-06, "loss": 0.9022, "step": 890 }, { "epoch": 0.06, "grad_norm": 2.080029275722089, "learning_rate": 9.980878010441539e-06, "loss": 0.8629, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.784572203446537, "learning_rate": 9.980787335674341e-06, "loss": 0.8982, "step": 892 }, { "epoch": 0.06, "grad_norm": 1.7945627598849923, "learning_rate": 9.980696446843371e-06, "loss": 0.8675, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.794433647501701, "learning_rate": 9.980605343952534e-06, "loss": 1.0211, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.3693151169231796, "learning_rate": 9.980514027005743e-06, "loss": 0.5713, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.8192096704069411, "learning_rate": 9.980422496006926e-06, "loss": 0.9315, "step": 896 }, { "epoch": 0.06, "grad_norm": 2.4180635103126322, "learning_rate": 9.980330750960014e-06, "loss": 0.8553, "step": 897 }, { "epoch": 0.06, "grad_norm": 2.230095211220556, "learning_rate": 9.980238791868955e-06, "loss": 0.9141, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.8426337929492764, "learning_rate": 9.980146618737694e-06, "loss": 0.8647, "step": 899 }, { "epoch": 0.06, "grad_norm": 2.1303551409764254, "learning_rate": 9.980054231570197e-06, "loss": 0.8552, "step": 900 }, { "epoch": 0.06, "grad_norm": 2.0670303035799447, "learning_rate": 9.979961630370435e-06, "loss": 0.5889, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.8306776868082653, "learning_rate": 9.979868815142385e-06, "loss": 0.8806, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.4095514989734734, "learning_rate": 9.979775785890037e-06, "loss": 0.6481, "step": 903 }, { "epoch": 0.06, "grad_norm": 2.0921406781500447, "learning_rate": 9.979682542617393e-06, "loss": 0.7733, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.975575087444301, "learning_rate": 9.979589085328454e-06, "loss": 0.794, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.9745371848639235, "learning_rate": 9.979495414027241e-06, "loss": 0.7997, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.9521051622583578, "learning_rate": 9.979401528717777e-06, "loss": 0.7855, "step": 907 }, { "epoch": 0.06, "grad_norm": 1.3406751813662139, "learning_rate": 9.9793074294041e-06, "loss": 0.7618, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.781023295730117, "learning_rate": 9.979213116090251e-06, "loss": 0.8422, "step": 909 }, { "epoch": 0.06, "grad_norm": 1.8475884340691349, "learning_rate": 9.979118588780286e-06, "loss": 0.883, "step": 910 }, { "epoch": 0.06, "grad_norm": 2.183958680831, "learning_rate": 9.979023847478268e-06, "loss": 0.8929, "step": 911 }, { "epoch": 0.06, "grad_norm": 1.8200805560261253, "learning_rate": 9.978928892188265e-06, "loss": 0.88, "step": 912 }, { "epoch": 0.06, "grad_norm": 2.165036729245558, "learning_rate": 9.978833722914362e-06, "loss": 0.8984, "step": 913 }, { "epoch": 0.06, "grad_norm": 2.46773893721887, "learning_rate": 9.978738339660648e-06, "loss": 0.8954, "step": 914 }, { "epoch": 0.06, "grad_norm": 1.7030564542470885, "learning_rate": 9.97864274243122e-06, "loss": 0.8775, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.9417760990488593, "learning_rate": 9.97854693123019e-06, "loss": 0.8919, "step": 916 }, { "epoch": 0.06, "grad_norm": 1.8159118792981321, "learning_rate": 9.978450906061673e-06, "loss": 0.8985, "step": 917 }, { "epoch": 0.06, "grad_norm": 1.8508186894705982, "learning_rate": 9.978354666929797e-06, "loss": 0.8845, "step": 918 }, { "epoch": 0.06, "grad_norm": 1.1819977941074624, "learning_rate": 9.978258213838701e-06, "loss": 0.5555, "step": 919 }, { "epoch": 0.06, "grad_norm": 1.83925627220933, "learning_rate": 9.978161546792526e-06, "loss": 0.893, "step": 920 }, { "epoch": 0.06, "grad_norm": 1.8084663915651018, "learning_rate": 9.978064665795429e-06, "loss": 0.9645, "step": 921 }, { "epoch": 0.06, "grad_norm": 2.0584098265145583, "learning_rate": 9.977967570851571e-06, "loss": 0.8772, "step": 922 }, { "epoch": 0.06, "grad_norm": 1.9048552551576468, "learning_rate": 9.977870261965128e-06, "loss": 0.9239, "step": 923 }, { "epoch": 0.06, "grad_norm": 2.117820584711354, "learning_rate": 9.97777273914028e-06, "loss": 0.9359, "step": 924 }, { "epoch": 0.06, "grad_norm": 1.773878561840763, "learning_rate": 9.977675002381222e-06, "loss": 0.8993, "step": 925 }, { "epoch": 0.06, "grad_norm": 1.395369723141014, "learning_rate": 9.97757705169215e-06, "loss": 0.5252, "step": 926 }, { "epoch": 0.06, "grad_norm": 1.8676961340975935, "learning_rate": 9.977478887077277e-06, "loss": 0.8459, "step": 927 }, { "epoch": 0.06, "grad_norm": 1.2800748274560103, "learning_rate": 9.97738050854082e-06, "loss": 0.6818, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.856356937463397, "learning_rate": 9.977281916087008e-06, "loss": 0.9222, "step": 929 }, { "epoch": 0.06, "grad_norm": 1.1389382205826464, "learning_rate": 9.977183109720078e-06, "loss": 0.6042, "step": 930 }, { "epoch": 0.06, "grad_norm": 1.8843915154324566, "learning_rate": 9.977084089444276e-06, "loss": 0.7833, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.7549375320921219, "learning_rate": 9.976984855263859e-06, "loss": 0.8259, "step": 932 }, { "epoch": 0.06, "grad_norm": 1.160998349604635, "learning_rate": 9.976885407183088e-06, "loss": 0.7475, "step": 933 }, { "epoch": 0.06, "grad_norm": 1.8151181073795724, "learning_rate": 9.976785745206243e-06, "loss": 0.7693, "step": 934 }, { "epoch": 0.06, "grad_norm": 1.7573059979843486, "learning_rate": 9.976685869337603e-06, "loss": 0.7863, "step": 935 }, { "epoch": 0.06, "grad_norm": 1.3758490018853633, "learning_rate": 9.976585779581463e-06, "loss": 0.7409, "step": 936 }, { "epoch": 0.06, "grad_norm": 1.852177633316721, "learning_rate": 9.976485475942123e-06, "loss": 0.8256, "step": 937 }, { "epoch": 0.06, "grad_norm": 1.630682028969344, "learning_rate": 9.976384958423895e-06, "loss": 0.8699, "step": 938 }, { "epoch": 0.06, "grad_norm": 1.783659227280606, "learning_rate": 9.976284227031097e-06, "loss": 0.9983, "step": 939 }, { "epoch": 0.06, "grad_norm": 1.6220195908012716, "learning_rate": 9.976183281768059e-06, "loss": 0.947, "step": 940 }, { "epoch": 0.06, "grad_norm": 1.9544408631173673, "learning_rate": 9.976082122639122e-06, "loss": 0.8888, "step": 941 }, { "epoch": 0.06, "grad_norm": 2.254368734633022, "learning_rate": 9.97598074964863e-06, "loss": 0.8733, "step": 942 }, { "epoch": 0.06, "grad_norm": 1.6702282549078373, "learning_rate": 9.975879162800942e-06, "loss": 0.7183, "step": 943 }, { "epoch": 0.06, "grad_norm": 1.8322588767360242, "learning_rate": 9.975777362100423e-06, "loss": 0.9861, "step": 944 }, { "epoch": 0.06, "grad_norm": 1.3072093349120704, "learning_rate": 9.975675347551449e-06, "loss": 0.7231, "step": 945 }, { "epoch": 0.06, "grad_norm": 2.136765832309652, "learning_rate": 9.975573119158402e-06, "loss": 0.9577, "step": 946 }, { "epoch": 0.06, "grad_norm": 1.1711665473257489, "learning_rate": 9.975470676925681e-06, "loss": 0.6417, "step": 947 }, { "epoch": 0.06, "grad_norm": 3.2141278530385446, "learning_rate": 9.975368020857682e-06, "loss": 0.8841, "step": 948 }, { "epoch": 0.06, "grad_norm": 1.5814136075345542, "learning_rate": 9.975265150958822e-06, "loss": 0.8187, "step": 949 }, { "epoch": 0.06, "grad_norm": 1.9507848071092373, "learning_rate": 9.97516206723352e-06, "loss": 0.9352, "step": 950 }, { "epoch": 0.06, "grad_norm": 2.029903756609918, "learning_rate": 9.975058769686205e-06, "loss": 0.8878, "step": 951 }, { "epoch": 0.06, "grad_norm": 2.1641574232475755, "learning_rate": 9.974955258321319e-06, "loss": 0.937, "step": 952 }, { "epoch": 0.06, "grad_norm": 1.298697679519979, "learning_rate": 9.97485153314331e-06, "loss": 0.7067, "step": 953 }, { "epoch": 0.06, "grad_norm": 1.8389280927874445, "learning_rate": 9.974747594156638e-06, "loss": 0.852, "step": 954 }, { "epoch": 0.06, "grad_norm": 1.8700525000579782, "learning_rate": 9.974643441365765e-06, "loss": 0.9461, "step": 955 }, { "epoch": 0.06, "grad_norm": 1.8723591217084086, "learning_rate": 9.97453907477517e-06, "loss": 0.9045, "step": 956 }, { "epoch": 0.06, "grad_norm": 1.8406401592574853, "learning_rate": 9.97443449438934e-06, "loss": 0.804, "step": 957 }, { "epoch": 0.06, "grad_norm": 1.7223769126921193, "learning_rate": 9.974329700212767e-06, "loss": 0.8134, "step": 958 }, { "epoch": 0.06, "grad_norm": 2.0134501762383414, "learning_rate": 9.974224692249956e-06, "loss": 0.9112, "step": 959 }, { "epoch": 0.06, "grad_norm": 1.7931508095445898, "learning_rate": 9.974119470505422e-06, "loss": 0.9762, "step": 960 }, { "epoch": 0.06, "grad_norm": 2.140868526379472, "learning_rate": 9.974014034983684e-06, "loss": 0.8619, "step": 961 }, { "epoch": 0.06, "grad_norm": 1.1372153580348099, "learning_rate": 9.973908385689273e-06, "loss": 0.7022, "step": 962 }, { "epoch": 0.06, "grad_norm": 1.760578356175492, "learning_rate": 9.973802522626731e-06, "loss": 1.0064, "step": 963 }, { "epoch": 0.06, "grad_norm": 1.7412916499412006, "learning_rate": 9.973696445800611e-06, "loss": 0.9454, "step": 964 }, { "epoch": 0.06, "grad_norm": 1.807728298313726, "learning_rate": 9.973590155215467e-06, "loss": 0.7718, "step": 965 }, { "epoch": 0.06, "grad_norm": 1.923978819519636, "learning_rate": 9.97348365087587e-06, "loss": 0.9176, "step": 966 }, { "epoch": 0.06, "grad_norm": 2.254094511977967, "learning_rate": 9.973376932786396e-06, "loss": 0.9724, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.8406156926206738, "learning_rate": 9.97327000095163e-06, "loss": 0.8054, "step": 968 }, { "epoch": 0.06, "grad_norm": 1.775141604200871, "learning_rate": 9.973162855376173e-06, "loss": 0.89, "step": 969 }, { "epoch": 0.06, "grad_norm": 1.148180294757204, "learning_rate": 9.973055496064625e-06, "loss": 0.6711, "step": 970 }, { "epoch": 0.06, "grad_norm": 2.1312827686094535, "learning_rate": 9.972947923021602e-06, "loss": 1.0319, "step": 971 }, { "epoch": 0.06, "grad_norm": 1.8932439870380282, "learning_rate": 9.972840136251728e-06, "loss": 0.8079, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.8947064730571859, "learning_rate": 9.972732135759631e-06, "loss": 0.8888, "step": 973 }, { "epoch": 0.06, "grad_norm": 2.3274505628913107, "learning_rate": 9.972623921549957e-06, "loss": 0.8343, "step": 974 }, { "epoch": 0.06, "grad_norm": 1.8586870308183472, "learning_rate": 9.972515493627357e-06, "loss": 0.8396, "step": 975 }, { "epoch": 0.06, "grad_norm": 2.0729290776413114, "learning_rate": 9.97240685199649e-06, "loss": 0.8891, "step": 976 }, { "epoch": 0.06, "grad_norm": 1.8751377171960637, "learning_rate": 9.972297996662023e-06, "loss": 0.8283, "step": 977 }, { "epoch": 0.06, "grad_norm": 1.8287183855799904, "learning_rate": 9.972188927628638e-06, "loss": 0.9072, "step": 978 }, { "epoch": 0.06, "grad_norm": 1.784042345713816, "learning_rate": 9.972079644901021e-06, "loss": 0.8413, "step": 979 }, { "epoch": 0.06, "grad_norm": 1.8277175877826206, "learning_rate": 9.971970148483868e-06, "loss": 0.9607, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.0435860808724986, "learning_rate": 9.971860438381886e-06, "loss": 0.5996, "step": 981 }, { "epoch": 0.06, "grad_norm": 2.9448784175088605, "learning_rate": 9.97175051459979e-06, "loss": 0.9784, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.2645136884987012, "learning_rate": 9.971640377142301e-06, "loss": 0.7015, "step": 983 }, { "epoch": 0.06, "grad_norm": 1.604388859835439, "learning_rate": 9.97153002601416e-06, "loss": 0.8101, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.7430173793499244, "learning_rate": 9.971419461220102e-06, "loss": 0.8091, "step": 985 }, { "epoch": 0.06, "grad_norm": 1.6870213982966125, "learning_rate": 9.971308682764884e-06, "loss": 0.9586, "step": 986 }, { "epoch": 0.06, "grad_norm": 1.8687064465427725, "learning_rate": 9.971197690653263e-06, "loss": 0.815, "step": 987 }, { "epoch": 0.06, "grad_norm": 3.420764356391413, "learning_rate": 9.971086484890013e-06, "loss": 0.9407, "step": 988 }, { "epoch": 0.06, "grad_norm": 1.579485391786988, "learning_rate": 9.970975065479909e-06, "loss": 0.7936, "step": 989 }, { "epoch": 0.06, "grad_norm": 1.891886628839919, "learning_rate": 9.970863432427745e-06, "loss": 0.7457, "step": 990 }, { "epoch": 0.06, "grad_norm": 1.7653676608311242, "learning_rate": 9.970751585738315e-06, "loss": 0.9175, "step": 991 }, { "epoch": 0.06, "grad_norm": 1.6847753944770052, "learning_rate": 9.970639525416425e-06, "loss": 0.8377, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.9351291782410793, "learning_rate": 9.970527251466895e-06, "loss": 0.9058, "step": 993 }, { "epoch": 0.06, "grad_norm": 1.5195353471350552, "learning_rate": 9.970414763894548e-06, "loss": 0.6499, "step": 994 }, { "epoch": 0.06, "grad_norm": 1.9434257991479436, "learning_rate": 9.970302062704218e-06, "loss": 0.9532, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.4448233548876348, "learning_rate": 9.970189147900751e-06, "loss": 0.7441, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.7677952395262189, "learning_rate": 9.970076019488997e-06, "loss": 0.8585, "step": 997 }, { "epoch": 0.06, "grad_norm": 1.4341092560010023, "learning_rate": 9.96996267747382e-06, "loss": 0.683, "step": 998 }, { "epoch": 0.06, "grad_norm": 1.9439227299748891, "learning_rate": 9.969849121860089e-06, "loss": 0.847, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.9031233962640721, "learning_rate": 9.969735352652685e-06, "loss": 0.8527, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.6241599363243002, "learning_rate": 9.969621369856502e-06, "loss": 0.89, "step": 1001 }, { "epoch": 0.06, "grad_norm": 1.710048060890382, "learning_rate": 9.969507173476431e-06, "loss": 0.7985, "step": 1002 }, { "epoch": 0.06, "grad_norm": 1.8996705385536343, "learning_rate": 9.969392763517387e-06, "loss": 0.7839, "step": 1003 }, { "epoch": 0.06, "grad_norm": 2.4133187820389206, "learning_rate": 9.969278139984283e-06, "loss": 0.8139, "step": 1004 }, { "epoch": 0.06, "grad_norm": 1.364823672114109, "learning_rate": 9.969163302882048e-06, "loss": 0.6599, "step": 1005 }, { "epoch": 0.06, "grad_norm": 1.129142460652482, "learning_rate": 9.969048252215614e-06, "loss": 0.6709, "step": 1006 }, { "epoch": 0.06, "grad_norm": 1.8399337319678297, "learning_rate": 9.968932987989927e-06, "loss": 0.9547, "step": 1007 }, { "epoch": 0.06, "grad_norm": 1.6996462765569504, "learning_rate": 9.968817510209943e-06, "loss": 0.7582, "step": 1008 }, { "epoch": 0.06, "grad_norm": 1.8910502649474075, "learning_rate": 9.968701818880624e-06, "loss": 0.8676, "step": 1009 }, { "epoch": 0.06, "grad_norm": 1.8132495460702422, "learning_rate": 9.96858591400694e-06, "loss": 0.8802, "step": 1010 }, { "epoch": 0.06, "grad_norm": 1.903979278058798, "learning_rate": 9.968469795593874e-06, "loss": 0.9297, "step": 1011 }, { "epoch": 0.06, "grad_norm": 1.6211612419207961, "learning_rate": 9.968353463646417e-06, "loss": 0.829, "step": 1012 }, { "epoch": 0.06, "grad_norm": 1.1633091227399939, "learning_rate": 9.968236918169567e-06, "loss": 0.7174, "step": 1013 }, { "epoch": 0.06, "grad_norm": 1.8940041437686845, "learning_rate": 9.968120159168335e-06, "loss": 0.8108, "step": 1014 }, { "epoch": 0.06, "grad_norm": 1.773087411056418, "learning_rate": 9.968003186647737e-06, "loss": 0.9754, "step": 1015 }, { "epoch": 0.07, "grad_norm": 1.7770125902928644, "learning_rate": 9.967886000612801e-06, "loss": 0.7817, "step": 1016 }, { "epoch": 0.07, "grad_norm": 2.3063036665125196, "learning_rate": 9.967768601068566e-06, "loss": 0.8927, "step": 1017 }, { "epoch": 0.07, "grad_norm": 2.0046331959126094, "learning_rate": 9.967650988020073e-06, "loss": 0.9598, "step": 1018 }, { "epoch": 0.07, "grad_norm": 1.7500108767223201, "learning_rate": 9.96753316147238e-06, "loss": 0.9349, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.7426728341036553, "learning_rate": 9.96741512143055e-06, "loss": 0.8086, "step": 1020 }, { "epoch": 0.07, "grad_norm": 1.4749356933615134, "learning_rate": 9.967296867899656e-06, "loss": 0.7839, "step": 1021 }, { "epoch": 0.07, "grad_norm": 1.6532313760450104, "learning_rate": 9.96717840088478e-06, "loss": 0.9477, "step": 1022 }, { "epoch": 0.07, "grad_norm": 1.9792292700052776, "learning_rate": 9.967059720391014e-06, "loss": 0.8387, "step": 1023 }, { "epoch": 0.07, "grad_norm": 1.6548939466773924, "learning_rate": 9.966940826423459e-06, "loss": 0.8529, "step": 1024 }, { "epoch": 0.07, "grad_norm": 1.7677255286097018, "learning_rate": 9.966821718987222e-06, "loss": 0.8319, "step": 1025 }, { "epoch": 0.07, "grad_norm": 2.165067875277482, "learning_rate": 9.966702398087426e-06, "loss": 0.8853, "step": 1026 }, { "epoch": 0.07, "grad_norm": 1.7587943233758025, "learning_rate": 9.966582863729198e-06, "loss": 0.7443, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.7326644528452946, "learning_rate": 9.966463115917676e-06, "loss": 0.8786, "step": 1028 }, { "epoch": 0.07, "grad_norm": 1.6685108407701417, "learning_rate": 9.966343154658002e-06, "loss": 0.9096, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.244645753473948, "learning_rate": 9.966222979955335e-06, "loss": 0.7064, "step": 1030 }, { "epoch": 0.07, "grad_norm": 5.556153497246327, "learning_rate": 9.966102591814842e-06, "loss": 0.8903, "step": 1031 }, { "epoch": 0.07, "grad_norm": 1.7210809960616633, "learning_rate": 9.965981990241694e-06, "loss": 0.8445, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.2145570391161795, "learning_rate": 9.965861175241075e-06, "loss": 0.8079, "step": 1033 }, { "epoch": 0.07, "grad_norm": 1.9886168936666562, "learning_rate": 9.965740146818177e-06, "loss": 0.8647, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.2040463026341415, "learning_rate": 9.965618904978203e-06, "loss": 0.7427, "step": 1035 }, { "epoch": 0.07, "grad_norm": 2.3601225325140636, "learning_rate": 9.965497449726363e-06, "loss": 0.8473, "step": 1036 }, { "epoch": 0.07, "grad_norm": 1.7371760762280664, "learning_rate": 9.965375781067874e-06, "loss": 0.8681, "step": 1037 }, { "epoch": 0.07, "grad_norm": 2.1396874439474716, "learning_rate": 9.965253899007969e-06, "loss": 1.1841, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.657903700598522, "learning_rate": 9.965131803551885e-06, "loss": 0.7419, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.7581929589460872, "learning_rate": 9.96500949470487e-06, "loss": 0.8772, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.2169782468077348, "learning_rate": 9.96488697247218e-06, "loss": 0.6709, "step": 1041 }, { "epoch": 0.07, "grad_norm": 2.233087724791037, "learning_rate": 9.964764236859079e-06, "loss": 1.0083, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.9686232825786036, "learning_rate": 9.964641287870844e-06, "loss": 0.7499, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.4398369123284145, "learning_rate": 9.96451812551276e-06, "loss": 0.7858, "step": 1044 }, { "epoch": 0.07, "grad_norm": 2.0068183703309703, "learning_rate": 9.964394749790117e-06, "loss": 0.9749, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.1841524401247447, "learning_rate": 9.96427116070822e-06, "loss": 0.6219, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.8254298297669143, "learning_rate": 9.964147358272379e-06, "loss": 0.8537, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.8501188448051333, "learning_rate": 9.964023342487916e-06, "loss": 0.8773, "step": 1048 }, { "epoch": 0.07, "grad_norm": 2.2442588360850215, "learning_rate": 9.963899113360161e-06, "loss": 1.0717, "step": 1049 }, { "epoch": 0.07, "grad_norm": 1.7502931284284218, "learning_rate": 9.963774670894453e-06, "loss": 0.818, "step": 1050 }, { "epoch": 0.07, "grad_norm": 1.5747697291205895, "learning_rate": 9.96365001509614e-06, "loss": 0.8362, "step": 1051 }, { "epoch": 0.07, "grad_norm": 2.2473769840489615, "learning_rate": 9.963525145970579e-06, "loss": 0.9074, "step": 1052 }, { "epoch": 0.07, "grad_norm": 1.7545346518733904, "learning_rate": 9.963400063523136e-06, "loss": 0.9302, "step": 1053 }, { "epoch": 0.07, "grad_norm": 1.7935291886945772, "learning_rate": 9.96327476775919e-06, "loss": 0.8376, "step": 1054 }, { "epoch": 0.07, "grad_norm": 1.644452537308519, "learning_rate": 9.963149258684122e-06, "loss": 0.9442, "step": 1055 }, { "epoch": 0.07, "grad_norm": 1.5970604168782325, "learning_rate": 9.96302353630333e-06, "loss": 0.8821, "step": 1056 }, { "epoch": 0.07, "grad_norm": 1.595217362102543, "learning_rate": 9.962897600622212e-06, "loss": 0.7612, "step": 1057 }, { "epoch": 0.07, "grad_norm": 1.911622340478663, "learning_rate": 9.962771451646187e-06, "loss": 1.0025, "step": 1058 }, { "epoch": 0.07, "grad_norm": 1.018465129603162, "learning_rate": 9.962645089380672e-06, "loss": 0.622, "step": 1059 }, { "epoch": 0.07, "grad_norm": 1.5987053512187896, "learning_rate": 9.962518513831096e-06, "loss": 0.8036, "step": 1060 }, { "epoch": 0.07, "grad_norm": 1.909023397420912, "learning_rate": 9.962391725002906e-06, "loss": 0.8368, "step": 1061 }, { "epoch": 0.07, "grad_norm": 1.9198516283861535, "learning_rate": 9.962264722901545e-06, "loss": 0.9888, "step": 1062 }, { "epoch": 0.07, "grad_norm": 1.683507574748586, "learning_rate": 9.962137507532474e-06, "loss": 0.7942, "step": 1063 }, { "epoch": 0.07, "grad_norm": 1.8108692714136563, "learning_rate": 9.962010078901161e-06, "loss": 0.8389, "step": 1064 }, { "epoch": 0.07, "grad_norm": 1.8277283950931291, "learning_rate": 9.961882437013079e-06, "loss": 0.9279, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.6714415916236063, "learning_rate": 9.961754581873717e-06, "loss": 0.8271, "step": 1066 }, { "epoch": 0.07, "grad_norm": 1.7788227927254787, "learning_rate": 9.961626513488572e-06, "loss": 0.8152, "step": 1067 }, { "epoch": 0.07, "grad_norm": 2.047386777703436, "learning_rate": 9.961498231863142e-06, "loss": 0.9131, "step": 1068 }, { "epoch": 0.07, "grad_norm": 1.8125078932890284, "learning_rate": 9.961369737002943e-06, "loss": 0.9038, "step": 1069 }, { "epoch": 0.07, "grad_norm": 2.16794090628463, "learning_rate": 9.9612410289135e-06, "loss": 0.8589, "step": 1070 }, { "epoch": 0.07, "grad_norm": 2.1680446310961665, "learning_rate": 9.961112107600343e-06, "loss": 0.8817, "step": 1071 }, { "epoch": 0.07, "grad_norm": 1.447309533323782, "learning_rate": 9.960982973069011e-06, "loss": 0.8124, "step": 1072 }, { "epoch": 0.07, "grad_norm": 2.013596451719237, "learning_rate": 9.960853625325056e-06, "loss": 0.9185, "step": 1073 }, { "epoch": 0.07, "grad_norm": 2.3799075994575634, "learning_rate": 9.960724064374035e-06, "loss": 0.8888, "step": 1074 }, { "epoch": 0.07, "grad_norm": 1.0680564633713217, "learning_rate": 9.960594290221519e-06, "loss": 0.6394, "step": 1075 }, { "epoch": 0.07, "grad_norm": 1.628805004258654, "learning_rate": 9.960464302873084e-06, "loss": 0.8459, "step": 1076 }, { "epoch": 0.07, "grad_norm": 1.3436757882963513, "learning_rate": 9.960334102334316e-06, "loss": 0.6372, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.624120120072572, "learning_rate": 9.960203688610813e-06, "loss": 0.765, "step": 1078 }, { "epoch": 0.07, "grad_norm": 1.6120199919359086, "learning_rate": 9.960073061708176e-06, "loss": 0.8823, "step": 1079 }, { "epoch": 0.07, "grad_norm": 1.6884476403073017, "learning_rate": 9.959942221632021e-06, "loss": 0.8148, "step": 1080 }, { "epoch": 0.07, "grad_norm": 1.6880928600440637, "learning_rate": 9.959811168387974e-06, "loss": 0.8871, "step": 1081 }, { "epoch": 0.07, "grad_norm": 2.0507514701676053, "learning_rate": 9.959679901981663e-06, "loss": 0.854, "step": 1082 }, { "epoch": 0.07, "grad_norm": 1.8101031705720099, "learning_rate": 9.959548422418734e-06, "loss": 0.8402, "step": 1083 }, { "epoch": 0.07, "grad_norm": 1.7042909202526217, "learning_rate": 9.959416729704832e-06, "loss": 0.9201, "step": 1084 }, { "epoch": 0.07, "grad_norm": 1.9645428598880132, "learning_rate": 9.959284823845623e-06, "loss": 0.7901, "step": 1085 }, { "epoch": 0.07, "grad_norm": 1.6384542617420734, "learning_rate": 9.959152704846771e-06, "loss": 0.928, "step": 1086 }, { "epoch": 0.07, "grad_norm": 2.0328237213668237, "learning_rate": 9.959020372713959e-06, "loss": 0.7899, "step": 1087 }, { "epoch": 0.07, "grad_norm": 1.9141859230427891, "learning_rate": 9.95888782745287e-06, "loss": 0.9521, "step": 1088 }, { "epoch": 0.07, "grad_norm": 1.5990259751361382, "learning_rate": 9.958755069069202e-06, "loss": 0.8783, "step": 1089 }, { "epoch": 0.07, "grad_norm": 1.860011112384882, "learning_rate": 9.95862209756866e-06, "loss": 1.0276, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.5997217617397195, "learning_rate": 9.958488912956961e-06, "loss": 0.8682, "step": 1091 }, { "epoch": 0.07, "grad_norm": 2.772697338634492, "learning_rate": 9.958355515239828e-06, "loss": 0.8778, "step": 1092 }, { "epoch": 0.07, "grad_norm": 1.6696225654258405, "learning_rate": 9.958221904422993e-06, "loss": 0.9432, "step": 1093 }, { "epoch": 0.07, "grad_norm": 1.7145049515356237, "learning_rate": 9.9580880805122e-06, "loss": 0.8323, "step": 1094 }, { "epoch": 0.07, "grad_norm": 1.6785773837021298, "learning_rate": 9.9579540435132e-06, "loss": 0.9147, "step": 1095 }, { "epoch": 0.07, "grad_norm": 1.6946952213013935, "learning_rate": 9.957819793431754e-06, "loss": 0.6334, "step": 1096 }, { "epoch": 0.07, "grad_norm": 1.7629300174916556, "learning_rate": 9.957685330273628e-06, "loss": 0.9106, "step": 1097 }, { "epoch": 0.07, "grad_norm": 1.9389064113805474, "learning_rate": 9.957550654044606e-06, "loss": 0.7974, "step": 1098 }, { "epoch": 0.07, "grad_norm": 1.6429148758151972, "learning_rate": 9.957415764750474e-06, "loss": 0.8664, "step": 1099 }, { "epoch": 0.07, "grad_norm": 1.555871745642821, "learning_rate": 9.957280662397031e-06, "loss": 0.6312, "step": 1100 }, { "epoch": 0.07, "grad_norm": 1.892789311450515, "learning_rate": 9.957145346990079e-06, "loss": 0.8079, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.6772251351291991, "learning_rate": 9.957009818535438e-06, "loss": 0.8606, "step": 1102 }, { "epoch": 0.07, "grad_norm": 1.7449996586622891, "learning_rate": 9.95687407703893e-06, "loss": 0.9148, "step": 1103 }, { "epoch": 0.07, "grad_norm": 1.604790997459788, "learning_rate": 9.95673812250639e-06, "loss": 0.8333, "step": 1104 }, { "epoch": 0.07, "grad_norm": 1.691233016224419, "learning_rate": 9.956601954943664e-06, "loss": 0.9143, "step": 1105 }, { "epoch": 0.07, "grad_norm": 0.9808698642548703, "learning_rate": 9.956465574356598e-06, "loss": 0.6724, "step": 1106 }, { "epoch": 0.07, "grad_norm": 1.6096364921755124, "learning_rate": 9.956328980751057e-06, "loss": 0.806, "step": 1107 }, { "epoch": 0.07, "grad_norm": 2.8055249608802963, "learning_rate": 9.956192174132912e-06, "loss": 1.0131, "step": 1108 }, { "epoch": 0.07, "grad_norm": 2.262861574982693, "learning_rate": 9.956055154508042e-06, "loss": 0.8632, "step": 1109 }, { "epoch": 0.07, "grad_norm": 1.7007833993173278, "learning_rate": 9.955917921882334e-06, "loss": 0.8582, "step": 1110 }, { "epoch": 0.07, "grad_norm": 1.9003533335798117, "learning_rate": 9.955780476261689e-06, "loss": 0.8816, "step": 1111 }, { "epoch": 0.07, "grad_norm": 1.7668555986750645, "learning_rate": 9.955642817652012e-06, "loss": 0.9264, "step": 1112 }, { "epoch": 0.07, "grad_norm": 2.1568689733560156, "learning_rate": 9.955504946059221e-06, "loss": 0.8369, "step": 1113 }, { "epoch": 0.07, "grad_norm": 1.1251603911274703, "learning_rate": 9.95536686148924e-06, "loss": 0.6961, "step": 1114 }, { "epoch": 0.07, "grad_norm": 1.3958712877796706, "learning_rate": 9.955228563948003e-06, "loss": 0.5777, "step": 1115 }, { "epoch": 0.07, "grad_norm": 2.034804588677997, "learning_rate": 9.955090053441455e-06, "loss": 0.8979, "step": 1116 }, { "epoch": 0.07, "grad_norm": 1.8298913231388476, "learning_rate": 9.954951329975549e-06, "loss": 0.9391, "step": 1117 }, { "epoch": 0.07, "grad_norm": 1.13891489621275, "learning_rate": 9.954812393556245e-06, "loss": 0.8031, "step": 1118 }, { "epoch": 0.07, "grad_norm": 1.686571717276944, "learning_rate": 9.954673244189518e-06, "loss": 0.877, "step": 1119 }, { "epoch": 0.07, "grad_norm": 1.3662609461466806, "learning_rate": 9.954533881881346e-06, "loss": 0.7548, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.760561537704463, "learning_rate": 9.954394306637719e-06, "loss": 0.9564, "step": 1121 }, { "epoch": 0.07, "grad_norm": 1.85747743253911, "learning_rate": 9.954254518464633e-06, "loss": 0.8478, "step": 1122 }, { "epoch": 0.07, "grad_norm": 1.6117709662979611, "learning_rate": 9.9541145173681e-06, "loss": 0.9485, "step": 1123 }, { "epoch": 0.07, "grad_norm": 1.8542611738355086, "learning_rate": 9.953974303354136e-06, "loss": 0.9443, "step": 1124 }, { "epoch": 0.07, "grad_norm": 1.732351012206619, "learning_rate": 9.953833876428763e-06, "loss": 0.8356, "step": 1125 }, { "epoch": 0.07, "grad_norm": 1.7573914923225062, "learning_rate": 9.95369323659802e-06, "loss": 0.8511, "step": 1126 }, { "epoch": 0.07, "grad_norm": 1.817284489416925, "learning_rate": 9.953552383867953e-06, "loss": 0.8926, "step": 1127 }, { "epoch": 0.07, "grad_norm": 1.726225600917723, "learning_rate": 9.953411318244612e-06, "loss": 0.8562, "step": 1128 }, { "epoch": 0.07, "grad_norm": 1.7418003585409065, "learning_rate": 9.953270039734063e-06, "loss": 0.8948, "step": 1129 }, { "epoch": 0.07, "grad_norm": 1.7370775388888877, "learning_rate": 9.953128548342372e-06, "loss": 0.8399, "step": 1130 }, { "epoch": 0.07, "grad_norm": 1.1852352837136948, "learning_rate": 9.952986844075628e-06, "loss": 0.6518, "step": 1131 }, { "epoch": 0.07, "grad_norm": 1.8785361624909864, "learning_rate": 9.952844926939916e-06, "loss": 0.9102, "step": 1132 }, { "epoch": 0.07, "grad_norm": 2.031603246674942, "learning_rate": 9.952702796941334e-06, "loss": 0.7628, "step": 1133 }, { "epoch": 0.07, "grad_norm": 2.0002239395736483, "learning_rate": 9.952560454085995e-06, "loss": 0.8043, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.911376955551564, "learning_rate": 9.952417898380015e-06, "loss": 1.0345, "step": 1135 }, { "epoch": 0.07, "grad_norm": 1.8696006689839455, "learning_rate": 9.952275129829518e-06, "loss": 0.8537, "step": 1136 }, { "epoch": 0.07, "grad_norm": 1.7462441680664034, "learning_rate": 9.952132148440644e-06, "loss": 0.8632, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.8133334633230467, "learning_rate": 9.951988954219535e-06, "loss": 0.9155, "step": 1138 }, { "epoch": 0.07, "grad_norm": 1.6970837255309317, "learning_rate": 9.951845547172347e-06, "loss": 1.0893, "step": 1139 }, { "epoch": 0.07, "grad_norm": 1.657722470094953, "learning_rate": 9.951701927305244e-06, "loss": 0.8698, "step": 1140 }, { "epoch": 0.07, "grad_norm": 1.743786100077396, "learning_rate": 9.951558094624395e-06, "loss": 0.8883, "step": 1141 }, { "epoch": 0.07, "grad_norm": 1.0703571620757355, "learning_rate": 9.951414049135984e-06, "loss": 0.6875, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.8693121907866501, "learning_rate": 9.951269790846204e-06, "loss": 0.942, "step": 1143 }, { "epoch": 0.07, "grad_norm": 1.1544521602431757, "learning_rate": 9.951125319761248e-06, "loss": 0.6567, "step": 1144 }, { "epoch": 0.07, "grad_norm": 1.7263844520298852, "learning_rate": 9.950980635887332e-06, "loss": 0.8973, "step": 1145 }, { "epoch": 0.07, "grad_norm": 1.8252508155754639, "learning_rate": 9.950835739230671e-06, "loss": 0.8267, "step": 1146 }, { "epoch": 0.07, "grad_norm": 2.8637713403618625, "learning_rate": 9.950690629797494e-06, "loss": 0.9791, "step": 1147 }, { "epoch": 0.07, "grad_norm": 1.2006110110667718, "learning_rate": 9.950545307594037e-06, "loss": 0.6537, "step": 1148 }, { "epoch": 0.07, "grad_norm": 2.0379432987594517, "learning_rate": 9.950399772626545e-06, "loss": 0.8012, "step": 1149 }, { "epoch": 0.07, "grad_norm": 2.1184477748275774, "learning_rate": 9.95025402490127e-06, "loss": 0.7686, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.8492382596997772, "learning_rate": 9.950108064424482e-06, "loss": 0.8752, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.7762200572418232, "learning_rate": 9.94996189120245e-06, "loss": 0.8717, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.8139696335701812, "learning_rate": 9.949815505241458e-06, "loss": 0.886, "step": 1153 }, { "epoch": 0.07, "grad_norm": 2.036448677755117, "learning_rate": 9.949668906547798e-06, "loss": 0.9474, "step": 1154 }, { "epoch": 0.07, "grad_norm": 1.0755473356291947, "learning_rate": 9.949522095127765e-06, "loss": 0.7566, "step": 1155 }, { "epoch": 0.07, "grad_norm": 1.029929802506312, "learning_rate": 9.949375070987676e-06, "loss": 0.6346, "step": 1156 }, { "epoch": 0.07, "grad_norm": 1.9865432233866953, "learning_rate": 9.949227834133845e-06, "loss": 0.8062, "step": 1157 }, { "epoch": 0.07, "grad_norm": 1.8158695994501792, "learning_rate": 9.949080384572602e-06, "loss": 0.8102, "step": 1158 }, { "epoch": 0.07, "grad_norm": 1.7304359464942562, "learning_rate": 9.948932722310283e-06, "loss": 0.8589, "step": 1159 }, { "epoch": 0.07, "grad_norm": 1.0937038202470797, "learning_rate": 9.948784847353237e-06, "loss": 0.7491, "step": 1160 }, { "epoch": 0.07, "grad_norm": 2.1985326555651254, "learning_rate": 9.948636759707815e-06, "loss": 0.8613, "step": 1161 }, { "epoch": 0.07, "grad_norm": 1.6301517575547695, "learning_rate": 9.948488459380382e-06, "loss": 0.8719, "step": 1162 }, { "epoch": 0.07, "grad_norm": 2.770068279039453, "learning_rate": 9.948339946377316e-06, "loss": 0.8272, "step": 1163 }, { "epoch": 0.07, "grad_norm": 1.8004995799384809, "learning_rate": 9.948191220704996e-06, "loss": 0.8789, "step": 1164 }, { "epoch": 0.07, "grad_norm": 1.1781290834940805, "learning_rate": 9.948042282369816e-06, "loss": 0.6567, "step": 1165 }, { "epoch": 0.07, "grad_norm": 1.6241265236952422, "learning_rate": 9.947893131378174e-06, "loss": 0.8746, "step": 1166 }, { "epoch": 0.07, "grad_norm": 1.9150969379797251, "learning_rate": 9.947743767736485e-06, "loss": 0.7778, "step": 1167 }, { "epoch": 0.07, "grad_norm": 1.9167293296282533, "learning_rate": 9.947594191451162e-06, "loss": 0.8993, "step": 1168 }, { "epoch": 0.07, "grad_norm": 3.8691982342564724, "learning_rate": 9.947444402528638e-06, "loss": 0.9219, "step": 1169 }, { "epoch": 0.07, "grad_norm": 1.5586804942471486, "learning_rate": 9.947294400975352e-06, "loss": 0.8852, "step": 1170 }, { "epoch": 0.07, "grad_norm": 1.7292194161475523, "learning_rate": 9.947144186797747e-06, "loss": 0.9048, "step": 1171 }, { "epoch": 0.08, "grad_norm": 1.900768326216793, "learning_rate": 9.946993760002278e-06, "loss": 1.0065, "step": 1172 }, { "epoch": 0.08, "grad_norm": 1.7705176629114288, "learning_rate": 9.946843120595415e-06, "loss": 0.929, "step": 1173 }, { "epoch": 0.08, "grad_norm": 1.5629657826446788, "learning_rate": 9.946692268583628e-06, "loss": 0.8475, "step": 1174 }, { "epoch": 0.08, "grad_norm": 1.8193921896797525, "learning_rate": 9.946541203973402e-06, "loss": 0.9775, "step": 1175 }, { "epoch": 0.08, "grad_norm": 1.6074979974866168, "learning_rate": 9.94638992677123e-06, "loss": 0.8897, "step": 1176 }, { "epoch": 0.08, "grad_norm": 1.8943903488263987, "learning_rate": 9.94623843698361e-06, "loss": 0.9498, "step": 1177 }, { "epoch": 0.08, "grad_norm": 1.8181296337022896, "learning_rate": 9.946086734617058e-06, "loss": 0.8583, "step": 1178 }, { "epoch": 0.08, "grad_norm": 1.8343411905639577, "learning_rate": 9.945934819678092e-06, "loss": 0.8463, "step": 1179 }, { "epoch": 0.08, "grad_norm": 1.8885379062801368, "learning_rate": 9.945782692173239e-06, "loss": 0.8242, "step": 1180 }, { "epoch": 0.08, "grad_norm": 1.697974894016267, "learning_rate": 9.945630352109039e-06, "loss": 0.7885, "step": 1181 }, { "epoch": 0.08, "grad_norm": 2.1811977869385206, "learning_rate": 9.945477799492038e-06, "loss": 0.966, "step": 1182 }, { "epoch": 0.08, "grad_norm": 1.3494089507240956, "learning_rate": 9.945325034328791e-06, "loss": 0.6698, "step": 1183 }, { "epoch": 0.08, "grad_norm": 2.088018603440814, "learning_rate": 9.94517205662587e-06, "loss": 0.8936, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.7457839539678188, "learning_rate": 9.945018866389844e-06, "loss": 0.8046, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.96400814737633, "learning_rate": 9.944865463627295e-06, "loss": 0.834, "step": 1186 }, { "epoch": 0.08, "grad_norm": 1.165541774674652, "learning_rate": 9.944711848344822e-06, "loss": 0.6871, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.777146245905601, "learning_rate": 9.944558020549024e-06, "loss": 0.9295, "step": 1188 }, { "epoch": 0.08, "grad_norm": 1.9393290800605374, "learning_rate": 9.94440398024651e-06, "loss": 0.9394, "step": 1189 }, { "epoch": 0.08, "grad_norm": 1.1568961683224293, "learning_rate": 9.944249727443904e-06, "loss": 0.6713, "step": 1190 }, { "epoch": 0.08, "grad_norm": 2.241890268653499, "learning_rate": 9.944095262147835e-06, "loss": 0.8697, "step": 1191 }, { "epoch": 0.08, "grad_norm": 1.788405810710824, "learning_rate": 9.94394058436494e-06, "loss": 0.9543, "step": 1192 }, { "epoch": 0.08, "grad_norm": 1.7864684964476847, "learning_rate": 9.943785694101865e-06, "loss": 0.8803, "step": 1193 }, { "epoch": 0.08, "grad_norm": 1.401493536348756, "learning_rate": 9.943630591365272e-06, "loss": 0.7875, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.3454202535757436, "learning_rate": 9.943475276161823e-06, "loss": 0.7282, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.4182666741495087, "learning_rate": 9.943319748498195e-06, "loss": 0.6373, "step": 1196 }, { "epoch": 0.08, "grad_norm": 1.8224248387223205, "learning_rate": 9.943164008381072e-06, "loss": 0.9001, "step": 1197 }, { "epoch": 0.08, "grad_norm": 2.151020187818216, "learning_rate": 9.943008055817145e-06, "loss": 0.8896, "step": 1198 }, { "epoch": 0.08, "grad_norm": 1.8577285737890235, "learning_rate": 9.94285189081312e-06, "loss": 0.8714, "step": 1199 }, { "epoch": 0.08, "grad_norm": 1.7926616452806319, "learning_rate": 9.942695513375707e-06, "loss": 0.8657, "step": 1200 }, { "epoch": 0.08, "grad_norm": 1.7768641308502442, "learning_rate": 9.942538923511627e-06, "loss": 0.9196, "step": 1201 }, { "epoch": 0.08, "grad_norm": 1.846343941877003, "learning_rate": 9.942382121227608e-06, "loss": 0.8084, "step": 1202 }, { "epoch": 0.08, "grad_norm": 1.8248632272800602, "learning_rate": 9.942225106530391e-06, "loss": 0.86, "step": 1203 }, { "epoch": 0.08, "grad_norm": 1.7876636965426256, "learning_rate": 9.942067879426727e-06, "loss": 0.9874, "step": 1204 }, { "epoch": 0.08, "grad_norm": 1.9759815445481221, "learning_rate": 9.941910439923367e-06, "loss": 0.8421, "step": 1205 }, { "epoch": 0.08, "grad_norm": 1.9527714800761873, "learning_rate": 9.941752788027083e-06, "loss": 1.025, "step": 1206 }, { "epoch": 0.08, "grad_norm": 2.1591878267106743, "learning_rate": 9.941594923744647e-06, "loss": 0.8948, "step": 1207 }, { "epoch": 0.08, "grad_norm": 1.637900172660432, "learning_rate": 9.941436847082845e-06, "loss": 0.7812, "step": 1208 }, { "epoch": 0.08, "grad_norm": 1.9376073078729936, "learning_rate": 9.941278558048471e-06, "loss": 0.8578, "step": 1209 }, { "epoch": 0.08, "grad_norm": 2.1456604154631327, "learning_rate": 9.941120056648329e-06, "loss": 0.9125, "step": 1210 }, { "epoch": 0.08, "grad_norm": 1.8653185169940938, "learning_rate": 9.940961342889225e-06, "loss": 0.86, "step": 1211 }, { "epoch": 0.08, "grad_norm": 1.4666393108456777, "learning_rate": 9.94080241677799e-06, "loss": 0.7714, "step": 1212 }, { "epoch": 0.08, "grad_norm": 1.515149476310531, "learning_rate": 9.940643278321447e-06, "loss": 0.7518, "step": 1213 }, { "epoch": 0.08, "grad_norm": 2.0903845583140326, "learning_rate": 9.940483927526438e-06, "loss": 0.9588, "step": 1214 }, { "epoch": 0.08, "grad_norm": 2.1468483950683215, "learning_rate": 9.94032436439981e-06, "loss": 0.8658, "step": 1215 }, { "epoch": 0.08, "grad_norm": 1.7602195571402386, "learning_rate": 9.940164588948424e-06, "loss": 0.8356, "step": 1216 }, { "epoch": 0.08, "grad_norm": 1.6256042342147305, "learning_rate": 9.940004601179143e-06, "loss": 0.9036, "step": 1217 }, { "epoch": 0.08, "grad_norm": 1.6471737011853986, "learning_rate": 9.939844401098845e-06, "loss": 0.8122, "step": 1218 }, { "epoch": 0.08, "grad_norm": 1.736522269973178, "learning_rate": 9.939683988714415e-06, "loss": 0.7769, "step": 1219 }, { "epoch": 0.08, "grad_norm": 1.8039040397429644, "learning_rate": 9.939523364032746e-06, "loss": 0.8762, "step": 1220 }, { "epoch": 0.08, "grad_norm": 1.6730217529655873, "learning_rate": 9.939362527060743e-06, "loss": 0.8288, "step": 1221 }, { "epoch": 0.08, "grad_norm": 1.8889700517348156, "learning_rate": 9.939201477805318e-06, "loss": 0.8345, "step": 1222 }, { "epoch": 0.08, "grad_norm": 1.7141504918135646, "learning_rate": 9.939040216273392e-06, "loss": 0.8268, "step": 1223 }, { "epoch": 0.08, "grad_norm": 1.119030638683063, "learning_rate": 9.938878742471896e-06, "loss": 0.5972, "step": 1224 }, { "epoch": 0.08, "grad_norm": 2.076657286622019, "learning_rate": 9.938717056407768e-06, "loss": 0.8876, "step": 1225 }, { "epoch": 0.08, "grad_norm": 2.003047531945825, "learning_rate": 9.93855515808796e-06, "loss": 0.7401, "step": 1226 }, { "epoch": 0.08, "grad_norm": 1.5390588603657775, "learning_rate": 9.938393047519428e-06, "loss": 0.7759, "step": 1227 }, { "epoch": 0.08, "grad_norm": 1.828310727876585, "learning_rate": 9.938230724709141e-06, "loss": 0.8073, "step": 1228 }, { "epoch": 0.08, "grad_norm": 2.5538942206940627, "learning_rate": 9.938068189664073e-06, "loss": 0.9088, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.7198741814145349, "learning_rate": 9.937905442391211e-06, "loss": 0.9211, "step": 1230 }, { "epoch": 0.08, "grad_norm": 1.7045172859141113, "learning_rate": 9.937742482897549e-06, "loss": 0.9222, "step": 1231 }, { "epoch": 0.08, "grad_norm": 1.572766930573158, "learning_rate": 9.937579311190092e-06, "loss": 0.8448, "step": 1232 }, { "epoch": 0.08, "grad_norm": 2.0745487727552, "learning_rate": 9.937415927275848e-06, "loss": 0.7007, "step": 1233 }, { "epoch": 0.08, "grad_norm": 1.5503513043769905, "learning_rate": 9.937252331161845e-06, "loss": 0.8231, "step": 1234 }, { "epoch": 0.08, "grad_norm": 1.33782367827833, "learning_rate": 9.937088522855111e-06, "loss": 0.7175, "step": 1235 }, { "epoch": 0.08, "grad_norm": 2.0491003226319333, "learning_rate": 9.936924502362687e-06, "loss": 0.9393, "step": 1236 }, { "epoch": 0.08, "grad_norm": 1.7906141385649383, "learning_rate": 9.936760269691621e-06, "loss": 0.7978, "step": 1237 }, { "epoch": 0.08, "grad_norm": 1.023154245461843, "learning_rate": 9.936595824848972e-06, "loss": 0.6168, "step": 1238 }, { "epoch": 0.08, "grad_norm": 1.5848002206314618, "learning_rate": 9.936431167841808e-06, "loss": 0.8343, "step": 1239 }, { "epoch": 0.08, "grad_norm": 1.7636141617478698, "learning_rate": 9.936266298677206e-06, "loss": 0.8417, "step": 1240 }, { "epoch": 0.08, "grad_norm": 1.525339573216191, "learning_rate": 9.93610121736225e-06, "loss": 0.7167, "step": 1241 }, { "epoch": 0.08, "grad_norm": 1.842543630079003, "learning_rate": 9.935935923904037e-06, "loss": 0.8871, "step": 1242 }, { "epoch": 0.08, "grad_norm": 1.604338847443962, "learning_rate": 9.935770418309667e-06, "loss": 0.798, "step": 1243 }, { "epoch": 0.08, "grad_norm": 1.784051597301534, "learning_rate": 9.93560470058626e-06, "loss": 0.8719, "step": 1244 }, { "epoch": 0.08, "grad_norm": 1.7199807789245967, "learning_rate": 9.93543877074093e-06, "loss": 0.7734, "step": 1245 }, { "epoch": 0.08, "grad_norm": 1.4654365049092446, "learning_rate": 9.935272628780815e-06, "loss": 0.7195, "step": 1246 }, { "epoch": 0.08, "grad_norm": 1.6469192025479382, "learning_rate": 9.935106274713053e-06, "loss": 0.7871, "step": 1247 }, { "epoch": 0.08, "grad_norm": 1.9787251959311118, "learning_rate": 9.934939708544792e-06, "loss": 0.8148, "step": 1248 }, { "epoch": 0.08, "grad_norm": 1.688033386733923, "learning_rate": 9.934772930283194e-06, "loss": 0.7795, "step": 1249 }, { "epoch": 0.08, "grad_norm": 2.632882174459937, "learning_rate": 9.934605939935423e-06, "loss": 0.9248, "step": 1250 }, { "epoch": 0.08, "grad_norm": 1.7468105104966776, "learning_rate": 9.934438737508658e-06, "loss": 0.8406, "step": 1251 }, { "epoch": 0.08, "grad_norm": 1.6433835421938794, "learning_rate": 9.934271323010085e-06, "loss": 0.9513, "step": 1252 }, { "epoch": 0.08, "grad_norm": 1.7763186593424225, "learning_rate": 9.9341036964469e-06, "loss": 0.8855, "step": 1253 }, { "epoch": 0.08, "grad_norm": 1.7099960649075807, "learning_rate": 9.933935857826304e-06, "loss": 0.8505, "step": 1254 }, { "epoch": 0.08, "grad_norm": 1.7423423705926417, "learning_rate": 9.933767807155515e-06, "loss": 0.8945, "step": 1255 }, { "epoch": 0.08, "grad_norm": 2.14750589664642, "learning_rate": 9.933599544441752e-06, "loss": 0.9566, "step": 1256 }, { "epoch": 0.08, "grad_norm": 2.0079839729308993, "learning_rate": 9.933431069692245e-06, "loss": 0.7514, "step": 1257 }, { "epoch": 0.08, "grad_norm": 2.0439246323293747, "learning_rate": 9.933262382914239e-06, "loss": 0.8013, "step": 1258 }, { "epoch": 0.08, "grad_norm": 1.664317111203543, "learning_rate": 9.933093484114983e-06, "loss": 0.8378, "step": 1259 }, { "epoch": 0.08, "grad_norm": 1.6668626153418147, "learning_rate": 9.932924373301735e-06, "loss": 0.9188, "step": 1260 }, { "epoch": 0.08, "grad_norm": 1.1762334618733983, "learning_rate": 9.932755050481762e-06, "loss": 0.8, "step": 1261 }, { "epoch": 0.08, "grad_norm": 1.6226355645475077, "learning_rate": 9.932585515662341e-06, "loss": 0.822, "step": 1262 }, { "epoch": 0.08, "grad_norm": 1.8913411594510245, "learning_rate": 9.93241576885076e-06, "loss": 0.8588, "step": 1263 }, { "epoch": 0.08, "grad_norm": 1.1917839715982927, "learning_rate": 9.932245810054315e-06, "loss": 0.6729, "step": 1264 }, { "epoch": 0.08, "grad_norm": 4.154303082956973, "learning_rate": 9.932075639280308e-06, "loss": 0.8371, "step": 1265 }, { "epoch": 0.08, "grad_norm": 1.096026164998411, "learning_rate": 9.931905256536053e-06, "loss": 0.7183, "step": 1266 }, { "epoch": 0.08, "grad_norm": 1.63448620825102, "learning_rate": 9.931734661828876e-06, "loss": 0.8676, "step": 1267 }, { "epoch": 0.08, "grad_norm": 2.1548082709937812, "learning_rate": 9.931563855166104e-06, "loss": 0.7554, "step": 1268 }, { "epoch": 0.08, "grad_norm": 1.9587766089126528, "learning_rate": 9.931392836555081e-06, "loss": 0.8455, "step": 1269 }, { "epoch": 0.08, "grad_norm": 1.2379980711243848, "learning_rate": 9.931221606003156e-06, "loss": 0.6937, "step": 1270 }, { "epoch": 0.08, "grad_norm": 1.7324756347244754, "learning_rate": 9.931050163517688e-06, "loss": 0.8805, "step": 1271 }, { "epoch": 0.08, "grad_norm": 1.766315163651542, "learning_rate": 9.930878509106046e-06, "loss": 0.7693, "step": 1272 }, { "epoch": 0.08, "grad_norm": 1.8950171794201789, "learning_rate": 9.930706642775607e-06, "loss": 0.7741, "step": 1273 }, { "epoch": 0.08, "grad_norm": 1.8053540390668794, "learning_rate": 9.930534564533757e-06, "loss": 0.8016, "step": 1274 }, { "epoch": 0.08, "grad_norm": 1.29143093912742, "learning_rate": 9.930362274387892e-06, "loss": 0.7253, "step": 1275 }, { "epoch": 0.08, "grad_norm": 1.8413843470492874, "learning_rate": 9.930189772345416e-06, "loss": 0.855, "step": 1276 }, { "epoch": 0.08, "grad_norm": 1.2549591004876846, "learning_rate": 9.930017058413745e-06, "loss": 0.7113, "step": 1277 }, { "epoch": 0.08, "grad_norm": 1.7113630842220238, "learning_rate": 9.929844132600299e-06, "loss": 0.9284, "step": 1278 }, { "epoch": 0.08, "grad_norm": 1.9059513038274887, "learning_rate": 9.92967099491251e-06, "loss": 0.7015, "step": 1279 }, { "epoch": 0.08, "grad_norm": 1.8567973763435237, "learning_rate": 9.929497645357822e-06, "loss": 0.9185, "step": 1280 }, { "epoch": 0.08, "grad_norm": 1.1914106000841722, "learning_rate": 9.929324083943683e-06, "loss": 0.7629, "step": 1281 }, { "epoch": 0.08, "grad_norm": 1.9318673515418119, "learning_rate": 9.929150310677553e-06, "loss": 0.9398, "step": 1282 }, { "epoch": 0.08, "grad_norm": 2.0956132495492543, "learning_rate": 9.9289763255669e-06, "loss": 0.8525, "step": 1283 }, { "epoch": 0.08, "grad_norm": 1.6409098301358342, "learning_rate": 9.928802128619201e-06, "loss": 0.8006, "step": 1284 }, { "epoch": 0.08, "grad_norm": 1.7486444181802465, "learning_rate": 9.928627719841945e-06, "loss": 0.8189, "step": 1285 }, { "epoch": 0.08, "grad_norm": 2.2116322770030052, "learning_rate": 9.928453099242625e-06, "loss": 0.7386, "step": 1286 }, { "epoch": 0.08, "grad_norm": 1.5388101709414617, "learning_rate": 9.928278266828747e-06, "loss": 0.8139, "step": 1287 }, { "epoch": 0.08, "grad_norm": 2.2229764485363765, "learning_rate": 9.928103222607825e-06, "loss": 0.9125, "step": 1288 }, { "epoch": 0.08, "grad_norm": 1.672149174017893, "learning_rate": 9.927927966587383e-06, "loss": 0.7828, "step": 1289 }, { "epoch": 0.08, "grad_norm": 1.8846882024539933, "learning_rate": 9.92775249877495e-06, "loss": 0.9318, "step": 1290 }, { "epoch": 0.08, "grad_norm": 1.9261968547904673, "learning_rate": 9.927576819178071e-06, "loss": 0.9565, "step": 1291 }, { "epoch": 0.08, "grad_norm": 1.6825319477945013, "learning_rate": 9.927400927804294e-06, "loss": 0.84, "step": 1292 }, { "epoch": 0.08, "grad_norm": 1.767930104487957, "learning_rate": 9.927224824661179e-06, "loss": 0.9208, "step": 1293 }, { "epoch": 0.08, "grad_norm": 1.9481312563416886, "learning_rate": 9.927048509756294e-06, "loss": 0.7921, "step": 1294 }, { "epoch": 0.08, "grad_norm": 2.6529595100304264, "learning_rate": 9.926871983097217e-06, "loss": 0.809, "step": 1295 }, { "epoch": 0.08, "grad_norm": 1.6861943803116894, "learning_rate": 9.926695244691536e-06, "loss": 0.7755, "step": 1296 }, { "epoch": 0.08, "grad_norm": 2.1512042421417092, "learning_rate": 9.926518294546846e-06, "loss": 0.8482, "step": 1297 }, { "epoch": 0.08, "grad_norm": 1.9936283727215498, "learning_rate": 9.92634113267075e-06, "loss": 0.802, "step": 1298 }, { "epoch": 0.08, "grad_norm": 2.2087198920306794, "learning_rate": 9.926163759070863e-06, "loss": 1.0424, "step": 1299 }, { "epoch": 0.08, "grad_norm": 1.6254897046448338, "learning_rate": 9.92598617375481e-06, "loss": 0.833, "step": 1300 }, { "epoch": 0.08, "grad_norm": 1.195603705303048, "learning_rate": 9.925808376730225e-06, "loss": 0.6458, "step": 1301 }, { "epoch": 0.08, "grad_norm": 1.6728396654819055, "learning_rate": 9.925630368004744e-06, "loss": 0.8517, "step": 1302 }, { "epoch": 0.08, "grad_norm": 1.7125989914168895, "learning_rate": 9.925452147586022e-06, "loss": 0.734, "step": 1303 }, { "epoch": 0.08, "grad_norm": 1.4990924516152513, "learning_rate": 9.925273715481713e-06, "loss": 0.8185, "step": 1304 }, { "epoch": 0.08, "grad_norm": 3.7477679274270823, "learning_rate": 9.925095071699491e-06, "loss": 0.8146, "step": 1305 }, { "epoch": 0.08, "grad_norm": 1.2287366529343289, "learning_rate": 9.924916216247033e-06, "loss": 0.7048, "step": 1306 }, { "epoch": 0.08, "grad_norm": 1.6378937850990238, "learning_rate": 9.924737149132022e-06, "loss": 1.008, "step": 1307 }, { "epoch": 0.08, "grad_norm": 2.6109145801589673, "learning_rate": 9.92455787036216e-06, "loss": 0.8067, "step": 1308 }, { "epoch": 0.08, "grad_norm": 1.7872154517871814, "learning_rate": 9.924378379945145e-06, "loss": 0.8184, "step": 1309 }, { "epoch": 0.08, "grad_norm": 1.610378135491679, "learning_rate": 9.924198677888696e-06, "loss": 0.6863, "step": 1310 }, { "epoch": 0.08, "grad_norm": 2.0055422337276956, "learning_rate": 9.924018764200538e-06, "loss": 0.8823, "step": 1311 }, { "epoch": 0.08, "grad_norm": 1.9630406872067594, "learning_rate": 9.923838638888397e-06, "loss": 0.8225, "step": 1312 }, { "epoch": 0.08, "grad_norm": 1.6604124349230147, "learning_rate": 9.923658301960018e-06, "loss": 0.774, "step": 1313 }, { "epoch": 0.08, "grad_norm": 1.1716563867172776, "learning_rate": 9.923477753423151e-06, "loss": 0.7245, "step": 1314 }, { "epoch": 0.08, "grad_norm": 1.7264386787015928, "learning_rate": 9.923296993285558e-06, "loss": 0.9756, "step": 1315 }, { "epoch": 0.08, "grad_norm": 1.7875760583118032, "learning_rate": 9.923116021555003e-06, "loss": 0.8038, "step": 1316 }, { "epoch": 0.08, "grad_norm": 1.8015795671087196, "learning_rate": 9.922934838239269e-06, "loss": 0.8813, "step": 1317 }, { "epoch": 0.08, "grad_norm": 1.772797248571571, "learning_rate": 9.922753443346137e-06, "loss": 0.7565, "step": 1318 }, { "epoch": 0.08, "grad_norm": 2.0171809689879328, "learning_rate": 9.922571836883408e-06, "loss": 0.9281, "step": 1319 }, { "epoch": 0.08, "grad_norm": 1.77699715063405, "learning_rate": 9.922390018858885e-06, "loss": 0.8497, "step": 1320 }, { "epoch": 0.08, "grad_norm": 1.728138655091492, "learning_rate": 9.922207989280383e-06, "loss": 0.8558, "step": 1321 }, { "epoch": 0.08, "grad_norm": 1.594183131668878, "learning_rate": 9.922025748155724e-06, "loss": 0.8558, "step": 1322 }, { "epoch": 0.08, "grad_norm": 2.3682283999301905, "learning_rate": 9.92184329549274e-06, "loss": 0.7997, "step": 1323 }, { "epoch": 0.08, "grad_norm": 1.568277930030655, "learning_rate": 9.921660631299274e-06, "loss": 0.804, "step": 1324 }, { "epoch": 0.08, "grad_norm": 1.8504651233259704, "learning_rate": 9.921477755583178e-06, "loss": 0.8861, "step": 1325 }, { "epoch": 0.08, "grad_norm": 1.3505603047819144, "learning_rate": 9.921294668352307e-06, "loss": 0.6726, "step": 1326 }, { "epoch": 0.08, "grad_norm": 1.752501079883423, "learning_rate": 9.921111369614533e-06, "loss": 0.9222, "step": 1327 }, { "epoch": 0.09, "grad_norm": 1.8415726250570332, "learning_rate": 9.920927859377733e-06, "loss": 0.7704, "step": 1328 }, { "epoch": 0.09, "grad_norm": 1.7220632579772934, "learning_rate": 9.920744137649793e-06, "loss": 0.878, "step": 1329 }, { "epoch": 0.09, "grad_norm": 1.7148021701469456, "learning_rate": 9.92056020443861e-06, "loss": 0.8748, "step": 1330 }, { "epoch": 0.09, "grad_norm": 1.634871045472798, "learning_rate": 9.920376059752091e-06, "loss": 0.7637, "step": 1331 }, { "epoch": 0.09, "grad_norm": 1.9053611456238158, "learning_rate": 9.920191703598145e-06, "loss": 1.0124, "step": 1332 }, { "epoch": 0.09, "grad_norm": 1.8674968355933048, "learning_rate": 9.920007135984701e-06, "loss": 0.8066, "step": 1333 }, { "epoch": 0.09, "grad_norm": 1.3389489069382314, "learning_rate": 9.919822356919689e-06, "loss": 0.8004, "step": 1334 }, { "epoch": 0.09, "grad_norm": 1.7490665828448817, "learning_rate": 9.919637366411047e-06, "loss": 0.7545, "step": 1335 }, { "epoch": 0.09, "grad_norm": 1.988712183404302, "learning_rate": 9.919452164466731e-06, "loss": 0.8413, "step": 1336 }, { "epoch": 0.09, "grad_norm": 1.5035736947588154, "learning_rate": 9.919266751094698e-06, "loss": 0.8195, "step": 1337 }, { "epoch": 0.09, "grad_norm": 1.677391863916202, "learning_rate": 9.919081126302915e-06, "loss": 0.8519, "step": 1338 }, { "epoch": 0.09, "grad_norm": 1.7313822718340108, "learning_rate": 9.918895290099364e-06, "loss": 0.8996, "step": 1339 }, { "epoch": 0.09, "grad_norm": 2.0172379779750784, "learning_rate": 9.918709242492028e-06, "loss": 0.8715, "step": 1340 }, { "epoch": 0.09, "grad_norm": 1.5696530086368983, "learning_rate": 9.918522983488905e-06, "loss": 0.8976, "step": 1341 }, { "epoch": 0.09, "grad_norm": 1.993350254325089, "learning_rate": 9.918336513097999e-06, "loss": 0.8345, "step": 1342 }, { "epoch": 0.09, "grad_norm": 2.0511613799724024, "learning_rate": 9.918149831327323e-06, "loss": 0.9094, "step": 1343 }, { "epoch": 0.09, "grad_norm": 1.6786418358489894, "learning_rate": 9.917962938184904e-06, "loss": 0.7943, "step": 1344 }, { "epoch": 0.09, "grad_norm": 2.107376142516131, "learning_rate": 9.917775833678772e-06, "loss": 0.883, "step": 1345 }, { "epoch": 0.09, "grad_norm": 1.9933484589702377, "learning_rate": 9.917588517816967e-06, "loss": 0.7799, "step": 1346 }, { "epoch": 0.09, "grad_norm": 2.0765683145960154, "learning_rate": 9.917400990607542e-06, "loss": 0.9931, "step": 1347 }, { "epoch": 0.09, "grad_norm": 1.8767304612005027, "learning_rate": 9.917213252058554e-06, "loss": 0.9106, "step": 1348 }, { "epoch": 0.09, "grad_norm": 1.7658242539473028, "learning_rate": 9.917025302178074e-06, "loss": 0.7787, "step": 1349 }, { "epoch": 0.09, "grad_norm": 1.8605501630633101, "learning_rate": 9.916837140974178e-06, "loss": 0.9328, "step": 1350 }, { "epoch": 0.09, "grad_norm": 1.626838809924624, "learning_rate": 9.916648768454955e-06, "loss": 0.7817, "step": 1351 }, { "epoch": 0.09, "grad_norm": 2.1563961645495584, "learning_rate": 9.916460184628498e-06, "loss": 0.8647, "step": 1352 }, { "epoch": 0.09, "grad_norm": 1.7288435563217845, "learning_rate": 9.916271389502911e-06, "loss": 0.8237, "step": 1353 }, { "epoch": 0.09, "grad_norm": 2.0637186305195114, "learning_rate": 9.916082383086314e-06, "loss": 0.8321, "step": 1354 }, { "epoch": 0.09, "grad_norm": 1.4051771261014996, "learning_rate": 9.915893165386824e-06, "loss": 0.7481, "step": 1355 }, { "epoch": 0.09, "grad_norm": 1.1449447446392043, "learning_rate": 9.915703736412576e-06, "loss": 0.6747, "step": 1356 }, { "epoch": 0.09, "grad_norm": 1.9170981064919257, "learning_rate": 9.915514096171711e-06, "loss": 0.8737, "step": 1357 }, { "epoch": 0.09, "grad_norm": 1.7645484566806406, "learning_rate": 9.915324244672379e-06, "loss": 0.9455, "step": 1358 }, { "epoch": 0.09, "grad_norm": 1.9478726330218314, "learning_rate": 9.915134181922739e-06, "loss": 0.8401, "step": 1359 }, { "epoch": 0.09, "grad_norm": 1.6041662422033776, "learning_rate": 9.91494390793096e-06, "loss": 0.8452, "step": 1360 }, { "epoch": 0.09, "grad_norm": 1.855959225764108, "learning_rate": 9.91475342270522e-06, "loss": 0.8639, "step": 1361 }, { "epoch": 0.09, "grad_norm": 2.043875412567798, "learning_rate": 9.914562726253705e-06, "loss": 0.9697, "step": 1362 }, { "epoch": 0.09, "grad_norm": 1.9682940109546583, "learning_rate": 9.914371818584612e-06, "loss": 0.8789, "step": 1363 }, { "epoch": 0.09, "grad_norm": 1.7612866295827658, "learning_rate": 9.914180699706143e-06, "loss": 0.7191, "step": 1364 }, { "epoch": 0.09, "grad_norm": 1.874378309301564, "learning_rate": 9.913989369626515e-06, "loss": 0.9599, "step": 1365 }, { "epoch": 0.09, "grad_norm": 1.826278631361912, "learning_rate": 9.913797828353948e-06, "loss": 0.8588, "step": 1366 }, { "epoch": 0.09, "grad_norm": 1.79080901774958, "learning_rate": 9.913606075896677e-06, "loss": 0.7873, "step": 1367 }, { "epoch": 0.09, "grad_norm": 1.749314164009876, "learning_rate": 9.913414112262942e-06, "loss": 0.9787, "step": 1368 }, { "epoch": 0.09, "grad_norm": 1.768512440475953, "learning_rate": 9.913221937460991e-06, "loss": 0.9498, "step": 1369 }, { "epoch": 0.09, "grad_norm": 2.1401854385107812, "learning_rate": 9.913029551499087e-06, "loss": 0.8768, "step": 1370 }, { "epoch": 0.09, "grad_norm": 2.0020431491423767, "learning_rate": 9.912836954385496e-06, "loss": 0.853, "step": 1371 }, { "epoch": 0.09, "grad_norm": 1.824765773991412, "learning_rate": 9.912644146128495e-06, "loss": 0.7645, "step": 1372 }, { "epoch": 0.09, "grad_norm": 1.6103074412376077, "learning_rate": 9.912451126736374e-06, "loss": 0.8301, "step": 1373 }, { "epoch": 0.09, "grad_norm": 1.7863477226782352, "learning_rate": 9.912257896217425e-06, "loss": 0.8316, "step": 1374 }, { "epoch": 0.09, "grad_norm": 1.1057113060407557, "learning_rate": 9.912064454579953e-06, "loss": 0.6681, "step": 1375 }, { "epoch": 0.09, "grad_norm": 1.687446757449711, "learning_rate": 9.911870801832273e-06, "loss": 0.8694, "step": 1376 }, { "epoch": 0.09, "grad_norm": 1.137861927101257, "learning_rate": 9.911676937982706e-06, "loss": 0.6341, "step": 1377 }, { "epoch": 0.09, "grad_norm": 1.1982852308322476, "learning_rate": 9.911482863039587e-06, "loss": 0.6202, "step": 1378 }, { "epoch": 0.09, "grad_norm": 1.766228901993186, "learning_rate": 9.911288577011254e-06, "loss": 0.9707, "step": 1379 }, { "epoch": 0.09, "grad_norm": 1.6213513565123054, "learning_rate": 9.911094079906059e-06, "loss": 0.9336, "step": 1380 }, { "epoch": 0.09, "grad_norm": 3.1445588839142675, "learning_rate": 9.910899371732358e-06, "loss": 0.9625, "step": 1381 }, { "epoch": 0.09, "grad_norm": 1.5908660213101118, "learning_rate": 9.910704452498523e-06, "loss": 0.7148, "step": 1382 }, { "epoch": 0.09, "grad_norm": 1.6780057058468174, "learning_rate": 9.91050932221293e-06, "loss": 0.8836, "step": 1383 }, { "epoch": 0.09, "grad_norm": 3.367781417421433, "learning_rate": 9.910313980883963e-06, "loss": 0.7535, "step": 1384 }, { "epoch": 0.09, "grad_norm": 1.7554614281953014, "learning_rate": 9.910118428520019e-06, "loss": 0.7228, "step": 1385 }, { "epoch": 0.09, "grad_norm": 1.9319526816924455, "learning_rate": 9.909922665129503e-06, "loss": 0.9565, "step": 1386 }, { "epoch": 0.09, "grad_norm": 2.1690922966443353, "learning_rate": 9.909726690720829e-06, "loss": 0.9446, "step": 1387 }, { "epoch": 0.09, "grad_norm": 1.735448592915104, "learning_rate": 9.909530505302417e-06, "loss": 0.8341, "step": 1388 }, { "epoch": 0.09, "grad_norm": 1.739914642660314, "learning_rate": 9.9093341088827e-06, "loss": 0.7658, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.6839068398106904, "learning_rate": 9.909137501470121e-06, "loss": 0.8609, "step": 1390 }, { "epoch": 0.09, "grad_norm": 1.7336283057048545, "learning_rate": 9.908940683073127e-06, "loss": 0.8473, "step": 1391 }, { "epoch": 0.09, "grad_norm": 1.897231132159755, "learning_rate": 9.908743653700177e-06, "loss": 0.8045, "step": 1392 }, { "epoch": 0.09, "grad_norm": 1.2222775030514892, "learning_rate": 9.90854641335974e-06, "loss": 0.6453, "step": 1393 }, { "epoch": 0.09, "grad_norm": 1.9427580766602468, "learning_rate": 9.908348962060292e-06, "loss": 0.8119, "step": 1394 }, { "epoch": 0.09, "grad_norm": 1.6367011514858523, "learning_rate": 9.908151299810319e-06, "loss": 0.7054, "step": 1395 }, { "epoch": 0.09, "grad_norm": 1.3080689483911754, "learning_rate": 9.907953426618317e-06, "loss": 0.7127, "step": 1396 }, { "epoch": 0.09, "grad_norm": 2.0842337760911263, "learning_rate": 9.90775534249279e-06, "loss": 0.9543, "step": 1397 }, { "epoch": 0.09, "grad_norm": 1.6737568007507009, "learning_rate": 9.907557047442252e-06, "loss": 0.7853, "step": 1398 }, { "epoch": 0.09, "grad_norm": 1.8141069549088171, "learning_rate": 9.907358541475223e-06, "loss": 0.8683, "step": 1399 }, { "epoch": 0.09, "grad_norm": 2.0961430816936524, "learning_rate": 9.907159824600235e-06, "loss": 0.8421, "step": 1400 }, { "epoch": 0.09, "grad_norm": 1.7584539334731646, "learning_rate": 9.90696089682583e-06, "loss": 0.9699, "step": 1401 }, { "epoch": 0.09, "grad_norm": 1.8707612049326379, "learning_rate": 9.906761758160556e-06, "loss": 0.8403, "step": 1402 }, { "epoch": 0.09, "grad_norm": 1.868447807559805, "learning_rate": 9.906562408612973e-06, "loss": 0.8857, "step": 1403 }, { "epoch": 0.09, "grad_norm": 2.097849139692282, "learning_rate": 9.90636284819165e-06, "loss": 0.8767, "step": 1404 }, { "epoch": 0.09, "grad_norm": 1.6917711987419357, "learning_rate": 9.906163076905158e-06, "loss": 0.8596, "step": 1405 }, { "epoch": 0.09, "grad_norm": 1.8871651639149, "learning_rate": 9.905963094762086e-06, "loss": 0.8998, "step": 1406 }, { "epoch": 0.09, "grad_norm": 1.6962860480922093, "learning_rate": 9.90576290177103e-06, "loss": 0.7674, "step": 1407 }, { "epoch": 0.09, "grad_norm": 1.7334780947309756, "learning_rate": 9.905562497940593e-06, "loss": 0.8334, "step": 1408 }, { "epoch": 0.09, "grad_norm": 1.6308723779777932, "learning_rate": 9.905361883279388e-06, "loss": 0.8808, "step": 1409 }, { "epoch": 0.09, "grad_norm": 2.1172200604312015, "learning_rate": 9.905161057796037e-06, "loss": 0.9396, "step": 1410 }, { "epoch": 0.09, "grad_norm": 1.468815640210205, "learning_rate": 9.90496002149917e-06, "loss": 0.9193, "step": 1411 }, { "epoch": 0.09, "grad_norm": 1.903482035854877, "learning_rate": 9.90475877439743e-06, "loss": 0.8904, "step": 1412 }, { "epoch": 0.09, "grad_norm": 1.6606556290405998, "learning_rate": 9.904557316499462e-06, "loss": 0.7487, "step": 1413 }, { "epoch": 0.09, "grad_norm": 1.5266920445406942, "learning_rate": 9.904355647813928e-06, "loss": 0.9137, "step": 1414 }, { "epoch": 0.09, "grad_norm": 1.7186480777189894, "learning_rate": 9.904153768349494e-06, "loss": 0.9315, "step": 1415 }, { "epoch": 0.09, "grad_norm": 1.0878993939253316, "learning_rate": 9.903951678114833e-06, "loss": 0.6985, "step": 1416 }, { "epoch": 0.09, "grad_norm": 1.8038407712783155, "learning_rate": 9.903749377118637e-06, "loss": 0.7464, "step": 1417 }, { "epoch": 0.09, "grad_norm": 1.9622869826522873, "learning_rate": 9.903546865369596e-06, "loss": 0.8097, "step": 1418 }, { "epoch": 0.09, "grad_norm": 1.7852985995989685, "learning_rate": 9.903344142876414e-06, "loss": 0.9198, "step": 1419 }, { "epoch": 0.09, "grad_norm": 1.6939937899864193, "learning_rate": 9.903141209647804e-06, "loss": 1.1095, "step": 1420 }, { "epoch": 0.09, "grad_norm": 1.8260733506424907, "learning_rate": 9.902938065692488e-06, "loss": 0.9223, "step": 1421 }, { "epoch": 0.09, "grad_norm": 1.6895304975800913, "learning_rate": 9.902734711019196e-06, "loss": 0.8697, "step": 1422 }, { "epoch": 0.09, "grad_norm": 1.7565065031970462, "learning_rate": 9.902531145636668e-06, "loss": 0.8082, "step": 1423 }, { "epoch": 0.09, "grad_norm": 1.9030348241570167, "learning_rate": 9.902327369553655e-06, "loss": 0.8555, "step": 1424 }, { "epoch": 0.09, "grad_norm": 1.8283824007156735, "learning_rate": 9.902123382778911e-06, "loss": 0.78, "step": 1425 }, { "epoch": 0.09, "grad_norm": 1.155494384115449, "learning_rate": 9.901919185321205e-06, "loss": 0.6549, "step": 1426 }, { "epoch": 0.09, "grad_norm": 1.47122034190339, "learning_rate": 9.901714777189313e-06, "loss": 0.8891, "step": 1427 }, { "epoch": 0.09, "grad_norm": 1.7227471399762084, "learning_rate": 9.901510158392021e-06, "loss": 0.849, "step": 1428 }, { "epoch": 0.09, "grad_norm": 1.8288737910034722, "learning_rate": 9.90130532893812e-06, "loss": 0.8345, "step": 1429 }, { "epoch": 0.09, "grad_norm": 1.8308266722294118, "learning_rate": 9.901100288836416e-06, "loss": 0.7413, "step": 1430 }, { "epoch": 0.09, "grad_norm": 1.7288562230011066, "learning_rate": 9.900895038095718e-06, "loss": 0.9699, "step": 1431 }, { "epoch": 0.09, "grad_norm": 1.2435162469302055, "learning_rate": 9.900689576724854e-06, "loss": 0.6931, "step": 1432 }, { "epoch": 0.09, "grad_norm": 1.8691970997898621, "learning_rate": 9.900483904732645e-06, "loss": 0.9889, "step": 1433 }, { "epoch": 0.09, "grad_norm": 1.7456819002344204, "learning_rate": 9.900278022127938e-06, "loss": 0.8932, "step": 1434 }, { "epoch": 0.09, "grad_norm": 3.7481223251658435, "learning_rate": 9.900071928919577e-06, "loss": 0.8113, "step": 1435 }, { "epoch": 0.09, "grad_norm": 1.653581929628424, "learning_rate": 9.899865625116423e-06, "loss": 0.8528, "step": 1436 }, { "epoch": 0.09, "grad_norm": 1.9205248048594081, "learning_rate": 9.899659110727339e-06, "loss": 0.8954, "step": 1437 }, { "epoch": 0.09, "grad_norm": 1.734500785652139, "learning_rate": 9.8994523857612e-06, "loss": 0.9109, "step": 1438 }, { "epoch": 0.09, "grad_norm": 1.7572217106305688, "learning_rate": 9.899245450226897e-06, "loss": 1.0831, "step": 1439 }, { "epoch": 0.09, "grad_norm": 1.984015486622488, "learning_rate": 9.899038304133318e-06, "loss": 0.8417, "step": 1440 }, { "epoch": 0.09, "grad_norm": 1.7171032800341994, "learning_rate": 9.898830947489365e-06, "loss": 0.8902, "step": 1441 }, { "epoch": 0.09, "grad_norm": 1.6766875971166482, "learning_rate": 9.898623380303953e-06, "loss": 0.8032, "step": 1442 }, { "epoch": 0.09, "grad_norm": 1.2841854460135027, "learning_rate": 9.898415602586e-06, "loss": 0.7814, "step": 1443 }, { "epoch": 0.09, "grad_norm": 1.8352382515618726, "learning_rate": 9.89820761434444e-06, "loss": 0.7857, "step": 1444 }, { "epoch": 0.09, "grad_norm": 1.6754587359700732, "learning_rate": 9.89799941558821e-06, "loss": 0.7237, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.7699664993868314, "learning_rate": 9.897791006326254e-06, "loss": 0.7585, "step": 1446 }, { "epoch": 0.09, "grad_norm": 1.613074804693358, "learning_rate": 9.897582386567532e-06, "loss": 0.7377, "step": 1447 }, { "epoch": 0.09, "grad_norm": 1.743023309629754, "learning_rate": 9.897373556321013e-06, "loss": 0.8119, "step": 1448 }, { "epoch": 0.09, "grad_norm": 1.7575902070394642, "learning_rate": 9.897164515595667e-06, "loss": 0.9167, "step": 1449 }, { "epoch": 0.09, "grad_norm": 1.625189138323323, "learning_rate": 9.896955264400483e-06, "loss": 0.8675, "step": 1450 }, { "epoch": 0.09, "grad_norm": 1.2105773881251511, "learning_rate": 9.89674580274445e-06, "loss": 0.7498, "step": 1451 }, { "epoch": 0.09, "grad_norm": 2.02448494336228, "learning_rate": 9.896536130636571e-06, "loss": 0.7763, "step": 1452 }, { "epoch": 0.09, "grad_norm": 1.563672645838986, "learning_rate": 9.896326248085862e-06, "loss": 0.8306, "step": 1453 }, { "epoch": 0.09, "grad_norm": 1.736145781943233, "learning_rate": 9.896116155101335e-06, "loss": 0.8941, "step": 1454 }, { "epoch": 0.09, "grad_norm": 1.963050861826125, "learning_rate": 9.895905851692026e-06, "loss": 0.8956, "step": 1455 }, { "epoch": 0.09, "grad_norm": 1.7265640857476579, "learning_rate": 9.89569533786697e-06, "loss": 0.8292, "step": 1456 }, { "epoch": 0.09, "grad_norm": 1.7533356032310385, "learning_rate": 9.895484613635217e-06, "loss": 0.8965, "step": 1457 }, { "epoch": 0.09, "grad_norm": 1.7046274067874172, "learning_rate": 9.895273679005822e-06, "loss": 0.9944, "step": 1458 }, { "epoch": 0.09, "grad_norm": 1.8328685297740324, "learning_rate": 9.89506253398785e-06, "loss": 0.944, "step": 1459 }, { "epoch": 0.09, "grad_norm": 1.7728529103241752, "learning_rate": 9.894851178590377e-06, "loss": 0.8844, "step": 1460 }, { "epoch": 0.09, "grad_norm": 1.334130878273355, "learning_rate": 9.894639612822486e-06, "loss": 0.722, "step": 1461 }, { "epoch": 0.09, "grad_norm": 1.90131831189434, "learning_rate": 9.894427836693267e-06, "loss": 0.8688, "step": 1462 }, { "epoch": 0.09, "grad_norm": 1.7762643239215203, "learning_rate": 9.894215850211825e-06, "loss": 0.846, "step": 1463 }, { "epoch": 0.09, "grad_norm": 1.5231866986037261, "learning_rate": 9.894003653387272e-06, "loss": 0.7755, "step": 1464 }, { "epoch": 0.09, "grad_norm": 1.068826099004125, "learning_rate": 9.893791246228726e-06, "loss": 0.6173, "step": 1465 }, { "epoch": 0.09, "grad_norm": 1.7371509975730495, "learning_rate": 9.893578628745312e-06, "loss": 0.8598, "step": 1466 }, { "epoch": 0.09, "grad_norm": 2.404068174292666, "learning_rate": 9.893365800946176e-06, "loss": 0.8087, "step": 1467 }, { "epoch": 0.09, "grad_norm": 1.8505279697052015, "learning_rate": 9.893152762840457e-06, "loss": 0.952, "step": 1468 }, { "epoch": 0.09, "grad_norm": 1.8131378198302084, "learning_rate": 9.892939514437314e-06, "loss": 0.8565, "step": 1469 }, { "epoch": 0.09, "grad_norm": 1.8164468898430426, "learning_rate": 9.892726055745914e-06, "loss": 0.8297, "step": 1470 }, { "epoch": 0.09, "grad_norm": 1.809003407135974, "learning_rate": 9.892512386775429e-06, "loss": 0.8298, "step": 1471 }, { "epoch": 0.09, "grad_norm": 2.2618489196426745, "learning_rate": 9.89229850753504e-06, "loss": 0.7597, "step": 1472 }, { "epoch": 0.09, "grad_norm": 1.8108426783520197, "learning_rate": 9.892084418033942e-06, "loss": 0.8162, "step": 1473 }, { "epoch": 0.09, "grad_norm": 1.2006067969299068, "learning_rate": 9.891870118281336e-06, "loss": 0.7283, "step": 1474 }, { "epoch": 0.09, "grad_norm": 1.676097813628958, "learning_rate": 9.891655608286432e-06, "loss": 0.9066, "step": 1475 }, { "epoch": 0.09, "grad_norm": 1.5051406270261973, "learning_rate": 9.891440888058449e-06, "loss": 0.761, "step": 1476 }, { "epoch": 0.09, "grad_norm": 1.591959775085894, "learning_rate": 9.891225957606613e-06, "loss": 0.7604, "step": 1477 }, { "epoch": 0.09, "grad_norm": 1.6408014312361368, "learning_rate": 9.891010816940165e-06, "loss": 0.8828, "step": 1478 }, { "epoch": 0.09, "grad_norm": 1.8016332615693338, "learning_rate": 9.890795466068351e-06, "loss": 0.8841, "step": 1479 }, { "epoch": 0.09, "grad_norm": 1.059143414673156, "learning_rate": 9.890579905000422e-06, "loss": 0.607, "step": 1480 }, { "epoch": 0.09, "grad_norm": 1.5118094263987982, "learning_rate": 9.890364133745646e-06, "loss": 0.8834, "step": 1481 }, { "epoch": 0.09, "grad_norm": 1.2634923779430254, "learning_rate": 9.890148152313295e-06, "loss": 0.7601, "step": 1482 }, { "epoch": 0.09, "grad_norm": 1.4726230484906, "learning_rate": 9.889931960712653e-06, "loss": 0.6618, "step": 1483 }, { "epoch": 0.09, "grad_norm": 1.7493877188073839, "learning_rate": 9.88971555895301e-06, "loss": 0.7526, "step": 1484 }, { "epoch": 0.1, "grad_norm": 1.69180248170689, "learning_rate": 9.88949894704367e-06, "loss": 0.8684, "step": 1485 }, { "epoch": 0.1, "grad_norm": 1.5311196917369678, "learning_rate": 9.889282124993936e-06, "loss": 0.9417, "step": 1486 }, { "epoch": 0.1, "grad_norm": 1.6107632644748675, "learning_rate": 9.889065092813131e-06, "loss": 0.7464, "step": 1487 }, { "epoch": 0.1, "grad_norm": 1.7658564805298924, "learning_rate": 9.888847850510581e-06, "loss": 0.9159, "step": 1488 }, { "epoch": 0.1, "grad_norm": 1.7008408163685789, "learning_rate": 9.888630398095623e-06, "loss": 0.7666, "step": 1489 }, { "epoch": 0.1, "grad_norm": 1.805920959288355, "learning_rate": 9.888412735577604e-06, "loss": 0.9149, "step": 1490 }, { "epoch": 0.1, "grad_norm": 1.5650345938573904, "learning_rate": 9.888194862965877e-06, "loss": 0.7474, "step": 1491 }, { "epoch": 0.1, "grad_norm": 1.7895880780326208, "learning_rate": 9.887976780269807e-06, "loss": 0.899, "step": 1492 }, { "epoch": 0.1, "grad_norm": 1.5166513921479234, "learning_rate": 9.887758487498765e-06, "loss": 0.7484, "step": 1493 }, { "epoch": 0.1, "grad_norm": 1.767031823360052, "learning_rate": 9.887539984662135e-06, "loss": 0.8082, "step": 1494 }, { "epoch": 0.1, "grad_norm": 1.591167203027485, "learning_rate": 9.887321271769305e-06, "loss": 0.7949, "step": 1495 }, { "epoch": 0.1, "grad_norm": 1.625966542303608, "learning_rate": 9.887102348829678e-06, "loss": 0.8121, "step": 1496 }, { "epoch": 0.1, "grad_norm": 1.4418240210211426, "learning_rate": 9.88688321585266e-06, "loss": 0.713, "step": 1497 }, { "epoch": 0.1, "grad_norm": 1.9173171344833095, "learning_rate": 9.886663872847672e-06, "loss": 0.8873, "step": 1498 }, { "epoch": 0.1, "grad_norm": 1.910837387609622, "learning_rate": 9.886444319824138e-06, "loss": 0.9394, "step": 1499 }, { "epoch": 0.1, "grad_norm": 1.614849371942557, "learning_rate": 9.886224556791495e-06, "loss": 0.8605, "step": 1500 }, { "epoch": 0.1, "grad_norm": 1.9681665065775915, "learning_rate": 9.886004583759187e-06, "loss": 0.8681, "step": 1501 }, { "epoch": 0.1, "grad_norm": 1.9639342438244956, "learning_rate": 9.885784400736672e-06, "loss": 0.7914, "step": 1502 }, { "epoch": 0.1, "grad_norm": 1.1551979088700612, "learning_rate": 9.885564007733406e-06, "loss": 0.6354, "step": 1503 }, { "epoch": 0.1, "grad_norm": 1.2241012848984483, "learning_rate": 9.885343404758867e-06, "loss": 0.735, "step": 1504 }, { "epoch": 0.1, "grad_norm": 1.6389809059365803, "learning_rate": 9.885122591822533e-06, "loss": 0.7616, "step": 1505 }, { "epoch": 0.1, "grad_norm": 2.389632454499553, "learning_rate": 9.884901568933896e-06, "loss": 0.8394, "step": 1506 }, { "epoch": 0.1, "grad_norm": 1.8793073940215042, "learning_rate": 9.884680336102452e-06, "loss": 0.8257, "step": 1507 }, { "epoch": 0.1, "grad_norm": 1.7527360900519084, "learning_rate": 9.884458893337714e-06, "loss": 0.825, "step": 1508 }, { "epoch": 0.1, "grad_norm": 1.6014723445248142, "learning_rate": 9.884237240649195e-06, "loss": 0.8247, "step": 1509 }, { "epoch": 0.1, "grad_norm": 1.783729896909034, "learning_rate": 9.884015378046424e-06, "loss": 0.7785, "step": 1510 }, { "epoch": 0.1, "grad_norm": 1.9055796802809393, "learning_rate": 9.883793305538934e-06, "loss": 0.7287, "step": 1511 }, { "epoch": 0.1, "grad_norm": 1.4895499020402496, "learning_rate": 9.88357102313627e-06, "loss": 0.9285, "step": 1512 }, { "epoch": 0.1, "grad_norm": 1.2554065117357631, "learning_rate": 9.883348530847985e-06, "loss": 0.7279, "step": 1513 }, { "epoch": 0.1, "grad_norm": 1.1564974665298604, "learning_rate": 9.88312582868364e-06, "loss": 0.7112, "step": 1514 }, { "epoch": 0.1, "grad_norm": 1.1037462273011138, "learning_rate": 9.88290291665281e-06, "loss": 0.6796, "step": 1515 }, { "epoch": 0.1, "grad_norm": 1.5081750888357492, "learning_rate": 9.882679794765072e-06, "loss": 0.7386, "step": 1516 }, { "epoch": 0.1, "grad_norm": 2.0248570080998194, "learning_rate": 9.882456463030017e-06, "loss": 0.869, "step": 1517 }, { "epoch": 0.1, "grad_norm": 1.7328011792912397, "learning_rate": 9.882232921457242e-06, "loss": 0.8175, "step": 1518 }, { "epoch": 0.1, "grad_norm": 1.661170325072875, "learning_rate": 9.882009170056354e-06, "loss": 1.1893, "step": 1519 }, { "epoch": 0.1, "grad_norm": 1.7422454957376545, "learning_rate": 9.881785208836974e-06, "loss": 0.8629, "step": 1520 }, { "epoch": 0.1, "grad_norm": 1.5318756004626577, "learning_rate": 9.88156103780872e-06, "loss": 0.7521, "step": 1521 }, { "epoch": 0.1, "grad_norm": 1.498928346344, "learning_rate": 9.881336656981232e-06, "loss": 0.8366, "step": 1522 }, { "epoch": 0.1, "grad_norm": 2.1458534946125907, "learning_rate": 9.881112066364151e-06, "loss": 0.7572, "step": 1523 }, { "epoch": 0.1, "grad_norm": 1.7508097921930885, "learning_rate": 9.88088726596713e-06, "loss": 0.8491, "step": 1524 }, { "epoch": 0.1, "grad_norm": 2.142946857736444, "learning_rate": 9.880662255799831e-06, "loss": 0.8671, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.7508495795199526, "learning_rate": 9.880437035871924e-06, "loss": 0.8563, "step": 1526 }, { "epoch": 0.1, "grad_norm": 1.1412947297951477, "learning_rate": 9.880211606193089e-06, "loss": 0.7204, "step": 1527 }, { "epoch": 0.1, "grad_norm": 1.7831693045818846, "learning_rate": 9.879985966773012e-06, "loss": 0.8828, "step": 1528 }, { "epoch": 0.1, "grad_norm": 1.689493600161696, "learning_rate": 9.879760117621393e-06, "loss": 0.8503, "step": 1529 }, { "epoch": 0.1, "grad_norm": 1.851519976035837, "learning_rate": 9.879534058747939e-06, "loss": 1.0378, "step": 1530 }, { "epoch": 0.1, "grad_norm": 1.6907282686021545, "learning_rate": 9.879307790162364e-06, "loss": 1.029, "step": 1531 }, { "epoch": 0.1, "grad_norm": 1.6465396822033593, "learning_rate": 9.879081311874392e-06, "loss": 0.7878, "step": 1532 }, { "epoch": 0.1, "grad_norm": 1.7454126337433011, "learning_rate": 9.87885462389376e-06, "loss": 0.8233, "step": 1533 }, { "epoch": 0.1, "grad_norm": 1.624693006135298, "learning_rate": 9.878627726230206e-06, "loss": 0.8247, "step": 1534 }, { "epoch": 0.1, "grad_norm": 1.9511743624349729, "learning_rate": 9.878400618893483e-06, "loss": 0.9569, "step": 1535 }, { "epoch": 0.1, "grad_norm": 1.9714398966261097, "learning_rate": 9.878173301893355e-06, "loss": 0.9527, "step": 1536 }, { "epoch": 0.1, "grad_norm": 1.8946508636553776, "learning_rate": 9.877945775239585e-06, "loss": 0.83, "step": 1537 }, { "epoch": 0.1, "grad_norm": 1.1514097169997615, "learning_rate": 9.87771803894196e-06, "loss": 0.6195, "step": 1538 }, { "epoch": 0.1, "grad_norm": 1.8949180981673854, "learning_rate": 9.87749009301026e-06, "loss": 0.737, "step": 1539 }, { "epoch": 0.1, "grad_norm": 1.8009258237441, "learning_rate": 9.877261937454286e-06, "loss": 0.8309, "step": 1540 }, { "epoch": 0.1, "grad_norm": 1.7121090452491479, "learning_rate": 9.877033572283842e-06, "loss": 0.9236, "step": 1541 }, { "epoch": 0.1, "grad_norm": 1.8880718917516937, "learning_rate": 9.876804997508744e-06, "loss": 0.9817, "step": 1542 }, { "epoch": 0.1, "grad_norm": 1.6834700086870342, "learning_rate": 9.876576213138815e-06, "loss": 0.9052, "step": 1543 }, { "epoch": 0.1, "grad_norm": 2.4812233596965654, "learning_rate": 9.876347219183888e-06, "loss": 0.7701, "step": 1544 }, { "epoch": 0.1, "grad_norm": 1.8487351275290203, "learning_rate": 9.876118015653804e-06, "loss": 0.8868, "step": 1545 }, { "epoch": 0.1, "grad_norm": 1.8273457897370522, "learning_rate": 9.87588860255841e-06, "loss": 0.8038, "step": 1546 }, { "epoch": 0.1, "grad_norm": 1.9843006173714155, "learning_rate": 9.875658979907574e-06, "loss": 0.8726, "step": 1547 }, { "epoch": 0.1, "grad_norm": 1.6075070702461294, "learning_rate": 9.875429147711158e-06, "loss": 0.8356, "step": 1548 }, { "epoch": 0.1, "grad_norm": 1.8190795293414426, "learning_rate": 9.875199105979043e-06, "loss": 0.8103, "step": 1549 }, { "epoch": 0.1, "grad_norm": 1.7066819746904733, "learning_rate": 9.874968854721115e-06, "loss": 0.8476, "step": 1550 }, { "epoch": 0.1, "grad_norm": 1.654162780559807, "learning_rate": 9.874738393947268e-06, "loss": 0.8681, "step": 1551 }, { "epoch": 0.1, "grad_norm": 1.30648404468082, "learning_rate": 9.874507723667409e-06, "loss": 0.6845, "step": 1552 }, { "epoch": 0.1, "grad_norm": 1.787106456755664, "learning_rate": 9.87427684389145e-06, "loss": 0.9609, "step": 1553 }, { "epoch": 0.1, "grad_norm": 1.7885902099847786, "learning_rate": 9.874045754629314e-06, "loss": 0.8429, "step": 1554 }, { "epoch": 0.1, "grad_norm": 1.7957215611596125, "learning_rate": 9.873814455890934e-06, "loss": 0.8465, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.1765629053217141, "learning_rate": 9.87358294768625e-06, "loss": 0.6993, "step": 1556 }, { "epoch": 0.1, "grad_norm": 1.3061307960008184, "learning_rate": 9.873351230025212e-06, "loss": 0.6714, "step": 1557 }, { "epoch": 0.1, "grad_norm": 1.4667766840167067, "learning_rate": 9.873119302917778e-06, "loss": 0.6838, "step": 1558 }, { "epoch": 0.1, "grad_norm": 2.0598725758215823, "learning_rate": 9.872887166373916e-06, "loss": 0.8808, "step": 1559 }, { "epoch": 0.1, "grad_norm": 1.7232568001073088, "learning_rate": 9.872654820403604e-06, "loss": 0.8327, "step": 1560 }, { "epoch": 0.1, "grad_norm": 1.1736489361743694, "learning_rate": 9.872422265016827e-06, "loss": 0.7402, "step": 1561 }, { "epoch": 0.1, "grad_norm": 1.7384758008118713, "learning_rate": 9.872189500223577e-06, "loss": 0.9277, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.7226987152189515, "learning_rate": 9.871956526033863e-06, "loss": 0.9184, "step": 1563 }, { "epoch": 0.1, "grad_norm": 1.8965004120426303, "learning_rate": 9.871723342457693e-06, "loss": 0.9552, "step": 1564 }, { "epoch": 0.1, "grad_norm": 1.8537143109525298, "learning_rate": 9.871489949505091e-06, "loss": 0.9025, "step": 1565 }, { "epoch": 0.1, "grad_norm": 1.6597338920930358, "learning_rate": 9.871256347186087e-06, "loss": 0.7197, "step": 1566 }, { "epoch": 0.1, "grad_norm": 1.361685013993366, "learning_rate": 9.871022535510722e-06, "loss": 0.5665, "step": 1567 }, { "epoch": 0.1, "grad_norm": 1.8585660091846672, "learning_rate": 9.870788514489045e-06, "loss": 0.934, "step": 1568 }, { "epoch": 0.1, "grad_norm": 1.6693953529825754, "learning_rate": 9.870554284131111e-06, "loss": 0.7793, "step": 1569 }, { "epoch": 0.1, "grad_norm": 2.0667517518458944, "learning_rate": 9.870319844446987e-06, "loss": 1.0584, "step": 1570 }, { "epoch": 0.1, "grad_norm": 2.159125158766166, "learning_rate": 9.870085195446752e-06, "loss": 0.7944, "step": 1571 }, { "epoch": 0.1, "grad_norm": 1.8054314648172065, "learning_rate": 9.869850337140489e-06, "loss": 0.7831, "step": 1572 }, { "epoch": 0.1, "grad_norm": 1.7674872727394735, "learning_rate": 9.86961526953829e-06, "loss": 0.894, "step": 1573 }, { "epoch": 0.1, "grad_norm": 1.52520918802063, "learning_rate": 9.86937999265026e-06, "loss": 0.847, "step": 1574 }, { "epoch": 0.1, "grad_norm": 1.7856722847555566, "learning_rate": 9.86914450648651e-06, "loss": 0.9316, "step": 1575 }, { "epoch": 0.1, "grad_norm": 2.437412256276723, "learning_rate": 9.86890881105716e-06, "loss": 1.0671, "step": 1576 }, { "epoch": 0.1, "grad_norm": 1.8325358433064167, "learning_rate": 9.868672906372341e-06, "loss": 0.7827, "step": 1577 }, { "epoch": 0.1, "grad_norm": 1.2830905371393369, "learning_rate": 9.868436792442191e-06, "loss": 0.7665, "step": 1578 }, { "epoch": 0.1, "grad_norm": 1.7498660492068188, "learning_rate": 9.868200469276858e-06, "loss": 0.9015, "step": 1579 }, { "epoch": 0.1, "grad_norm": 1.8332842504708524, "learning_rate": 9.867963936886497e-06, "loss": 0.8702, "step": 1580 }, { "epoch": 0.1, "grad_norm": 2.0968694149679776, "learning_rate": 9.867727195281275e-06, "loss": 0.8957, "step": 1581 }, { "epoch": 0.1, "grad_norm": 1.8378888696470046, "learning_rate": 9.867490244471367e-06, "loss": 0.8626, "step": 1582 }, { "epoch": 0.1, "grad_norm": 1.7114510161819343, "learning_rate": 9.867253084466957e-06, "loss": 0.7942, "step": 1583 }, { "epoch": 0.1, "grad_norm": 1.6086283844797695, "learning_rate": 9.867015715278236e-06, "loss": 0.8273, "step": 1584 }, { "epoch": 0.1, "grad_norm": 1.187039627559611, "learning_rate": 9.866778136915408e-06, "loss": 0.744, "step": 1585 }, { "epoch": 0.1, "grad_norm": 1.9779005498098134, "learning_rate": 9.86654034938868e-06, "loss": 0.7441, "step": 1586 }, { "epoch": 0.1, "grad_norm": 2.005652669423734, "learning_rate": 9.866302352708276e-06, "loss": 0.8267, "step": 1587 }, { "epoch": 0.1, "grad_norm": 1.5975975694473479, "learning_rate": 9.866064146884422e-06, "loss": 0.8524, "step": 1588 }, { "epoch": 0.1, "grad_norm": 1.850398703421842, "learning_rate": 9.865825731927356e-06, "loss": 0.8372, "step": 1589 }, { "epoch": 0.1, "grad_norm": 1.70832341473484, "learning_rate": 9.865587107847323e-06, "loss": 0.7862, "step": 1590 }, { "epoch": 0.1, "grad_norm": 1.1819962219574638, "learning_rate": 9.865348274654582e-06, "loss": 0.5801, "step": 1591 }, { "epoch": 0.1, "grad_norm": 1.7683179229686006, "learning_rate": 9.865109232359394e-06, "loss": 0.8203, "step": 1592 }, { "epoch": 0.1, "grad_norm": 2.4456386333977083, "learning_rate": 9.864869980972036e-06, "loss": 0.8996, "step": 1593 }, { "epoch": 0.1, "grad_norm": 1.792615772069909, "learning_rate": 9.864630520502787e-06, "loss": 0.9042, "step": 1594 }, { "epoch": 0.1, "grad_norm": 1.9119581926319116, "learning_rate": 9.86439085096194e-06, "loss": 0.7006, "step": 1595 }, { "epoch": 0.1, "grad_norm": 2.697947522545054, "learning_rate": 9.864150972359798e-06, "loss": 0.8026, "step": 1596 }, { "epoch": 0.1, "grad_norm": 1.649865199359102, "learning_rate": 9.863910884706667e-06, "loss": 0.7207, "step": 1597 }, { "epoch": 0.1, "grad_norm": 1.90362029772525, "learning_rate": 9.863670588012865e-06, "loss": 0.7831, "step": 1598 }, { "epoch": 0.1, "grad_norm": 1.6877944684807522, "learning_rate": 9.863430082288724e-06, "loss": 0.8837, "step": 1599 }, { "epoch": 0.1, "grad_norm": 1.3180819480317651, "learning_rate": 9.863189367544576e-06, "loss": 0.662, "step": 1600 }, { "epoch": 0.1, "grad_norm": 1.834463590461233, "learning_rate": 9.862948443790768e-06, "loss": 1.1432, "step": 1601 }, { "epoch": 0.1, "grad_norm": 1.657216702052957, "learning_rate": 9.862707311037654e-06, "loss": 0.7948, "step": 1602 }, { "epoch": 0.1, "grad_norm": 1.6714777822135372, "learning_rate": 9.862465969295599e-06, "loss": 0.8676, "step": 1603 }, { "epoch": 0.1, "grad_norm": 1.7952869832418574, "learning_rate": 9.862224418574972e-06, "loss": 0.846, "step": 1604 }, { "epoch": 0.1, "grad_norm": 1.612029743509732, "learning_rate": 9.861982658886158e-06, "loss": 0.9259, "step": 1605 }, { "epoch": 0.1, "grad_norm": 3.2558125137498575, "learning_rate": 9.861740690239546e-06, "loss": 0.8112, "step": 1606 }, { "epoch": 0.1, "grad_norm": 1.781274965053057, "learning_rate": 9.861498512645532e-06, "loss": 0.8397, "step": 1607 }, { "epoch": 0.1, "grad_norm": 1.0768341756393536, "learning_rate": 9.861256126114532e-06, "loss": 0.6661, "step": 1608 }, { "epoch": 0.1, "grad_norm": 1.4823546055423311, "learning_rate": 9.861013530656956e-06, "loss": 0.7558, "step": 1609 }, { "epoch": 0.1, "grad_norm": 1.0358175875525633, "learning_rate": 9.860770726283234e-06, "loss": 0.6795, "step": 1610 }, { "epoch": 0.1, "grad_norm": 1.2049603000538065, "learning_rate": 9.860527713003797e-06, "loss": 0.6717, "step": 1611 }, { "epoch": 0.1, "grad_norm": 2.170428105877154, "learning_rate": 9.860284490829097e-06, "loss": 0.8697, "step": 1612 }, { "epoch": 0.1, "grad_norm": 1.2801143104478412, "learning_rate": 9.86004105976958e-06, "loss": 0.7602, "step": 1613 }, { "epoch": 0.1, "grad_norm": 1.870875271161196, "learning_rate": 9.85979741983571e-06, "loss": 0.8446, "step": 1614 }, { "epoch": 0.1, "grad_norm": 1.5317038783813985, "learning_rate": 9.85955357103796e-06, "loss": 0.8127, "step": 1615 }, { "epoch": 0.1, "grad_norm": 1.1232841659654857, "learning_rate": 9.859309513386808e-06, "loss": 0.6008, "step": 1616 }, { "epoch": 0.1, "grad_norm": 1.6742435034692524, "learning_rate": 9.859065246892744e-06, "loss": 0.9687, "step": 1617 }, { "epoch": 0.1, "grad_norm": 1.7227774712710204, "learning_rate": 9.858820771566267e-06, "loss": 0.823, "step": 1618 }, { "epoch": 0.1, "grad_norm": 1.8106667880301526, "learning_rate": 9.858576087417881e-06, "loss": 0.7595, "step": 1619 }, { "epoch": 0.1, "grad_norm": 1.6658443242889047, "learning_rate": 9.858331194458105e-06, "loss": 0.8287, "step": 1620 }, { "epoch": 0.1, "grad_norm": 2.537757181002924, "learning_rate": 9.858086092697464e-06, "loss": 0.7863, "step": 1621 }, { "epoch": 0.1, "grad_norm": 1.6574986768822852, "learning_rate": 9.85784078214649e-06, "loss": 0.7462, "step": 1622 }, { "epoch": 0.1, "grad_norm": 1.0277147077320992, "learning_rate": 9.857595262815726e-06, "loss": 0.6884, "step": 1623 }, { "epoch": 0.1, "grad_norm": 1.3310491144232142, "learning_rate": 9.857349534715728e-06, "loss": 0.7483, "step": 1624 }, { "epoch": 0.1, "grad_norm": 1.7344528854016996, "learning_rate": 9.857103597857051e-06, "loss": 0.8406, "step": 1625 }, { "epoch": 0.1, "grad_norm": 1.6039543771583364, "learning_rate": 9.856857452250268e-06, "loss": 0.8119, "step": 1626 }, { "epoch": 0.1, "grad_norm": 2.5906551135043014, "learning_rate": 9.856611097905956e-06, "loss": 0.7947, "step": 1627 }, { "epoch": 0.1, "grad_norm": 2.12869552158047, "learning_rate": 9.856364534834708e-06, "loss": 0.7627, "step": 1628 }, { "epoch": 0.1, "grad_norm": 1.8943492877085102, "learning_rate": 9.856117763047115e-06, "loss": 0.7964, "step": 1629 }, { "epoch": 0.1, "grad_norm": 1.7844723283107864, "learning_rate": 9.855870782553783e-06, "loss": 0.7565, "step": 1630 }, { "epoch": 0.1, "grad_norm": 1.9058509179558945, "learning_rate": 9.855623593365332e-06, "loss": 0.8417, "step": 1631 }, { "epoch": 0.1, "grad_norm": 1.733304781381375, "learning_rate": 9.855376195492378e-06, "loss": 0.8185, "step": 1632 }, { "epoch": 0.1, "grad_norm": 1.100140109064738, "learning_rate": 9.855128588945559e-06, "loss": 0.621, "step": 1633 }, { "epoch": 0.1, "grad_norm": 1.8395323101482273, "learning_rate": 9.854880773735515e-06, "loss": 0.8153, "step": 1634 }, { "epoch": 0.1, "grad_norm": 1.8498242303708778, "learning_rate": 9.8546327498729e-06, "loss": 0.8861, "step": 1635 }, { "epoch": 0.1, "grad_norm": 1.7357992645852542, "learning_rate": 9.854384517368368e-06, "loss": 0.8163, "step": 1636 }, { "epoch": 0.1, "grad_norm": 1.6829984863239704, "learning_rate": 9.854136076232587e-06, "loss": 0.7885, "step": 1637 }, { "epoch": 0.1, "grad_norm": 1.519828494829376, "learning_rate": 9.85388742647624e-06, "loss": 0.7458, "step": 1638 }, { "epoch": 0.1, "grad_norm": 1.9222255080049777, "learning_rate": 9.853638568110013e-06, "loss": 0.7173, "step": 1639 }, { "epoch": 0.1, "grad_norm": 1.799781447235983, "learning_rate": 9.853389501144596e-06, "loss": 0.8849, "step": 1640 }, { "epoch": 0.11, "grad_norm": 1.132636281445171, "learning_rate": 9.853140225590698e-06, "loss": 0.6653, "step": 1641 }, { "epoch": 0.11, "grad_norm": 1.7601633396223362, "learning_rate": 9.852890741459031e-06, "loss": 0.759, "step": 1642 }, { "epoch": 0.11, "grad_norm": 2.3111159029988144, "learning_rate": 9.852641048760319e-06, "loss": 0.7093, "step": 1643 }, { "epoch": 0.11, "grad_norm": 1.7706824465837905, "learning_rate": 9.85239114750529e-06, "loss": 0.9043, "step": 1644 }, { "epoch": 0.11, "grad_norm": 1.5855359734217866, "learning_rate": 9.852141037704683e-06, "loss": 0.7682, "step": 1645 }, { "epoch": 0.11, "grad_norm": 1.7844772740059789, "learning_rate": 9.851890719369253e-06, "loss": 0.9042, "step": 1646 }, { "epoch": 0.11, "grad_norm": 1.824225675366576, "learning_rate": 9.851640192509755e-06, "loss": 0.8293, "step": 1647 }, { "epoch": 0.11, "grad_norm": 1.742448701744283, "learning_rate": 9.851389457136955e-06, "loss": 0.8127, "step": 1648 }, { "epoch": 0.11, "grad_norm": 1.9461223185031005, "learning_rate": 9.85113851326163e-06, "loss": 0.8303, "step": 1649 }, { "epoch": 0.11, "grad_norm": 1.0480137839620265, "learning_rate": 9.850887360894567e-06, "loss": 0.6291, "step": 1650 }, { "epoch": 0.11, "grad_norm": 2.2113258468128847, "learning_rate": 9.850636000046558e-06, "loss": 1.0118, "step": 1651 }, { "epoch": 0.11, "grad_norm": 1.855962240052863, "learning_rate": 9.850384430728406e-06, "loss": 0.8019, "step": 1652 }, { "epoch": 0.11, "grad_norm": 2.0063409842228763, "learning_rate": 9.850132652950923e-06, "loss": 0.7926, "step": 1653 }, { "epoch": 0.11, "grad_norm": 1.2893997276611175, "learning_rate": 9.849880666724931e-06, "loss": 0.5675, "step": 1654 }, { "epoch": 0.11, "grad_norm": 2.21890599828105, "learning_rate": 9.84962847206126e-06, "loss": 0.9982, "step": 1655 }, { "epoch": 0.11, "grad_norm": 1.8062730520066377, "learning_rate": 9.849376068970744e-06, "loss": 0.8624, "step": 1656 }, { "epoch": 0.11, "grad_norm": 1.6144430868230946, "learning_rate": 9.849123457464238e-06, "loss": 0.826, "step": 1657 }, { "epoch": 0.11, "grad_norm": 1.2839903930310705, "learning_rate": 9.848870637552592e-06, "loss": 0.6422, "step": 1658 }, { "epoch": 0.11, "grad_norm": 1.830380521946578, "learning_rate": 9.848617609246678e-06, "loss": 0.8748, "step": 1659 }, { "epoch": 0.11, "grad_norm": 1.0856657789498192, "learning_rate": 9.848364372557366e-06, "loss": 0.7548, "step": 1660 }, { "epoch": 0.11, "grad_norm": 1.6449677821594626, "learning_rate": 9.84811092749554e-06, "loss": 0.8282, "step": 1661 }, { "epoch": 0.11, "grad_norm": 1.8634607504588558, "learning_rate": 9.847857274072096e-06, "loss": 0.779, "step": 1662 }, { "epoch": 0.11, "grad_norm": 1.1114001465821732, "learning_rate": 9.84760341229793e-06, "loss": 0.6402, "step": 1663 }, { "epoch": 0.11, "grad_norm": 1.7685066491253267, "learning_rate": 9.847349342183959e-06, "loss": 0.7551, "step": 1664 }, { "epoch": 0.11, "grad_norm": 1.7563451686378078, "learning_rate": 9.847095063741097e-06, "loss": 0.8609, "step": 1665 }, { "epoch": 0.11, "grad_norm": 2.401747943496739, "learning_rate": 9.846840576980275e-06, "loss": 0.8435, "step": 1666 }, { "epoch": 0.11, "grad_norm": 1.9145398548913153, "learning_rate": 9.84658588191243e-06, "loss": 1.1083, "step": 1667 }, { "epoch": 0.11, "grad_norm": 2.0311760454091505, "learning_rate": 9.846330978548507e-06, "loss": 0.8589, "step": 1668 }, { "epoch": 0.11, "grad_norm": 1.7716487026606211, "learning_rate": 9.846075866899463e-06, "loss": 0.7361, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.8029219757209742, "learning_rate": 9.845820546976258e-06, "loss": 0.7707, "step": 1670 }, { "epoch": 0.11, "grad_norm": 1.788645565515629, "learning_rate": 9.845565018789873e-06, "loss": 0.8313, "step": 1671 }, { "epoch": 0.11, "grad_norm": 8.55692108377542, "learning_rate": 9.845309282351282e-06, "loss": 0.744, "step": 1672 }, { "epoch": 0.11, "grad_norm": 1.6624231103279146, "learning_rate": 9.845053337671482e-06, "loss": 0.7604, "step": 1673 }, { "epoch": 0.11, "grad_norm": 1.7517285749813305, "learning_rate": 9.84479718476147e-06, "loss": 0.9087, "step": 1674 }, { "epoch": 0.11, "grad_norm": 1.8302268082014075, "learning_rate": 9.844540823632254e-06, "loss": 0.8117, "step": 1675 }, { "epoch": 0.11, "grad_norm": 1.846055182784427, "learning_rate": 9.844284254294854e-06, "loss": 0.9353, "step": 1676 }, { "epoch": 0.11, "grad_norm": 1.7068923234456013, "learning_rate": 9.844027476760295e-06, "loss": 0.9203, "step": 1677 }, { "epoch": 0.11, "grad_norm": 1.701308754547418, "learning_rate": 9.843770491039614e-06, "loss": 0.7969, "step": 1678 }, { "epoch": 0.11, "grad_norm": 1.7480552698289027, "learning_rate": 9.843513297143856e-06, "loss": 0.867, "step": 1679 }, { "epoch": 0.11, "grad_norm": 3.3000688150257647, "learning_rate": 9.843255895084076e-06, "loss": 0.8768, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.9660613447553312, "learning_rate": 9.842998284871332e-06, "loss": 0.8874, "step": 1681 }, { "epoch": 0.11, "grad_norm": 2.0644608185554234, "learning_rate": 9.842740466516698e-06, "loss": 0.9275, "step": 1682 }, { "epoch": 0.11, "grad_norm": 1.6215250593443464, "learning_rate": 9.842482440031256e-06, "loss": 0.8805, "step": 1683 }, { "epoch": 0.11, "grad_norm": 1.777008420491213, "learning_rate": 9.842224205426094e-06, "loss": 0.9542, "step": 1684 }, { "epoch": 0.11, "grad_norm": 1.6179498324840433, "learning_rate": 9.84196576271231e-06, "loss": 0.8012, "step": 1685 }, { "epoch": 0.11, "grad_norm": 1.613929869727427, "learning_rate": 9.841707111901013e-06, "loss": 0.9317, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.2625173962664884, "learning_rate": 9.841448253003319e-06, "loss": 0.6625, "step": 1687 }, { "epoch": 0.11, "grad_norm": 1.7101741623062423, "learning_rate": 9.84118918603035e-06, "loss": 0.8089, "step": 1688 }, { "epoch": 0.11, "grad_norm": 1.7524577618985238, "learning_rate": 9.840929910993244e-06, "loss": 0.8465, "step": 1689 }, { "epoch": 0.11, "grad_norm": 1.7019737097354535, "learning_rate": 9.840670427903142e-06, "loss": 0.8219, "step": 1690 }, { "epoch": 0.11, "grad_norm": 1.8456942636802152, "learning_rate": 9.840410736771196e-06, "loss": 0.9058, "step": 1691 }, { "epoch": 0.11, "grad_norm": 1.6981542098583835, "learning_rate": 9.840150837608568e-06, "loss": 0.9539, "step": 1692 }, { "epoch": 0.11, "grad_norm": 1.6127131360796607, "learning_rate": 9.839890730426429e-06, "loss": 0.926, "step": 1693 }, { "epoch": 0.11, "grad_norm": 2.0605362661114346, "learning_rate": 9.839630415235954e-06, "loss": 0.957, "step": 1694 }, { "epoch": 0.11, "grad_norm": 1.7879897057202294, "learning_rate": 9.839369892048337e-06, "loss": 0.9285, "step": 1695 }, { "epoch": 0.11, "grad_norm": 1.5902161460276636, "learning_rate": 9.839109160874767e-06, "loss": 0.9752, "step": 1696 }, { "epoch": 0.11, "grad_norm": 1.801843397872425, "learning_rate": 9.838848221726455e-06, "loss": 0.86, "step": 1697 }, { "epoch": 0.11, "grad_norm": 1.295436873003033, "learning_rate": 9.838587074614614e-06, "loss": 0.7419, "step": 1698 }, { "epoch": 0.11, "grad_norm": 2.0884070178346557, "learning_rate": 9.838325719550469e-06, "loss": 0.8247, "step": 1699 }, { "epoch": 0.11, "grad_norm": 1.797857107383286, "learning_rate": 9.838064156545251e-06, "loss": 0.9289, "step": 1700 }, { "epoch": 0.11, "grad_norm": 2.100569674580715, "learning_rate": 9.8378023856102e-06, "loss": 0.8055, "step": 1701 }, { "epoch": 0.11, "grad_norm": 1.6428242501864174, "learning_rate": 9.83754040675657e-06, "loss": 0.8038, "step": 1702 }, { "epoch": 0.11, "grad_norm": 1.7908078075912017, "learning_rate": 9.837278219995618e-06, "loss": 0.862, "step": 1703 }, { "epoch": 0.11, "grad_norm": 1.6821370088227041, "learning_rate": 9.837015825338611e-06, "loss": 0.788, "step": 1704 }, { "epoch": 0.11, "grad_norm": 1.5714665031090935, "learning_rate": 9.836753222796831e-06, "loss": 0.8616, "step": 1705 }, { "epoch": 0.11, "grad_norm": 1.0440641766189442, "learning_rate": 9.836490412381559e-06, "loss": 0.692, "step": 1706 }, { "epoch": 0.11, "grad_norm": 2.255845339788649, "learning_rate": 9.836227394104093e-06, "loss": 0.8444, "step": 1707 }, { "epoch": 0.11, "grad_norm": 1.727415730471223, "learning_rate": 9.835964167975734e-06, "loss": 0.7249, "step": 1708 }, { "epoch": 0.11, "grad_norm": 1.643138741914165, "learning_rate": 9.835700734007798e-06, "loss": 0.8398, "step": 1709 }, { "epoch": 0.11, "grad_norm": 1.8433919652043667, "learning_rate": 9.835437092211605e-06, "loss": 0.9331, "step": 1710 }, { "epoch": 0.11, "grad_norm": 1.706143269734461, "learning_rate": 9.835173242598486e-06, "loss": 0.8698, "step": 1711 }, { "epoch": 0.11, "grad_norm": 1.6904969250021586, "learning_rate": 9.834909185179782e-06, "loss": 0.6389, "step": 1712 }, { "epoch": 0.11, "grad_norm": 2.016253629055064, "learning_rate": 9.834644919966842e-06, "loss": 0.7967, "step": 1713 }, { "epoch": 0.11, "grad_norm": 1.6059398310369255, "learning_rate": 9.83438044697102e-06, "loss": 0.7427, "step": 1714 }, { "epoch": 0.11, "grad_norm": 1.749550709663428, "learning_rate": 9.834115766203687e-06, "loss": 0.8279, "step": 1715 }, { "epoch": 0.11, "grad_norm": 1.893572059087186, "learning_rate": 9.833850877676215e-06, "loss": 0.7923, "step": 1716 }, { "epoch": 0.11, "grad_norm": 2.093596552377715, "learning_rate": 9.833585781399989e-06, "loss": 0.9707, "step": 1717 }, { "epoch": 0.11, "grad_norm": 1.8083447949579323, "learning_rate": 9.833320477386403e-06, "loss": 0.8193, "step": 1718 }, { "epoch": 0.11, "grad_norm": 1.57331783575691, "learning_rate": 9.83305496564686e-06, "loss": 0.888, "step": 1719 }, { "epoch": 0.11, "grad_norm": 1.7346212320658194, "learning_rate": 9.83278924619277e-06, "loss": 0.8096, "step": 1720 }, { "epoch": 0.11, "grad_norm": 1.5920429853972216, "learning_rate": 9.832523319035553e-06, "loss": 0.7876, "step": 1721 }, { "epoch": 0.11, "grad_norm": 1.6654274807843468, "learning_rate": 9.832257184186638e-06, "loss": 0.9193, "step": 1722 }, { "epoch": 0.11, "grad_norm": 1.5120033345222639, "learning_rate": 9.831990841657465e-06, "loss": 0.7558, "step": 1723 }, { "epoch": 0.11, "grad_norm": 1.8313666822331562, "learning_rate": 9.831724291459477e-06, "loss": 0.692, "step": 1724 }, { "epoch": 0.11, "grad_norm": 1.7375011925040695, "learning_rate": 9.831457533604133e-06, "loss": 0.928, "step": 1725 }, { "epoch": 0.11, "grad_norm": 1.8225995093193121, "learning_rate": 9.831190568102895e-06, "loss": 0.8467, "step": 1726 }, { "epoch": 0.11, "grad_norm": 1.3267368424296937, "learning_rate": 9.83092339496724e-06, "loss": 0.6716, "step": 1727 }, { "epoch": 0.11, "grad_norm": 1.6636591546389616, "learning_rate": 9.830656014208648e-06, "loss": 0.8504, "step": 1728 }, { "epoch": 0.11, "grad_norm": 1.4541714559014238, "learning_rate": 9.83038842583861e-06, "loss": 0.6591, "step": 1729 }, { "epoch": 0.11, "grad_norm": 1.1069728954495042, "learning_rate": 9.830120629868628e-06, "loss": 0.7415, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.7504407790167589, "learning_rate": 9.829852626310213e-06, "loss": 0.8164, "step": 1731 }, { "epoch": 0.11, "grad_norm": 1.6666124462447418, "learning_rate": 9.829584415174879e-06, "loss": 0.8135, "step": 1732 }, { "epoch": 0.11, "grad_norm": 2.809075576066954, "learning_rate": 9.829315996474156e-06, "loss": 0.6158, "step": 1733 }, { "epoch": 0.11, "grad_norm": 1.1166094453860904, "learning_rate": 9.829047370219577e-06, "loss": 0.5925, "step": 1734 }, { "epoch": 0.11, "grad_norm": 1.1209370922639066, "learning_rate": 9.828778536422692e-06, "loss": 0.701, "step": 1735 }, { "epoch": 0.11, "grad_norm": 2.012701360936985, "learning_rate": 9.828509495095051e-06, "loss": 0.8349, "step": 1736 }, { "epoch": 0.11, "grad_norm": 2.269463552597194, "learning_rate": 9.828240246248217e-06, "loss": 0.9235, "step": 1737 }, { "epoch": 0.11, "grad_norm": 1.3110677072374912, "learning_rate": 9.827970789893764e-06, "loss": 0.7266, "step": 1738 }, { "epoch": 0.11, "grad_norm": 1.5841012512365913, "learning_rate": 9.827701126043272e-06, "loss": 0.8029, "step": 1739 }, { "epoch": 0.11, "grad_norm": 1.6301094880327125, "learning_rate": 9.82743125470833e-06, "loss": 0.7138, "step": 1740 }, { "epoch": 0.11, "grad_norm": 2.493224883220222, "learning_rate": 9.827161175900535e-06, "loss": 0.7896, "step": 1741 }, { "epoch": 0.11, "grad_norm": 2.0283307836765707, "learning_rate": 9.826890889631498e-06, "loss": 0.9106, "step": 1742 }, { "epoch": 0.11, "grad_norm": 1.272568019444936, "learning_rate": 9.826620395912832e-06, "loss": 0.638, "step": 1743 }, { "epoch": 0.11, "grad_norm": 1.7923238832298907, "learning_rate": 9.826349694756163e-06, "loss": 0.9174, "step": 1744 }, { "epoch": 0.11, "grad_norm": 1.9486944201958307, "learning_rate": 9.826078786173126e-06, "loss": 0.8586, "step": 1745 }, { "epoch": 0.11, "grad_norm": 1.8858260613652738, "learning_rate": 9.825807670175364e-06, "loss": 0.9376, "step": 1746 }, { "epoch": 0.11, "grad_norm": 1.9016466579180442, "learning_rate": 9.82553634677453e-06, "loss": 0.9003, "step": 1747 }, { "epoch": 0.11, "grad_norm": 1.5346457474871686, "learning_rate": 9.82526481598228e-06, "loss": 0.7509, "step": 1748 }, { "epoch": 0.11, "grad_norm": 2.036419244595624, "learning_rate": 9.82499307781029e-06, "loss": 0.8141, "step": 1749 }, { "epoch": 0.11, "grad_norm": 1.6341661795521738, "learning_rate": 9.824721132270236e-06, "loss": 0.6597, "step": 1750 }, { "epoch": 0.11, "grad_norm": 1.7062059810824162, "learning_rate": 9.824448979373807e-06, "loss": 0.7355, "step": 1751 }, { "epoch": 0.11, "grad_norm": 1.8075403068628515, "learning_rate": 9.824176619132698e-06, "loss": 0.8262, "step": 1752 }, { "epoch": 0.11, "grad_norm": 2.0419123246380235, "learning_rate": 9.823904051558613e-06, "loss": 0.7863, "step": 1753 }, { "epoch": 0.11, "grad_norm": 1.2862740471468965, "learning_rate": 9.82363127666327e-06, "loss": 0.7107, "step": 1754 }, { "epoch": 0.11, "grad_norm": 1.5735965114631052, "learning_rate": 9.82335829445839e-06, "loss": 0.7447, "step": 1755 }, { "epoch": 0.11, "grad_norm": 1.6390164823724604, "learning_rate": 9.823085104955705e-06, "loss": 0.8956, "step": 1756 }, { "epoch": 0.11, "grad_norm": 1.9922096186214704, "learning_rate": 9.822811708166958e-06, "loss": 0.7145, "step": 1757 }, { "epoch": 0.11, "grad_norm": 1.9338832279719664, "learning_rate": 9.822538104103898e-06, "loss": 0.8457, "step": 1758 }, { "epoch": 0.11, "grad_norm": 1.9886990979205956, "learning_rate": 9.822264292778282e-06, "loss": 0.8611, "step": 1759 }, { "epoch": 0.11, "grad_norm": 1.7122242255460438, "learning_rate": 9.821990274201883e-06, "loss": 0.8239, "step": 1760 }, { "epoch": 0.11, "grad_norm": 1.7877366966102466, "learning_rate": 9.821716048386472e-06, "loss": 0.811, "step": 1761 }, { "epoch": 0.11, "grad_norm": 1.62572273709801, "learning_rate": 9.821441615343838e-06, "loss": 0.7991, "step": 1762 }, { "epoch": 0.11, "grad_norm": 1.7020559372973807, "learning_rate": 9.821166975085774e-06, "loss": 0.8969, "step": 1763 }, { "epoch": 0.11, "grad_norm": 2.071006356383922, "learning_rate": 9.820892127624085e-06, "loss": 0.7568, "step": 1764 }, { "epoch": 0.11, "grad_norm": 1.7279147475112098, "learning_rate": 9.820617072970583e-06, "loss": 0.7963, "step": 1765 }, { "epoch": 0.11, "grad_norm": 1.7109305568181241, "learning_rate": 9.820341811137085e-06, "loss": 0.7431, "step": 1766 }, { "epoch": 0.11, "grad_norm": 1.6357236890858442, "learning_rate": 9.820066342135428e-06, "loss": 0.7543, "step": 1767 }, { "epoch": 0.11, "grad_norm": 1.7210626655229557, "learning_rate": 9.819790665977447e-06, "loss": 0.8689, "step": 1768 }, { "epoch": 0.11, "grad_norm": 1.978044728251076, "learning_rate": 9.819514782674992e-06, "loss": 0.9408, "step": 1769 }, { "epoch": 0.11, "grad_norm": 1.3821467446480895, "learning_rate": 9.819238692239919e-06, "loss": 0.7408, "step": 1770 }, { "epoch": 0.11, "grad_norm": 1.3336503132743682, "learning_rate": 9.818962394684094e-06, "loss": 0.6908, "step": 1771 }, { "epoch": 0.11, "grad_norm": 1.1925101289700908, "learning_rate": 9.818685890019389e-06, "loss": 0.6683, "step": 1772 }, { "epoch": 0.11, "grad_norm": 1.985651251524018, "learning_rate": 9.818409178257691e-06, "loss": 0.8552, "step": 1773 }, { "epoch": 0.11, "grad_norm": 1.165808582608611, "learning_rate": 9.81813225941089e-06, "loss": 0.7755, "step": 1774 }, { "epoch": 0.11, "grad_norm": 1.8179180760614313, "learning_rate": 9.817855133490891e-06, "loss": 0.8241, "step": 1775 }, { "epoch": 0.11, "grad_norm": 1.9200829871230287, "learning_rate": 9.817577800509601e-06, "loss": 0.8546, "step": 1776 }, { "epoch": 0.11, "grad_norm": 1.2620941266864107, "learning_rate": 9.817300260478942e-06, "loss": 0.7133, "step": 1777 }, { "epoch": 0.11, "grad_norm": 1.2687193565727137, "learning_rate": 9.817022513410838e-06, "loss": 0.6284, "step": 1778 }, { "epoch": 0.11, "grad_norm": 1.8948455316315054, "learning_rate": 9.816744559317228e-06, "loss": 0.8279, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.744086207100921, "learning_rate": 9.81646639821006e-06, "loss": 0.9067, "step": 1780 }, { "epoch": 0.11, "grad_norm": 1.0857523405438216, "learning_rate": 9.816188030101287e-06, "loss": 0.6613, "step": 1781 }, { "epoch": 0.11, "grad_norm": 1.6021258349425, "learning_rate": 9.815909455002872e-06, "loss": 0.7132, "step": 1782 }, { "epoch": 0.11, "grad_norm": 1.5981061671065766, "learning_rate": 9.815630672926789e-06, "loss": 0.8758, "step": 1783 }, { "epoch": 0.11, "grad_norm": 1.6339750774602944, "learning_rate": 9.815351683885017e-06, "loss": 0.8129, "step": 1784 }, { "epoch": 0.11, "grad_norm": 2.0622282621758123, "learning_rate": 9.81507248788955e-06, "loss": 0.876, "step": 1785 }, { "epoch": 0.11, "grad_norm": 1.646259369895036, "learning_rate": 9.814793084952384e-06, "loss": 0.7994, "step": 1786 }, { "epoch": 0.11, "grad_norm": 1.759280336989431, "learning_rate": 9.814513475085528e-06, "loss": 0.8862, "step": 1787 }, { "epoch": 0.11, "grad_norm": 1.6233100942528225, "learning_rate": 9.814233658301002e-06, "loss": 0.7413, "step": 1788 }, { "epoch": 0.11, "grad_norm": 1.7712043233144437, "learning_rate": 9.813953634610827e-06, "loss": 0.8325, "step": 1789 }, { "epoch": 0.11, "grad_norm": 1.5640246730160856, "learning_rate": 9.813673404027042e-06, "loss": 0.8402, "step": 1790 }, { "epoch": 0.11, "grad_norm": 1.6479671841599763, "learning_rate": 9.813392966561688e-06, "loss": 0.7267, "step": 1791 }, { "epoch": 0.11, "grad_norm": 1.7148749222739783, "learning_rate": 9.813112322226819e-06, "loss": 0.7895, "step": 1792 }, { "epoch": 0.11, "grad_norm": 1.6796406353760407, "learning_rate": 9.812831471034495e-06, "loss": 0.8264, "step": 1793 }, { "epoch": 0.11, "grad_norm": 1.864967131420658, "learning_rate": 9.812550412996788e-06, "loss": 0.7506, "step": 1794 }, { "epoch": 0.11, "grad_norm": 2.1680510043596932, "learning_rate": 9.812269148125779e-06, "loss": 0.935, "step": 1795 }, { "epoch": 0.11, "grad_norm": 1.4152586721870373, "learning_rate": 9.811987676433552e-06, "loss": 0.6788, "step": 1796 }, { "epoch": 0.12, "grad_norm": 1.4056675760220771, "learning_rate": 9.811705997932206e-06, "loss": 0.6286, "step": 1797 }, { "epoch": 0.12, "grad_norm": 3.4149550867981096, "learning_rate": 9.811424112633847e-06, "loss": 0.8555, "step": 1798 }, { "epoch": 0.12, "grad_norm": 1.7442881271748798, "learning_rate": 9.81114202055059e-06, "loss": 0.7161, "step": 1799 }, { "epoch": 0.12, "grad_norm": 1.2561354154985893, "learning_rate": 9.810859721694559e-06, "loss": 0.597, "step": 1800 }, { "epoch": 0.12, "grad_norm": 1.784340955619335, "learning_rate": 9.810577216077886e-06, "loss": 0.7799, "step": 1801 }, { "epoch": 0.12, "grad_norm": 2.1028856483415246, "learning_rate": 9.810294503712711e-06, "loss": 0.8369, "step": 1802 }, { "epoch": 0.12, "grad_norm": 1.803105463694642, "learning_rate": 9.810011584611189e-06, "loss": 0.7931, "step": 1803 }, { "epoch": 0.12, "grad_norm": 1.6930498669242877, "learning_rate": 9.809728458785474e-06, "loss": 0.9216, "step": 1804 }, { "epoch": 0.12, "grad_norm": 1.0968301510003167, "learning_rate": 9.80944512624774e-06, "loss": 0.7844, "step": 1805 }, { "epoch": 0.12, "grad_norm": 1.7812045952398474, "learning_rate": 9.809161587010156e-06, "loss": 0.7668, "step": 1806 }, { "epoch": 0.12, "grad_norm": 1.1864691674054901, "learning_rate": 9.808877841084915e-06, "loss": 0.5845, "step": 1807 }, { "epoch": 0.12, "grad_norm": 1.726911295074272, "learning_rate": 9.808593888484207e-06, "loss": 0.7761, "step": 1808 }, { "epoch": 0.12, "grad_norm": 1.955832341150983, "learning_rate": 9.808309729220241e-06, "loss": 0.8782, "step": 1809 }, { "epoch": 0.12, "grad_norm": 1.0731200701103452, "learning_rate": 9.808025363305225e-06, "loss": 0.7139, "step": 1810 }, { "epoch": 0.12, "grad_norm": 1.0644307860751339, "learning_rate": 9.807740790751383e-06, "loss": 0.6492, "step": 1811 }, { "epoch": 0.12, "grad_norm": 2.1989427839071585, "learning_rate": 9.807456011570941e-06, "loss": 0.8479, "step": 1812 }, { "epoch": 0.12, "grad_norm": 1.9282835416105086, "learning_rate": 9.807171025776145e-06, "loss": 0.8875, "step": 1813 }, { "epoch": 0.12, "grad_norm": 2.837167423563308, "learning_rate": 9.806885833379238e-06, "loss": 0.7107, "step": 1814 }, { "epoch": 0.12, "grad_norm": 1.9847511682506076, "learning_rate": 9.806600434392479e-06, "loss": 0.7793, "step": 1815 }, { "epoch": 0.12, "grad_norm": 1.8827180080094468, "learning_rate": 9.806314828828134e-06, "loss": 0.7345, "step": 1816 }, { "epoch": 0.12, "grad_norm": 1.5149352417646154, "learning_rate": 9.806029016698475e-06, "loss": 0.7104, "step": 1817 }, { "epoch": 0.12, "grad_norm": 1.2352019214529535, "learning_rate": 9.80574299801579e-06, "loss": 0.6716, "step": 1818 }, { "epoch": 0.12, "grad_norm": 2.0271332429612623, "learning_rate": 9.805456772792367e-06, "loss": 0.7636, "step": 1819 }, { "epoch": 0.12, "grad_norm": 2.0068435550672787, "learning_rate": 9.805170341040512e-06, "loss": 0.8913, "step": 1820 }, { "epoch": 0.12, "grad_norm": 1.530199250060449, "learning_rate": 9.804883702772532e-06, "loss": 0.7073, "step": 1821 }, { "epoch": 0.12, "grad_norm": 1.9751196678480032, "learning_rate": 9.804596858000745e-06, "loss": 0.7369, "step": 1822 }, { "epoch": 0.12, "grad_norm": 1.6310143403399455, "learning_rate": 9.804309806737482e-06, "loss": 0.8687, "step": 1823 }, { "epoch": 0.12, "grad_norm": 1.7066603703695422, "learning_rate": 9.80402254899508e-06, "loss": 0.9198, "step": 1824 }, { "epoch": 0.12, "grad_norm": 1.646008602268603, "learning_rate": 9.803735084785884e-06, "loss": 0.8173, "step": 1825 }, { "epoch": 0.12, "grad_norm": 1.6230612252726646, "learning_rate": 9.803447414122245e-06, "loss": 0.8996, "step": 1826 }, { "epoch": 0.12, "grad_norm": 2.0171562006500747, "learning_rate": 9.803159537016533e-06, "loss": 0.8992, "step": 1827 }, { "epoch": 0.12, "grad_norm": 1.0782393813996565, "learning_rate": 9.802871453481114e-06, "loss": 0.6462, "step": 1828 }, { "epoch": 0.12, "grad_norm": 1.2400232259947588, "learning_rate": 9.802583163528374e-06, "loss": 0.6823, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.5136928409614787, "learning_rate": 9.8022946671707e-06, "loss": 0.8334, "step": 1830 }, { "epoch": 0.12, "grad_norm": 1.2541868806514926, "learning_rate": 9.802005964420493e-06, "loss": 0.6421, "step": 1831 }, { "epoch": 0.12, "grad_norm": 1.5619382240016118, "learning_rate": 9.801717055290162e-06, "loss": 0.7636, "step": 1832 }, { "epoch": 0.12, "grad_norm": 1.9323173517986008, "learning_rate": 9.801427939792118e-06, "loss": 0.9278, "step": 1833 }, { "epoch": 0.12, "grad_norm": 1.6554169370160075, "learning_rate": 9.801138617938793e-06, "loss": 0.7918, "step": 1834 }, { "epoch": 0.12, "grad_norm": 1.8582258283020965, "learning_rate": 9.800849089742617e-06, "loss": 0.9483, "step": 1835 }, { "epoch": 0.12, "grad_norm": 1.6184998766065233, "learning_rate": 9.800559355216035e-06, "loss": 0.7727, "step": 1836 }, { "epoch": 0.12, "grad_norm": 1.6595319350435669, "learning_rate": 9.800269414371501e-06, "loss": 0.8684, "step": 1837 }, { "epoch": 0.12, "grad_norm": 1.6184758486873965, "learning_rate": 9.799979267221473e-06, "loss": 0.7311, "step": 1838 }, { "epoch": 0.12, "grad_norm": 2.17888436048882, "learning_rate": 9.799688913778423e-06, "loss": 0.7587, "step": 1839 }, { "epoch": 0.12, "grad_norm": 1.2090805718956432, "learning_rate": 9.799398354054828e-06, "loss": 0.7037, "step": 1840 }, { "epoch": 0.12, "grad_norm": 1.647699899404749, "learning_rate": 9.799107588063178e-06, "loss": 0.8809, "step": 1841 }, { "epoch": 0.12, "grad_norm": 1.6253093066320388, "learning_rate": 9.798816615815967e-06, "loss": 0.7115, "step": 1842 }, { "epoch": 0.12, "grad_norm": 1.9208134255529472, "learning_rate": 9.798525437325704e-06, "loss": 0.7347, "step": 1843 }, { "epoch": 0.12, "grad_norm": 1.1256899437891388, "learning_rate": 9.798234052604898e-06, "loss": 0.661, "step": 1844 }, { "epoch": 0.12, "grad_norm": 1.6969588225097132, "learning_rate": 9.797942461666075e-06, "loss": 0.757, "step": 1845 }, { "epoch": 0.12, "grad_norm": 1.0899535392454955, "learning_rate": 9.797650664521768e-06, "loss": 0.6147, "step": 1846 }, { "epoch": 0.12, "grad_norm": 2.0292900123041826, "learning_rate": 9.797358661184517e-06, "loss": 0.7425, "step": 1847 }, { "epoch": 0.12, "grad_norm": 1.903431770332664, "learning_rate": 9.79706645166687e-06, "loss": 0.824, "step": 1848 }, { "epoch": 0.12, "grad_norm": 1.5282109305710783, "learning_rate": 9.796774035981388e-06, "loss": 0.8333, "step": 1849 }, { "epoch": 0.12, "grad_norm": 1.7880041290426996, "learning_rate": 9.796481414140637e-06, "loss": 0.8749, "step": 1850 }, { "epoch": 0.12, "grad_norm": 2.860885527392507, "learning_rate": 9.796188586157194e-06, "loss": 0.8342, "step": 1851 }, { "epoch": 0.12, "grad_norm": 2.6231575942773944, "learning_rate": 9.795895552043644e-06, "loss": 0.8623, "step": 1852 }, { "epoch": 0.12, "grad_norm": 1.5162499494228383, "learning_rate": 9.79560231181258e-06, "loss": 0.7592, "step": 1853 }, { "epoch": 0.12, "grad_norm": 1.839077789599076, "learning_rate": 9.795308865476605e-06, "loss": 0.7382, "step": 1854 }, { "epoch": 0.12, "grad_norm": 1.7887719272469618, "learning_rate": 9.795015213048334e-06, "loss": 0.9052, "step": 1855 }, { "epoch": 0.12, "grad_norm": 1.4415144066105912, "learning_rate": 9.794721354540382e-06, "loss": 0.6267, "step": 1856 }, { "epoch": 0.12, "grad_norm": 1.5602837996188599, "learning_rate": 9.794427289965383e-06, "loss": 0.7588, "step": 1857 }, { "epoch": 0.12, "grad_norm": 2.1365927042002975, "learning_rate": 9.794133019335972e-06, "loss": 0.9232, "step": 1858 }, { "epoch": 0.12, "grad_norm": 1.6857165458139192, "learning_rate": 9.793838542664797e-06, "loss": 0.8794, "step": 1859 }, { "epoch": 0.12, "grad_norm": 1.6736268402963859, "learning_rate": 9.793543859964518e-06, "loss": 0.8884, "step": 1860 }, { "epoch": 0.12, "grad_norm": 2.0888574901984507, "learning_rate": 9.793248971247792e-06, "loss": 0.8649, "step": 1861 }, { "epoch": 0.12, "grad_norm": 1.5855962784129436, "learning_rate": 9.792953876527301e-06, "loss": 0.7226, "step": 1862 }, { "epoch": 0.12, "grad_norm": 1.7252886469434345, "learning_rate": 9.792658575815724e-06, "loss": 0.7665, "step": 1863 }, { "epoch": 0.12, "grad_norm": 1.7450748663044473, "learning_rate": 9.792363069125749e-06, "loss": 0.874, "step": 1864 }, { "epoch": 0.12, "grad_norm": 1.8360257721990987, "learning_rate": 9.792067356470083e-06, "loss": 0.8201, "step": 1865 }, { "epoch": 0.12, "grad_norm": 1.833791481268103, "learning_rate": 9.79177143786143e-06, "loss": 0.8919, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.3993973870433865, "learning_rate": 9.791475313312508e-06, "loss": 0.6529, "step": 1867 }, { "epoch": 0.12, "grad_norm": 1.7308208303983548, "learning_rate": 9.791178982836048e-06, "loss": 0.8244, "step": 1868 }, { "epoch": 0.12, "grad_norm": 1.8347767945257734, "learning_rate": 9.79088244644478e-06, "loss": 0.9057, "step": 1869 }, { "epoch": 0.12, "grad_norm": 1.3323911388905167, "learning_rate": 9.790585704151453e-06, "loss": 0.6505, "step": 1870 }, { "epoch": 0.12, "grad_norm": 0.9939429200371198, "learning_rate": 9.79028875596882e-06, "loss": 0.6546, "step": 1871 }, { "epoch": 0.12, "grad_norm": 1.8178956735146565, "learning_rate": 9.78999160190964e-06, "loss": 0.8271, "step": 1872 }, { "epoch": 0.12, "grad_norm": 1.7561930483754833, "learning_rate": 9.789694241986687e-06, "loss": 0.7769, "step": 1873 }, { "epoch": 0.12, "grad_norm": 1.709106091309926, "learning_rate": 9.789396676212742e-06, "loss": 0.8781, "step": 1874 }, { "epoch": 0.12, "grad_norm": 1.6027518828403917, "learning_rate": 9.789098904600588e-06, "loss": 0.8022, "step": 1875 }, { "epoch": 0.12, "grad_norm": 1.8357575155848307, "learning_rate": 9.788800927163028e-06, "loss": 0.7893, "step": 1876 }, { "epoch": 0.12, "grad_norm": 2.3112714914234, "learning_rate": 9.788502743912866e-06, "loss": 0.9019, "step": 1877 }, { "epoch": 0.12, "grad_norm": 1.2629724134239286, "learning_rate": 9.78820435486292e-06, "loss": 0.7903, "step": 1878 }, { "epoch": 0.12, "grad_norm": 1.3602928360640123, "learning_rate": 9.787905760026011e-06, "loss": 0.7015, "step": 1879 }, { "epoch": 0.12, "grad_norm": 1.223557780081912, "learning_rate": 9.787606959414975e-06, "loss": 0.6559, "step": 1880 }, { "epoch": 0.12, "grad_norm": 1.8358815178077872, "learning_rate": 9.78730795304265e-06, "loss": 0.883, "step": 1881 }, { "epoch": 0.12, "grad_norm": 2.2146912658928204, "learning_rate": 9.787008740921889e-06, "loss": 0.825, "step": 1882 }, { "epoch": 0.12, "grad_norm": 1.8587718450833493, "learning_rate": 9.786709323065553e-06, "loss": 0.8347, "step": 1883 }, { "epoch": 0.12, "grad_norm": 1.6271641118701186, "learning_rate": 9.786409699486506e-06, "loss": 0.7597, "step": 1884 }, { "epoch": 0.12, "grad_norm": 1.960595036804724, "learning_rate": 9.786109870197629e-06, "loss": 0.8571, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.070694352081693, "learning_rate": 9.785809835211806e-06, "loss": 0.668, "step": 1886 }, { "epoch": 0.12, "grad_norm": 2.0089256851098263, "learning_rate": 9.785509594541934e-06, "loss": 0.8618, "step": 1887 }, { "epoch": 0.12, "grad_norm": 1.543549849279983, "learning_rate": 9.785209148200913e-06, "loss": 0.6744, "step": 1888 }, { "epoch": 0.12, "grad_norm": 1.6482315027130825, "learning_rate": 9.78490849620166e-06, "loss": 0.7425, "step": 1889 }, { "epoch": 0.12, "grad_norm": 1.7604434351018734, "learning_rate": 9.784607638557094e-06, "loss": 0.8009, "step": 1890 }, { "epoch": 0.12, "grad_norm": 2.0496372709076325, "learning_rate": 9.784306575280146e-06, "loss": 0.9453, "step": 1891 }, { "epoch": 0.12, "grad_norm": 1.3785395140512546, "learning_rate": 9.784005306383754e-06, "loss": 0.6456, "step": 1892 }, { "epoch": 0.12, "grad_norm": 1.8907698247374716, "learning_rate": 9.783703831880867e-06, "loss": 0.819, "step": 1893 }, { "epoch": 0.12, "grad_norm": 2.2899062125422707, "learning_rate": 9.78340215178444e-06, "loss": 0.881, "step": 1894 }, { "epoch": 0.12, "grad_norm": 1.7512734248727042, "learning_rate": 9.78310026610744e-06, "loss": 0.9712, "step": 1895 }, { "epoch": 0.12, "grad_norm": 2.0610072728648103, "learning_rate": 9.782798174862842e-06, "loss": 0.7423, "step": 1896 }, { "epoch": 0.12, "grad_norm": 1.3230690382495995, "learning_rate": 9.78249587806363e-06, "loss": 0.7169, "step": 1897 }, { "epoch": 0.12, "grad_norm": 1.6168518258217333, "learning_rate": 9.782193375722792e-06, "loss": 0.7659, "step": 1898 }, { "epoch": 0.12, "grad_norm": 1.3369003856869754, "learning_rate": 9.78189066785333e-06, "loss": 0.6345, "step": 1899 }, { "epoch": 0.12, "grad_norm": 1.731494811612202, "learning_rate": 9.78158775446826e-06, "loss": 0.8316, "step": 1900 }, { "epoch": 0.12, "grad_norm": 1.935792956411186, "learning_rate": 9.781284635580593e-06, "loss": 0.8391, "step": 1901 }, { "epoch": 0.12, "grad_norm": 1.647222675106805, "learning_rate": 9.780981311203359e-06, "loss": 0.9348, "step": 1902 }, { "epoch": 0.12, "grad_norm": 1.7674515200711347, "learning_rate": 9.780677781349594e-06, "loss": 0.7541, "step": 1903 }, { "epoch": 0.12, "grad_norm": 1.8303868837382775, "learning_rate": 9.780374046032345e-06, "loss": 0.6892, "step": 1904 }, { "epoch": 0.12, "grad_norm": 1.7224040229152011, "learning_rate": 9.780070105264665e-06, "loss": 0.8586, "step": 1905 }, { "epoch": 0.12, "grad_norm": 1.6502191449657235, "learning_rate": 9.779765959059616e-06, "loss": 0.7786, "step": 1906 }, { "epoch": 0.12, "grad_norm": 1.2042195550663755, "learning_rate": 9.779461607430268e-06, "loss": 0.6916, "step": 1907 }, { "epoch": 0.12, "grad_norm": 1.0119132035394593, "learning_rate": 9.779157050389706e-06, "loss": 0.6225, "step": 1908 }, { "epoch": 0.12, "grad_norm": 1.761121756556953, "learning_rate": 9.778852287951013e-06, "loss": 0.8565, "step": 1909 }, { "epoch": 0.12, "grad_norm": 1.9375651134463432, "learning_rate": 9.778547320127291e-06, "loss": 0.7257, "step": 1910 }, { "epoch": 0.12, "grad_norm": 1.7174723715217983, "learning_rate": 9.77824214693165e-06, "loss": 0.9154, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.7530499360050822, "learning_rate": 9.777936768377197e-06, "loss": 0.9027, "step": 1912 }, { "epoch": 0.12, "grad_norm": 1.6320082306034118, "learning_rate": 9.777631184477065e-06, "loss": 0.8538, "step": 1913 }, { "epoch": 0.12, "grad_norm": 1.8333404838979341, "learning_rate": 9.777325395244381e-06, "loss": 0.8218, "step": 1914 }, { "epoch": 0.12, "grad_norm": 1.6650664308653056, "learning_rate": 9.777019400692292e-06, "loss": 0.8591, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.6388090098274846, "learning_rate": 9.776713200833948e-06, "loss": 0.9385, "step": 1916 }, { "epoch": 0.12, "grad_norm": 1.649254012452513, "learning_rate": 9.776406795682506e-06, "loss": 0.888, "step": 1917 }, { "epoch": 0.12, "grad_norm": 1.830118933320493, "learning_rate": 9.776100185251138e-06, "loss": 0.906, "step": 1918 }, { "epoch": 0.12, "grad_norm": 1.7576180146951768, "learning_rate": 9.775793369553019e-06, "loss": 0.9527, "step": 1919 }, { "epoch": 0.12, "grad_norm": 1.813192098901177, "learning_rate": 9.775486348601336e-06, "loss": 0.7869, "step": 1920 }, { "epoch": 0.12, "grad_norm": 1.37921701728362, "learning_rate": 9.775179122409283e-06, "loss": 0.6427, "step": 1921 }, { "epoch": 0.12, "grad_norm": 1.669664242027252, "learning_rate": 9.774871690990069e-06, "loss": 0.7433, "step": 1922 }, { "epoch": 0.12, "grad_norm": 2.2314224522692823, "learning_rate": 9.7745640543569e-06, "loss": 0.9423, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.6858937878049205, "learning_rate": 9.774256212523003e-06, "loss": 0.7532, "step": 1924 }, { "epoch": 0.12, "grad_norm": 1.5955869165357959, "learning_rate": 9.773948165501606e-06, "loss": 0.7568, "step": 1925 }, { "epoch": 0.12, "grad_norm": 4.151779101395218, "learning_rate": 9.773639913305946e-06, "loss": 0.8251, "step": 1926 }, { "epoch": 0.12, "grad_norm": 2.1206945292845436, "learning_rate": 9.773331455949275e-06, "loss": 0.8417, "step": 1927 }, { "epoch": 0.12, "grad_norm": 1.5059831062169033, "learning_rate": 9.773022793444848e-06, "loss": 0.7303, "step": 1928 }, { "epoch": 0.12, "grad_norm": 1.871743009856899, "learning_rate": 9.772713925805932e-06, "loss": 0.8226, "step": 1929 }, { "epoch": 0.12, "grad_norm": 1.817997849384838, "learning_rate": 9.772404853045798e-06, "loss": 0.8103, "step": 1930 }, { "epoch": 0.12, "grad_norm": 1.7141981867220517, "learning_rate": 9.772095575177733e-06, "loss": 0.9041, "step": 1931 }, { "epoch": 0.12, "grad_norm": 1.910025007834506, "learning_rate": 9.771786092215027e-06, "loss": 0.8141, "step": 1932 }, { "epoch": 0.12, "grad_norm": 1.7296608148301382, "learning_rate": 9.771476404170984e-06, "loss": 0.8187, "step": 1933 }, { "epoch": 0.12, "grad_norm": 1.9668999097162325, "learning_rate": 9.771166511058908e-06, "loss": 0.7437, "step": 1934 }, { "epoch": 0.12, "grad_norm": 1.2675025838638034, "learning_rate": 9.770856412892122e-06, "loss": 0.7544, "step": 1935 }, { "epoch": 0.12, "grad_norm": 1.779105143096287, "learning_rate": 9.770546109683953e-06, "loss": 0.9716, "step": 1936 }, { "epoch": 0.12, "grad_norm": 1.7123380562717432, "learning_rate": 9.770235601447737e-06, "loss": 0.8452, "step": 1937 }, { "epoch": 0.12, "grad_norm": 1.5203788191332952, "learning_rate": 9.769924888196817e-06, "loss": 0.6417, "step": 1938 }, { "epoch": 0.12, "grad_norm": 1.571834514645963, "learning_rate": 9.769613969944549e-06, "loss": 0.8908, "step": 1939 }, { "epoch": 0.12, "grad_norm": 1.8945869178014665, "learning_rate": 9.769302846704295e-06, "loss": 0.8224, "step": 1940 }, { "epoch": 0.12, "grad_norm": 2.5972785064345327, "learning_rate": 9.768991518489427e-06, "loss": 0.9001, "step": 1941 }, { "epoch": 0.12, "grad_norm": 1.6844058839009322, "learning_rate": 9.768679985313323e-06, "loss": 0.7887, "step": 1942 }, { "epoch": 0.12, "grad_norm": 1.1984234254936903, "learning_rate": 9.768368247189375e-06, "loss": 0.6713, "step": 1943 }, { "epoch": 0.12, "grad_norm": 2.125797801758725, "learning_rate": 9.76805630413098e-06, "loss": 0.9716, "step": 1944 }, { "epoch": 0.12, "grad_norm": 1.6669919582416821, "learning_rate": 9.767744156151542e-06, "loss": 0.8491, "step": 1945 }, { "epoch": 0.12, "grad_norm": 1.8115739613505557, "learning_rate": 9.76743180326448e-06, "loss": 0.7985, "step": 1946 }, { "epoch": 0.12, "grad_norm": 1.669113957524938, "learning_rate": 9.767119245483217e-06, "loss": 0.7994, "step": 1947 }, { "epoch": 0.12, "grad_norm": 1.6826220206916092, "learning_rate": 9.766806482821185e-06, "loss": 0.8383, "step": 1948 }, { "epoch": 0.12, "grad_norm": 1.5068767954925044, "learning_rate": 9.766493515291827e-06, "loss": 0.8662, "step": 1949 }, { "epoch": 0.12, "grad_norm": 1.2080728348314564, "learning_rate": 9.766180342908593e-06, "loss": 0.6753, "step": 1950 }, { "epoch": 0.12, "grad_norm": 1.7917347508948525, "learning_rate": 9.765866965684944e-06, "loss": 0.7864, "step": 1951 }, { "epoch": 0.12, "grad_norm": 1.70558898566606, "learning_rate": 9.765553383634347e-06, "loss": 0.8283, "step": 1952 }, { "epoch": 0.13, "grad_norm": 2.181292983870311, "learning_rate": 9.76523959677028e-06, "loss": 0.9976, "step": 1953 }, { "epoch": 0.13, "grad_norm": 2.0309166546542583, "learning_rate": 9.764925605106228e-06, "loss": 0.845, "step": 1954 }, { "epoch": 0.13, "grad_norm": 1.159646246748942, "learning_rate": 9.764611408655687e-06, "loss": 0.608, "step": 1955 }, { "epoch": 0.13, "grad_norm": 1.6924100496925016, "learning_rate": 9.764297007432158e-06, "loss": 0.7403, "step": 1956 }, { "epoch": 0.13, "grad_norm": 1.724034384274952, "learning_rate": 9.763982401449154e-06, "loss": 0.8457, "step": 1957 }, { "epoch": 0.13, "grad_norm": 1.07122085177683, "learning_rate": 9.763667590720197e-06, "loss": 0.634, "step": 1958 }, { "epoch": 0.13, "grad_norm": 1.2285382428896066, "learning_rate": 9.763352575258817e-06, "loss": 0.633, "step": 1959 }, { "epoch": 0.13, "grad_norm": 1.4716209403493883, "learning_rate": 9.763037355078553e-06, "loss": 0.756, "step": 1960 }, { "epoch": 0.13, "grad_norm": 1.2404843208479301, "learning_rate": 9.762721930192953e-06, "loss": 0.6598, "step": 1961 }, { "epoch": 0.13, "grad_norm": 1.736435009395049, "learning_rate": 9.762406300615572e-06, "loss": 0.7982, "step": 1962 }, { "epoch": 0.13, "grad_norm": 2.647113944283198, "learning_rate": 9.762090466359975e-06, "loss": 0.7523, "step": 1963 }, { "epoch": 0.13, "grad_norm": 1.9768817038864033, "learning_rate": 9.761774427439736e-06, "loss": 0.7893, "step": 1964 }, { "epoch": 0.13, "grad_norm": 1.8113381589769217, "learning_rate": 9.761458183868437e-06, "loss": 0.791, "step": 1965 }, { "epoch": 0.13, "grad_norm": 1.5348139906203704, "learning_rate": 9.761141735659671e-06, "loss": 0.7698, "step": 1966 }, { "epoch": 0.13, "grad_norm": 1.3533695795993235, "learning_rate": 9.760825082827039e-06, "loss": 0.7053, "step": 1967 }, { "epoch": 0.13, "grad_norm": 1.49098532891538, "learning_rate": 9.760508225384149e-06, "loss": 0.8204, "step": 1968 }, { "epoch": 0.13, "grad_norm": 1.9072066189090122, "learning_rate": 9.760191163344617e-06, "loss": 0.9217, "step": 1969 }, { "epoch": 0.13, "grad_norm": 1.226359500237629, "learning_rate": 9.759873896722072e-06, "loss": 0.6832, "step": 1970 }, { "epoch": 0.13, "grad_norm": 1.2474802114625987, "learning_rate": 9.75955642553015e-06, "loss": 0.5203, "step": 1971 }, { "epoch": 0.13, "grad_norm": 1.7714199377622564, "learning_rate": 9.759238749782491e-06, "loss": 0.9637, "step": 1972 }, { "epoch": 0.13, "grad_norm": 1.6386706740500296, "learning_rate": 9.758920869492755e-06, "loss": 0.8799, "step": 1973 }, { "epoch": 0.13, "grad_norm": 2.385331438198258, "learning_rate": 9.758602784674597e-06, "loss": 0.8792, "step": 1974 }, { "epoch": 0.13, "grad_norm": 0.9970317428808199, "learning_rate": 9.758284495341692e-06, "loss": 0.6704, "step": 1975 }, { "epoch": 0.13, "grad_norm": 2.1580536694790697, "learning_rate": 9.757966001507718e-06, "loss": 0.894, "step": 1976 }, { "epoch": 0.13, "grad_norm": 1.774635992796311, "learning_rate": 9.757647303186361e-06, "loss": 0.9426, "step": 1977 }, { "epoch": 0.13, "grad_norm": 1.6787891022195087, "learning_rate": 9.757328400391321e-06, "loss": 0.8375, "step": 1978 }, { "epoch": 0.13, "grad_norm": 1.769668831029147, "learning_rate": 9.757009293136303e-06, "loss": 0.7434, "step": 1979 }, { "epoch": 0.13, "grad_norm": 1.3246960011849074, "learning_rate": 9.756689981435024e-06, "loss": 0.6773, "step": 1980 }, { "epoch": 0.13, "grad_norm": 1.6400147635719424, "learning_rate": 9.756370465301202e-06, "loss": 0.7971, "step": 1981 }, { "epoch": 0.13, "grad_norm": 4.320685636684676, "learning_rate": 9.756050744748574e-06, "loss": 0.8158, "step": 1982 }, { "epoch": 0.13, "grad_norm": 1.8930679386376714, "learning_rate": 9.755730819790876e-06, "loss": 0.8046, "step": 1983 }, { "epoch": 0.13, "grad_norm": 1.8330485880073273, "learning_rate": 9.755410690441862e-06, "loss": 0.8937, "step": 1984 }, { "epoch": 0.13, "grad_norm": 1.227527700416581, "learning_rate": 9.755090356715288e-06, "loss": 0.6603, "step": 1985 }, { "epoch": 0.13, "grad_norm": 1.6250336072125917, "learning_rate": 9.754769818624925e-06, "loss": 0.7988, "step": 1986 }, { "epoch": 0.13, "grad_norm": 1.3277224721265908, "learning_rate": 9.754449076184544e-06, "loss": 0.6725, "step": 1987 }, { "epoch": 0.13, "grad_norm": 1.6264172257376592, "learning_rate": 9.754128129407932e-06, "loss": 0.8137, "step": 1988 }, { "epoch": 0.13, "grad_norm": 1.7438714486703206, "learning_rate": 9.753806978308884e-06, "loss": 0.8372, "step": 1989 }, { "epoch": 0.13, "grad_norm": 1.7202583762012205, "learning_rate": 9.7534856229012e-06, "loss": 0.6933, "step": 1990 }, { "epoch": 0.13, "grad_norm": 1.8631485979081805, "learning_rate": 9.753164063198693e-06, "loss": 0.8741, "step": 1991 }, { "epoch": 0.13, "grad_norm": 1.775665026916873, "learning_rate": 9.752842299215184e-06, "loss": 0.7307, "step": 1992 }, { "epoch": 0.13, "grad_norm": 1.6639528106684143, "learning_rate": 9.752520330964497e-06, "loss": 0.7765, "step": 1993 }, { "epoch": 0.13, "grad_norm": 1.7236945787497018, "learning_rate": 9.752198158460475e-06, "loss": 0.8691, "step": 1994 }, { "epoch": 0.13, "grad_norm": 1.8051792751534101, "learning_rate": 9.75187578171696e-06, "loss": 0.909, "step": 1995 }, { "epoch": 0.13, "grad_norm": 1.6789052825592234, "learning_rate": 9.751553200747811e-06, "loss": 0.8627, "step": 1996 }, { "epoch": 0.13, "grad_norm": 1.8898495103818773, "learning_rate": 9.751230415566888e-06, "loss": 0.8845, "step": 1997 }, { "epoch": 0.13, "grad_norm": 1.803945650841158, "learning_rate": 9.750907426188067e-06, "loss": 0.9284, "step": 1998 }, { "epoch": 0.13, "grad_norm": 1.5965843510066802, "learning_rate": 9.750584232625226e-06, "loss": 0.7718, "step": 1999 }, { "epoch": 0.13, "grad_norm": 1.911524986895219, "learning_rate": 9.750260834892259e-06, "loss": 0.9236, "step": 2000 }, { "epoch": 0.13, "grad_norm": 1.1643113085329473, "learning_rate": 9.749937233003062e-06, "loss": 0.6307, "step": 2001 }, { "epoch": 0.13, "grad_norm": 2.049457950075909, "learning_rate": 9.749613426971544e-06, "loss": 0.8181, "step": 2002 }, { "epoch": 0.13, "grad_norm": 1.591931307857672, "learning_rate": 9.749289416811622e-06, "loss": 0.7204, "step": 2003 }, { "epoch": 0.13, "grad_norm": 1.9301305205924137, "learning_rate": 9.74896520253722e-06, "loss": 0.8268, "step": 2004 }, { "epoch": 0.13, "grad_norm": 3.6492945658124674, "learning_rate": 9.74864078416227e-06, "loss": 0.5446, "step": 2005 }, { "epoch": 0.13, "grad_norm": 1.6902721470216397, "learning_rate": 9.74831616170072e-06, "loss": 0.8156, "step": 2006 }, { "epoch": 0.13, "grad_norm": 1.5538544502318405, "learning_rate": 9.747991335166518e-06, "loss": 0.9121, "step": 2007 }, { "epoch": 0.13, "grad_norm": 2.2593606950924663, "learning_rate": 9.747666304573626e-06, "loss": 0.8749, "step": 2008 }, { "epoch": 0.13, "grad_norm": 1.2657512814680925, "learning_rate": 9.747341069936013e-06, "loss": 0.6413, "step": 2009 }, { "epoch": 0.13, "grad_norm": 1.7985040940043537, "learning_rate": 9.747015631267656e-06, "loss": 0.796, "step": 2010 }, { "epoch": 0.13, "grad_norm": 1.6559976409805146, "learning_rate": 9.74668998858254e-06, "loss": 0.7301, "step": 2011 }, { "epoch": 0.13, "grad_norm": 2.0248466781981644, "learning_rate": 9.746364141894665e-06, "loss": 0.7306, "step": 2012 }, { "epoch": 0.13, "grad_norm": 1.7313608047523517, "learning_rate": 9.746038091218032e-06, "loss": 0.8881, "step": 2013 }, { "epoch": 0.13, "grad_norm": 1.757632718171916, "learning_rate": 9.745711836566653e-06, "loss": 0.8781, "step": 2014 }, { "epoch": 0.13, "grad_norm": 1.5749267200726178, "learning_rate": 9.745385377954555e-06, "loss": 0.8431, "step": 2015 }, { "epoch": 0.13, "grad_norm": 2.7437934095355745, "learning_rate": 9.745058715395761e-06, "loss": 0.8232, "step": 2016 }, { "epoch": 0.13, "grad_norm": 1.9870489713060149, "learning_rate": 9.744731848904316e-06, "loss": 0.8768, "step": 2017 }, { "epoch": 0.13, "grad_norm": 2.019725378317635, "learning_rate": 9.744404778494267e-06, "loss": 0.8829, "step": 2018 }, { "epoch": 0.13, "grad_norm": 2.797723876791858, "learning_rate": 9.74407750417967e-06, "loss": 0.7549, "step": 2019 }, { "epoch": 0.13, "grad_norm": 1.8338146435305058, "learning_rate": 9.743750025974587e-06, "loss": 0.8243, "step": 2020 }, { "epoch": 0.13, "grad_norm": 1.7640699557883328, "learning_rate": 9.743422343893099e-06, "loss": 0.7659, "step": 2021 }, { "epoch": 0.13, "grad_norm": 1.2485777026320009, "learning_rate": 9.743094457949286e-06, "loss": 0.6014, "step": 2022 }, { "epoch": 0.13, "grad_norm": 2.1000701283940297, "learning_rate": 9.742766368157239e-06, "loss": 0.9101, "step": 2023 }, { "epoch": 0.13, "grad_norm": 2.0525369559953655, "learning_rate": 9.74243807453106e-06, "loss": 0.7617, "step": 2024 }, { "epoch": 0.13, "grad_norm": 1.756486282657339, "learning_rate": 9.742109577084857e-06, "loss": 0.9498, "step": 2025 }, { "epoch": 0.13, "grad_norm": 1.9532792711365918, "learning_rate": 9.74178087583275e-06, "loss": 0.8186, "step": 2026 }, { "epoch": 0.13, "grad_norm": 1.6785678899505747, "learning_rate": 9.741451970788863e-06, "loss": 0.7749, "step": 2027 }, { "epoch": 0.13, "grad_norm": 1.8143541574682238, "learning_rate": 9.741122861967335e-06, "loss": 0.834, "step": 2028 }, { "epoch": 0.13, "grad_norm": 1.5522846771591718, "learning_rate": 9.740793549382308e-06, "loss": 0.6782, "step": 2029 }, { "epoch": 0.13, "grad_norm": 1.741252864824638, "learning_rate": 9.740464033047937e-06, "loss": 0.8362, "step": 2030 }, { "epoch": 0.13, "grad_norm": 1.8157228083701673, "learning_rate": 9.740134312978382e-06, "loss": 0.9057, "step": 2031 }, { "epoch": 0.13, "grad_norm": 2.0844751919712285, "learning_rate": 9.739804389187814e-06, "loss": 0.7972, "step": 2032 }, { "epoch": 0.13, "grad_norm": 1.4943423860601108, "learning_rate": 9.739474261690415e-06, "loss": 0.7216, "step": 2033 }, { "epoch": 0.13, "grad_norm": 1.6965542265293778, "learning_rate": 9.73914393050037e-06, "loss": 0.6921, "step": 2034 }, { "epoch": 0.13, "grad_norm": 2.08639850137397, "learning_rate": 9.73881339563188e-06, "loss": 0.832, "step": 2035 }, { "epoch": 0.13, "grad_norm": 1.652401971769774, "learning_rate": 9.738482657099144e-06, "loss": 0.8858, "step": 2036 }, { "epoch": 0.13, "grad_norm": 1.359316086789431, "learning_rate": 9.738151714916382e-06, "loss": 0.6977, "step": 2037 }, { "epoch": 0.13, "grad_norm": 1.9492042874733786, "learning_rate": 9.737820569097815e-06, "loss": 0.9165, "step": 2038 }, { "epoch": 0.13, "grad_norm": 1.6758009676965029, "learning_rate": 9.737489219657676e-06, "loss": 0.9604, "step": 2039 }, { "epoch": 0.13, "grad_norm": 1.8067370200173505, "learning_rate": 9.737157666610204e-06, "loss": 0.7264, "step": 2040 }, { "epoch": 0.13, "grad_norm": 1.6936017369260936, "learning_rate": 9.73682590996965e-06, "loss": 0.8156, "step": 2041 }, { "epoch": 0.13, "grad_norm": 1.7325414338808292, "learning_rate": 9.736493949750273e-06, "loss": 0.8255, "step": 2042 }, { "epoch": 0.13, "grad_norm": 1.8427122932829827, "learning_rate": 9.736161785966339e-06, "loss": 0.8296, "step": 2043 }, { "epoch": 0.13, "grad_norm": 1.6312169263318794, "learning_rate": 9.735829418632121e-06, "loss": 0.791, "step": 2044 }, { "epoch": 0.13, "grad_norm": 1.6478416028328786, "learning_rate": 9.735496847761908e-06, "loss": 0.8067, "step": 2045 }, { "epoch": 0.13, "grad_norm": 1.6439612442175209, "learning_rate": 9.735164073369992e-06, "loss": 0.8059, "step": 2046 }, { "epoch": 0.13, "grad_norm": 1.837737590524252, "learning_rate": 9.734831095470671e-06, "loss": 0.8329, "step": 2047 }, { "epoch": 0.13, "grad_norm": 1.6836726060814202, "learning_rate": 9.734497914078261e-06, "loss": 0.8011, "step": 2048 }, { "epoch": 0.13, "grad_norm": 4.033681030212577, "learning_rate": 9.73416452920708e-06, "loss": 0.8109, "step": 2049 }, { "epoch": 0.13, "grad_norm": 1.7754139664673383, "learning_rate": 9.733830940871454e-06, "loss": 0.8761, "step": 2050 }, { "epoch": 0.13, "grad_norm": 2.0951868783484446, "learning_rate": 9.73349714908572e-06, "loss": 0.7626, "step": 2051 }, { "epoch": 0.13, "grad_norm": 1.5329898009111922, "learning_rate": 9.733163153864227e-06, "loss": 0.8134, "step": 2052 }, { "epoch": 0.13, "grad_norm": 1.9712037200401862, "learning_rate": 9.732828955221328e-06, "loss": 0.7753, "step": 2053 }, { "epoch": 0.13, "grad_norm": 1.7994071017242377, "learning_rate": 9.732494553171384e-06, "loss": 0.8675, "step": 2054 }, { "epoch": 0.13, "grad_norm": 1.8664489952986743, "learning_rate": 9.732159947728767e-06, "loss": 0.8481, "step": 2055 }, { "epoch": 0.13, "grad_norm": 1.6422750412490124, "learning_rate": 9.731825138907863e-06, "loss": 0.8162, "step": 2056 }, { "epoch": 0.13, "grad_norm": 1.5818758331714597, "learning_rate": 9.731490126723053e-06, "loss": 0.8375, "step": 2057 }, { "epoch": 0.13, "grad_norm": 1.6976338435497875, "learning_rate": 9.731154911188742e-06, "loss": 0.7458, "step": 2058 }, { "epoch": 0.13, "grad_norm": 1.8900253371769917, "learning_rate": 9.730819492319336e-06, "loss": 0.7796, "step": 2059 }, { "epoch": 0.13, "grad_norm": 1.7321743052736878, "learning_rate": 9.730483870129247e-06, "loss": 0.9236, "step": 2060 }, { "epoch": 0.13, "grad_norm": 1.8057583466300269, "learning_rate": 9.730148044632902e-06, "loss": 0.8568, "step": 2061 }, { "epoch": 0.13, "grad_norm": 1.69647287201922, "learning_rate": 9.729812015844733e-06, "loss": 0.7392, "step": 2062 }, { "epoch": 0.13, "grad_norm": 1.7039285703632239, "learning_rate": 9.729475783779182e-06, "loss": 0.8853, "step": 2063 }, { "epoch": 0.13, "grad_norm": 2.2342763419164604, "learning_rate": 9.729139348450701e-06, "loss": 0.8207, "step": 2064 }, { "epoch": 0.13, "grad_norm": 1.1744436038575865, "learning_rate": 9.728802709873747e-06, "loss": 0.6811, "step": 2065 }, { "epoch": 0.13, "grad_norm": 2.0184284453975874, "learning_rate": 9.72846586806279e-06, "loss": 0.9473, "step": 2066 }, { "epoch": 0.13, "grad_norm": 1.715136685186203, "learning_rate": 9.728128823032305e-06, "loss": 0.6838, "step": 2067 }, { "epoch": 0.13, "grad_norm": 2.021077525849792, "learning_rate": 9.727791574796779e-06, "loss": 0.7872, "step": 2068 }, { "epoch": 0.13, "grad_norm": 1.584471690037209, "learning_rate": 9.727454123370705e-06, "loss": 0.7686, "step": 2069 }, { "epoch": 0.13, "grad_norm": 1.6960145759659702, "learning_rate": 9.727116468768586e-06, "loss": 0.8132, "step": 2070 }, { "epoch": 0.13, "grad_norm": 1.6534763110877901, "learning_rate": 9.726778611004936e-06, "loss": 0.7933, "step": 2071 }, { "epoch": 0.13, "grad_norm": 2.203184836151618, "learning_rate": 9.726440550094272e-06, "loss": 0.8506, "step": 2072 }, { "epoch": 0.13, "grad_norm": 1.4552821481213287, "learning_rate": 9.726102286051126e-06, "loss": 0.5791, "step": 2073 }, { "epoch": 0.13, "grad_norm": 1.640912926181151, "learning_rate": 9.725763818890035e-06, "loss": 0.881, "step": 2074 }, { "epoch": 0.13, "grad_norm": 1.2118733989806365, "learning_rate": 9.725425148625544e-06, "loss": 0.6305, "step": 2075 }, { "epoch": 0.13, "grad_norm": 1.649954228552629, "learning_rate": 9.725086275272209e-06, "loss": 0.8115, "step": 2076 }, { "epoch": 0.13, "grad_norm": 2.3272807811636618, "learning_rate": 9.724747198844597e-06, "loss": 0.8428, "step": 2077 }, { "epoch": 0.13, "grad_norm": 1.2020422443883307, "learning_rate": 9.724407919357276e-06, "loss": 0.694, "step": 2078 }, { "epoch": 0.13, "grad_norm": 1.526249754317424, "learning_rate": 9.724068436824831e-06, "loss": 0.6357, "step": 2079 }, { "epoch": 0.13, "grad_norm": 1.7748356851668425, "learning_rate": 9.723728751261852e-06, "loss": 0.7985, "step": 2080 }, { "epoch": 0.13, "grad_norm": 1.7247031116056157, "learning_rate": 9.723388862682936e-06, "loss": 0.849, "step": 2081 }, { "epoch": 0.13, "grad_norm": 1.6404279935602593, "learning_rate": 9.723048771102692e-06, "loss": 0.8224, "step": 2082 }, { "epoch": 0.13, "grad_norm": 1.2880142718504286, "learning_rate": 9.722708476535736e-06, "loss": 0.6921, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.8418562366199793, "learning_rate": 9.722367978996694e-06, "loss": 0.8636, "step": 2084 }, { "epoch": 0.13, "grad_norm": 2.020154251133528, "learning_rate": 9.722027278500201e-06, "loss": 0.8133, "step": 2085 }, { "epoch": 0.13, "grad_norm": 2.524928824450462, "learning_rate": 9.721686375060897e-06, "loss": 0.7606, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.7036931495565346, "learning_rate": 9.721345268693433e-06, "loss": 0.7788, "step": 2087 }, { "epoch": 0.13, "grad_norm": 2.7355424915235425, "learning_rate": 9.72100395941247e-06, "loss": 0.7674, "step": 2088 }, { "epoch": 0.13, "grad_norm": 1.6804676431770758, "learning_rate": 9.720662447232679e-06, "loss": 0.8052, "step": 2089 }, { "epoch": 0.13, "grad_norm": 1.6551431962771994, "learning_rate": 9.720320732168733e-06, "loss": 0.8527, "step": 2090 }, { "epoch": 0.13, "grad_norm": 1.9138646224779703, "learning_rate": 9.719978814235323e-06, "loss": 0.8594, "step": 2091 }, { "epoch": 0.13, "grad_norm": 1.7252256271255375, "learning_rate": 9.71963669344714e-06, "loss": 0.8742, "step": 2092 }, { "epoch": 0.13, "grad_norm": 1.989319852157514, "learning_rate": 9.71929436981889e-06, "loss": 0.8048, "step": 2093 }, { "epoch": 0.13, "grad_norm": 1.5760443952333747, "learning_rate": 9.718951843365285e-06, "loss": 0.91, "step": 2094 }, { "epoch": 0.13, "grad_norm": 2.1987049078683474, "learning_rate": 9.718609114101045e-06, "loss": 0.8417, "step": 2095 }, { "epoch": 0.13, "grad_norm": 1.533080720877787, "learning_rate": 9.7182661820409e-06, "loss": 0.7427, "step": 2096 }, { "epoch": 0.13, "grad_norm": 1.780781523143082, "learning_rate": 9.717923047199591e-06, "loss": 0.8791, "step": 2097 }, { "epoch": 0.13, "grad_norm": 1.7607268579184847, "learning_rate": 9.71757970959186e-06, "loss": 0.7445, "step": 2098 }, { "epoch": 0.13, "grad_norm": 1.7179600640998245, "learning_rate": 9.71723616923247e-06, "loss": 0.8025, "step": 2099 }, { "epoch": 0.13, "grad_norm": 1.8204517446759965, "learning_rate": 9.716892426136179e-06, "loss": 0.8934, "step": 2100 }, { "epoch": 0.13, "grad_norm": 1.6710389682748623, "learning_rate": 9.716548480317763e-06, "loss": 0.7341, "step": 2101 }, { "epoch": 0.13, "grad_norm": 1.6253983618906618, "learning_rate": 9.716204331792005e-06, "loss": 0.9172, "step": 2102 }, { "epoch": 0.13, "grad_norm": 1.5367558075924852, "learning_rate": 9.715859980573694e-06, "loss": 0.8422, "step": 2103 }, { "epoch": 0.13, "grad_norm": 1.560204831001006, "learning_rate": 9.715515426677633e-06, "loss": 1.05, "step": 2104 }, { "epoch": 0.13, "grad_norm": 1.6594194013241392, "learning_rate": 9.715170670118625e-06, "loss": 0.8185, "step": 2105 }, { "epoch": 0.13, "grad_norm": 1.5157328101029377, "learning_rate": 9.714825710911489e-06, "loss": 0.7456, "step": 2106 }, { "epoch": 0.13, "grad_norm": 1.5929980895491895, "learning_rate": 9.714480549071053e-06, "loss": 0.8688, "step": 2107 }, { "epoch": 0.13, "grad_norm": 1.871416152201751, "learning_rate": 9.714135184612149e-06, "loss": 0.7613, "step": 2108 }, { "epoch": 0.13, "grad_norm": 1.568356115887701, "learning_rate": 9.713789617549621e-06, "loss": 0.8428, "step": 2109 }, { "epoch": 0.14, "grad_norm": 1.8666797120732272, "learning_rate": 9.71344384789832e-06, "loss": 0.833, "step": 2110 }, { "epoch": 0.14, "grad_norm": 1.780148193361289, "learning_rate": 9.713097875673105e-06, "loss": 0.8057, "step": 2111 }, { "epoch": 0.14, "grad_norm": 1.6010174219912912, "learning_rate": 9.71275170088885e-06, "loss": 0.9986, "step": 2112 }, { "epoch": 0.14, "grad_norm": 1.1950308501272748, "learning_rate": 9.712405323560427e-06, "loss": 0.7338, "step": 2113 }, { "epoch": 0.14, "grad_norm": 1.6727286300096849, "learning_rate": 9.712058743702727e-06, "loss": 0.792, "step": 2114 }, { "epoch": 0.14, "grad_norm": 1.8642751071778354, "learning_rate": 9.711711961330644e-06, "loss": 0.8558, "step": 2115 }, { "epoch": 0.14, "grad_norm": 1.0917280188473149, "learning_rate": 9.711364976459078e-06, "loss": 0.6495, "step": 2116 }, { "epoch": 0.14, "grad_norm": 1.101979360600155, "learning_rate": 9.711017789102948e-06, "loss": 0.6063, "step": 2117 }, { "epoch": 0.14, "grad_norm": 1.7081731667047642, "learning_rate": 9.710670399277174e-06, "loss": 0.893, "step": 2118 }, { "epoch": 0.14, "grad_norm": 2.6573879446939617, "learning_rate": 9.710322806996682e-06, "loss": 0.9497, "step": 2119 }, { "epoch": 0.14, "grad_norm": 1.1811015142551369, "learning_rate": 9.709975012276416e-06, "loss": 0.6139, "step": 2120 }, { "epoch": 0.14, "grad_norm": 1.7261693249287762, "learning_rate": 9.70962701513132e-06, "loss": 0.8623, "step": 2121 }, { "epoch": 0.14, "grad_norm": 1.7865233655570363, "learning_rate": 9.709278815576351e-06, "loss": 0.8419, "step": 2122 }, { "epoch": 0.14, "grad_norm": 1.9049763547860747, "learning_rate": 9.708930413626473e-06, "loss": 0.7745, "step": 2123 }, { "epoch": 0.14, "grad_norm": 3.234296540623235, "learning_rate": 9.708581809296662e-06, "loss": 0.9338, "step": 2124 }, { "epoch": 0.14, "grad_norm": 1.7835683438209964, "learning_rate": 9.708233002601897e-06, "loss": 1.1607, "step": 2125 }, { "epoch": 0.14, "grad_norm": 1.7721551761717058, "learning_rate": 9.707883993557173e-06, "loss": 0.9338, "step": 2126 }, { "epoch": 0.14, "grad_norm": 1.9127158331686276, "learning_rate": 9.707534782177487e-06, "loss": 0.6912, "step": 2127 }, { "epoch": 0.14, "grad_norm": 1.6540187985229722, "learning_rate": 9.707185368477848e-06, "loss": 0.8646, "step": 2128 }, { "epoch": 0.14, "grad_norm": 1.7998745411871977, "learning_rate": 9.706835752473273e-06, "loss": 0.7567, "step": 2129 }, { "epoch": 0.14, "grad_norm": 1.7259648980101023, "learning_rate": 9.706485934178788e-06, "loss": 0.7637, "step": 2130 }, { "epoch": 0.14, "grad_norm": 1.207113512338887, "learning_rate": 9.706135913609426e-06, "loss": 0.6551, "step": 2131 }, { "epoch": 0.14, "grad_norm": 1.7138590612807139, "learning_rate": 9.705785690780234e-06, "loss": 0.9144, "step": 2132 }, { "epoch": 0.14, "grad_norm": 1.783600724860357, "learning_rate": 9.70543526570626e-06, "loss": 0.8615, "step": 2133 }, { "epoch": 0.14, "grad_norm": 1.5437183935387027, "learning_rate": 9.705084638402565e-06, "loss": 0.78, "step": 2134 }, { "epoch": 0.14, "grad_norm": 1.7272031393533938, "learning_rate": 9.704733808884219e-06, "loss": 0.6351, "step": 2135 }, { "epoch": 0.14, "grad_norm": 1.6300560359888547, "learning_rate": 9.7043827771663e-06, "loss": 0.8115, "step": 2136 }, { "epoch": 0.14, "grad_norm": 1.3765466740107455, "learning_rate": 9.704031543263893e-06, "loss": 0.658, "step": 2137 }, { "epoch": 0.14, "grad_norm": 1.9285027510116675, "learning_rate": 9.703680107192098e-06, "loss": 0.9265, "step": 2138 }, { "epoch": 0.14, "grad_norm": 1.6379690345576998, "learning_rate": 9.703328468966016e-06, "loss": 0.7872, "step": 2139 }, { "epoch": 0.14, "grad_norm": 1.4387618134942664, "learning_rate": 9.702976628600756e-06, "loss": 0.759, "step": 2140 }, { "epoch": 0.14, "grad_norm": 1.664873741114533, "learning_rate": 9.702624586111446e-06, "loss": 0.8562, "step": 2141 }, { "epoch": 0.14, "grad_norm": 1.9242695238896401, "learning_rate": 9.70227234151321e-06, "loss": 0.8126, "step": 2142 }, { "epoch": 0.14, "grad_norm": 1.5738825131845882, "learning_rate": 9.701919894821192e-06, "loss": 0.8327, "step": 2143 }, { "epoch": 0.14, "grad_norm": 1.536384638623946, "learning_rate": 9.701567246050537e-06, "loss": 0.8311, "step": 2144 }, { "epoch": 0.14, "grad_norm": 2.3041768984952107, "learning_rate": 9.7012143952164e-06, "loss": 0.7022, "step": 2145 }, { "epoch": 0.14, "grad_norm": 1.8455082338276396, "learning_rate": 9.700861342333946e-06, "loss": 0.8355, "step": 2146 }, { "epoch": 0.14, "grad_norm": 1.5502481638726724, "learning_rate": 9.70050808741835e-06, "loss": 0.9653, "step": 2147 }, { "epoch": 0.14, "grad_norm": 1.685011746120064, "learning_rate": 9.700154630484795e-06, "loss": 0.7731, "step": 2148 }, { "epoch": 0.14, "grad_norm": 1.7231655504476686, "learning_rate": 9.69980097154847e-06, "loss": 0.8293, "step": 2149 }, { "epoch": 0.14, "grad_norm": 2.438185763087349, "learning_rate": 9.699447110624574e-06, "loss": 0.8089, "step": 2150 }, { "epoch": 0.14, "grad_norm": 1.5834078562455562, "learning_rate": 9.699093047728317e-06, "loss": 0.8494, "step": 2151 }, { "epoch": 0.14, "grad_norm": 3.6044647643982617, "learning_rate": 9.698738782874914e-06, "loss": 0.7939, "step": 2152 }, { "epoch": 0.14, "grad_norm": 1.7286560400136828, "learning_rate": 9.698384316079592e-06, "loss": 0.8622, "step": 2153 }, { "epoch": 0.14, "grad_norm": 1.4872280579238524, "learning_rate": 9.698029647357585e-06, "loss": 0.8365, "step": 2154 }, { "epoch": 0.14, "grad_norm": 1.0849195014727937, "learning_rate": 9.697674776724135e-06, "loss": 0.6549, "step": 2155 }, { "epoch": 0.14, "grad_norm": 1.808532068244751, "learning_rate": 9.697319704194495e-06, "loss": 0.7128, "step": 2156 }, { "epoch": 0.14, "grad_norm": 1.795621794715478, "learning_rate": 9.696964429783923e-06, "loss": 0.8753, "step": 2157 }, { "epoch": 0.14, "grad_norm": 1.8479424945054699, "learning_rate": 9.69660895350769e-06, "loss": 0.8025, "step": 2158 }, { "epoch": 0.14, "grad_norm": 1.6949402328485204, "learning_rate": 9.696253275381074e-06, "loss": 0.8106, "step": 2159 }, { "epoch": 0.14, "grad_norm": 1.7482904494665685, "learning_rate": 9.69589739541936e-06, "loss": 0.9257, "step": 2160 }, { "epoch": 0.14, "grad_norm": 1.7749137582571606, "learning_rate": 9.695541313637845e-06, "loss": 0.8815, "step": 2161 }, { "epoch": 0.14, "grad_norm": 1.1431050967108103, "learning_rate": 9.695185030051828e-06, "loss": 0.6645, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.674373983672311, "learning_rate": 9.694828544676626e-06, "loss": 0.7856, "step": 2163 }, { "epoch": 0.14, "grad_norm": 1.253971851792372, "learning_rate": 9.69447185752756e-06, "loss": 0.5751, "step": 2164 }, { "epoch": 0.14, "grad_norm": 1.2221712622668335, "learning_rate": 9.694114968619955e-06, "loss": 0.7303, "step": 2165 }, { "epoch": 0.14, "grad_norm": 1.3592843089254014, "learning_rate": 9.693757877969155e-06, "loss": 0.692, "step": 2166 }, { "epoch": 0.14, "grad_norm": 1.7575460995257333, "learning_rate": 9.693400585590502e-06, "loss": 0.7975, "step": 2167 }, { "epoch": 0.14, "grad_norm": 1.8626665591566445, "learning_rate": 9.693043091499355e-06, "loss": 0.8076, "step": 2168 }, { "epoch": 0.14, "grad_norm": 1.4601645559257206, "learning_rate": 9.692685395711077e-06, "loss": 0.744, "step": 2169 }, { "epoch": 0.14, "grad_norm": 1.7263420859858463, "learning_rate": 9.692327498241042e-06, "loss": 0.8808, "step": 2170 }, { "epoch": 0.14, "grad_norm": 1.7358592847570147, "learning_rate": 9.69196939910463e-06, "loss": 0.7547, "step": 2171 }, { "epoch": 0.14, "grad_norm": 1.6140107490877431, "learning_rate": 9.691611098317234e-06, "loss": 0.8129, "step": 2172 }, { "epoch": 0.14, "grad_norm": 1.1435955726771014, "learning_rate": 9.69125259589425e-06, "loss": 0.6073, "step": 2173 }, { "epoch": 0.14, "grad_norm": 1.5716048941416507, "learning_rate": 9.690893891851088e-06, "loss": 0.7201, "step": 2174 }, { "epoch": 0.14, "grad_norm": 2.186430051814985, "learning_rate": 9.690534986203164e-06, "loss": 0.9035, "step": 2175 }, { "epoch": 0.14, "grad_norm": 0.9610948787399074, "learning_rate": 9.690175878965902e-06, "loss": 0.5849, "step": 2176 }, { "epoch": 0.14, "grad_norm": 1.5384241276223962, "learning_rate": 9.689816570154735e-06, "loss": 0.804, "step": 2177 }, { "epoch": 0.14, "grad_norm": 1.9763018124804101, "learning_rate": 9.689457059785108e-06, "loss": 0.8966, "step": 2178 }, { "epoch": 0.14, "grad_norm": 1.6903662604953928, "learning_rate": 9.68909734787247e-06, "loss": 0.7834, "step": 2179 }, { "epoch": 0.14, "grad_norm": 1.721469654279223, "learning_rate": 9.688737434432281e-06, "loss": 0.8664, "step": 2180 }, { "epoch": 0.14, "grad_norm": 1.7939510091423163, "learning_rate": 9.688377319480008e-06, "loss": 0.8785, "step": 2181 }, { "epoch": 0.14, "grad_norm": 1.826469845362499, "learning_rate": 9.688017003031132e-06, "loss": 0.875, "step": 2182 }, { "epoch": 0.14, "grad_norm": 1.9990269274636883, "learning_rate": 9.687656485101134e-06, "loss": 0.7693, "step": 2183 }, { "epoch": 0.14, "grad_norm": 1.6638329439743968, "learning_rate": 9.687295765705512e-06, "loss": 0.8176, "step": 2184 }, { "epoch": 0.14, "grad_norm": 1.5551848293911423, "learning_rate": 9.686934844859766e-06, "loss": 0.7488, "step": 2185 }, { "epoch": 0.14, "grad_norm": 1.5701345942768996, "learning_rate": 9.68657372257941e-06, "loss": 0.8651, "step": 2186 }, { "epoch": 0.14, "grad_norm": 1.4473103000040426, "learning_rate": 9.686212398879963e-06, "loss": 0.691, "step": 2187 }, { "epoch": 0.14, "grad_norm": 1.130095112412737, "learning_rate": 9.685850873776954e-06, "loss": 0.5495, "step": 2188 }, { "epoch": 0.14, "grad_norm": 1.79532699228628, "learning_rate": 9.68548914728592e-06, "loss": 0.7712, "step": 2189 }, { "epoch": 0.14, "grad_norm": 3.195184181839472, "learning_rate": 9.68512721942241e-06, "loss": 0.737, "step": 2190 }, { "epoch": 0.14, "grad_norm": 1.7580346574222212, "learning_rate": 9.684765090201975e-06, "loss": 0.8106, "step": 2191 }, { "epoch": 0.14, "grad_norm": 1.6296766730271888, "learning_rate": 9.684402759640181e-06, "loss": 0.8088, "step": 2192 }, { "epoch": 0.14, "grad_norm": 1.7218305044403797, "learning_rate": 9.684040227752601e-06, "loss": 0.6951, "step": 2193 }, { "epoch": 0.14, "grad_norm": 2.15915217100395, "learning_rate": 9.683677494554813e-06, "loss": 0.869, "step": 2194 }, { "epoch": 0.14, "grad_norm": 1.4841456611280035, "learning_rate": 9.683314560062409e-06, "loss": 0.7622, "step": 2195 }, { "epoch": 0.14, "grad_norm": 1.7355704511643808, "learning_rate": 9.682951424290985e-06, "loss": 0.8213, "step": 2196 }, { "epoch": 0.14, "grad_norm": 1.523980645526861, "learning_rate": 9.68258808725615e-06, "loss": 0.6663, "step": 2197 }, { "epoch": 0.14, "grad_norm": 1.6545864756166715, "learning_rate": 9.682224548973518e-06, "loss": 0.8198, "step": 2198 }, { "epoch": 0.14, "grad_norm": 1.8345948754223136, "learning_rate": 9.681860809458713e-06, "loss": 0.8536, "step": 2199 }, { "epoch": 0.14, "grad_norm": 1.711310941687128, "learning_rate": 9.68149686872737e-06, "loss": 0.8935, "step": 2200 }, { "epoch": 0.14, "grad_norm": 1.7531399989828527, "learning_rate": 9.681132726795128e-06, "loss": 0.8686, "step": 2201 }, { "epoch": 0.14, "grad_norm": 1.6983748021972611, "learning_rate": 9.680768383677637e-06, "loss": 0.7986, "step": 2202 }, { "epoch": 0.14, "grad_norm": 2.168340378304522, "learning_rate": 9.680403839390558e-06, "loss": 0.699, "step": 2203 }, { "epoch": 0.14, "grad_norm": 1.783344525149568, "learning_rate": 9.680039093949556e-06, "loss": 0.9248, "step": 2204 }, { "epoch": 0.14, "grad_norm": 1.6242424703960836, "learning_rate": 9.679674147370308e-06, "loss": 0.6563, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.6577710448857998, "learning_rate": 9.679308999668499e-06, "loss": 1.0526, "step": 2206 }, { "epoch": 0.14, "grad_norm": 1.6205568153038268, "learning_rate": 9.67894365085982e-06, "loss": 0.7962, "step": 2207 }, { "epoch": 0.14, "grad_norm": 1.7205492918742429, "learning_rate": 9.678578100959977e-06, "loss": 0.8713, "step": 2208 }, { "epoch": 0.14, "grad_norm": 1.9086268823414712, "learning_rate": 9.678212349984677e-06, "loss": 0.7884, "step": 2209 }, { "epoch": 0.14, "grad_norm": 1.817281633101396, "learning_rate": 9.677846397949641e-06, "loss": 0.8862, "step": 2210 }, { "epoch": 0.14, "grad_norm": 1.5289952760787806, "learning_rate": 9.677480244870597e-06, "loss": 0.7533, "step": 2211 }, { "epoch": 0.14, "grad_norm": 1.8500822396671528, "learning_rate": 9.67711389076328e-06, "loss": 0.873, "step": 2212 }, { "epoch": 0.14, "grad_norm": 1.5553436530994587, "learning_rate": 9.676747335643435e-06, "loss": 0.6988, "step": 2213 }, { "epoch": 0.14, "grad_norm": 1.3214961953005089, "learning_rate": 9.676380579526817e-06, "loss": 0.6642, "step": 2214 }, { "epoch": 0.14, "grad_norm": 1.125901388199734, "learning_rate": 9.676013622429187e-06, "loss": 0.6336, "step": 2215 }, { "epoch": 0.14, "grad_norm": 1.5782774882794643, "learning_rate": 9.67564646436632e-06, "loss": 0.7594, "step": 2216 }, { "epoch": 0.14, "grad_norm": 1.6061555299395285, "learning_rate": 9.675279105353991e-06, "loss": 0.8569, "step": 2217 }, { "epoch": 0.14, "grad_norm": 1.8091594494173584, "learning_rate": 9.674911545407992e-06, "loss": 0.8793, "step": 2218 }, { "epoch": 0.14, "grad_norm": 1.7303801621525015, "learning_rate": 9.674543784544118e-06, "loss": 0.8831, "step": 2219 }, { "epoch": 0.14, "grad_norm": 1.8597274936819237, "learning_rate": 9.674175822778172e-06, "loss": 0.839, "step": 2220 }, { "epoch": 0.14, "grad_norm": 1.5598505512159295, "learning_rate": 9.673807660125974e-06, "loss": 0.7216, "step": 2221 }, { "epoch": 0.14, "grad_norm": 1.6724746224956115, "learning_rate": 9.673439296603341e-06, "loss": 0.8317, "step": 2222 }, { "epoch": 0.14, "grad_norm": 1.7662152362222237, "learning_rate": 9.673070732226109e-06, "loss": 0.8035, "step": 2223 }, { "epoch": 0.14, "grad_norm": 1.6779016308012162, "learning_rate": 9.672701967010117e-06, "loss": 0.7469, "step": 2224 }, { "epoch": 0.14, "grad_norm": 1.5341546855096835, "learning_rate": 9.67233300097121e-06, "loss": 0.712, "step": 2225 }, { "epoch": 0.14, "grad_norm": 1.626103823736912, "learning_rate": 9.671963834125251e-06, "loss": 0.9131, "step": 2226 }, { "epoch": 0.14, "grad_norm": 1.875615237593109, "learning_rate": 9.671594466488104e-06, "loss": 0.7107, "step": 2227 }, { "epoch": 0.14, "grad_norm": 1.6200572254046244, "learning_rate": 9.671224898075643e-06, "loss": 0.8503, "step": 2228 }, { "epoch": 0.14, "grad_norm": 1.6606413648228868, "learning_rate": 9.670855128903752e-06, "loss": 0.8448, "step": 2229 }, { "epoch": 0.14, "grad_norm": 1.628549545735719, "learning_rate": 9.67048515898832e-06, "loss": 0.8157, "step": 2230 }, { "epoch": 0.14, "grad_norm": 1.3775477999028956, "learning_rate": 9.670114988345252e-06, "loss": 0.6738, "step": 2231 }, { "epoch": 0.14, "grad_norm": 1.7202507049400706, "learning_rate": 9.669744616990454e-06, "loss": 0.8487, "step": 2232 }, { "epoch": 0.14, "grad_norm": 2.1882778920148813, "learning_rate": 9.669374044939846e-06, "loss": 0.7892, "step": 2233 }, { "epoch": 0.14, "grad_norm": 1.554904169609434, "learning_rate": 9.669003272209352e-06, "loss": 0.7469, "step": 2234 }, { "epoch": 0.14, "grad_norm": 1.6027629674441546, "learning_rate": 9.668632298814908e-06, "loss": 0.8561, "step": 2235 }, { "epoch": 0.14, "grad_norm": 1.7257297312914108, "learning_rate": 9.66826112477246e-06, "loss": 0.8972, "step": 2236 }, { "epoch": 0.14, "grad_norm": 1.848325378182813, "learning_rate": 9.667889750097956e-06, "loss": 0.817, "step": 2237 }, { "epoch": 0.14, "grad_norm": 2.11839065353396, "learning_rate": 9.66751817480736e-06, "loss": 0.8226, "step": 2238 }, { "epoch": 0.14, "grad_norm": 1.7649862236894087, "learning_rate": 9.66714639891664e-06, "loss": 0.8244, "step": 2239 }, { "epoch": 0.14, "grad_norm": 1.7036000135472702, "learning_rate": 9.666774422441776e-06, "loss": 0.8601, "step": 2240 }, { "epoch": 0.14, "grad_norm": 1.804296375384988, "learning_rate": 9.666402245398753e-06, "loss": 0.8561, "step": 2241 }, { "epoch": 0.14, "grad_norm": 1.822103923204561, "learning_rate": 9.666029867803569e-06, "loss": 0.8838, "step": 2242 }, { "epoch": 0.14, "grad_norm": 1.5208606024423086, "learning_rate": 9.665657289672222e-06, "loss": 0.8317, "step": 2243 }, { "epoch": 0.14, "grad_norm": 1.7414871943794783, "learning_rate": 9.665284511020732e-06, "loss": 0.9146, "step": 2244 }, { "epoch": 0.14, "grad_norm": 1.4939655390242768, "learning_rate": 9.664911531865115e-06, "loss": 0.8418, "step": 2245 }, { "epoch": 0.14, "grad_norm": 1.1339740336866697, "learning_rate": 9.664538352221401e-06, "loss": 0.6357, "step": 2246 }, { "epoch": 0.14, "grad_norm": 1.5954379652789183, "learning_rate": 9.664164972105634e-06, "loss": 0.7345, "step": 2247 }, { "epoch": 0.14, "grad_norm": 1.550528641682451, "learning_rate": 9.663791391533856e-06, "loss": 0.7737, "step": 2248 }, { "epoch": 0.14, "grad_norm": 2.131740682527222, "learning_rate": 9.663417610522124e-06, "loss": 0.6581, "step": 2249 }, { "epoch": 0.14, "grad_norm": 2.1996789576993505, "learning_rate": 9.663043629086501e-06, "loss": 0.722, "step": 2250 }, { "epoch": 0.14, "grad_norm": 1.0794532031806636, "learning_rate": 9.66266944724306e-06, "loss": 0.7035, "step": 2251 }, { "epoch": 0.14, "grad_norm": 1.7278278515194292, "learning_rate": 9.662295065007887e-06, "loss": 0.9115, "step": 2252 }, { "epoch": 0.14, "grad_norm": 2.1435585830001265, "learning_rate": 9.661920482397069e-06, "loss": 0.7289, "step": 2253 }, { "epoch": 0.14, "grad_norm": 2.2853780456198307, "learning_rate": 9.661545699426703e-06, "loss": 0.7839, "step": 2254 }, { "epoch": 0.14, "grad_norm": 1.7113393587482169, "learning_rate": 9.661170716112897e-06, "loss": 0.8696, "step": 2255 }, { "epoch": 0.14, "grad_norm": 1.6222231451692115, "learning_rate": 9.66079553247177e-06, "loss": 0.8334, "step": 2256 }, { "epoch": 0.14, "grad_norm": 1.472614585656672, "learning_rate": 9.660420148519444e-06, "loss": 0.8075, "step": 2257 }, { "epoch": 0.14, "grad_norm": 1.984120085972687, "learning_rate": 9.660044564272054e-06, "loss": 0.6505, "step": 2258 }, { "epoch": 0.14, "grad_norm": 1.829415427554273, "learning_rate": 9.65966877974574e-06, "loss": 0.7402, "step": 2259 }, { "epoch": 0.14, "grad_norm": 1.8515238512624654, "learning_rate": 9.659292794956652e-06, "loss": 0.7947, "step": 2260 }, { "epoch": 0.14, "grad_norm": 1.5296834982198138, "learning_rate": 9.658916609920951e-06, "loss": 0.8033, "step": 2261 }, { "epoch": 0.14, "grad_norm": 1.641307730625061, "learning_rate": 9.658540224654805e-06, "loss": 1.0632, "step": 2262 }, { "epoch": 0.14, "grad_norm": 1.3049100911724882, "learning_rate": 9.65816363917439e-06, "loss": 0.6844, "step": 2263 }, { "epoch": 0.14, "grad_norm": 1.655361246291061, "learning_rate": 9.657786853495888e-06, "loss": 0.8248, "step": 2264 }, { "epoch": 0.14, "grad_norm": 1.5978019369774616, "learning_rate": 9.657409867635494e-06, "loss": 0.7592, "step": 2265 }, { "epoch": 0.15, "grad_norm": 1.7904727084998073, "learning_rate": 9.65703268160941e-06, "loss": 0.8035, "step": 2266 }, { "epoch": 0.15, "grad_norm": 1.721266633816432, "learning_rate": 9.656655295433849e-06, "loss": 0.8542, "step": 2267 }, { "epoch": 0.15, "grad_norm": 1.3979637445721307, "learning_rate": 9.656277709125028e-06, "loss": 0.5961, "step": 2268 }, { "epoch": 0.15, "grad_norm": 1.6644317232910013, "learning_rate": 9.655899922699174e-06, "loss": 0.8142, "step": 2269 }, { "epoch": 0.15, "grad_norm": 1.1483273254121311, "learning_rate": 9.655521936172525e-06, "loss": 0.7529, "step": 2270 }, { "epoch": 0.15, "grad_norm": 1.6203696465632618, "learning_rate": 9.655143749561326e-06, "loss": 0.7059, "step": 2271 }, { "epoch": 0.15, "grad_norm": 1.6736049264404744, "learning_rate": 9.65476536288183e-06, "loss": 0.7215, "step": 2272 }, { "epoch": 0.15, "grad_norm": 2.3525828607510073, "learning_rate": 9.654386776150301e-06, "loss": 0.7866, "step": 2273 }, { "epoch": 0.15, "grad_norm": 1.2437520952050525, "learning_rate": 9.654007989383009e-06, "loss": 0.77, "step": 2274 }, { "epoch": 0.15, "grad_norm": 1.6503020435064044, "learning_rate": 9.653629002596232e-06, "loss": 0.8056, "step": 2275 }, { "epoch": 0.15, "grad_norm": 2.259466701232313, "learning_rate": 9.653249815806258e-06, "loss": 0.8002, "step": 2276 }, { "epoch": 0.15, "grad_norm": 1.5218062712090186, "learning_rate": 9.652870429029386e-06, "loss": 0.8478, "step": 2277 }, { "epoch": 0.15, "grad_norm": 1.6903117208093632, "learning_rate": 9.652490842281921e-06, "loss": 0.9283, "step": 2278 }, { "epoch": 0.15, "grad_norm": 1.7951216171297761, "learning_rate": 9.652111055580175e-06, "loss": 0.8539, "step": 2279 }, { "epoch": 0.15, "grad_norm": 1.612029788212388, "learning_rate": 9.651731068940472e-06, "loss": 0.8935, "step": 2280 }, { "epoch": 0.15, "grad_norm": 1.591546765425457, "learning_rate": 9.65135088237914e-06, "loss": 0.8083, "step": 2281 }, { "epoch": 0.15, "grad_norm": 1.7558373206833804, "learning_rate": 9.650970495912521e-06, "loss": 0.8794, "step": 2282 }, { "epoch": 0.15, "grad_norm": 1.7347070575720405, "learning_rate": 9.650589909556964e-06, "loss": 0.742, "step": 2283 }, { "epoch": 0.15, "grad_norm": 1.761635294465967, "learning_rate": 9.650209123328826e-06, "loss": 0.8742, "step": 2284 }, { "epoch": 0.15, "grad_norm": 1.8076304776615901, "learning_rate": 9.649828137244471e-06, "loss": 0.7935, "step": 2285 }, { "epoch": 0.15, "grad_norm": 1.5736235509655672, "learning_rate": 9.649446951320273e-06, "loss": 0.7805, "step": 2286 }, { "epoch": 0.15, "grad_norm": 1.6486426884073544, "learning_rate": 9.649065565572615e-06, "loss": 0.8292, "step": 2287 }, { "epoch": 0.15, "grad_norm": 1.1883182189836823, "learning_rate": 9.648683980017887e-06, "loss": 0.7618, "step": 2288 }, { "epoch": 0.15, "grad_norm": 1.472836720145251, "learning_rate": 9.64830219467249e-06, "loss": 0.7351, "step": 2289 }, { "epoch": 0.15, "grad_norm": 2.114117924908605, "learning_rate": 9.647920209552832e-06, "loss": 0.9209, "step": 2290 }, { "epoch": 0.15, "grad_norm": 1.8001530319815433, "learning_rate": 9.647538024675331e-06, "loss": 0.8743, "step": 2291 }, { "epoch": 0.15, "grad_norm": 1.7691686774518975, "learning_rate": 9.647155640056411e-06, "loss": 1.0269, "step": 2292 }, { "epoch": 0.15, "grad_norm": 1.494210387923841, "learning_rate": 9.646773055712508e-06, "loss": 0.7542, "step": 2293 }, { "epoch": 0.15, "grad_norm": 1.757684888281118, "learning_rate": 9.646390271660062e-06, "loss": 0.7752, "step": 2294 }, { "epoch": 0.15, "grad_norm": 1.9216622432994495, "learning_rate": 9.646007287915524e-06, "loss": 0.8869, "step": 2295 }, { "epoch": 0.15, "grad_norm": 1.9298727224138876, "learning_rate": 9.645624104495358e-06, "loss": 0.7553, "step": 2296 }, { "epoch": 0.15, "grad_norm": 1.3463114073896283, "learning_rate": 9.64524072141603e-06, "loss": 0.8328, "step": 2297 }, { "epoch": 0.15, "grad_norm": 1.6427857932863643, "learning_rate": 9.644857138694016e-06, "loss": 0.8523, "step": 2298 }, { "epoch": 0.15, "grad_norm": 1.593917673457162, "learning_rate": 9.6444733563458e-06, "loss": 0.7732, "step": 2299 }, { "epoch": 0.15, "grad_norm": 1.635646527678963, "learning_rate": 9.644089374387881e-06, "loss": 0.8263, "step": 2300 }, { "epoch": 0.15, "grad_norm": 1.619123976707013, "learning_rate": 9.643705192836758e-06, "loss": 0.9307, "step": 2301 }, { "epoch": 0.15, "grad_norm": 2.521653152334014, "learning_rate": 9.643320811708944e-06, "loss": 0.8521, "step": 2302 }, { "epoch": 0.15, "grad_norm": 1.611811257961502, "learning_rate": 9.64293623102096e-06, "loss": 0.8587, "step": 2303 }, { "epoch": 0.15, "grad_norm": 1.6870970905247236, "learning_rate": 9.642551450789331e-06, "loss": 0.829, "step": 2304 }, { "epoch": 0.15, "grad_norm": 1.9366227691991924, "learning_rate": 9.642166471030596e-06, "loss": 0.9093, "step": 2305 }, { "epoch": 0.15, "grad_norm": 1.8895112248893486, "learning_rate": 9.641781291761301e-06, "loss": 0.8609, "step": 2306 }, { "epoch": 0.15, "grad_norm": 1.7886744528428804, "learning_rate": 9.641395912998e-06, "loss": 0.8952, "step": 2307 }, { "epoch": 0.15, "grad_norm": 1.6107467069565709, "learning_rate": 9.641010334757255e-06, "loss": 0.7972, "step": 2308 }, { "epoch": 0.15, "grad_norm": 1.952286127427969, "learning_rate": 9.64062455705564e-06, "loss": 0.8085, "step": 2309 }, { "epoch": 0.15, "grad_norm": 1.564656771052411, "learning_rate": 9.64023857990973e-06, "loss": 1.0116, "step": 2310 }, { "epoch": 0.15, "grad_norm": 1.7114071837626759, "learning_rate": 9.639852403336118e-06, "loss": 0.7549, "step": 2311 }, { "epoch": 0.15, "grad_norm": 1.839906609808701, "learning_rate": 9.6394660273514e-06, "loss": 0.8448, "step": 2312 }, { "epoch": 0.15, "grad_norm": 1.6523699679396668, "learning_rate": 9.63907945197218e-06, "loss": 0.8489, "step": 2313 }, { "epoch": 0.15, "grad_norm": 1.6595119092061963, "learning_rate": 9.638692677215074e-06, "loss": 0.8364, "step": 2314 }, { "epoch": 0.15, "grad_norm": 1.6504091301933588, "learning_rate": 9.638305703096702e-06, "loss": 0.7481, "step": 2315 }, { "epoch": 0.15, "grad_norm": 1.6609012107627141, "learning_rate": 9.637918529633699e-06, "loss": 0.8132, "step": 2316 }, { "epoch": 0.15, "grad_norm": 1.6850678055051354, "learning_rate": 9.637531156842702e-06, "loss": 0.8423, "step": 2317 }, { "epoch": 0.15, "grad_norm": 1.791321610042236, "learning_rate": 9.637143584740363e-06, "loss": 0.9647, "step": 2318 }, { "epoch": 0.15, "grad_norm": 1.8406086277532536, "learning_rate": 9.636755813343334e-06, "loss": 0.8906, "step": 2319 }, { "epoch": 0.15, "grad_norm": 1.5289554504566374, "learning_rate": 9.636367842668284e-06, "loss": 0.7719, "step": 2320 }, { "epoch": 0.15, "grad_norm": 1.729629540493605, "learning_rate": 9.635979672731888e-06, "loss": 0.7297, "step": 2321 }, { "epoch": 0.15, "grad_norm": 1.729812459324554, "learning_rate": 9.635591303550826e-06, "loss": 0.8302, "step": 2322 }, { "epoch": 0.15, "grad_norm": 1.051521942570481, "learning_rate": 9.635202735141792e-06, "loss": 0.7185, "step": 2323 }, { "epoch": 0.15, "grad_norm": 1.7066938220180383, "learning_rate": 9.634813967521482e-06, "loss": 0.7898, "step": 2324 }, { "epoch": 0.15, "grad_norm": 1.7567590537093205, "learning_rate": 9.634425000706607e-06, "loss": 0.8272, "step": 2325 }, { "epoch": 0.15, "grad_norm": 1.7915097600024825, "learning_rate": 9.634035834713885e-06, "loss": 0.8931, "step": 2326 }, { "epoch": 0.15, "grad_norm": 1.9103348511892426, "learning_rate": 9.633646469560039e-06, "loss": 0.8716, "step": 2327 }, { "epoch": 0.15, "grad_norm": 1.1904386050480826, "learning_rate": 9.633256905261805e-06, "loss": 0.5337, "step": 2328 }, { "epoch": 0.15, "grad_norm": 1.6781923141021682, "learning_rate": 9.632867141835926e-06, "loss": 0.8044, "step": 2329 }, { "epoch": 0.15, "grad_norm": 1.7531017868813386, "learning_rate": 9.632477179299152e-06, "loss": 0.7852, "step": 2330 }, { "epoch": 0.15, "grad_norm": 1.2626645635537135, "learning_rate": 9.632087017668242e-06, "loss": 0.7659, "step": 2331 }, { "epoch": 0.15, "grad_norm": 1.5473961192393626, "learning_rate": 9.631696656959966e-06, "loss": 0.9127, "step": 2332 }, { "epoch": 0.15, "grad_norm": 1.4814707865629384, "learning_rate": 9.6313060971911e-06, "loss": 0.6874, "step": 2333 }, { "epoch": 0.15, "grad_norm": 1.5349183434347276, "learning_rate": 9.63091533837843e-06, "loss": 0.707, "step": 2334 }, { "epoch": 0.15, "grad_norm": 1.771156718628493, "learning_rate": 9.630524380538748e-06, "loss": 0.8254, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.5134202812707969, "learning_rate": 9.63013322368886e-06, "loss": 0.7485, "step": 2336 }, { "epoch": 0.15, "grad_norm": 1.6297820941980414, "learning_rate": 9.629741867845574e-06, "loss": 0.8471, "step": 2337 }, { "epoch": 0.15, "grad_norm": 1.6373626254910285, "learning_rate": 9.629350313025711e-06, "loss": 0.8335, "step": 2338 }, { "epoch": 0.15, "grad_norm": 3.8774226803652088, "learning_rate": 9.628958559246101e-06, "loss": 0.8066, "step": 2339 }, { "epoch": 0.15, "grad_norm": 1.62887677953711, "learning_rate": 9.628566606523578e-06, "loss": 0.7103, "step": 2340 }, { "epoch": 0.15, "grad_norm": 1.6756460017034938, "learning_rate": 9.628174454874988e-06, "loss": 0.8359, "step": 2341 }, { "epoch": 0.15, "grad_norm": 1.726034082460133, "learning_rate": 9.627782104317185e-06, "loss": 0.8111, "step": 2342 }, { "epoch": 0.15, "grad_norm": 1.6228789480808699, "learning_rate": 9.627389554867032e-06, "loss": 0.9147, "step": 2343 }, { "epoch": 0.15, "grad_norm": 1.7800246038404612, "learning_rate": 9.626996806541398e-06, "loss": 0.8858, "step": 2344 }, { "epoch": 0.15, "grad_norm": 1.7931878845934976, "learning_rate": 9.626603859357165e-06, "loss": 0.965, "step": 2345 }, { "epoch": 0.15, "grad_norm": 1.1672575202929802, "learning_rate": 9.62621071333122e-06, "loss": 0.6322, "step": 2346 }, { "epoch": 0.15, "grad_norm": 1.7751900285952078, "learning_rate": 9.625817368480459e-06, "loss": 0.7581, "step": 2347 }, { "epoch": 0.15, "grad_norm": 1.610355697226184, "learning_rate": 9.625423824821789e-06, "loss": 0.868, "step": 2348 }, { "epoch": 0.15, "grad_norm": 1.819247810003986, "learning_rate": 9.625030082372122e-06, "loss": 0.8684, "step": 2349 }, { "epoch": 0.15, "grad_norm": 1.1066545502986647, "learning_rate": 9.624636141148377e-06, "loss": 0.6979, "step": 2350 }, { "epoch": 0.15, "grad_norm": 1.0405712401390097, "learning_rate": 9.624242001167493e-06, "loss": 0.6735, "step": 2351 }, { "epoch": 0.15, "grad_norm": 1.739852724078392, "learning_rate": 9.623847662446404e-06, "loss": 0.8573, "step": 2352 }, { "epoch": 0.15, "grad_norm": 1.5456460114384287, "learning_rate": 9.623453125002056e-06, "loss": 0.8163, "step": 2353 }, { "epoch": 0.15, "grad_norm": 1.3051115138181437, "learning_rate": 9.62305838885141e-06, "loss": 0.6372, "step": 2354 }, { "epoch": 0.15, "grad_norm": 1.5780372795949382, "learning_rate": 9.622663454011429e-06, "loss": 0.8957, "step": 2355 }, { "epoch": 0.15, "grad_norm": 1.918240727314435, "learning_rate": 9.622268320499083e-06, "loss": 0.7595, "step": 2356 }, { "epoch": 0.15, "grad_norm": 2.153429697697343, "learning_rate": 9.621872988331362e-06, "loss": 0.7988, "step": 2357 }, { "epoch": 0.15, "grad_norm": 1.8568229131755918, "learning_rate": 9.62147745752525e-06, "loss": 0.8929, "step": 2358 }, { "epoch": 0.15, "grad_norm": 1.7845634540531234, "learning_rate": 9.621081728097747e-06, "loss": 0.7972, "step": 2359 }, { "epoch": 0.15, "grad_norm": 1.721004525156755, "learning_rate": 9.620685800065861e-06, "loss": 0.8043, "step": 2360 }, { "epoch": 0.15, "grad_norm": 1.8570931780626774, "learning_rate": 9.620289673446611e-06, "loss": 0.7094, "step": 2361 }, { "epoch": 0.15, "grad_norm": 1.5698448414932407, "learning_rate": 9.619893348257019e-06, "loss": 0.8003, "step": 2362 }, { "epoch": 0.15, "grad_norm": 1.8768278863832273, "learning_rate": 9.619496824514118e-06, "loss": 0.6243, "step": 2363 }, { "epoch": 0.15, "grad_norm": 2.1458898213430437, "learning_rate": 9.61910010223495e-06, "loss": 0.8149, "step": 2364 }, { "epoch": 0.15, "grad_norm": 1.7234425293265379, "learning_rate": 9.618703181436566e-06, "loss": 0.8614, "step": 2365 }, { "epoch": 0.15, "grad_norm": 2.0636924773377627, "learning_rate": 9.618306062136025e-06, "loss": 0.8062, "step": 2366 }, { "epoch": 0.15, "grad_norm": 1.008955872236221, "learning_rate": 9.617908744350392e-06, "loss": 0.7449, "step": 2367 }, { "epoch": 0.15, "grad_norm": 1.213058834769946, "learning_rate": 9.617511228096746e-06, "loss": 0.7188, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.7588016529985693, "learning_rate": 9.61711351339217e-06, "loss": 0.8509, "step": 2369 }, { "epoch": 0.15, "grad_norm": 2.9661847525369174, "learning_rate": 9.616715600253759e-06, "loss": 0.7795, "step": 2370 }, { "epoch": 0.15, "grad_norm": 1.6389537723181251, "learning_rate": 9.61631748869861e-06, "loss": 0.8725, "step": 2371 }, { "epoch": 0.15, "grad_norm": 2.127730417382783, "learning_rate": 9.615919178743836e-06, "loss": 0.881, "step": 2372 }, { "epoch": 0.15, "grad_norm": 2.283691248594114, "learning_rate": 9.615520670406555e-06, "loss": 0.9131, "step": 2373 }, { "epoch": 0.15, "grad_norm": 1.6566484278050404, "learning_rate": 9.615121963703895e-06, "loss": 0.9206, "step": 2374 }, { "epoch": 0.15, "grad_norm": 1.8382001272194382, "learning_rate": 9.61472305865299e-06, "loss": 0.8325, "step": 2375 }, { "epoch": 0.15, "grad_norm": 1.5688847417090952, "learning_rate": 9.614323955270985e-06, "loss": 0.7918, "step": 2376 }, { "epoch": 0.15, "grad_norm": 1.6199242510028211, "learning_rate": 9.613924653575034e-06, "loss": 0.7268, "step": 2377 }, { "epoch": 0.15, "grad_norm": 1.7997765554849185, "learning_rate": 9.613525153582295e-06, "loss": 0.7652, "step": 2378 }, { "epoch": 0.15, "grad_norm": 1.714305914026516, "learning_rate": 9.61312545530994e-06, "loss": 0.8059, "step": 2379 }, { "epoch": 0.15, "grad_norm": 1.6756351459968803, "learning_rate": 9.612725558775144e-06, "loss": 0.7441, "step": 2380 }, { "epoch": 0.15, "grad_norm": 1.7055279788396065, "learning_rate": 9.612325463995099e-06, "loss": 0.7672, "step": 2381 }, { "epoch": 0.15, "grad_norm": 0.923139575163432, "learning_rate": 9.611925170986996e-06, "loss": 0.6415, "step": 2382 }, { "epoch": 0.15, "grad_norm": 1.73027882940631, "learning_rate": 9.61152467976804e-06, "loss": 0.7837, "step": 2383 }, { "epoch": 0.15, "grad_norm": 1.800881927155776, "learning_rate": 9.611123990355445e-06, "loss": 0.743, "step": 2384 }, { "epoch": 0.15, "grad_norm": 2.0213988046683715, "learning_rate": 9.610723102766429e-06, "loss": 0.7522, "step": 2385 }, { "epoch": 0.15, "grad_norm": 1.5692209942721211, "learning_rate": 9.610322017018224e-06, "loss": 0.8603, "step": 2386 }, { "epoch": 0.15, "grad_norm": 1.0476936437621807, "learning_rate": 9.609920733128064e-06, "loss": 0.6797, "step": 2387 }, { "epoch": 0.15, "grad_norm": 1.5631057400069395, "learning_rate": 9.609519251113199e-06, "loss": 0.8738, "step": 2388 }, { "epoch": 0.15, "grad_norm": 1.8251595173483248, "learning_rate": 9.609117570990882e-06, "loss": 0.747, "step": 2389 }, { "epoch": 0.15, "grad_norm": 1.8255650222081028, "learning_rate": 9.608715692778377e-06, "loss": 0.7203, "step": 2390 }, { "epoch": 0.15, "grad_norm": 1.8325624315928792, "learning_rate": 9.608313616492954e-06, "loss": 0.8381, "step": 2391 }, { "epoch": 0.15, "grad_norm": 1.6225185213192697, "learning_rate": 9.607911342151898e-06, "loss": 0.7646, "step": 2392 }, { "epoch": 0.15, "grad_norm": 1.7056892221364752, "learning_rate": 9.607508869772495e-06, "loss": 0.8511, "step": 2393 }, { "epoch": 0.15, "grad_norm": 1.9760365911296067, "learning_rate": 9.60710619937204e-06, "loss": 0.8589, "step": 2394 }, { "epoch": 0.15, "grad_norm": 1.1999444096923433, "learning_rate": 9.606703330967843e-06, "loss": 0.6973, "step": 2395 }, { "epoch": 0.15, "grad_norm": 1.866349299627104, "learning_rate": 9.606300264577217e-06, "loss": 0.7168, "step": 2396 }, { "epoch": 0.15, "grad_norm": 2.0400232463684134, "learning_rate": 9.605897000217485e-06, "loss": 0.7809, "step": 2397 }, { "epoch": 0.15, "grad_norm": 1.4652258500590511, "learning_rate": 9.605493537905978e-06, "loss": 0.9165, "step": 2398 }, { "epoch": 0.15, "grad_norm": 1.6867375281336017, "learning_rate": 9.605089877660036e-06, "loss": 0.8192, "step": 2399 }, { "epoch": 0.15, "grad_norm": 1.8177516185168592, "learning_rate": 9.604686019497008e-06, "loss": 0.8745, "step": 2400 }, { "epoch": 0.15, "grad_norm": 1.5046239823526322, "learning_rate": 9.60428196343425e-06, "loss": 0.7799, "step": 2401 }, { "epoch": 0.15, "grad_norm": 1.595338299293668, "learning_rate": 9.603877709489128e-06, "loss": 0.7138, "step": 2402 }, { "epoch": 0.15, "grad_norm": 1.6288294041126565, "learning_rate": 9.603473257679018e-06, "loss": 0.8759, "step": 2403 }, { "epoch": 0.15, "grad_norm": 1.7694206141037137, "learning_rate": 9.6030686080213e-06, "loss": 0.9219, "step": 2404 }, { "epoch": 0.15, "grad_norm": 1.6411937201568385, "learning_rate": 9.602663760533364e-06, "loss": 0.7213, "step": 2405 }, { "epoch": 0.15, "grad_norm": 1.8556841190967701, "learning_rate": 9.602258715232611e-06, "loss": 0.8369, "step": 2406 }, { "epoch": 0.15, "grad_norm": 1.6176284536926673, "learning_rate": 9.601853472136451e-06, "loss": 0.7978, "step": 2407 }, { "epoch": 0.15, "grad_norm": 1.6561247023193915, "learning_rate": 9.601448031262298e-06, "loss": 0.7249, "step": 2408 }, { "epoch": 0.15, "grad_norm": 1.6613888096099565, "learning_rate": 9.601042392627577e-06, "loss": 0.8015, "step": 2409 }, { "epoch": 0.15, "grad_norm": 1.4131589433373861, "learning_rate": 9.600636556249722e-06, "loss": 0.673, "step": 2410 }, { "epoch": 0.15, "grad_norm": 1.522901587965145, "learning_rate": 9.600230522146176e-06, "loss": 0.7856, "step": 2411 }, { "epoch": 0.15, "grad_norm": 2.328959384475759, "learning_rate": 9.599824290334388e-06, "loss": 0.8242, "step": 2412 }, { "epoch": 0.15, "grad_norm": 1.6208123074613967, "learning_rate": 9.599417860831818e-06, "loss": 0.8081, "step": 2413 }, { "epoch": 0.15, "grad_norm": 1.6828389424341903, "learning_rate": 9.599011233655933e-06, "loss": 0.7422, "step": 2414 }, { "epoch": 0.15, "grad_norm": 1.6119258544709028, "learning_rate": 9.598604408824209e-06, "loss": 0.7812, "step": 2415 }, { "epoch": 0.15, "grad_norm": 1.7215315684749817, "learning_rate": 9.598197386354129e-06, "loss": 0.7938, "step": 2416 }, { "epoch": 0.15, "grad_norm": 1.8002557863856028, "learning_rate": 9.59779016626319e-06, "loss": 0.8427, "step": 2417 }, { "epoch": 0.15, "grad_norm": 1.8438613102632517, "learning_rate": 9.597382748568889e-06, "loss": 0.8349, "step": 2418 }, { "epoch": 0.15, "grad_norm": 1.804440178586903, "learning_rate": 9.596975133288738e-06, "loss": 0.8663, "step": 2419 }, { "epoch": 0.15, "grad_norm": 2.817878955182608, "learning_rate": 9.596567320440255e-06, "loss": 0.817, "step": 2420 }, { "epoch": 0.15, "grad_norm": 1.1897354489827974, "learning_rate": 9.596159310040968e-06, "loss": 0.7041, "step": 2421 }, { "epoch": 0.16, "grad_norm": 1.2386988642243497, "learning_rate": 9.595751102108412e-06, "loss": 0.7286, "step": 2422 }, { "epoch": 0.16, "grad_norm": 2.0643766525216676, "learning_rate": 9.595342696660133e-06, "loss": 0.8439, "step": 2423 }, { "epoch": 0.16, "grad_norm": 1.7546622227833837, "learning_rate": 9.594934093713677e-06, "loss": 0.8421, "step": 2424 }, { "epoch": 0.16, "grad_norm": 1.7312041341162943, "learning_rate": 9.594525293286611e-06, "loss": 1.1221, "step": 2425 }, { "epoch": 0.16, "grad_norm": 1.0592854770779019, "learning_rate": 9.594116295396502e-06, "loss": 0.693, "step": 2426 }, { "epoch": 0.16, "grad_norm": 1.5815427141114418, "learning_rate": 9.593707100060927e-06, "loss": 0.8022, "step": 2427 }, { "epoch": 0.16, "grad_norm": 2.0017770031757722, "learning_rate": 9.593297707297475e-06, "loss": 0.8042, "step": 2428 }, { "epoch": 0.16, "grad_norm": 1.979057347348071, "learning_rate": 9.59288811712374e-06, "loss": 0.8201, "step": 2429 }, { "epoch": 0.16, "grad_norm": 1.4721524589963326, "learning_rate": 9.592478329557323e-06, "loss": 0.7963, "step": 2430 }, { "epoch": 0.16, "grad_norm": 1.0354489965353038, "learning_rate": 9.592068344615837e-06, "loss": 0.6113, "step": 2431 }, { "epoch": 0.16, "grad_norm": 2.756225402474681, "learning_rate": 9.591658162316905e-06, "loss": 0.7753, "step": 2432 }, { "epoch": 0.16, "grad_norm": 2.269878288433211, "learning_rate": 9.591247782678153e-06, "loss": 0.7499, "step": 2433 }, { "epoch": 0.16, "grad_norm": 2.030043578131732, "learning_rate": 9.590837205717219e-06, "loss": 0.7824, "step": 2434 }, { "epoch": 0.16, "grad_norm": 1.77337480352883, "learning_rate": 9.590426431451748e-06, "loss": 0.7963, "step": 2435 }, { "epoch": 0.16, "grad_norm": 1.5859224629986275, "learning_rate": 9.590015459899394e-06, "loss": 0.8586, "step": 2436 }, { "epoch": 0.16, "grad_norm": 1.5972658255691918, "learning_rate": 9.58960429107782e-06, "loss": 0.7398, "step": 2437 }, { "epoch": 0.16, "grad_norm": 1.7666333642084586, "learning_rate": 9.5891929250047e-06, "loss": 0.9088, "step": 2438 }, { "epoch": 0.16, "grad_norm": 1.313371847330422, "learning_rate": 9.58878136169771e-06, "loss": 0.6279, "step": 2439 }, { "epoch": 0.16, "grad_norm": 1.851678887594224, "learning_rate": 9.58836960117454e-06, "loss": 0.9189, "step": 2440 }, { "epoch": 0.16, "grad_norm": 2.032472347302225, "learning_rate": 9.587957643452886e-06, "loss": 0.7785, "step": 2441 }, { "epoch": 0.16, "grad_norm": 1.7711325588939086, "learning_rate": 9.587545488550453e-06, "loss": 0.8624, "step": 2442 }, { "epoch": 0.16, "grad_norm": 1.5222529735390546, "learning_rate": 9.587133136484953e-06, "loss": 0.7798, "step": 2443 }, { "epoch": 0.16, "grad_norm": 1.565409050837391, "learning_rate": 9.586720587274113e-06, "loss": 0.705, "step": 2444 }, { "epoch": 0.16, "grad_norm": 1.686717902898469, "learning_rate": 9.586307840935658e-06, "loss": 0.7473, "step": 2445 }, { "epoch": 0.16, "grad_norm": 1.59486258981804, "learning_rate": 9.585894897487328e-06, "loss": 0.7414, "step": 2446 }, { "epoch": 0.16, "grad_norm": 1.5811480237412523, "learning_rate": 9.585481756946875e-06, "loss": 0.7894, "step": 2447 }, { "epoch": 0.16, "grad_norm": 1.6241376215981742, "learning_rate": 9.58506841933205e-06, "loss": 0.7328, "step": 2448 }, { "epoch": 0.16, "grad_norm": 1.6897121500406207, "learning_rate": 9.584654884660618e-06, "loss": 0.9545, "step": 2449 }, { "epoch": 0.16, "grad_norm": 1.6503235453892744, "learning_rate": 9.584241152950353e-06, "loss": 0.8506, "step": 2450 }, { "epoch": 0.16, "grad_norm": 1.7510145079364745, "learning_rate": 9.583827224219035e-06, "loss": 0.9128, "step": 2451 }, { "epoch": 0.16, "grad_norm": 1.447871677287431, "learning_rate": 9.583413098484457e-06, "loss": 0.7538, "step": 2452 }, { "epoch": 0.16, "grad_norm": 1.6687666073153775, "learning_rate": 9.582998775764414e-06, "loss": 0.8565, "step": 2453 }, { "epoch": 0.16, "grad_norm": 1.5560687131273039, "learning_rate": 9.582584256076715e-06, "loss": 0.728, "step": 2454 }, { "epoch": 0.16, "grad_norm": 1.6986491776549761, "learning_rate": 9.582169539439173e-06, "loss": 0.8529, "step": 2455 }, { "epoch": 0.16, "grad_norm": 1.1368561721850907, "learning_rate": 9.581754625869612e-06, "loss": 0.6891, "step": 2456 }, { "epoch": 0.16, "grad_norm": 1.9705461665075539, "learning_rate": 9.581339515385866e-06, "loss": 0.8117, "step": 2457 }, { "epoch": 0.16, "grad_norm": 1.6331127848830578, "learning_rate": 9.580924208005775e-06, "loss": 0.8332, "step": 2458 }, { "epoch": 0.16, "grad_norm": 1.5158195829712144, "learning_rate": 9.580508703747185e-06, "loss": 0.9083, "step": 2459 }, { "epoch": 0.16, "grad_norm": 1.5068936633543792, "learning_rate": 9.580093002627958e-06, "loss": 0.746, "step": 2460 }, { "epoch": 0.16, "grad_norm": 1.691532691550952, "learning_rate": 9.579677104665957e-06, "loss": 0.868, "step": 2461 }, { "epoch": 0.16, "grad_norm": 2.4888898635341747, "learning_rate": 9.579261009879057e-06, "loss": 0.8321, "step": 2462 }, { "epoch": 0.16, "grad_norm": 1.292599200343767, "learning_rate": 9.578844718285141e-06, "loss": 0.6806, "step": 2463 }, { "epoch": 0.16, "grad_norm": 1.936382850089883, "learning_rate": 9.578428229902102e-06, "loss": 0.8653, "step": 2464 }, { "epoch": 0.16, "grad_norm": 1.702174258011919, "learning_rate": 9.578011544747836e-06, "loss": 0.7575, "step": 2465 }, { "epoch": 0.16, "grad_norm": 1.147747608684818, "learning_rate": 9.577594662840256e-06, "loss": 0.6431, "step": 2466 }, { "epoch": 0.16, "grad_norm": 1.5630626190924743, "learning_rate": 9.577177584197274e-06, "loss": 0.847, "step": 2467 }, { "epoch": 0.16, "grad_norm": 2.6894961465380147, "learning_rate": 9.576760308836819e-06, "loss": 0.8672, "step": 2468 }, { "epoch": 0.16, "grad_norm": 1.8854673275410343, "learning_rate": 9.576342836776822e-06, "loss": 0.7584, "step": 2469 }, { "epoch": 0.16, "grad_norm": 1.7779242308299388, "learning_rate": 9.575925168035225e-06, "loss": 0.8747, "step": 2470 }, { "epoch": 0.16, "grad_norm": 1.1722248850015824, "learning_rate": 9.575507302629982e-06, "loss": 0.6513, "step": 2471 }, { "epoch": 0.16, "grad_norm": 1.6539036731007648, "learning_rate": 9.57508924057905e-06, "loss": 0.7564, "step": 2472 }, { "epoch": 0.16, "grad_norm": 1.8916723105588893, "learning_rate": 9.574670981900394e-06, "loss": 0.8049, "step": 2473 }, { "epoch": 0.16, "grad_norm": 1.995293859698826, "learning_rate": 9.574252526611994e-06, "loss": 0.9315, "step": 2474 }, { "epoch": 0.16, "grad_norm": 1.6107554676118179, "learning_rate": 9.57383387473183e-06, "loss": 0.7862, "step": 2475 }, { "epoch": 0.16, "grad_norm": 1.7449549274229943, "learning_rate": 9.573415026277896e-06, "loss": 0.8161, "step": 2476 }, { "epoch": 0.16, "grad_norm": 1.1786881081044116, "learning_rate": 9.572995981268199e-06, "loss": 0.6594, "step": 2477 }, { "epoch": 0.16, "grad_norm": 1.7700662015311124, "learning_rate": 9.57257673972074e-06, "loss": 0.8756, "step": 2478 }, { "epoch": 0.16, "grad_norm": 1.9923269967597723, "learning_rate": 9.572157301653542e-06, "loss": 0.7597, "step": 2479 }, { "epoch": 0.16, "grad_norm": 1.6693346259667552, "learning_rate": 9.571737667084631e-06, "loss": 0.7479, "step": 2480 }, { "epoch": 0.16, "grad_norm": 1.565621619238323, "learning_rate": 9.571317836032042e-06, "loss": 0.6593, "step": 2481 }, { "epoch": 0.16, "grad_norm": 1.7192057649599481, "learning_rate": 9.570897808513818e-06, "loss": 0.8597, "step": 2482 }, { "epoch": 0.16, "grad_norm": 1.6951801293540438, "learning_rate": 9.570477584548008e-06, "loss": 0.7243, "step": 2483 }, { "epoch": 0.16, "grad_norm": 1.482597225075042, "learning_rate": 9.570057164152679e-06, "loss": 0.7454, "step": 2484 }, { "epoch": 0.16, "grad_norm": 1.5963425961593878, "learning_rate": 9.569636547345895e-06, "loss": 0.847, "step": 2485 }, { "epoch": 0.16, "grad_norm": 1.630657039582354, "learning_rate": 9.569215734145733e-06, "loss": 0.8147, "step": 2486 }, { "epoch": 0.16, "grad_norm": 1.6279409031566752, "learning_rate": 9.568794724570282e-06, "loss": 0.8851, "step": 2487 }, { "epoch": 0.16, "grad_norm": 0.9768413462360866, "learning_rate": 9.568373518637632e-06, "loss": 0.5527, "step": 2488 }, { "epoch": 0.16, "grad_norm": 1.6800735565065086, "learning_rate": 9.567952116365889e-06, "loss": 0.8318, "step": 2489 }, { "epoch": 0.16, "grad_norm": 2.5179696068655417, "learning_rate": 9.567530517773163e-06, "loss": 0.8817, "step": 2490 }, { "epoch": 0.16, "grad_norm": 1.6596181660136498, "learning_rate": 9.567108722877572e-06, "loss": 0.7567, "step": 2491 }, { "epoch": 0.16, "grad_norm": 1.3915253305002095, "learning_rate": 9.566686731697246e-06, "loss": 0.7962, "step": 2492 }, { "epoch": 0.16, "grad_norm": 1.7837511895275373, "learning_rate": 9.566264544250319e-06, "loss": 0.9453, "step": 2493 }, { "epoch": 0.16, "grad_norm": 1.5532675467784616, "learning_rate": 9.565842160554938e-06, "loss": 0.8984, "step": 2494 }, { "epoch": 0.16, "grad_norm": 1.7568540278375444, "learning_rate": 9.565419580629254e-06, "loss": 0.7772, "step": 2495 }, { "epoch": 0.16, "grad_norm": 1.7425792343018178, "learning_rate": 9.56499680449143e-06, "loss": 0.9, "step": 2496 }, { "epoch": 0.16, "grad_norm": 1.598151295358326, "learning_rate": 9.564573832159638e-06, "loss": 0.8445, "step": 2497 }, { "epoch": 0.16, "grad_norm": 1.57831135428216, "learning_rate": 9.564150663652053e-06, "loss": 0.8007, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.8800410607590856, "learning_rate": 9.56372729898686e-06, "loss": 0.8525, "step": 2499 }, { "epoch": 0.16, "grad_norm": 1.5571693365001853, "learning_rate": 9.56330373818226e-06, "loss": 0.7544, "step": 2500 }, { "epoch": 0.16, "grad_norm": 1.9263583676782052, "learning_rate": 9.562879981256455e-06, "loss": 0.8679, "step": 2501 }, { "epoch": 0.16, "grad_norm": 1.6274133241933995, "learning_rate": 9.562456028227654e-06, "loss": 0.8416, "step": 2502 }, { "epoch": 0.16, "grad_norm": 1.2654253325680997, "learning_rate": 9.562031879114082e-06, "loss": 0.6945, "step": 2503 }, { "epoch": 0.16, "grad_norm": 1.7510362142501168, "learning_rate": 9.561607533933965e-06, "loss": 0.7497, "step": 2504 }, { "epoch": 0.16, "grad_norm": 1.6091840321631123, "learning_rate": 9.561182992705541e-06, "loss": 0.7889, "step": 2505 }, { "epoch": 0.16, "grad_norm": 1.3535596676015238, "learning_rate": 9.560758255447058e-06, "loss": 0.7015, "step": 2506 }, { "epoch": 0.16, "grad_norm": 1.7113775563771512, "learning_rate": 9.560333322176767e-06, "loss": 0.8039, "step": 2507 }, { "epoch": 0.16, "grad_norm": 1.9727807043847012, "learning_rate": 9.559908192912933e-06, "loss": 0.8621, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.6954011414075871, "learning_rate": 9.559482867673825e-06, "loss": 0.8692, "step": 2509 }, { "epoch": 0.16, "grad_norm": 1.663531060699655, "learning_rate": 9.559057346477726e-06, "loss": 0.9449, "step": 2510 }, { "epoch": 0.16, "grad_norm": 1.6405660896165766, "learning_rate": 9.558631629342922e-06, "loss": 0.8315, "step": 2511 }, { "epoch": 0.16, "grad_norm": 1.6350728339401812, "learning_rate": 9.558205716287711e-06, "loss": 0.764, "step": 2512 }, { "epoch": 0.16, "grad_norm": 1.6574161072512965, "learning_rate": 9.557779607330393e-06, "loss": 0.8588, "step": 2513 }, { "epoch": 0.16, "grad_norm": 1.591145751317284, "learning_rate": 9.557353302489286e-06, "loss": 0.7065, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.6938693657266948, "learning_rate": 9.556926801782714e-06, "loss": 0.884, "step": 2515 }, { "epoch": 0.16, "grad_norm": 1.7888770079895617, "learning_rate": 9.556500105229e-06, "loss": 0.7659, "step": 2516 }, { "epoch": 0.16, "grad_norm": 1.8253652920755947, "learning_rate": 9.556073212846485e-06, "loss": 0.7714, "step": 2517 }, { "epoch": 0.16, "grad_norm": 1.7890637564165581, "learning_rate": 9.555646124653519e-06, "loss": 0.8075, "step": 2518 }, { "epoch": 0.16, "grad_norm": 1.753747645324843, "learning_rate": 9.555218840668454e-06, "loss": 0.751, "step": 2519 }, { "epoch": 0.16, "grad_norm": 1.6997777624910888, "learning_rate": 9.554791360909657e-06, "loss": 0.7393, "step": 2520 }, { "epoch": 0.16, "grad_norm": 1.7652858762010988, "learning_rate": 9.554363685395496e-06, "loss": 0.8659, "step": 2521 }, { "epoch": 0.16, "grad_norm": 1.5457313459532134, "learning_rate": 9.553935814144355e-06, "loss": 0.7734, "step": 2522 }, { "epoch": 0.16, "grad_norm": 1.5716309110597468, "learning_rate": 9.553507747174622e-06, "loss": 0.7312, "step": 2523 }, { "epoch": 0.16, "grad_norm": 2.0535069315429633, "learning_rate": 9.553079484504693e-06, "loss": 0.8506, "step": 2524 }, { "epoch": 0.16, "grad_norm": 1.9588757237955348, "learning_rate": 9.552651026152978e-06, "loss": 0.7873, "step": 2525 }, { "epoch": 0.16, "grad_norm": 1.6235653438303128, "learning_rate": 9.552222372137884e-06, "loss": 0.7691, "step": 2526 }, { "epoch": 0.16, "grad_norm": 1.643427928473139, "learning_rate": 9.551793522477842e-06, "loss": 0.7644, "step": 2527 }, { "epoch": 0.16, "grad_norm": 1.8014068312370615, "learning_rate": 9.551364477191276e-06, "loss": 0.8666, "step": 2528 }, { "epoch": 0.16, "grad_norm": 1.9427121785869144, "learning_rate": 9.55093523629663e-06, "loss": 1.0248, "step": 2529 }, { "epoch": 0.16, "grad_norm": 1.0702231918169856, "learning_rate": 9.550505799812351e-06, "loss": 0.7421, "step": 2530 }, { "epoch": 0.16, "grad_norm": 1.8144945384003226, "learning_rate": 9.550076167756892e-06, "loss": 0.8884, "step": 2531 }, { "epoch": 0.16, "grad_norm": 1.6887396191306105, "learning_rate": 9.549646340148725e-06, "loss": 0.7651, "step": 2532 }, { "epoch": 0.16, "grad_norm": 2.121149954617946, "learning_rate": 9.549216317006313e-06, "loss": 0.9004, "step": 2533 }, { "epoch": 0.16, "grad_norm": 1.8116648472351904, "learning_rate": 9.548786098348146e-06, "loss": 0.8242, "step": 2534 }, { "epoch": 0.16, "grad_norm": 1.8515119888094513, "learning_rate": 9.548355684192712e-06, "loss": 0.9577, "step": 2535 }, { "epoch": 0.16, "grad_norm": 1.6720054468685952, "learning_rate": 9.547925074558505e-06, "loss": 0.8426, "step": 2536 }, { "epoch": 0.16, "grad_norm": 1.470864143299933, "learning_rate": 9.547494269464037e-06, "loss": 0.6808, "step": 2537 }, { "epoch": 0.16, "grad_norm": 1.9705912690064606, "learning_rate": 9.54706326892782e-06, "loss": 0.8419, "step": 2538 }, { "epoch": 0.16, "grad_norm": 1.743311006931928, "learning_rate": 9.546632072968379e-06, "loss": 0.7794, "step": 2539 }, { "epoch": 0.16, "grad_norm": 1.805749807894918, "learning_rate": 9.546200681604243e-06, "loss": 0.8058, "step": 2540 }, { "epoch": 0.16, "grad_norm": 1.8343419696423076, "learning_rate": 9.545769094853958e-06, "loss": 0.8567, "step": 2541 }, { "epoch": 0.16, "grad_norm": 1.839322254591611, "learning_rate": 9.545337312736066e-06, "loss": 0.8704, "step": 2542 }, { "epoch": 0.16, "grad_norm": 1.5647127057060228, "learning_rate": 9.54490533526913e-06, "loss": 0.8307, "step": 2543 }, { "epoch": 0.16, "grad_norm": 1.1183641699787867, "learning_rate": 9.544473162471713e-06, "loss": 0.6094, "step": 2544 }, { "epoch": 0.16, "grad_norm": 1.68124153629873, "learning_rate": 9.544040794362389e-06, "loss": 0.7658, "step": 2545 }, { "epoch": 0.16, "grad_norm": 1.431279166196842, "learning_rate": 9.543608230959738e-06, "loss": 0.709, "step": 2546 }, { "epoch": 0.16, "grad_norm": 1.8527608401221003, "learning_rate": 9.543175472282353e-06, "loss": 0.8967, "step": 2547 }, { "epoch": 0.16, "grad_norm": 1.6029590782811265, "learning_rate": 9.542742518348833e-06, "loss": 0.8405, "step": 2548 }, { "epoch": 0.16, "grad_norm": 1.5809313212708949, "learning_rate": 9.542309369177785e-06, "loss": 0.7061, "step": 2549 }, { "epoch": 0.16, "grad_norm": 1.566315059466292, "learning_rate": 9.541876024787825e-06, "loss": 0.7571, "step": 2550 }, { "epoch": 0.16, "grad_norm": 1.1709323565207281, "learning_rate": 9.541442485197577e-06, "loss": 0.7198, "step": 2551 }, { "epoch": 0.16, "grad_norm": 1.123136077754464, "learning_rate": 9.541008750425676e-06, "loss": 0.663, "step": 2552 }, { "epoch": 0.16, "grad_norm": 1.2347335315180583, "learning_rate": 9.540574820490759e-06, "loss": 0.6157, "step": 2553 }, { "epoch": 0.16, "grad_norm": 1.154620950293174, "learning_rate": 9.540140695411478e-06, "loss": 0.7081, "step": 2554 }, { "epoch": 0.16, "grad_norm": 1.656637989303231, "learning_rate": 9.539706375206487e-06, "loss": 0.8474, "step": 2555 }, { "epoch": 0.16, "grad_norm": 1.165440118625259, "learning_rate": 9.539271859894459e-06, "loss": 0.7346, "step": 2556 }, { "epoch": 0.16, "grad_norm": 3.742145196892895, "learning_rate": 9.538837149494065e-06, "loss": 0.8145, "step": 2557 }, { "epoch": 0.16, "grad_norm": 1.5021458674756765, "learning_rate": 9.538402244023986e-06, "loss": 0.7873, "step": 2558 }, { "epoch": 0.16, "grad_norm": 1.7819206491724582, "learning_rate": 9.537967143502915e-06, "loss": 0.7671, "step": 2559 }, { "epoch": 0.16, "grad_norm": 1.1158863975839275, "learning_rate": 9.537531847949553e-06, "loss": 0.6599, "step": 2560 }, { "epoch": 0.16, "grad_norm": 1.2851305319626412, "learning_rate": 9.537096357382606e-06, "loss": 0.693, "step": 2561 }, { "epoch": 0.16, "grad_norm": 1.7902575568319923, "learning_rate": 9.53666067182079e-06, "loss": 0.8378, "step": 2562 }, { "epoch": 0.16, "grad_norm": 1.5950914414036401, "learning_rate": 9.536224791282834e-06, "loss": 0.768, "step": 2563 }, { "epoch": 0.16, "grad_norm": 1.4089373887256322, "learning_rate": 9.535788715787465e-06, "loss": 0.8103, "step": 2564 }, { "epoch": 0.16, "grad_norm": 1.5300977694925093, "learning_rate": 9.53535244535343e-06, "loss": 0.755, "step": 2565 }, { "epoch": 0.16, "grad_norm": 1.7913766342200244, "learning_rate": 9.534915979999476e-06, "loss": 0.7604, "step": 2566 }, { "epoch": 0.16, "grad_norm": 1.4889032810905363, "learning_rate": 9.534479319744366e-06, "loss": 0.7864, "step": 2567 }, { "epoch": 0.16, "grad_norm": 1.2340603480720753, "learning_rate": 9.534042464606859e-06, "loss": 0.6427, "step": 2568 }, { "epoch": 0.16, "grad_norm": 1.6129703346449067, "learning_rate": 9.533605414605736e-06, "loss": 0.8, "step": 2569 }, { "epoch": 0.16, "grad_norm": 1.644084188541672, "learning_rate": 9.533168169759778e-06, "loss": 0.8047, "step": 2570 }, { "epoch": 0.16, "grad_norm": 1.743885333721941, "learning_rate": 9.532730730087779e-06, "loss": 0.8405, "step": 2571 }, { "epoch": 0.16, "grad_norm": 1.7085100285731232, "learning_rate": 9.532293095608535e-06, "loss": 0.7716, "step": 2572 }, { "epoch": 0.16, "grad_norm": 1.8776189542881905, "learning_rate": 9.53185526634086e-06, "loss": 0.7871, "step": 2573 }, { "epoch": 0.16, "grad_norm": 1.8408012888326886, "learning_rate": 9.531417242303566e-06, "loss": 0.8996, "step": 2574 }, { "epoch": 0.16, "grad_norm": 1.84387023706692, "learning_rate": 9.530979023515483e-06, "loss": 0.7548, "step": 2575 }, { "epoch": 0.16, "grad_norm": 1.802931396683545, "learning_rate": 9.530540609995441e-06, "loss": 0.8474, "step": 2576 }, { "epoch": 0.16, "grad_norm": 1.7865995797174798, "learning_rate": 9.530102001762285e-06, "loss": 0.789, "step": 2577 }, { "epoch": 0.17, "grad_norm": 1.497578256258931, "learning_rate": 9.529663198834862e-06, "loss": 0.7108, "step": 2578 }, { "epoch": 0.17, "grad_norm": 1.6330046652367356, "learning_rate": 9.529224201232034e-06, "loss": 0.794, "step": 2579 }, { "epoch": 0.17, "grad_norm": 1.6844651978063419, "learning_rate": 9.528785008972667e-06, "loss": 0.9816, "step": 2580 }, { "epoch": 0.17, "grad_norm": 1.7729958436783602, "learning_rate": 9.528345622075636e-06, "loss": 0.7461, "step": 2581 }, { "epoch": 0.17, "grad_norm": 1.6729717124004773, "learning_rate": 9.527906040559828e-06, "loss": 0.9127, "step": 2582 }, { "epoch": 0.17, "grad_norm": 1.6212870325319613, "learning_rate": 9.52746626444413e-06, "loss": 0.7998, "step": 2583 }, { "epoch": 0.17, "grad_norm": 1.5628402609642642, "learning_rate": 9.527026293747446e-06, "loss": 0.7666, "step": 2584 }, { "epoch": 0.17, "grad_norm": 1.82379675599705, "learning_rate": 9.526586128488686e-06, "loss": 0.8475, "step": 2585 }, { "epoch": 0.17, "grad_norm": 1.709023709614876, "learning_rate": 9.526145768686765e-06, "loss": 0.8423, "step": 2586 }, { "epoch": 0.17, "grad_norm": 1.0130618290842417, "learning_rate": 9.52570521436061e-06, "loss": 0.6374, "step": 2587 }, { "epoch": 0.17, "grad_norm": 2.1397955202926866, "learning_rate": 9.525264465529154e-06, "loss": 0.7461, "step": 2588 }, { "epoch": 0.17, "grad_norm": 1.960258416990151, "learning_rate": 9.52482352221134e-06, "loss": 0.7484, "step": 2589 }, { "epoch": 0.17, "grad_norm": 2.174152500842639, "learning_rate": 9.524382384426119e-06, "loss": 0.7951, "step": 2590 }, { "epoch": 0.17, "grad_norm": 1.1400809982589233, "learning_rate": 9.52394105219245e-06, "loss": 0.6727, "step": 2591 }, { "epoch": 0.17, "grad_norm": 1.2683999799636234, "learning_rate": 9.523499525529302e-06, "loss": 0.6964, "step": 2592 }, { "epoch": 0.17, "grad_norm": 1.5254877781948983, "learning_rate": 9.523057804455648e-06, "loss": 0.788, "step": 2593 }, { "epoch": 0.17, "grad_norm": 1.7711299755450007, "learning_rate": 9.522615888990476e-06, "loss": 0.9166, "step": 2594 }, { "epoch": 0.17, "grad_norm": 1.5555581292506966, "learning_rate": 9.522173779152773e-06, "loss": 0.7359, "step": 2595 }, { "epoch": 0.17, "grad_norm": 1.7097414690180037, "learning_rate": 9.521731474961547e-06, "loss": 0.7587, "step": 2596 }, { "epoch": 0.17, "grad_norm": 1.5305377707487005, "learning_rate": 9.5212889764358e-06, "loss": 0.7258, "step": 2597 }, { "epoch": 0.17, "grad_norm": 1.546823319558439, "learning_rate": 9.520846283594555e-06, "loss": 0.8187, "step": 2598 }, { "epoch": 0.17, "grad_norm": 1.7570825482436248, "learning_rate": 9.520403396456838e-06, "loss": 0.7965, "step": 2599 }, { "epoch": 0.17, "grad_norm": 1.2453874633213964, "learning_rate": 9.519960315041681e-06, "loss": 0.6884, "step": 2600 }, { "epoch": 0.17, "grad_norm": 1.2791333845192823, "learning_rate": 9.519517039368127e-06, "loss": 0.7028, "step": 2601 }, { "epoch": 0.17, "grad_norm": 1.8798993938604451, "learning_rate": 9.519073569455225e-06, "loss": 0.882, "step": 2602 }, { "epoch": 0.17, "grad_norm": 1.6553539495691632, "learning_rate": 9.518629905322041e-06, "loss": 0.7814, "step": 2603 }, { "epoch": 0.17, "grad_norm": 1.7391142294962052, "learning_rate": 9.518186046987636e-06, "loss": 0.8558, "step": 2604 }, { "epoch": 0.17, "grad_norm": 1.5204647514641034, "learning_rate": 9.517741994471091e-06, "loss": 0.7952, "step": 2605 }, { "epoch": 0.17, "grad_norm": 1.4963053864363067, "learning_rate": 9.517297747791485e-06, "loss": 0.8118, "step": 2606 }, { "epoch": 0.17, "grad_norm": 1.5684562697234306, "learning_rate": 9.516853306967917e-06, "loss": 0.7277, "step": 2607 }, { "epoch": 0.17, "grad_norm": 1.347132269280463, "learning_rate": 9.516408672019482e-06, "loss": 0.5718, "step": 2608 }, { "epoch": 0.17, "grad_norm": 1.711088456650685, "learning_rate": 9.515963842965294e-06, "loss": 0.7577, "step": 2609 }, { "epoch": 0.17, "grad_norm": 1.824215053942442, "learning_rate": 9.51551881982447e-06, "loss": 0.7603, "step": 2610 }, { "epoch": 0.17, "grad_norm": 2.2131175992784446, "learning_rate": 9.515073602616135e-06, "loss": 0.9799, "step": 2611 }, { "epoch": 0.17, "grad_norm": 1.6042116779461968, "learning_rate": 9.514628191359426e-06, "loss": 0.9412, "step": 2612 }, { "epoch": 0.17, "grad_norm": 1.5955192359405326, "learning_rate": 9.51418258607348e-06, "loss": 0.8731, "step": 2613 }, { "epoch": 0.17, "grad_norm": 1.8085564483076262, "learning_rate": 9.513736786777455e-06, "loss": 0.727, "step": 2614 }, { "epoch": 0.17, "grad_norm": 1.619340815014769, "learning_rate": 9.513290793490506e-06, "loss": 0.716, "step": 2615 }, { "epoch": 0.17, "grad_norm": 1.6669983976006841, "learning_rate": 9.512844606231804e-06, "loss": 0.9838, "step": 2616 }, { "epoch": 0.17, "grad_norm": 2.05676965836453, "learning_rate": 9.512398225020523e-06, "loss": 0.8724, "step": 2617 }, { "epoch": 0.17, "grad_norm": 1.899506484290337, "learning_rate": 9.511951649875846e-06, "loss": 0.904, "step": 2618 }, { "epoch": 0.17, "grad_norm": 1.7418922302759114, "learning_rate": 9.511504880816971e-06, "loss": 0.7735, "step": 2619 }, { "epoch": 0.17, "grad_norm": 1.6143198525306082, "learning_rate": 9.511057917863094e-06, "loss": 0.8152, "step": 2620 }, { "epoch": 0.17, "grad_norm": 1.7010319439158759, "learning_rate": 9.510610761033427e-06, "loss": 0.8576, "step": 2621 }, { "epoch": 0.17, "grad_norm": 1.8610861336694466, "learning_rate": 9.51016341034719e-06, "loss": 0.8657, "step": 2622 }, { "epoch": 0.17, "grad_norm": 1.374007572425876, "learning_rate": 9.509715865823605e-06, "loss": 0.8225, "step": 2623 }, { "epoch": 0.17, "grad_norm": 1.9122858017716606, "learning_rate": 9.509268127481907e-06, "loss": 0.8029, "step": 2624 }, { "epoch": 0.17, "grad_norm": 1.7319553838470723, "learning_rate": 9.508820195341343e-06, "loss": 0.7428, "step": 2625 }, { "epoch": 0.17, "grad_norm": 1.7568375241628955, "learning_rate": 9.508372069421159e-06, "loss": 0.887, "step": 2626 }, { "epoch": 0.17, "grad_norm": 1.653552062487871, "learning_rate": 9.507923749740619e-06, "loss": 0.8896, "step": 2627 }, { "epoch": 0.17, "grad_norm": 1.7178714076343453, "learning_rate": 9.507475236318987e-06, "loss": 0.8394, "step": 2628 }, { "epoch": 0.17, "grad_norm": 0.9722739518657735, "learning_rate": 9.50702652917554e-06, "loss": 0.6241, "step": 2629 }, { "epoch": 0.17, "grad_norm": 1.490173712580066, "learning_rate": 9.506577628329566e-06, "loss": 0.7771, "step": 2630 }, { "epoch": 0.17, "grad_norm": 1.7157069911735403, "learning_rate": 9.506128533800353e-06, "loss": 0.9427, "step": 2631 }, { "epoch": 0.17, "grad_norm": 1.9347003743267868, "learning_rate": 9.505679245607205e-06, "loss": 0.8005, "step": 2632 }, { "epoch": 0.17, "grad_norm": 1.111099998141523, "learning_rate": 9.505229763769432e-06, "loss": 0.655, "step": 2633 }, { "epoch": 0.17, "grad_norm": 1.7663804831382734, "learning_rate": 9.504780088306349e-06, "loss": 0.714, "step": 2634 }, { "epoch": 0.17, "grad_norm": 1.2736368291348232, "learning_rate": 9.504330219237284e-06, "loss": 0.7151, "step": 2635 }, { "epoch": 0.17, "grad_norm": 1.8301454626214866, "learning_rate": 9.503880156581571e-06, "loss": 0.8891, "step": 2636 }, { "epoch": 0.17, "grad_norm": 1.6879630860785662, "learning_rate": 9.503429900358554e-06, "loss": 0.8414, "step": 2637 }, { "epoch": 0.17, "grad_norm": 1.9775090786946998, "learning_rate": 9.502979450587582e-06, "loss": 0.7346, "step": 2638 }, { "epoch": 0.17, "grad_norm": 1.5063623726590212, "learning_rate": 9.502528807288014e-06, "loss": 0.7808, "step": 2639 }, { "epoch": 0.17, "grad_norm": 1.6568127019050731, "learning_rate": 9.50207797047922e-06, "loss": 0.7481, "step": 2640 }, { "epoch": 0.17, "grad_norm": 1.7210768274859265, "learning_rate": 9.501626940180574e-06, "loss": 0.9472, "step": 2641 }, { "epoch": 0.17, "grad_norm": 1.2045214965365147, "learning_rate": 9.501175716411464e-06, "loss": 0.6682, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.4807979569067888, "learning_rate": 9.50072429919128e-06, "loss": 0.8343, "step": 2643 }, { "epoch": 0.17, "grad_norm": 2.100532938425354, "learning_rate": 9.50027268853942e-06, "loss": 0.8149, "step": 2644 }, { "epoch": 0.17, "grad_norm": 1.8284756142567113, "learning_rate": 9.499820884475296e-06, "loss": 0.8679, "step": 2645 }, { "epoch": 0.17, "grad_norm": 1.9820247065082832, "learning_rate": 9.49936888701833e-06, "loss": 1.012, "step": 2646 }, { "epoch": 0.17, "grad_norm": 1.7364836067158573, "learning_rate": 9.498916696187942e-06, "loss": 0.6696, "step": 2647 }, { "epoch": 0.17, "grad_norm": 1.6805109913075946, "learning_rate": 9.498464312003565e-06, "loss": 0.8383, "step": 2648 }, { "epoch": 0.17, "grad_norm": 1.7426173250848997, "learning_rate": 9.498011734484647e-06, "loss": 0.8166, "step": 2649 }, { "epoch": 0.17, "grad_norm": 1.638094533814725, "learning_rate": 9.497558963650635e-06, "loss": 0.8127, "step": 2650 }, { "epoch": 0.17, "grad_norm": 1.1276212451218643, "learning_rate": 9.49710599952099e-06, "loss": 0.6147, "step": 2651 }, { "epoch": 0.17, "grad_norm": 1.7196425039247851, "learning_rate": 9.49665284211518e-06, "loss": 0.8639, "step": 2652 }, { "epoch": 0.17, "grad_norm": 1.8770572866431663, "learning_rate": 9.49619949145268e-06, "loss": 0.9463, "step": 2653 }, { "epoch": 0.17, "grad_norm": 1.5117262498971902, "learning_rate": 9.49574594755297e-06, "loss": 0.8116, "step": 2654 }, { "epoch": 0.17, "grad_norm": 1.7748395183239682, "learning_rate": 9.49529221043555e-06, "loss": 0.8762, "step": 2655 }, { "epoch": 0.17, "grad_norm": 1.6682138981125452, "learning_rate": 9.494838280119915e-06, "loss": 0.7931, "step": 2656 }, { "epoch": 0.17, "grad_norm": 1.659610801283884, "learning_rate": 9.494384156625575e-06, "loss": 0.8134, "step": 2657 }, { "epoch": 0.17, "grad_norm": 1.7702594097152988, "learning_rate": 9.493929839972048e-06, "loss": 0.7641, "step": 2658 }, { "epoch": 0.17, "grad_norm": 1.9024615026388056, "learning_rate": 9.49347533017886e-06, "loss": 0.8255, "step": 2659 }, { "epoch": 0.17, "grad_norm": 1.77709757040552, "learning_rate": 9.493020627265545e-06, "loss": 0.7078, "step": 2660 }, { "epoch": 0.17, "grad_norm": 1.636554843113569, "learning_rate": 9.492565731251645e-06, "loss": 0.8425, "step": 2661 }, { "epoch": 0.17, "grad_norm": 1.5404984973042832, "learning_rate": 9.492110642156708e-06, "loss": 0.753, "step": 2662 }, { "epoch": 0.17, "grad_norm": 1.604485730504611, "learning_rate": 9.491655360000298e-06, "loss": 0.7373, "step": 2663 }, { "epoch": 0.17, "grad_norm": 1.9485600087549602, "learning_rate": 9.491199884801976e-06, "loss": 0.924, "step": 2664 }, { "epoch": 0.17, "grad_norm": 2.1478703690919216, "learning_rate": 9.490744216581323e-06, "loss": 0.919, "step": 2665 }, { "epoch": 0.17, "grad_norm": 1.0238056962407445, "learning_rate": 9.490288355357918e-06, "loss": 0.6813, "step": 2666 }, { "epoch": 0.17, "grad_norm": 1.721856608462047, "learning_rate": 9.489832301151354e-06, "loss": 0.9411, "step": 2667 }, { "epoch": 0.17, "grad_norm": 1.5783758592277344, "learning_rate": 9.489376053981234e-06, "loss": 0.784, "step": 2668 }, { "epoch": 0.17, "grad_norm": 1.7690723286106522, "learning_rate": 9.488919613867162e-06, "loss": 0.9303, "step": 2669 }, { "epoch": 0.17, "grad_norm": 1.7764655864309826, "learning_rate": 9.48846298082876e-06, "loss": 0.6819, "step": 2670 }, { "epoch": 0.17, "grad_norm": 1.904949130640312, "learning_rate": 9.48800615488565e-06, "loss": 0.7033, "step": 2671 }, { "epoch": 0.17, "grad_norm": 1.5806590963109173, "learning_rate": 9.487549136057467e-06, "loss": 0.7952, "step": 2672 }, { "epoch": 0.17, "grad_norm": 1.5463505045473849, "learning_rate": 9.487091924363852e-06, "loss": 0.7521, "step": 2673 }, { "epoch": 0.17, "grad_norm": 1.7242485999629644, "learning_rate": 9.486634519824453e-06, "loss": 0.7837, "step": 2674 }, { "epoch": 0.17, "grad_norm": 1.8045609857181055, "learning_rate": 9.486176922458929e-06, "loss": 0.7601, "step": 2675 }, { "epoch": 0.17, "grad_norm": 1.6869679988103907, "learning_rate": 9.48571913228695e-06, "loss": 0.853, "step": 2676 }, { "epoch": 0.17, "grad_norm": 1.5637013678026304, "learning_rate": 9.485261149328189e-06, "loss": 0.7866, "step": 2677 }, { "epoch": 0.17, "grad_norm": 2.2096565954966327, "learning_rate": 9.484802973602328e-06, "loss": 0.7529, "step": 2678 }, { "epoch": 0.17, "grad_norm": 1.0758742713758285, "learning_rate": 9.484344605129057e-06, "loss": 0.597, "step": 2679 }, { "epoch": 0.17, "grad_norm": 1.5224631437967526, "learning_rate": 9.483886043928078e-06, "loss": 0.6297, "step": 2680 }, { "epoch": 0.17, "grad_norm": 1.2069005787612674, "learning_rate": 9.4834272900191e-06, "loss": 0.601, "step": 2681 }, { "epoch": 0.17, "grad_norm": 1.762903167904653, "learning_rate": 9.48296834342184e-06, "loss": 0.8451, "step": 2682 }, { "epoch": 0.17, "grad_norm": 1.318074594711498, "learning_rate": 9.482509204156019e-06, "loss": 0.7476, "step": 2683 }, { "epoch": 0.17, "grad_norm": 1.7388241809772664, "learning_rate": 9.48204987224137e-06, "loss": 0.7044, "step": 2684 }, { "epoch": 0.17, "grad_norm": 1.8305360547557272, "learning_rate": 9.481590347697638e-06, "loss": 0.7094, "step": 2685 }, { "epoch": 0.17, "grad_norm": 1.9985468645762436, "learning_rate": 9.481130630544569e-06, "loss": 0.8816, "step": 2686 }, { "epoch": 0.17, "grad_norm": 2.0749006196209745, "learning_rate": 9.480670720801921e-06, "loss": 0.8547, "step": 2687 }, { "epoch": 0.17, "grad_norm": 1.4352994507576202, "learning_rate": 9.48021061848946e-06, "loss": 0.6983, "step": 2688 }, { "epoch": 0.17, "grad_norm": 1.6577742302892917, "learning_rate": 9.479750323626963e-06, "loss": 0.9242, "step": 2689 }, { "epoch": 0.17, "grad_norm": 1.5052572649256415, "learning_rate": 9.47928983623421e-06, "loss": 0.7271, "step": 2690 }, { "epoch": 0.17, "grad_norm": 1.5602703277726846, "learning_rate": 9.47882915633099e-06, "loss": 0.8956, "step": 2691 }, { "epoch": 0.17, "grad_norm": 3.1088481338662377, "learning_rate": 9.478368283937106e-06, "loss": 0.8137, "step": 2692 }, { "epoch": 0.17, "grad_norm": 1.556553371250788, "learning_rate": 9.477907219072362e-06, "loss": 0.8036, "step": 2693 }, { "epoch": 0.17, "grad_norm": 1.6284940924871776, "learning_rate": 9.477445961756577e-06, "loss": 0.7599, "step": 2694 }, { "epoch": 0.17, "grad_norm": 1.545941553253548, "learning_rate": 9.476984512009572e-06, "loss": 0.8506, "step": 2695 }, { "epoch": 0.17, "grad_norm": 1.6760403057701174, "learning_rate": 9.47652286985118e-06, "loss": 0.81, "step": 2696 }, { "epoch": 0.17, "grad_norm": 1.7946824563382828, "learning_rate": 9.476061035301242e-06, "loss": 0.7533, "step": 2697 }, { "epoch": 0.17, "grad_norm": 1.9502427178013397, "learning_rate": 9.475599008379606e-06, "loss": 0.8998, "step": 2698 }, { "epoch": 0.17, "grad_norm": 1.8719334235362846, "learning_rate": 9.475136789106128e-06, "loss": 0.9259, "step": 2699 }, { "epoch": 0.17, "grad_norm": 4.092667070190972, "learning_rate": 9.474674377500677e-06, "loss": 0.8063, "step": 2700 }, { "epoch": 0.17, "grad_norm": 1.722752597969227, "learning_rate": 9.474211773583122e-06, "loss": 0.8743, "step": 2701 }, { "epoch": 0.17, "grad_norm": 1.9042881404411092, "learning_rate": 9.473748977373346e-06, "loss": 0.9604, "step": 2702 }, { "epoch": 0.17, "grad_norm": 1.6422624123721445, "learning_rate": 9.47328598889124e-06, "loss": 0.8488, "step": 2703 }, { "epoch": 0.17, "grad_norm": 1.7866484287916995, "learning_rate": 9.472822808156704e-06, "loss": 0.7397, "step": 2704 }, { "epoch": 0.17, "grad_norm": 1.1942595902362707, "learning_rate": 9.47235943518964e-06, "loss": 0.6721, "step": 2705 }, { "epoch": 0.17, "grad_norm": 1.5848365996914706, "learning_rate": 9.471895870009966e-06, "loss": 0.6777, "step": 2706 }, { "epoch": 0.17, "grad_norm": 1.7551197456454222, "learning_rate": 9.471432112637604e-06, "loss": 0.8254, "step": 2707 }, { "epoch": 0.17, "grad_norm": 1.8935936134263507, "learning_rate": 9.470968163092486e-06, "loss": 0.8465, "step": 2708 }, { "epoch": 0.17, "grad_norm": 1.280474174543626, "learning_rate": 9.47050402139455e-06, "loss": 0.6662, "step": 2709 }, { "epoch": 0.17, "grad_norm": 1.4591610312212824, "learning_rate": 9.470039687563747e-06, "loss": 0.8186, "step": 2710 }, { "epoch": 0.17, "grad_norm": 1.991074797758488, "learning_rate": 9.469575161620029e-06, "loss": 0.7201, "step": 2711 }, { "epoch": 0.17, "grad_norm": 1.6081286594021593, "learning_rate": 9.469110443583363e-06, "loss": 0.7686, "step": 2712 }, { "epoch": 0.17, "grad_norm": 1.9801295361852673, "learning_rate": 9.468645533473721e-06, "loss": 0.8316, "step": 2713 }, { "epoch": 0.17, "grad_norm": 3.230199734913487, "learning_rate": 9.468180431311085e-06, "loss": 0.7544, "step": 2714 }, { "epoch": 0.17, "grad_norm": 1.596048180171331, "learning_rate": 9.467715137115442e-06, "loss": 0.8268, "step": 2715 }, { "epoch": 0.17, "grad_norm": 1.6183140260676863, "learning_rate": 9.467249650906792e-06, "loss": 0.743, "step": 2716 }, { "epoch": 0.17, "grad_norm": 1.7704937276508375, "learning_rate": 9.466783972705138e-06, "loss": 0.9085, "step": 2717 }, { "epoch": 0.17, "grad_norm": 1.59527650376255, "learning_rate": 9.466318102530494e-06, "loss": 0.701, "step": 2718 }, { "epoch": 0.17, "grad_norm": 1.7629645967826149, "learning_rate": 9.465852040402883e-06, "loss": 0.7526, "step": 2719 }, { "epoch": 0.17, "grad_norm": 1.7047565224258032, "learning_rate": 9.465385786342337e-06, "loss": 0.8151, "step": 2720 }, { "epoch": 0.17, "grad_norm": 1.1428004325541805, "learning_rate": 9.46491934036889e-06, "loss": 0.692, "step": 2721 }, { "epoch": 0.17, "grad_norm": 1.5988404666894533, "learning_rate": 9.464452702502595e-06, "loss": 0.7966, "step": 2722 }, { "epoch": 0.17, "grad_norm": 2.0827170369559873, "learning_rate": 9.463985872763501e-06, "loss": 0.7976, "step": 2723 }, { "epoch": 0.17, "grad_norm": 1.8512186831107076, "learning_rate": 9.463518851171678e-06, "loss": 0.9404, "step": 2724 }, { "epoch": 0.17, "grad_norm": 1.6791028972906255, "learning_rate": 9.463051637747191e-06, "loss": 0.8322, "step": 2725 }, { "epoch": 0.17, "grad_norm": 1.6220852607677505, "learning_rate": 9.462584232510123e-06, "loss": 0.7512, "step": 2726 }, { "epoch": 0.17, "grad_norm": 2.444477140845627, "learning_rate": 9.462116635480562e-06, "loss": 0.8498, "step": 2727 }, { "epoch": 0.17, "grad_norm": 1.3027817682697649, "learning_rate": 9.461648846678605e-06, "loss": 0.7193, "step": 2728 }, { "epoch": 0.17, "grad_norm": 1.2451390650293863, "learning_rate": 9.461180866124357e-06, "loss": 0.6633, "step": 2729 }, { "epoch": 0.17, "grad_norm": 1.6376198730466376, "learning_rate": 9.460712693837928e-06, "loss": 0.8702, "step": 2730 }, { "epoch": 0.17, "grad_norm": 1.8316582278208833, "learning_rate": 9.46024432983944e-06, "loss": 0.9041, "step": 2731 }, { "epoch": 0.17, "grad_norm": 1.9776154766897074, "learning_rate": 9.459775774149024e-06, "loss": 0.856, "step": 2732 }, { "epoch": 0.17, "grad_norm": 1.3869535883540487, "learning_rate": 9.459307026786817e-06, "loss": 0.6761, "step": 2733 }, { "epoch": 0.17, "grad_norm": 1.710768723949833, "learning_rate": 9.458838087772963e-06, "loss": 0.8307, "step": 2734 }, { "epoch": 0.18, "grad_norm": 2.7148381942131783, "learning_rate": 9.458368957127616e-06, "loss": 0.7127, "step": 2735 }, { "epoch": 0.18, "grad_norm": 1.5434268781521694, "learning_rate": 9.457899634870944e-06, "loss": 0.9196, "step": 2736 }, { "epoch": 0.18, "grad_norm": 1.8116597215141492, "learning_rate": 9.45743012102311e-06, "loss": 0.8487, "step": 2737 }, { "epoch": 0.18, "grad_norm": 1.7402534935920637, "learning_rate": 9.456960415604295e-06, "loss": 0.8117, "step": 2738 }, { "epoch": 0.18, "grad_norm": 1.7205376783925426, "learning_rate": 9.456490518634688e-06, "loss": 0.9154, "step": 2739 }, { "epoch": 0.18, "grad_norm": 1.2880822011050184, "learning_rate": 9.456020430134483e-06, "loss": 0.5916, "step": 2740 }, { "epoch": 0.18, "grad_norm": 1.6345561993875517, "learning_rate": 9.455550150123884e-06, "loss": 0.8974, "step": 2741 }, { "epoch": 0.18, "grad_norm": 1.3530596948027065, "learning_rate": 9.4550796786231e-06, "loss": 0.7127, "step": 2742 }, { "epoch": 0.18, "grad_norm": 1.2686085556125672, "learning_rate": 9.454609015652355e-06, "loss": 0.594, "step": 2743 }, { "epoch": 0.18, "grad_norm": 1.9127898120019258, "learning_rate": 9.454138161231873e-06, "loss": 0.6918, "step": 2744 }, { "epoch": 0.18, "grad_norm": 1.722360772902297, "learning_rate": 9.453667115381893e-06, "loss": 0.7967, "step": 2745 }, { "epoch": 0.18, "grad_norm": 1.9308027967906065, "learning_rate": 9.453195878122659e-06, "loss": 0.8194, "step": 2746 }, { "epoch": 0.18, "grad_norm": 1.6935996613692512, "learning_rate": 9.452724449474423e-06, "loss": 0.8438, "step": 2747 }, { "epoch": 0.18, "grad_norm": 1.744899717539345, "learning_rate": 9.452252829457447e-06, "loss": 0.7983, "step": 2748 }, { "epoch": 0.18, "grad_norm": 1.1078032741273338, "learning_rate": 9.451781018092e-06, "loss": 0.6795, "step": 2749 }, { "epoch": 0.18, "grad_norm": 1.662297088169997, "learning_rate": 9.45130901539836e-06, "loss": 0.8206, "step": 2750 }, { "epoch": 0.18, "grad_norm": 1.5798913245809199, "learning_rate": 9.45083682139681e-06, "loss": 0.7095, "step": 2751 }, { "epoch": 0.18, "grad_norm": 1.813038942747635, "learning_rate": 9.450364436107647e-06, "loss": 0.8274, "step": 2752 }, { "epoch": 0.18, "grad_norm": 1.6995796560251417, "learning_rate": 9.449891859551172e-06, "loss": 0.8623, "step": 2753 }, { "epoch": 0.18, "grad_norm": 1.87919860258054, "learning_rate": 9.449419091747695e-06, "loss": 0.8668, "step": 2754 }, { "epoch": 0.18, "grad_norm": 1.8094300841415405, "learning_rate": 9.448946132717536e-06, "loss": 0.7485, "step": 2755 }, { "epoch": 0.18, "grad_norm": 1.6784385229597627, "learning_rate": 9.448472982481021e-06, "loss": 0.8597, "step": 2756 }, { "epoch": 0.18, "grad_norm": 1.4301617360909176, "learning_rate": 9.447999641058486e-06, "loss": 0.8268, "step": 2757 }, { "epoch": 0.18, "grad_norm": 1.0927992317354502, "learning_rate": 9.44752610847027e-06, "loss": 0.7558, "step": 2758 }, { "epoch": 0.18, "grad_norm": 1.5231673340147542, "learning_rate": 9.447052384736729e-06, "loss": 0.7396, "step": 2759 }, { "epoch": 0.18, "grad_norm": 1.8902937455671764, "learning_rate": 9.446578469878219e-06, "loss": 0.8615, "step": 2760 }, { "epoch": 0.18, "grad_norm": 1.7585956364011213, "learning_rate": 9.446104363915112e-06, "loss": 0.6023, "step": 2761 }, { "epoch": 0.18, "grad_norm": 1.946254973065542, "learning_rate": 9.445630066867781e-06, "loss": 0.8988, "step": 2762 }, { "epoch": 0.18, "grad_norm": 1.8461796934977304, "learning_rate": 9.445155578756612e-06, "loss": 0.8955, "step": 2763 }, { "epoch": 0.18, "grad_norm": 1.1261198628029259, "learning_rate": 9.444680899601997e-06, "loss": 0.6652, "step": 2764 }, { "epoch": 0.18, "grad_norm": 2.681517180273461, "learning_rate": 9.444206029424334e-06, "loss": 0.8664, "step": 2765 }, { "epoch": 0.18, "grad_norm": 1.4749645905371611, "learning_rate": 9.443730968244037e-06, "loss": 0.6247, "step": 2766 }, { "epoch": 0.18, "grad_norm": 1.4909248914868871, "learning_rate": 9.443255716081522e-06, "loss": 0.8294, "step": 2767 }, { "epoch": 0.18, "grad_norm": 2.046719463770825, "learning_rate": 9.442780272957208e-06, "loss": 0.7863, "step": 2768 }, { "epoch": 0.18, "grad_norm": 1.7602845634397895, "learning_rate": 9.442304638891538e-06, "loss": 0.8267, "step": 2769 }, { "epoch": 0.18, "grad_norm": 1.8272171869633134, "learning_rate": 9.441828813904948e-06, "loss": 0.9645, "step": 2770 }, { "epoch": 0.18, "grad_norm": 1.7806158898890743, "learning_rate": 9.441352798017886e-06, "loss": 0.9308, "step": 2771 }, { "epoch": 0.18, "grad_norm": 1.830042429755283, "learning_rate": 9.440876591250817e-06, "loss": 0.8183, "step": 2772 }, { "epoch": 0.18, "grad_norm": 1.4541997472891948, "learning_rate": 9.440400193624202e-06, "loss": 0.7026, "step": 2773 }, { "epoch": 0.18, "grad_norm": 1.7776006713486798, "learning_rate": 9.439923605158519e-06, "loss": 0.7684, "step": 2774 }, { "epoch": 0.18, "grad_norm": 1.6934962927082788, "learning_rate": 9.439446825874248e-06, "loss": 0.7597, "step": 2775 }, { "epoch": 0.18, "grad_norm": 2.057806530098997, "learning_rate": 9.43896985579188e-06, "loss": 0.6624, "step": 2776 }, { "epoch": 0.18, "grad_norm": 1.676983372030172, "learning_rate": 9.438492694931916e-06, "loss": 0.8207, "step": 2777 }, { "epoch": 0.18, "grad_norm": 1.4953607593161948, "learning_rate": 9.438015343314862e-06, "loss": 0.6986, "step": 2778 }, { "epoch": 0.18, "grad_norm": 1.8809180539660577, "learning_rate": 9.437537800961235e-06, "loss": 0.8197, "step": 2779 }, { "epoch": 0.18, "grad_norm": 1.872624084791358, "learning_rate": 9.437060067891556e-06, "loss": 0.8711, "step": 2780 }, { "epoch": 0.18, "grad_norm": 1.0938464924156306, "learning_rate": 9.436582144126362e-06, "loss": 0.5515, "step": 2781 }, { "epoch": 0.18, "grad_norm": 1.743789262151622, "learning_rate": 9.436104029686188e-06, "loss": 0.7017, "step": 2782 }, { "epoch": 0.18, "grad_norm": 1.5455995538009033, "learning_rate": 9.435625724591584e-06, "loss": 0.8114, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.486079235614101, "learning_rate": 9.435147228863109e-06, "loss": 0.8041, "step": 2784 }, { "epoch": 0.18, "grad_norm": 1.702993443740867, "learning_rate": 9.434668542521323e-06, "loss": 0.8117, "step": 2785 }, { "epoch": 0.18, "grad_norm": 1.8854744269390882, "learning_rate": 9.434189665586803e-06, "loss": 0.8059, "step": 2786 }, { "epoch": 0.18, "grad_norm": 1.713745211729304, "learning_rate": 9.433710598080128e-06, "loss": 0.7332, "step": 2787 }, { "epoch": 0.18, "grad_norm": 1.8890989232541897, "learning_rate": 9.43323134002189e-06, "loss": 0.826, "step": 2788 }, { "epoch": 0.18, "grad_norm": 1.4977764891462817, "learning_rate": 9.432751891432681e-06, "loss": 0.6466, "step": 2789 }, { "epoch": 0.18, "grad_norm": 1.0756630163257224, "learning_rate": 9.432272252333112e-06, "loss": 0.5747, "step": 2790 }, { "epoch": 0.18, "grad_norm": 1.4650930143254575, "learning_rate": 9.431792422743795e-06, "loss": 0.8402, "step": 2791 }, { "epoch": 0.18, "grad_norm": 1.2766806770068533, "learning_rate": 9.431312402685354e-06, "loss": 0.6545, "step": 2792 }, { "epoch": 0.18, "grad_norm": 1.8708248300313441, "learning_rate": 9.430832192178413e-06, "loss": 0.8123, "step": 2793 }, { "epoch": 0.18, "grad_norm": 1.566598460139554, "learning_rate": 9.43035179124362e-06, "loss": 0.6984, "step": 2794 }, { "epoch": 0.18, "grad_norm": 5.24898236677714, "learning_rate": 9.429871199901614e-06, "loss": 0.8563, "step": 2795 }, { "epoch": 0.18, "grad_norm": 1.6576670458022034, "learning_rate": 9.429390418173052e-06, "loss": 0.7653, "step": 2796 }, { "epoch": 0.18, "grad_norm": 1.645662439039437, "learning_rate": 9.428909446078597e-06, "loss": 0.7763, "step": 2797 }, { "epoch": 0.18, "grad_norm": 1.7152353249814523, "learning_rate": 9.428428283638922e-06, "loss": 0.8829, "step": 2798 }, { "epoch": 0.18, "grad_norm": 1.7912556559922486, "learning_rate": 9.427946930874704e-06, "loss": 0.7334, "step": 2799 }, { "epoch": 0.18, "grad_norm": 1.6499423152893882, "learning_rate": 9.42746538780663e-06, "loss": 0.7481, "step": 2800 }, { "epoch": 0.18, "grad_norm": 1.5863219184194406, "learning_rate": 9.426983654455399e-06, "loss": 0.8099, "step": 2801 }, { "epoch": 0.18, "grad_norm": 1.6994061177420137, "learning_rate": 9.42650173084171e-06, "loss": 0.7279, "step": 2802 }, { "epoch": 0.18, "grad_norm": 1.6477595081959762, "learning_rate": 9.426019616986281e-06, "loss": 0.7202, "step": 2803 }, { "epoch": 0.18, "grad_norm": 2.304091835370703, "learning_rate": 9.425537312909828e-06, "loss": 0.8421, "step": 2804 }, { "epoch": 0.18, "grad_norm": 1.640690481587602, "learning_rate": 9.425054818633081e-06, "loss": 0.8485, "step": 2805 }, { "epoch": 0.18, "grad_norm": 1.4991731423907744, "learning_rate": 9.424572134176776e-06, "loss": 0.7908, "step": 2806 }, { "epoch": 0.18, "grad_norm": 1.628315077172151, "learning_rate": 9.424089259561658e-06, "loss": 0.8105, "step": 2807 }, { "epoch": 0.18, "grad_norm": 1.5684217467670025, "learning_rate": 9.42360619480848e-06, "loss": 0.8073, "step": 2808 }, { "epoch": 0.18, "grad_norm": 1.871843686948868, "learning_rate": 9.423122939938003e-06, "loss": 0.9145, "step": 2809 }, { "epoch": 0.18, "grad_norm": 1.57622306796839, "learning_rate": 9.422639494970996e-06, "loss": 0.8315, "step": 2810 }, { "epoch": 0.18, "grad_norm": 1.6886171522453324, "learning_rate": 9.422155859928237e-06, "loss": 0.9082, "step": 2811 }, { "epoch": 0.18, "grad_norm": 1.7269926729346232, "learning_rate": 9.421672034830511e-06, "loss": 0.9199, "step": 2812 }, { "epoch": 0.18, "grad_norm": 1.645951733759207, "learning_rate": 9.421188019698613e-06, "loss": 0.8028, "step": 2813 }, { "epoch": 0.18, "grad_norm": 1.5612893312337215, "learning_rate": 9.420703814553343e-06, "loss": 0.721, "step": 2814 }, { "epoch": 0.18, "grad_norm": 1.0575293315503607, "learning_rate": 9.420219419415513e-06, "loss": 0.6604, "step": 2815 }, { "epoch": 0.18, "grad_norm": 1.9006125084644367, "learning_rate": 9.419734834305942e-06, "loss": 0.7954, "step": 2816 }, { "epoch": 0.18, "grad_norm": 1.6358185276767285, "learning_rate": 9.419250059245453e-06, "loss": 0.8159, "step": 2817 }, { "epoch": 0.18, "grad_norm": 1.834640344943155, "learning_rate": 9.418765094254882e-06, "loss": 0.8213, "step": 2818 }, { "epoch": 0.18, "grad_norm": 1.6313695224282565, "learning_rate": 9.418279939355073e-06, "loss": 0.8011, "step": 2819 }, { "epoch": 0.18, "grad_norm": 1.1581237001980198, "learning_rate": 9.417794594566878e-06, "loss": 0.6556, "step": 2820 }, { "epoch": 0.18, "grad_norm": 1.8066782201276688, "learning_rate": 9.417309059911152e-06, "loss": 0.9058, "step": 2821 }, { "epoch": 0.18, "grad_norm": 1.8128607454664047, "learning_rate": 9.416823335408766e-06, "loss": 0.7406, "step": 2822 }, { "epoch": 0.18, "grad_norm": 1.7346035645355702, "learning_rate": 9.416337421080594e-06, "loss": 0.8418, "step": 2823 }, { "epoch": 0.18, "grad_norm": 1.5537035983798506, "learning_rate": 9.41585131694752e-06, "loss": 0.7389, "step": 2824 }, { "epoch": 0.18, "grad_norm": 1.4989823388259913, "learning_rate": 9.415365023030433e-06, "loss": 0.728, "step": 2825 }, { "epoch": 0.18, "grad_norm": 1.637183107258097, "learning_rate": 9.414878539350237e-06, "loss": 0.7269, "step": 2826 }, { "epoch": 0.18, "grad_norm": 1.3984909355175343, "learning_rate": 9.41439186592784e-06, "loss": 0.7227, "step": 2827 }, { "epoch": 0.18, "grad_norm": 1.8116567341842607, "learning_rate": 9.413905002784153e-06, "loss": 0.703, "step": 2828 }, { "epoch": 0.18, "grad_norm": 1.9568364914325194, "learning_rate": 9.413417949940107e-06, "loss": 0.7966, "step": 2829 }, { "epoch": 0.18, "grad_norm": 2.411271343634316, "learning_rate": 9.41293070741663e-06, "loss": 0.9596, "step": 2830 }, { "epoch": 0.18, "grad_norm": 1.8409015211321462, "learning_rate": 9.412443275234663e-06, "loss": 0.9346, "step": 2831 }, { "epoch": 0.18, "grad_norm": 1.9729843419957473, "learning_rate": 9.411955653415157e-06, "loss": 0.8671, "step": 2832 }, { "epoch": 0.18, "grad_norm": 1.4605681853114567, "learning_rate": 9.411467841979069e-06, "loss": 0.7037, "step": 2833 }, { "epoch": 0.18, "grad_norm": 2.0358224732344667, "learning_rate": 9.41097984094736e-06, "loss": 0.8362, "step": 2834 }, { "epoch": 0.18, "grad_norm": 1.6718535032206094, "learning_rate": 9.410491650341009e-06, "loss": 0.8376, "step": 2835 }, { "epoch": 0.18, "grad_norm": 1.6390645065652616, "learning_rate": 9.410003270180992e-06, "loss": 0.8761, "step": 2836 }, { "epoch": 0.18, "grad_norm": 1.725742180999685, "learning_rate": 9.409514700488304e-06, "loss": 0.9297, "step": 2837 }, { "epoch": 0.18, "grad_norm": 1.0461320835427572, "learning_rate": 9.409025941283937e-06, "loss": 0.6002, "step": 2838 }, { "epoch": 0.18, "grad_norm": 1.2630165814652128, "learning_rate": 9.408536992588903e-06, "loss": 0.7668, "step": 2839 }, { "epoch": 0.18, "grad_norm": 1.6048534085146968, "learning_rate": 9.40804785442421e-06, "loss": 0.7916, "step": 2840 }, { "epoch": 0.18, "grad_norm": 1.4908013514585554, "learning_rate": 9.407558526810884e-06, "loss": 0.7553, "step": 2841 }, { "epoch": 0.18, "grad_norm": 1.6093723054187319, "learning_rate": 9.407069009769953e-06, "loss": 0.7715, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.6047348240249637, "learning_rate": 9.406579303322458e-06, "loss": 0.9373, "step": 2843 }, { "epoch": 0.18, "grad_norm": 1.8376782879505211, "learning_rate": 9.406089407489443e-06, "loss": 0.8004, "step": 2844 }, { "epoch": 0.18, "grad_norm": 1.586717431744826, "learning_rate": 9.405599322291963e-06, "loss": 0.7771, "step": 2845 }, { "epoch": 0.18, "grad_norm": 1.7270994331775913, "learning_rate": 9.405109047751083e-06, "loss": 0.8313, "step": 2846 }, { "epoch": 0.18, "grad_norm": 1.8250858330382054, "learning_rate": 9.404618583887872e-06, "loss": 0.87, "step": 2847 }, { "epoch": 0.18, "grad_norm": 1.9005981138218222, "learning_rate": 9.40412793072341e-06, "loss": 0.8939, "step": 2848 }, { "epoch": 0.18, "grad_norm": 1.4553838823944718, "learning_rate": 9.403637088278784e-06, "loss": 0.6602, "step": 2849 }, { "epoch": 0.18, "grad_norm": 1.6008439258200038, "learning_rate": 9.403146056575088e-06, "loss": 0.8242, "step": 2850 }, { "epoch": 0.18, "grad_norm": 1.213834243207658, "learning_rate": 9.402654835633429e-06, "loss": 0.6664, "step": 2851 }, { "epoch": 0.18, "grad_norm": 1.6640779640938212, "learning_rate": 9.402163425474914e-06, "loss": 0.8142, "step": 2852 }, { "epoch": 0.18, "grad_norm": 2.0480339287689007, "learning_rate": 9.401671826120667e-06, "loss": 0.8071, "step": 2853 }, { "epoch": 0.18, "grad_norm": 1.1093385679672165, "learning_rate": 9.401180037591813e-06, "loss": 0.611, "step": 2854 }, { "epoch": 0.18, "grad_norm": 1.838804513808765, "learning_rate": 9.400688059909489e-06, "loss": 0.8453, "step": 2855 }, { "epoch": 0.18, "grad_norm": 1.0532515107839515, "learning_rate": 9.40019589309484e-06, "loss": 0.5732, "step": 2856 }, { "epoch": 0.18, "grad_norm": 1.702897647851072, "learning_rate": 9.399703537169017e-06, "loss": 0.8181, "step": 2857 }, { "epoch": 0.18, "grad_norm": 1.451439347750458, "learning_rate": 9.399210992153181e-06, "loss": 0.7459, "step": 2858 }, { "epoch": 0.18, "grad_norm": 1.573446909386317, "learning_rate": 9.398718258068502e-06, "loss": 0.7034, "step": 2859 }, { "epoch": 0.18, "grad_norm": 1.6860891915596283, "learning_rate": 9.398225334936153e-06, "loss": 0.8181, "step": 2860 }, { "epoch": 0.18, "grad_norm": 2.1579149213617983, "learning_rate": 9.397732222777323e-06, "loss": 0.7396, "step": 2861 }, { "epoch": 0.18, "grad_norm": 1.8771593472913979, "learning_rate": 9.397238921613202e-06, "loss": 0.8583, "step": 2862 }, { "epoch": 0.18, "grad_norm": 1.9331690812316285, "learning_rate": 9.396745431464993e-06, "loss": 0.8614, "step": 2863 }, { "epoch": 0.18, "grad_norm": 1.664173532059735, "learning_rate": 9.396251752353903e-06, "loss": 0.8187, "step": 2864 }, { "epoch": 0.18, "grad_norm": 1.7225555987176369, "learning_rate": 9.395757884301152e-06, "loss": 0.8185, "step": 2865 }, { "epoch": 0.18, "grad_norm": 1.5499432682124838, "learning_rate": 9.395263827327963e-06, "loss": 0.7118, "step": 2866 }, { "epoch": 0.18, "grad_norm": 1.479713769965832, "learning_rate": 9.394769581455569e-06, "loss": 0.7643, "step": 2867 }, { "epoch": 0.18, "grad_norm": 1.6771149501512956, "learning_rate": 9.394275146705214e-06, "loss": 0.9372, "step": 2868 }, { "epoch": 0.18, "grad_norm": 1.1980892990869454, "learning_rate": 9.393780523098148e-06, "loss": 0.6846, "step": 2869 }, { "epoch": 0.18, "grad_norm": 1.5208671324895895, "learning_rate": 9.393285710655626e-06, "loss": 0.8156, "step": 2870 }, { "epoch": 0.18, "grad_norm": 1.7995969634048332, "learning_rate": 9.392790709398916e-06, "loss": 0.8474, "step": 2871 }, { "epoch": 0.18, "grad_norm": 1.9208651169309319, "learning_rate": 9.392295519349293e-06, "loss": 0.7783, "step": 2872 }, { "epoch": 0.18, "grad_norm": 1.8352206973775764, "learning_rate": 9.391800140528038e-06, "loss": 0.8818, "step": 2873 }, { "epoch": 0.18, "grad_norm": 1.7204535116518387, "learning_rate": 9.391304572956442e-06, "loss": 0.6956, "step": 2874 }, { "epoch": 0.18, "grad_norm": 1.0359514028479722, "learning_rate": 9.390808816655801e-06, "loss": 0.6503, "step": 2875 }, { "epoch": 0.18, "grad_norm": 2.117593825617667, "learning_rate": 9.390312871647423e-06, "loss": 0.8367, "step": 2876 }, { "epoch": 0.18, "grad_norm": 1.6615122555113464, "learning_rate": 9.389816737952624e-06, "loss": 0.7842, "step": 2877 }, { "epoch": 0.18, "grad_norm": 1.9341690402406004, "learning_rate": 9.389320415592726e-06, "loss": 0.8512, "step": 2878 }, { "epoch": 0.18, "grad_norm": 1.5410016739602317, "learning_rate": 9.388823904589062e-06, "loss": 0.8016, "step": 2879 }, { "epoch": 0.18, "grad_norm": 1.192815795522029, "learning_rate": 9.388327204962966e-06, "loss": 0.7397, "step": 2880 }, { "epoch": 0.18, "grad_norm": 1.6581169345017042, "learning_rate": 9.387830316735789e-06, "loss": 0.7095, "step": 2881 }, { "epoch": 0.18, "grad_norm": 1.4618196857496162, "learning_rate": 9.387333239928883e-06, "loss": 0.7268, "step": 2882 }, { "epoch": 0.18, "grad_norm": 1.7425702383820303, "learning_rate": 9.386835974563616e-06, "loss": 0.8359, "step": 2883 }, { "epoch": 0.18, "grad_norm": 1.634968302997704, "learning_rate": 9.386338520661355e-06, "loss": 0.663, "step": 2884 }, { "epoch": 0.18, "grad_norm": 1.5433634053738734, "learning_rate": 9.385840878243482e-06, "loss": 0.8416, "step": 2885 }, { "epoch": 0.18, "grad_norm": 1.711453670655956, "learning_rate": 9.385343047331385e-06, "loss": 0.7514, "step": 2886 }, { "epoch": 0.18, "grad_norm": 1.9228862645455607, "learning_rate": 9.384845027946458e-06, "loss": 0.6355, "step": 2887 }, { "epoch": 0.18, "grad_norm": 2.136234327548966, "learning_rate": 9.384346820110107e-06, "loss": 0.7812, "step": 2888 }, { "epoch": 0.18, "grad_norm": 1.5411995387020827, "learning_rate": 9.38384842384374e-06, "loss": 0.7892, "step": 2889 }, { "epoch": 0.18, "grad_norm": 1.4407812838682141, "learning_rate": 9.383349839168781e-06, "loss": 0.7693, "step": 2890 }, { "epoch": 0.19, "grad_norm": 1.844764184723645, "learning_rate": 9.382851066106655e-06, "loss": 0.8757, "step": 2891 }, { "epoch": 0.19, "grad_norm": 2.066569189433429, "learning_rate": 9.3823521046788e-06, "loss": 0.8043, "step": 2892 }, { "epoch": 0.19, "grad_norm": 1.6526954878192315, "learning_rate": 9.381852954906662e-06, "loss": 0.7331, "step": 2893 }, { "epoch": 0.19, "grad_norm": 1.586603012327117, "learning_rate": 9.38135361681169e-06, "loss": 0.961, "step": 2894 }, { "epoch": 0.19, "grad_norm": 3.6083272860662263, "learning_rate": 9.380854090415347e-06, "loss": 0.8497, "step": 2895 }, { "epoch": 0.19, "grad_norm": 1.6360332711144592, "learning_rate": 9.3803543757391e-06, "loss": 0.7728, "step": 2896 }, { "epoch": 0.19, "grad_norm": 1.4716522750407681, "learning_rate": 9.379854472804425e-06, "loss": 0.7927, "step": 2897 }, { "epoch": 0.19, "grad_norm": 1.6058548274272075, "learning_rate": 9.37935438163281e-06, "loss": 0.7325, "step": 2898 }, { "epoch": 0.19, "grad_norm": 1.1456321958701114, "learning_rate": 9.378854102245747e-06, "loss": 0.6217, "step": 2899 }, { "epoch": 0.19, "grad_norm": 1.625412372761381, "learning_rate": 9.378353634664735e-06, "loss": 0.8658, "step": 2900 }, { "epoch": 0.19, "grad_norm": 1.6258101393480802, "learning_rate": 9.377852978911282e-06, "loss": 0.8929, "step": 2901 }, { "epoch": 0.19, "grad_norm": 1.1446274722771859, "learning_rate": 9.377352135006909e-06, "loss": 0.6316, "step": 2902 }, { "epoch": 0.19, "grad_norm": 1.4323034097577991, "learning_rate": 9.376851102973139e-06, "loss": 0.7985, "step": 2903 }, { "epoch": 0.19, "grad_norm": 1.6762669345861259, "learning_rate": 9.376349882831507e-06, "loss": 0.8864, "step": 2904 }, { "epoch": 0.19, "grad_norm": 1.822659268052369, "learning_rate": 9.375848474603549e-06, "loss": 0.8731, "step": 2905 }, { "epoch": 0.19, "grad_norm": 1.6451473802863028, "learning_rate": 9.375346878310822e-06, "loss": 0.8552, "step": 2906 }, { "epoch": 0.19, "grad_norm": 1.2142800424409492, "learning_rate": 9.374845093974879e-06, "loss": 0.7546, "step": 2907 }, { "epoch": 0.19, "grad_norm": 1.313324237773003, "learning_rate": 9.374343121617286e-06, "loss": 0.6404, "step": 2908 }, { "epoch": 0.19, "grad_norm": 1.6789616303125152, "learning_rate": 9.373840961259619e-06, "loss": 0.8592, "step": 2909 }, { "epoch": 0.19, "grad_norm": 1.77719265226133, "learning_rate": 9.373338612923457e-06, "loss": 0.9161, "step": 2910 }, { "epoch": 0.19, "grad_norm": 1.180726592581588, "learning_rate": 9.372836076630391e-06, "loss": 0.6994, "step": 2911 }, { "epoch": 0.19, "grad_norm": 1.5692281206150636, "learning_rate": 9.372333352402019e-06, "loss": 0.8077, "step": 2912 }, { "epoch": 0.19, "grad_norm": 1.6147094178974877, "learning_rate": 9.371830440259948e-06, "loss": 0.8044, "step": 2913 }, { "epoch": 0.19, "grad_norm": 1.232337045625667, "learning_rate": 9.371327340225791e-06, "loss": 0.6602, "step": 2914 }, { "epoch": 0.19, "grad_norm": 2.0321125485088607, "learning_rate": 9.370824052321168e-06, "loss": 0.8323, "step": 2915 }, { "epoch": 0.19, "grad_norm": 1.6651468994009682, "learning_rate": 9.370320576567715e-06, "loss": 0.7574, "step": 2916 }, { "epoch": 0.19, "grad_norm": 1.8863753720837435, "learning_rate": 9.369816912987066e-06, "loss": 0.8997, "step": 2917 }, { "epoch": 0.19, "grad_norm": 1.7351102345701817, "learning_rate": 9.369313061600867e-06, "loss": 0.8833, "step": 2918 }, { "epoch": 0.19, "grad_norm": 2.0792595009252413, "learning_rate": 9.368809022430773e-06, "loss": 0.7908, "step": 2919 }, { "epoch": 0.19, "grad_norm": 1.7236719436897023, "learning_rate": 9.36830479549845e-06, "loss": 0.8473, "step": 2920 }, { "epoch": 0.19, "grad_norm": 1.9678830560340048, "learning_rate": 9.367800380825564e-06, "loss": 0.8458, "step": 2921 }, { "epoch": 0.19, "grad_norm": 1.5159281832660447, "learning_rate": 9.367295778433798e-06, "loss": 0.9773, "step": 2922 }, { "epoch": 0.19, "grad_norm": 1.9721939510123594, "learning_rate": 9.366790988344835e-06, "loss": 1.0297, "step": 2923 }, { "epoch": 0.19, "grad_norm": 1.6359837855459576, "learning_rate": 9.36628601058037e-06, "loss": 0.7934, "step": 2924 }, { "epoch": 0.19, "grad_norm": 2.5510916682873406, "learning_rate": 9.365780845162109e-06, "loss": 0.8589, "step": 2925 }, { "epoch": 0.19, "grad_norm": 1.724218846410138, "learning_rate": 9.365275492111761e-06, "loss": 0.8312, "step": 2926 }, { "epoch": 0.19, "grad_norm": 1.408199690157177, "learning_rate": 9.364769951451045e-06, "loss": 0.8241, "step": 2927 }, { "epoch": 0.19, "grad_norm": 1.7421328527748774, "learning_rate": 9.364264223201687e-06, "loss": 0.746, "step": 2928 }, { "epoch": 0.19, "grad_norm": 1.6254821420694, "learning_rate": 9.363758307385423e-06, "loss": 0.7933, "step": 2929 }, { "epoch": 0.19, "grad_norm": 1.0973105512448909, "learning_rate": 9.363252204023999e-06, "loss": 0.7508, "step": 2930 }, { "epoch": 0.19, "grad_norm": 0.9896195296948608, "learning_rate": 9.36274591313916e-06, "loss": 0.6689, "step": 2931 }, { "epoch": 0.19, "grad_norm": 1.5839705615968187, "learning_rate": 9.36223943475267e-06, "loss": 0.8337, "step": 2932 }, { "epoch": 0.19, "grad_norm": 1.8216303479102258, "learning_rate": 9.361732768886299e-06, "loss": 0.8864, "step": 2933 }, { "epoch": 0.19, "grad_norm": 1.9514913221088188, "learning_rate": 9.361225915561815e-06, "loss": 0.8132, "step": 2934 }, { "epoch": 0.19, "grad_norm": 1.655813329032349, "learning_rate": 9.360718874801007e-06, "loss": 0.7667, "step": 2935 }, { "epoch": 0.19, "grad_norm": 1.7729721562809448, "learning_rate": 9.360211646625664e-06, "loss": 0.7721, "step": 2936 }, { "epoch": 0.19, "grad_norm": 2.9380881431481263, "learning_rate": 9.359704231057587e-06, "loss": 0.8232, "step": 2937 }, { "epoch": 0.19, "grad_norm": 1.0357765545638948, "learning_rate": 9.359196628118584e-06, "loss": 0.6648, "step": 2938 }, { "epoch": 0.19, "grad_norm": 1.563401436424673, "learning_rate": 9.358688837830469e-06, "loss": 0.708, "step": 2939 }, { "epoch": 0.19, "grad_norm": 2.6869242533338342, "learning_rate": 9.358180860215065e-06, "loss": 0.9415, "step": 2940 }, { "epoch": 0.19, "grad_norm": 1.1758914482512892, "learning_rate": 9.357672695294208e-06, "loss": 0.7137, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.857267952043107, "learning_rate": 9.357164343089734e-06, "loss": 0.8319, "step": 2942 }, { "epoch": 0.19, "grad_norm": 1.5938912731246313, "learning_rate": 9.356655803623493e-06, "loss": 0.8646, "step": 2943 }, { "epoch": 0.19, "grad_norm": 1.7415246237344006, "learning_rate": 9.35614707691734e-06, "loss": 0.7759, "step": 2944 }, { "epoch": 0.19, "grad_norm": 1.6319054567402687, "learning_rate": 9.355638162993139e-06, "loss": 0.8182, "step": 2945 }, { "epoch": 0.19, "grad_norm": 1.894859349857244, "learning_rate": 9.355129061872762e-06, "loss": 0.7437, "step": 2946 }, { "epoch": 0.19, "grad_norm": 1.7718201537834102, "learning_rate": 9.354619773578088e-06, "loss": 0.8002, "step": 2947 }, { "epoch": 0.19, "grad_norm": 1.1044390281134102, "learning_rate": 9.35411029813101e-06, "loss": 0.7472, "step": 2948 }, { "epoch": 0.19, "grad_norm": 1.1487991706026555, "learning_rate": 9.353600635553418e-06, "loss": 0.8002, "step": 2949 }, { "epoch": 0.19, "grad_norm": 1.7768554776599417, "learning_rate": 9.353090785867219e-06, "loss": 0.8259, "step": 2950 }, { "epoch": 0.19, "grad_norm": 1.8962408812137466, "learning_rate": 9.352580749094325e-06, "loss": 0.8102, "step": 2951 }, { "epoch": 0.19, "grad_norm": 1.6745109239040297, "learning_rate": 9.352070525256656e-06, "loss": 0.8484, "step": 2952 }, { "epoch": 0.19, "grad_norm": 1.6647997546439168, "learning_rate": 9.351560114376142e-06, "loss": 0.8593, "step": 2953 }, { "epoch": 0.19, "grad_norm": 1.7671455861659568, "learning_rate": 9.35104951647472e-06, "loss": 0.9699, "step": 2954 }, { "epoch": 0.19, "grad_norm": 1.7603969287387742, "learning_rate": 9.350538731574329e-06, "loss": 0.7605, "step": 2955 }, { "epoch": 0.19, "grad_norm": 1.1872003099855986, "learning_rate": 9.350027759696928e-06, "loss": 0.8088, "step": 2956 }, { "epoch": 0.19, "grad_norm": 2.393092339179483, "learning_rate": 9.349516600864473e-06, "loss": 0.8514, "step": 2957 }, { "epoch": 0.19, "grad_norm": 1.6383912452383207, "learning_rate": 9.349005255098932e-06, "loss": 0.7253, "step": 2958 }, { "epoch": 0.19, "grad_norm": 1.1258881583516727, "learning_rate": 9.348493722422288e-06, "loss": 0.661, "step": 2959 }, { "epoch": 0.19, "grad_norm": 1.7132753579158049, "learning_rate": 9.347982002856517e-06, "loss": 0.8471, "step": 2960 }, { "epoch": 0.19, "grad_norm": 1.6563216217670989, "learning_rate": 9.347470096423618e-06, "loss": 0.9333, "step": 2961 }, { "epoch": 0.19, "grad_norm": 1.5053967802078922, "learning_rate": 9.34695800314559e-06, "loss": 0.7203, "step": 2962 }, { "epoch": 0.19, "grad_norm": 1.6785105169147412, "learning_rate": 9.346445723044441e-06, "loss": 0.8333, "step": 2963 }, { "epoch": 0.19, "grad_norm": 1.617748232176745, "learning_rate": 9.345933256142187e-06, "loss": 0.6549, "step": 2964 }, { "epoch": 0.19, "grad_norm": 1.6418287982754725, "learning_rate": 9.345420602460856e-06, "loss": 0.7515, "step": 2965 }, { "epoch": 0.19, "grad_norm": 2.533863691440421, "learning_rate": 9.344907762022476e-06, "loss": 0.8676, "step": 2966 }, { "epoch": 0.19, "grad_norm": 1.7748806764421214, "learning_rate": 9.344394734849092e-06, "loss": 0.8549, "step": 2967 }, { "epoch": 0.19, "grad_norm": 1.5054450606089578, "learning_rate": 9.343881520962749e-06, "loss": 0.7512, "step": 2968 }, { "epoch": 0.19, "grad_norm": 1.6244712419192289, "learning_rate": 9.343368120385508e-06, "loss": 0.6259, "step": 2969 }, { "epoch": 0.19, "grad_norm": 1.8678046368096908, "learning_rate": 9.342854533139431e-06, "loss": 0.7934, "step": 2970 }, { "epoch": 0.19, "grad_norm": 1.6819183796373152, "learning_rate": 9.342340759246591e-06, "loss": 0.938, "step": 2971 }, { "epoch": 0.19, "grad_norm": 1.8495404822849808, "learning_rate": 9.341826798729071e-06, "loss": 0.757, "step": 2972 }, { "epoch": 0.19, "grad_norm": 2.2348213998109383, "learning_rate": 9.341312651608957e-06, "loss": 0.7984, "step": 2973 }, { "epoch": 0.19, "grad_norm": 1.2928466332634323, "learning_rate": 9.34079831790835e-06, "loss": 0.588, "step": 2974 }, { "epoch": 0.19, "grad_norm": 2.054540160791334, "learning_rate": 9.340283797649352e-06, "loss": 1.0173, "step": 2975 }, { "epoch": 0.19, "grad_norm": 1.149260028451828, "learning_rate": 9.339769090854075e-06, "loss": 0.6695, "step": 2976 }, { "epoch": 0.19, "grad_norm": 1.9571953618254618, "learning_rate": 9.339254197544642e-06, "loss": 0.9252, "step": 2977 }, { "epoch": 0.19, "grad_norm": 1.4952643685966178, "learning_rate": 9.338739117743183e-06, "loss": 0.7315, "step": 2978 }, { "epoch": 0.19, "grad_norm": 1.8024216289087287, "learning_rate": 9.338223851471833e-06, "loss": 0.8899, "step": 2979 }, { "epoch": 0.19, "grad_norm": 1.8028863387256775, "learning_rate": 9.337708398752738e-06, "loss": 0.8174, "step": 2980 }, { "epoch": 0.19, "grad_norm": 1.5770469516341343, "learning_rate": 9.33719275960805e-06, "loss": 0.7238, "step": 2981 }, { "epoch": 0.19, "grad_norm": 1.6424729492172763, "learning_rate": 9.336676934059932e-06, "loss": 0.9163, "step": 2982 }, { "epoch": 0.19, "grad_norm": 1.6032316372502733, "learning_rate": 9.33616092213055e-06, "loss": 0.8107, "step": 2983 }, { "epoch": 0.19, "grad_norm": 1.7960392741863949, "learning_rate": 9.335644723842086e-06, "loss": 0.7802, "step": 2984 }, { "epoch": 0.19, "grad_norm": 1.3242842483580772, "learning_rate": 9.335128339216719e-06, "loss": 0.6389, "step": 2985 }, { "epoch": 0.19, "grad_norm": 1.099072190840349, "learning_rate": 9.334611768276648e-06, "loss": 0.6877, "step": 2986 }, { "epoch": 0.19, "grad_norm": 1.9926853799814275, "learning_rate": 9.33409501104407e-06, "loss": 0.7637, "step": 2987 }, { "epoch": 0.19, "grad_norm": 1.6216868153018065, "learning_rate": 9.333578067541196e-06, "loss": 0.817, "step": 2988 }, { "epoch": 0.19, "grad_norm": 2.007136874885836, "learning_rate": 9.333060937790243e-06, "loss": 0.9425, "step": 2989 }, { "epoch": 0.19, "grad_norm": 1.6390972438866018, "learning_rate": 9.332543621813434e-06, "loss": 0.7866, "step": 2990 }, { "epoch": 0.19, "grad_norm": 1.0978173214431848, "learning_rate": 9.332026119633007e-06, "loss": 0.6295, "step": 2991 }, { "epoch": 0.19, "grad_norm": 9.92205570337673, "learning_rate": 9.3315084312712e-06, "loss": 0.7318, "step": 2992 }, { "epoch": 0.19, "grad_norm": 1.9181614207959374, "learning_rate": 9.33099055675026e-06, "loss": 0.823, "step": 2993 }, { "epoch": 0.19, "grad_norm": 1.3551716240757696, "learning_rate": 9.330472496092449e-06, "loss": 0.6651, "step": 2994 }, { "epoch": 0.19, "grad_norm": 1.7007985539680652, "learning_rate": 9.329954249320028e-06, "loss": 0.8266, "step": 2995 }, { "epoch": 0.19, "grad_norm": 1.5457770434678397, "learning_rate": 9.329435816455273e-06, "loss": 0.8334, "step": 2996 }, { "epoch": 0.19, "grad_norm": 2.0326175155394317, "learning_rate": 9.328917197520461e-06, "loss": 0.8946, "step": 2997 }, { "epoch": 0.19, "grad_norm": 1.6017478575247792, "learning_rate": 9.328398392537887e-06, "loss": 0.7981, "step": 2998 }, { "epoch": 0.19, "grad_norm": 1.8420731043749212, "learning_rate": 9.327879401529846e-06, "loss": 0.877, "step": 2999 }, { "epoch": 0.19, "grad_norm": 1.5697809907171227, "learning_rate": 9.327360224518641e-06, "loss": 0.7759, "step": 3000 }, { "epoch": 0.19, "grad_norm": 1.6417710917380113, "learning_rate": 9.326840861526587e-06, "loss": 0.8228, "step": 3001 }, { "epoch": 0.19, "grad_norm": 1.1751708849348164, "learning_rate": 9.326321312576006e-06, "loss": 0.7318, "step": 3002 }, { "epoch": 0.19, "grad_norm": 2.0942799872454354, "learning_rate": 9.325801577689224e-06, "loss": 0.853, "step": 3003 }, { "epoch": 0.19, "grad_norm": 1.416990487722702, "learning_rate": 9.325281656888579e-06, "loss": 0.7921, "step": 3004 }, { "epoch": 0.19, "grad_norm": 1.268771381504234, "learning_rate": 9.32476155019642e-06, "loss": 0.7551, "step": 3005 }, { "epoch": 0.19, "grad_norm": 1.628917064271471, "learning_rate": 9.324241257635095e-06, "loss": 0.8118, "step": 3006 }, { "epoch": 0.19, "grad_norm": 1.9803923991204313, "learning_rate": 9.323720779226969e-06, "loss": 0.7321, "step": 3007 }, { "epoch": 0.19, "grad_norm": 2.148718007292986, "learning_rate": 9.323200114994407e-06, "loss": 0.8976, "step": 3008 }, { "epoch": 0.19, "grad_norm": 1.928497086841126, "learning_rate": 9.322679264959789e-06, "loss": 0.9621, "step": 3009 }, { "epoch": 0.19, "grad_norm": 1.6971539040443286, "learning_rate": 9.322158229145501e-06, "loss": 0.7964, "step": 3010 }, { "epoch": 0.19, "grad_norm": 1.5399038294888914, "learning_rate": 9.321637007573932e-06, "loss": 0.7294, "step": 3011 }, { "epoch": 0.19, "grad_norm": 2.3470127724274343, "learning_rate": 9.321115600267488e-06, "loss": 0.789, "step": 3012 }, { "epoch": 0.19, "grad_norm": 1.6564147586578883, "learning_rate": 9.320594007248573e-06, "loss": 0.8013, "step": 3013 }, { "epoch": 0.19, "grad_norm": 2.0819780583611704, "learning_rate": 9.320072228539607e-06, "loss": 0.7531, "step": 3014 }, { "epoch": 0.19, "grad_norm": 1.6221938060733756, "learning_rate": 9.319550264163015e-06, "loss": 0.8612, "step": 3015 }, { "epoch": 0.19, "grad_norm": 1.0127627317140901, "learning_rate": 9.31902811414123e-06, "loss": 0.7669, "step": 3016 }, { "epoch": 0.19, "grad_norm": 2.298774076780384, "learning_rate": 9.318505778496692e-06, "loss": 0.6738, "step": 3017 }, { "epoch": 0.19, "grad_norm": 1.3752175696725957, "learning_rate": 9.317983257251848e-06, "loss": 0.6873, "step": 3018 }, { "epoch": 0.19, "grad_norm": 1.672338119098468, "learning_rate": 9.317460550429159e-06, "loss": 0.8009, "step": 3019 }, { "epoch": 0.19, "grad_norm": 1.6553007644278097, "learning_rate": 9.316937658051088e-06, "loss": 0.8008, "step": 3020 }, { "epoch": 0.19, "grad_norm": 1.5332438939273278, "learning_rate": 9.316414580140105e-06, "loss": 0.9797, "step": 3021 }, { "epoch": 0.19, "grad_norm": 1.477282564034818, "learning_rate": 9.315891316718696e-06, "loss": 0.7743, "step": 3022 }, { "epoch": 0.19, "grad_norm": 1.7615773175078493, "learning_rate": 9.315367867809346e-06, "loss": 0.8206, "step": 3023 }, { "epoch": 0.19, "grad_norm": 1.4486532012283606, "learning_rate": 9.314844233434553e-06, "loss": 0.7434, "step": 3024 }, { "epoch": 0.19, "grad_norm": 1.6368783476020534, "learning_rate": 9.314320413616821e-06, "loss": 0.827, "step": 3025 }, { "epoch": 0.19, "grad_norm": 1.5417661724364327, "learning_rate": 9.313796408378666e-06, "loss": 0.8129, "step": 3026 }, { "epoch": 0.19, "grad_norm": 1.8865644466291447, "learning_rate": 9.313272217742604e-06, "loss": 0.7039, "step": 3027 }, { "epoch": 0.19, "grad_norm": 1.7556745087188226, "learning_rate": 9.312747841731165e-06, "loss": 0.8617, "step": 3028 }, { "epoch": 0.19, "grad_norm": 1.9292160019733982, "learning_rate": 9.312223280366886e-06, "loss": 0.785, "step": 3029 }, { "epoch": 0.19, "grad_norm": 1.6982754058982983, "learning_rate": 9.311698533672313e-06, "loss": 0.7022, "step": 3030 }, { "epoch": 0.19, "grad_norm": 2.1330157962063647, "learning_rate": 9.311173601669996e-06, "loss": 0.792, "step": 3031 }, { "epoch": 0.19, "grad_norm": 1.7240627815046647, "learning_rate": 9.310648484382496e-06, "loss": 0.7286, "step": 3032 }, { "epoch": 0.19, "grad_norm": 1.6302492233733947, "learning_rate": 9.310123181832382e-06, "loss": 0.7484, "step": 3033 }, { "epoch": 0.19, "grad_norm": 1.7243307088012552, "learning_rate": 9.309597694042232e-06, "loss": 0.8138, "step": 3034 }, { "epoch": 0.19, "grad_norm": 1.651151011588128, "learning_rate": 9.309072021034629e-06, "loss": 0.7916, "step": 3035 }, { "epoch": 0.19, "grad_norm": 2.2412494770900024, "learning_rate": 9.308546162832164e-06, "loss": 0.8774, "step": 3036 }, { "epoch": 0.19, "grad_norm": 1.8862971122293424, "learning_rate": 9.308020119457439e-06, "loss": 0.8774, "step": 3037 }, { "epoch": 0.19, "grad_norm": 1.4287984382976797, "learning_rate": 9.307493890933062e-06, "loss": 0.6758, "step": 3038 }, { "epoch": 0.19, "grad_norm": 1.7670651116507474, "learning_rate": 9.306967477281648e-06, "loss": 0.7194, "step": 3039 }, { "epoch": 0.19, "grad_norm": 1.7805794255879308, "learning_rate": 9.306440878525823e-06, "loss": 0.9075, "step": 3040 }, { "epoch": 0.19, "grad_norm": 1.5939396991993566, "learning_rate": 9.305914094688216e-06, "loss": 0.8451, "step": 3041 }, { "epoch": 0.19, "grad_norm": 1.6217699878883485, "learning_rate": 9.305387125791472e-06, "loss": 0.8474, "step": 3042 }, { "epoch": 0.19, "grad_norm": 1.633190985393773, "learning_rate": 9.304859971858233e-06, "loss": 0.7667, "step": 3043 }, { "epoch": 0.19, "grad_norm": 1.6399086939228, "learning_rate": 9.304332632911162e-06, "loss": 0.856, "step": 3044 }, { "epoch": 0.19, "grad_norm": 1.816673714729369, "learning_rate": 9.303805108972916e-06, "loss": 0.8619, "step": 3045 }, { "epoch": 0.19, "grad_norm": 1.6868504951261787, "learning_rate": 9.30327740006617e-06, "loss": 0.7891, "step": 3046 }, { "epoch": 0.2, "grad_norm": 2.714497818455796, "learning_rate": 9.302749506213607e-06, "loss": 0.8322, "step": 3047 }, { "epoch": 0.2, "grad_norm": 1.4386751160714168, "learning_rate": 9.302221427437907e-06, "loss": 0.7642, "step": 3048 }, { "epoch": 0.2, "grad_norm": 1.8797631311860512, "learning_rate": 9.301693163761774e-06, "loss": 0.796, "step": 3049 }, { "epoch": 0.2, "grad_norm": 1.6412697760895052, "learning_rate": 9.301164715207907e-06, "loss": 0.7492, "step": 3050 }, { "epoch": 0.2, "grad_norm": 1.7215236177079596, "learning_rate": 9.300636081799017e-06, "loss": 0.8067, "step": 3051 }, { "epoch": 0.2, "grad_norm": 1.7496114949220638, "learning_rate": 9.300107263557827e-06, "loss": 0.8555, "step": 3052 }, { "epoch": 0.2, "grad_norm": 2.347786977433742, "learning_rate": 9.299578260507061e-06, "loss": 0.7951, "step": 3053 }, { "epoch": 0.2, "grad_norm": 1.9882841365906148, "learning_rate": 9.299049072669458e-06, "loss": 0.7774, "step": 3054 }, { "epoch": 0.2, "grad_norm": 1.7310630996863652, "learning_rate": 9.298519700067757e-06, "loss": 0.8105, "step": 3055 }, { "epoch": 0.2, "grad_norm": 1.554066386749785, "learning_rate": 9.297990142724714e-06, "loss": 0.8237, "step": 3056 }, { "epoch": 0.2, "grad_norm": 1.6914995059248443, "learning_rate": 9.297460400663083e-06, "loss": 0.8007, "step": 3057 }, { "epoch": 0.2, "grad_norm": 1.872951553874393, "learning_rate": 9.296930473905636e-06, "loss": 0.7358, "step": 3058 }, { "epoch": 0.2, "grad_norm": 1.2571235188880643, "learning_rate": 9.296400362475149e-06, "loss": 0.6458, "step": 3059 }, { "epoch": 0.2, "grad_norm": 1.686436697725251, "learning_rate": 9.2958700663944e-06, "loss": 0.7852, "step": 3060 }, { "epoch": 0.2, "grad_norm": 1.7807882563731832, "learning_rate": 9.29533958568618e-06, "loss": 0.7472, "step": 3061 }, { "epoch": 0.2, "grad_norm": 1.7998074992239992, "learning_rate": 9.294808920373293e-06, "loss": 0.7963, "step": 3062 }, { "epoch": 0.2, "grad_norm": 1.7726876418555855, "learning_rate": 9.294278070478544e-06, "loss": 0.8154, "step": 3063 }, { "epoch": 0.2, "grad_norm": 2.3799989349041955, "learning_rate": 9.293747036024746e-06, "loss": 1.0567, "step": 3064 }, { "epoch": 0.2, "grad_norm": 1.201363424326316, "learning_rate": 9.293215817034722e-06, "loss": 0.7313, "step": 3065 }, { "epoch": 0.2, "grad_norm": 1.7599213781106307, "learning_rate": 9.292684413531305e-06, "loss": 0.9054, "step": 3066 }, { "epoch": 0.2, "grad_norm": 1.4652881841516556, "learning_rate": 9.29215282553733e-06, "loss": 0.7372, "step": 3067 }, { "epoch": 0.2, "grad_norm": 2.265393679361022, "learning_rate": 9.291621053075647e-06, "loss": 0.7872, "step": 3068 }, { "epoch": 0.2, "grad_norm": 2.216889240321716, "learning_rate": 9.291089096169109e-06, "loss": 0.8779, "step": 3069 }, { "epoch": 0.2, "grad_norm": 1.0972749956943044, "learning_rate": 9.29055695484058e-06, "loss": 0.702, "step": 3070 }, { "epoch": 0.2, "grad_norm": 1.782599998910276, "learning_rate": 9.290024629112927e-06, "loss": 0.8431, "step": 3071 }, { "epoch": 0.2, "grad_norm": 1.7036692613206392, "learning_rate": 9.28949211900903e-06, "loss": 0.7287, "step": 3072 }, { "epoch": 0.2, "grad_norm": 2.2757923617471896, "learning_rate": 9.288959424551775e-06, "loss": 0.7959, "step": 3073 }, { "epoch": 0.2, "grad_norm": 1.14372726789594, "learning_rate": 9.288426545764055e-06, "loss": 0.663, "step": 3074 }, { "epoch": 0.2, "grad_norm": 1.6695324426156772, "learning_rate": 9.287893482668774e-06, "loss": 0.7821, "step": 3075 }, { "epoch": 0.2, "grad_norm": 2.570463255888333, "learning_rate": 9.287360235288843e-06, "loss": 0.73, "step": 3076 }, { "epoch": 0.2, "grad_norm": 1.6335833743699582, "learning_rate": 9.286826803647178e-06, "loss": 0.7133, "step": 3077 }, { "epoch": 0.2, "grad_norm": 1.5063321702525365, "learning_rate": 9.286293187766703e-06, "loss": 0.703, "step": 3078 }, { "epoch": 0.2, "grad_norm": 1.6530815989611158, "learning_rate": 9.285759387670357e-06, "loss": 0.9098, "step": 3079 }, { "epoch": 0.2, "grad_norm": 1.6618068685683507, "learning_rate": 9.285225403381075e-06, "loss": 0.9038, "step": 3080 }, { "epoch": 0.2, "grad_norm": 1.7783494426324553, "learning_rate": 9.28469123492181e-06, "loss": 0.7873, "step": 3081 }, { "epoch": 0.2, "grad_norm": 1.7194118294177223, "learning_rate": 9.28415688231552e-06, "loss": 0.8223, "step": 3082 }, { "epoch": 0.2, "grad_norm": 1.6077181391597044, "learning_rate": 9.283622345585169e-06, "loss": 0.7143, "step": 3083 }, { "epoch": 0.2, "grad_norm": 1.7498277807671614, "learning_rate": 9.28308762475373e-06, "loss": 0.6887, "step": 3084 }, { "epoch": 0.2, "grad_norm": 1.6072041220332975, "learning_rate": 9.282552719844186e-06, "loss": 0.8249, "step": 3085 }, { "epoch": 0.2, "grad_norm": 1.891098710580628, "learning_rate": 9.282017630879524e-06, "loss": 0.761, "step": 3086 }, { "epoch": 0.2, "grad_norm": 1.7360145802325349, "learning_rate": 9.281482357882743e-06, "loss": 0.7826, "step": 3087 }, { "epoch": 0.2, "grad_norm": 1.6221343149834864, "learning_rate": 9.280946900876847e-06, "loss": 0.8885, "step": 3088 }, { "epoch": 0.2, "grad_norm": 1.6411303535395214, "learning_rate": 9.280411259884847e-06, "loss": 0.9462, "step": 3089 }, { "epoch": 0.2, "grad_norm": 1.7384756760547357, "learning_rate": 9.279875434929767e-06, "loss": 0.8269, "step": 3090 }, { "epoch": 0.2, "grad_norm": 1.776083366136317, "learning_rate": 9.279339426034634e-06, "loss": 0.8702, "step": 3091 }, { "epoch": 0.2, "grad_norm": 1.6167306282754588, "learning_rate": 9.278803233222484e-06, "loss": 0.7839, "step": 3092 }, { "epoch": 0.2, "grad_norm": 1.5480313510761434, "learning_rate": 9.278266856516363e-06, "loss": 0.7792, "step": 3093 }, { "epoch": 0.2, "grad_norm": 1.208099577676267, "learning_rate": 9.277730295939322e-06, "loss": 0.7258, "step": 3094 }, { "epoch": 0.2, "grad_norm": 2.1805239764642024, "learning_rate": 9.27719355151442e-06, "loss": 0.761, "step": 3095 }, { "epoch": 0.2, "grad_norm": 1.8087853591060898, "learning_rate": 9.276656623264729e-06, "loss": 0.7851, "step": 3096 }, { "epoch": 0.2, "grad_norm": 1.5173460769135354, "learning_rate": 9.276119511213321e-06, "loss": 0.7685, "step": 3097 }, { "epoch": 0.2, "grad_norm": 1.5950318325433344, "learning_rate": 9.275582215383283e-06, "loss": 0.7635, "step": 3098 }, { "epoch": 0.2, "grad_norm": 1.7715339105618284, "learning_rate": 9.275044735797705e-06, "loss": 0.8754, "step": 3099 }, { "epoch": 0.2, "grad_norm": 1.8286567546023347, "learning_rate": 9.274507072479688e-06, "loss": 0.8122, "step": 3100 }, { "epoch": 0.2, "grad_norm": 2.160581405211749, "learning_rate": 9.27396922545234e-06, "loss": 1.091, "step": 3101 }, { "epoch": 0.2, "grad_norm": 1.6559886123162983, "learning_rate": 9.273431194738773e-06, "loss": 0.7701, "step": 3102 }, { "epoch": 0.2, "grad_norm": 2.7717115790815403, "learning_rate": 9.272892980362113e-06, "loss": 0.8824, "step": 3103 }, { "epoch": 0.2, "grad_norm": 1.5903257036001055, "learning_rate": 9.272354582345492e-06, "loss": 0.7056, "step": 3104 }, { "epoch": 0.2, "grad_norm": 1.5831809375051613, "learning_rate": 9.27181600071205e-06, "loss": 0.8817, "step": 3105 }, { "epoch": 0.2, "grad_norm": 1.7297895062800692, "learning_rate": 9.271277235484932e-06, "loss": 0.7912, "step": 3106 }, { "epoch": 0.2, "grad_norm": 1.732431690491456, "learning_rate": 9.270738286687293e-06, "loss": 0.852, "step": 3107 }, { "epoch": 0.2, "grad_norm": 1.858043489070986, "learning_rate": 9.270199154342297e-06, "loss": 0.7579, "step": 3108 }, { "epoch": 0.2, "grad_norm": 1.7971490073265615, "learning_rate": 9.269659838473113e-06, "loss": 0.8332, "step": 3109 }, { "epoch": 0.2, "grad_norm": 1.8395948693338824, "learning_rate": 9.269120339102922e-06, "loss": 0.8897, "step": 3110 }, { "epoch": 0.2, "grad_norm": 1.0564728466330064, "learning_rate": 9.26858065625491e-06, "loss": 0.5715, "step": 3111 }, { "epoch": 0.2, "grad_norm": 1.742369764192591, "learning_rate": 9.26804078995227e-06, "loss": 0.8195, "step": 3112 }, { "epoch": 0.2, "grad_norm": 1.7126871851184018, "learning_rate": 9.267500740218205e-06, "loss": 0.8142, "step": 3113 }, { "epoch": 0.2, "grad_norm": 2.2176698330655054, "learning_rate": 9.266960507075927e-06, "loss": 0.9476, "step": 3114 }, { "epoch": 0.2, "grad_norm": 1.7381003256903818, "learning_rate": 9.266420090548652e-06, "loss": 0.8407, "step": 3115 }, { "epoch": 0.2, "grad_norm": 1.147654385479871, "learning_rate": 9.265879490659606e-06, "loss": 0.7287, "step": 3116 }, { "epoch": 0.2, "grad_norm": 1.7127084365160437, "learning_rate": 9.265338707432024e-06, "loss": 0.8728, "step": 3117 }, { "epoch": 0.2, "grad_norm": 1.7250045894226154, "learning_rate": 9.264797740889146e-06, "loss": 0.8025, "step": 3118 }, { "epoch": 0.2, "grad_norm": 2.3912611291995223, "learning_rate": 9.264256591054222e-06, "loss": 0.8811, "step": 3119 }, { "epoch": 0.2, "grad_norm": 1.2489239625210422, "learning_rate": 9.263715257950511e-06, "loss": 0.7054, "step": 3120 }, { "epoch": 0.2, "grad_norm": 1.776429465369923, "learning_rate": 9.263173741601278e-06, "loss": 0.8699, "step": 3121 }, { "epoch": 0.2, "grad_norm": 1.541215342215278, "learning_rate": 9.262632042029796e-06, "loss": 0.8066, "step": 3122 }, { "epoch": 0.2, "grad_norm": 1.6561145619988502, "learning_rate": 9.262090159259345e-06, "loss": 0.8224, "step": 3123 }, { "epoch": 0.2, "grad_norm": 1.5953954925504228, "learning_rate": 9.261548093313216e-06, "loss": 0.7666, "step": 3124 }, { "epoch": 0.2, "grad_norm": 1.561910227940748, "learning_rate": 9.261005844214703e-06, "loss": 0.7285, "step": 3125 }, { "epoch": 0.2, "grad_norm": 1.7456276529840133, "learning_rate": 9.260463411987113e-06, "loss": 0.7745, "step": 3126 }, { "epoch": 0.2, "grad_norm": 1.6680039955096562, "learning_rate": 9.25992079665376e-06, "loss": 0.7544, "step": 3127 }, { "epoch": 0.2, "grad_norm": 1.0841302676346865, "learning_rate": 9.25937799823796e-06, "loss": 0.5994, "step": 3128 }, { "epoch": 0.2, "grad_norm": 1.0462877381801876, "learning_rate": 9.258835016763044e-06, "loss": 0.7062, "step": 3129 }, { "epoch": 0.2, "grad_norm": 1.2855579276837334, "learning_rate": 9.258291852252349e-06, "loss": 0.5543, "step": 3130 }, { "epoch": 0.2, "grad_norm": 0.9904066686726054, "learning_rate": 9.257748504729218e-06, "loss": 0.6268, "step": 3131 }, { "epoch": 0.2, "grad_norm": 1.697573637911834, "learning_rate": 9.257204974217002e-06, "loss": 0.7197, "step": 3132 }, { "epoch": 0.2, "grad_norm": 1.7785904770128949, "learning_rate": 9.256661260739065e-06, "loss": 0.774, "step": 3133 }, { "epoch": 0.2, "grad_norm": 1.2342365996670652, "learning_rate": 9.256117364318768e-06, "loss": 0.6562, "step": 3134 }, { "epoch": 0.2, "grad_norm": 1.3137764978127136, "learning_rate": 9.255573284979491e-06, "loss": 0.6355, "step": 3135 }, { "epoch": 0.2, "grad_norm": 1.799119651737079, "learning_rate": 9.255029022744615e-06, "loss": 0.8052, "step": 3136 }, { "epoch": 0.2, "grad_norm": 2.0027399396740573, "learning_rate": 9.254484577637534e-06, "loss": 0.8115, "step": 3137 }, { "epoch": 0.2, "grad_norm": 10.102773183171523, "learning_rate": 9.253939949681648e-06, "loss": 0.8801, "step": 3138 }, { "epoch": 0.2, "grad_norm": 1.7093361262155757, "learning_rate": 9.253395138900359e-06, "loss": 0.7598, "step": 3139 }, { "epoch": 0.2, "grad_norm": 1.7581584294012846, "learning_rate": 9.252850145317085e-06, "loss": 0.708, "step": 3140 }, { "epoch": 0.2, "grad_norm": 1.8259553604568393, "learning_rate": 9.252304968955248e-06, "loss": 0.8337, "step": 3141 }, { "epoch": 0.2, "grad_norm": 1.554809379185726, "learning_rate": 9.251759609838278e-06, "loss": 0.768, "step": 3142 }, { "epoch": 0.2, "grad_norm": 2.0496492503726342, "learning_rate": 9.251214067989617e-06, "loss": 0.8865, "step": 3143 }, { "epoch": 0.2, "grad_norm": 1.0363359576524966, "learning_rate": 9.250668343432707e-06, "loss": 0.6523, "step": 3144 }, { "epoch": 0.2, "grad_norm": 1.5256005147415599, "learning_rate": 9.250122436191002e-06, "loss": 0.836, "step": 3145 }, { "epoch": 0.2, "grad_norm": 1.6500750594454598, "learning_rate": 9.249576346287967e-06, "loss": 0.8347, "step": 3146 }, { "epoch": 0.2, "grad_norm": 1.2067455792958375, "learning_rate": 9.249030073747069e-06, "loss": 0.5611, "step": 3147 }, { "epoch": 0.2, "grad_norm": 1.7725955857343294, "learning_rate": 9.248483618591788e-06, "loss": 0.8658, "step": 3148 }, { "epoch": 0.2, "grad_norm": 1.1315625129098652, "learning_rate": 9.247936980845608e-06, "loss": 0.6384, "step": 3149 }, { "epoch": 0.2, "grad_norm": 1.547432814559269, "learning_rate": 9.247390160532024e-06, "loss": 0.8313, "step": 3150 }, { "epoch": 0.2, "grad_norm": 1.8126920292400452, "learning_rate": 9.246843157674535e-06, "loss": 0.6895, "step": 3151 }, { "epoch": 0.2, "grad_norm": 2.012688943706133, "learning_rate": 9.246295972296651e-06, "loss": 0.8096, "step": 3152 }, { "epoch": 0.2, "grad_norm": 1.5514234899204424, "learning_rate": 9.245748604421888e-06, "loss": 0.7109, "step": 3153 }, { "epoch": 0.2, "grad_norm": 1.8002868302408928, "learning_rate": 9.245201054073772e-06, "loss": 0.8112, "step": 3154 }, { "epoch": 0.2, "grad_norm": 1.66962948808013, "learning_rate": 9.244653321275834e-06, "loss": 0.8482, "step": 3155 }, { "epoch": 0.2, "grad_norm": 1.8593704221179503, "learning_rate": 9.244105406051617e-06, "loss": 0.8651, "step": 3156 }, { "epoch": 0.2, "grad_norm": 1.1564382623374436, "learning_rate": 9.243557308424667e-06, "loss": 0.6502, "step": 3157 }, { "epoch": 0.2, "grad_norm": 1.5495320334369376, "learning_rate": 9.243009028418542e-06, "loss": 0.7903, "step": 3158 }, { "epoch": 0.2, "grad_norm": 1.5424764473864085, "learning_rate": 9.242460566056803e-06, "loss": 0.9311, "step": 3159 }, { "epoch": 0.2, "grad_norm": 1.5763526848939617, "learning_rate": 9.241911921363025e-06, "loss": 0.7567, "step": 3160 }, { "epoch": 0.2, "grad_norm": 1.8150670226720718, "learning_rate": 9.241363094360785e-06, "loss": 0.8612, "step": 3161 }, { "epoch": 0.2, "grad_norm": 1.2522227446521683, "learning_rate": 9.240814085073673e-06, "loss": 0.7424, "step": 3162 }, { "epoch": 0.2, "grad_norm": 1.1449747448241985, "learning_rate": 9.240264893525281e-06, "loss": 0.7393, "step": 3163 }, { "epoch": 0.2, "grad_norm": 1.6208382665534626, "learning_rate": 9.239715519739215e-06, "loss": 0.8059, "step": 3164 }, { "epoch": 0.2, "grad_norm": 1.5996464382781164, "learning_rate": 9.239165963739084e-06, "loss": 0.7874, "step": 3165 }, { "epoch": 0.2, "grad_norm": 2.3269651635844038, "learning_rate": 9.23861622554851e-06, "loss": 0.7737, "step": 3166 }, { "epoch": 0.2, "grad_norm": 1.5868192797699239, "learning_rate": 9.238066305191113e-06, "loss": 0.8611, "step": 3167 }, { "epoch": 0.2, "grad_norm": 1.6103098847193305, "learning_rate": 9.237516202690535e-06, "loss": 0.8385, "step": 3168 }, { "epoch": 0.2, "grad_norm": 1.1968570339548998, "learning_rate": 9.236965918070413e-06, "loss": 0.6637, "step": 3169 }, { "epoch": 0.2, "grad_norm": 1.6821170210264726, "learning_rate": 9.2364154513544e-06, "loss": 0.7696, "step": 3170 }, { "epoch": 0.2, "grad_norm": 1.6344495433188837, "learning_rate": 9.235864802566153e-06, "loss": 0.6532, "step": 3171 }, { "epoch": 0.2, "grad_norm": 1.679009762370236, "learning_rate": 9.235313971729338e-06, "loss": 0.8635, "step": 3172 }, { "epoch": 0.2, "grad_norm": 1.6086390172474796, "learning_rate": 9.234762958867627e-06, "loss": 0.8903, "step": 3173 }, { "epoch": 0.2, "grad_norm": 1.648841782639422, "learning_rate": 9.234211764004701e-06, "loss": 0.814, "step": 3174 }, { "epoch": 0.2, "grad_norm": 1.7063270737593146, "learning_rate": 9.233660387164254e-06, "loss": 0.841, "step": 3175 }, { "epoch": 0.2, "grad_norm": 2.0406887610324786, "learning_rate": 9.233108828369977e-06, "loss": 0.7287, "step": 3176 }, { "epoch": 0.2, "grad_norm": 1.751064434594627, "learning_rate": 9.23255708764558e-06, "loss": 0.8516, "step": 3177 }, { "epoch": 0.2, "grad_norm": 1.2188109461266146, "learning_rate": 9.23200516501477e-06, "loss": 0.6274, "step": 3178 }, { "epoch": 0.2, "grad_norm": 1.6118177156334803, "learning_rate": 9.231453060501274e-06, "loss": 0.8003, "step": 3179 }, { "epoch": 0.2, "grad_norm": 1.796799332009598, "learning_rate": 9.230900774128815e-06, "loss": 0.7868, "step": 3180 }, { "epoch": 0.2, "grad_norm": 1.2605002052486332, "learning_rate": 9.230348305921131e-06, "loss": 0.7532, "step": 3181 }, { "epoch": 0.2, "grad_norm": 1.798099155378188, "learning_rate": 9.229795655901967e-06, "loss": 0.8099, "step": 3182 }, { "epoch": 0.2, "grad_norm": 1.9271071529884019, "learning_rate": 9.229242824095074e-06, "loss": 0.6958, "step": 3183 }, { "epoch": 0.2, "grad_norm": 1.1678721522387954, "learning_rate": 9.22868981052421e-06, "loss": 0.6476, "step": 3184 }, { "epoch": 0.2, "grad_norm": 1.6538009994061462, "learning_rate": 9.228136615213144e-06, "loss": 0.7301, "step": 3185 }, { "epoch": 0.2, "grad_norm": 1.657976068453235, "learning_rate": 9.227583238185651e-06, "loss": 0.9638, "step": 3186 }, { "epoch": 0.2, "grad_norm": 5.441806707528161, "learning_rate": 9.227029679465515e-06, "loss": 0.7213, "step": 3187 }, { "epoch": 0.2, "grad_norm": 1.6360275845190435, "learning_rate": 9.226475939076525e-06, "loss": 0.8701, "step": 3188 }, { "epoch": 0.2, "grad_norm": 1.7000294257187554, "learning_rate": 9.225922017042478e-06, "loss": 0.8163, "step": 3189 }, { "epoch": 0.2, "grad_norm": 1.8009817925340739, "learning_rate": 9.225367913387184e-06, "loss": 0.8209, "step": 3190 }, { "epoch": 0.2, "grad_norm": 1.8309815468751123, "learning_rate": 9.224813628134457e-06, "loss": 0.7579, "step": 3191 }, { "epoch": 0.2, "grad_norm": 1.7835876106408521, "learning_rate": 9.224259161308116e-06, "loss": 0.7406, "step": 3192 }, { "epoch": 0.2, "grad_norm": 1.630979190070516, "learning_rate": 9.223704512931994e-06, "loss": 0.777, "step": 3193 }, { "epoch": 0.2, "grad_norm": 1.7926322920013047, "learning_rate": 9.223149683029927e-06, "loss": 0.8844, "step": 3194 }, { "epoch": 0.2, "grad_norm": 1.5131588387665895, "learning_rate": 9.22259467162576e-06, "loss": 0.7396, "step": 3195 }, { "epoch": 0.2, "grad_norm": 1.8054947083795, "learning_rate": 9.222039478743348e-06, "loss": 0.8446, "step": 3196 }, { "epoch": 0.2, "grad_norm": 1.6805853651622458, "learning_rate": 9.221484104406551e-06, "loss": 0.8197, "step": 3197 }, { "epoch": 0.2, "grad_norm": 1.6148066245507846, "learning_rate": 9.220928548639237e-06, "loss": 0.7228, "step": 3198 }, { "epoch": 0.2, "grad_norm": 1.7238027229295982, "learning_rate": 9.220372811465282e-06, "loss": 0.8065, "step": 3199 }, { "epoch": 0.2, "grad_norm": 1.5244446559021771, "learning_rate": 9.219816892908573e-06, "loss": 0.7675, "step": 3200 }, { "epoch": 0.2, "grad_norm": 1.624465152905155, "learning_rate": 9.219260792993001e-06, "loss": 0.7594, "step": 3201 }, { "epoch": 0.2, "grad_norm": 1.155840282855999, "learning_rate": 9.218704511742466e-06, "loss": 0.6561, "step": 3202 }, { "epoch": 0.21, "grad_norm": 1.8057635628326754, "learning_rate": 9.218148049180878e-06, "loss": 0.8495, "step": 3203 }, { "epoch": 0.21, "grad_norm": 1.6217121211324153, "learning_rate": 9.217591405332147e-06, "loss": 0.8089, "step": 3204 }, { "epoch": 0.21, "grad_norm": 1.6782966695531725, "learning_rate": 9.217034580220203e-06, "loss": 0.8145, "step": 3205 }, { "epoch": 0.21, "grad_norm": 1.7349491658506233, "learning_rate": 9.216477573868972e-06, "loss": 0.9038, "step": 3206 }, { "epoch": 0.21, "grad_norm": 1.7142504579162614, "learning_rate": 9.215920386302393e-06, "loss": 0.7304, "step": 3207 }, { "epoch": 0.21, "grad_norm": 1.5716599494652659, "learning_rate": 9.21536301754442e-06, "loss": 0.8253, "step": 3208 }, { "epoch": 0.21, "grad_norm": 2.149556340062186, "learning_rate": 9.214805467618996e-06, "loss": 0.8065, "step": 3209 }, { "epoch": 0.21, "grad_norm": 1.5183549751946313, "learning_rate": 9.214247736550092e-06, "loss": 0.8796, "step": 3210 }, { "epoch": 0.21, "grad_norm": 1.6628514501563807, "learning_rate": 9.213689824361678e-06, "loss": 0.8607, "step": 3211 }, { "epoch": 0.21, "grad_norm": 1.4447924702839705, "learning_rate": 9.213131731077726e-06, "loss": 0.682, "step": 3212 }, { "epoch": 0.21, "grad_norm": 1.1193556028978584, "learning_rate": 9.212573456722227e-06, "loss": 0.6521, "step": 3213 }, { "epoch": 0.21, "grad_norm": 3.1039602894997653, "learning_rate": 9.212015001319173e-06, "loss": 0.7585, "step": 3214 }, { "epoch": 0.21, "grad_norm": 1.6346912871426562, "learning_rate": 9.211456364892566e-06, "loss": 0.8029, "step": 3215 }, { "epoch": 0.21, "grad_norm": 1.4764255622029023, "learning_rate": 9.21089754746641e-06, "loss": 0.7458, "step": 3216 }, { "epoch": 0.21, "grad_norm": 1.665671790185496, "learning_rate": 9.210338549064728e-06, "loss": 0.7602, "step": 3217 }, { "epoch": 0.21, "grad_norm": 2.323042169342874, "learning_rate": 9.209779369711544e-06, "loss": 0.766, "step": 3218 }, { "epoch": 0.21, "grad_norm": 2.0369386978807067, "learning_rate": 9.209220009430886e-06, "loss": 0.7282, "step": 3219 }, { "epoch": 0.21, "grad_norm": 1.3515920510279777, "learning_rate": 9.208660468246799e-06, "loss": 0.6918, "step": 3220 }, { "epoch": 0.21, "grad_norm": 1.5575123275046874, "learning_rate": 9.208100746183327e-06, "loss": 0.7997, "step": 3221 }, { "epoch": 0.21, "grad_norm": 1.849900949642393, "learning_rate": 9.207540843264527e-06, "loss": 0.8393, "step": 3222 }, { "epoch": 0.21, "grad_norm": 1.7293539436159564, "learning_rate": 9.206980759514464e-06, "loss": 0.8837, "step": 3223 }, { "epoch": 0.21, "grad_norm": 2.350659580915371, "learning_rate": 9.206420494957208e-06, "loss": 0.8941, "step": 3224 }, { "epoch": 0.21, "grad_norm": 1.820041203661987, "learning_rate": 9.205860049616837e-06, "loss": 0.7921, "step": 3225 }, { "epoch": 0.21, "grad_norm": 1.7349835670682532, "learning_rate": 9.20529942351744e-06, "loss": 0.7356, "step": 3226 }, { "epoch": 0.21, "grad_norm": 1.599604696238101, "learning_rate": 9.20473861668311e-06, "loss": 0.7242, "step": 3227 }, { "epoch": 0.21, "grad_norm": 1.6246952432936923, "learning_rate": 9.20417762913795e-06, "loss": 0.9666, "step": 3228 }, { "epoch": 0.21, "grad_norm": 1.74775130102566, "learning_rate": 9.203616460906068e-06, "loss": 0.8792, "step": 3229 }, { "epoch": 0.21, "grad_norm": 3.1815357520472447, "learning_rate": 9.203055112011585e-06, "loss": 0.8519, "step": 3230 }, { "epoch": 0.21, "grad_norm": 1.0700040482175697, "learning_rate": 9.202493582478625e-06, "loss": 0.6314, "step": 3231 }, { "epoch": 0.21, "grad_norm": 1.7374081196679345, "learning_rate": 9.201931872331322e-06, "loss": 0.8616, "step": 3232 }, { "epoch": 0.21, "grad_norm": 1.6570991067587466, "learning_rate": 9.201369981593816e-06, "loss": 0.776, "step": 3233 }, { "epoch": 0.21, "grad_norm": 1.849138680448563, "learning_rate": 9.200807910290256e-06, "loss": 0.8102, "step": 3234 }, { "epoch": 0.21, "grad_norm": 1.535479992497044, "learning_rate": 9.200245658444799e-06, "loss": 0.8488, "step": 3235 }, { "epoch": 0.21, "grad_norm": 1.7258789863016906, "learning_rate": 9.199683226081611e-06, "loss": 0.7922, "step": 3236 }, { "epoch": 0.21, "grad_norm": 1.1396565022017846, "learning_rate": 9.199120613224862e-06, "loss": 0.6001, "step": 3237 }, { "epoch": 0.21, "grad_norm": 1.6209581338757062, "learning_rate": 9.198557819898732e-06, "loss": 0.6797, "step": 3238 }, { "epoch": 0.21, "grad_norm": 1.7902150971599826, "learning_rate": 9.197994846127409e-06, "loss": 0.7463, "step": 3239 }, { "epoch": 0.21, "grad_norm": 1.9040422324926667, "learning_rate": 9.19743169193509e-06, "loss": 0.8799, "step": 3240 }, { "epoch": 0.21, "grad_norm": 0.9481635703443806, "learning_rate": 9.196868357345976e-06, "loss": 0.6014, "step": 3241 }, { "epoch": 0.21, "grad_norm": 1.2311431487942, "learning_rate": 9.19630484238428e-06, "loss": 0.6328, "step": 3242 }, { "epoch": 0.21, "grad_norm": 2.2074542318463015, "learning_rate": 9.195741147074218e-06, "loss": 0.8738, "step": 3243 }, { "epoch": 0.21, "grad_norm": 1.0245395881838517, "learning_rate": 9.195177271440019e-06, "loss": 0.5777, "step": 3244 }, { "epoch": 0.21, "grad_norm": 1.6534749767145893, "learning_rate": 9.194613215505916e-06, "loss": 0.7026, "step": 3245 }, { "epoch": 0.21, "grad_norm": 1.8262650650256491, "learning_rate": 9.194048979296151e-06, "loss": 0.917, "step": 3246 }, { "epoch": 0.21, "grad_norm": 3.4982545581403754, "learning_rate": 9.193484562834973e-06, "loss": 0.6467, "step": 3247 }, { "epoch": 0.21, "grad_norm": 1.6800083185256127, "learning_rate": 9.19291996614664e-06, "loss": 0.9572, "step": 3248 }, { "epoch": 0.21, "grad_norm": 1.1915864464760813, "learning_rate": 9.192355189255418e-06, "loss": 0.6796, "step": 3249 }, { "epoch": 0.21, "grad_norm": 1.1680521343290877, "learning_rate": 9.19179023218558e-06, "loss": 0.6329, "step": 3250 }, { "epoch": 0.21, "grad_norm": 1.6688093870474066, "learning_rate": 9.191225094961407e-06, "loss": 0.8597, "step": 3251 }, { "epoch": 0.21, "grad_norm": 1.5648565346455583, "learning_rate": 9.190659777607185e-06, "loss": 0.7677, "step": 3252 }, { "epoch": 0.21, "grad_norm": 1.7567840057076196, "learning_rate": 9.19009428014721e-06, "loss": 0.7868, "step": 3253 }, { "epoch": 0.21, "grad_norm": 1.4443118718513637, "learning_rate": 9.189528602605789e-06, "loss": 0.7561, "step": 3254 }, { "epoch": 0.21, "grad_norm": 1.436834636003776, "learning_rate": 9.188962745007233e-06, "loss": 0.7913, "step": 3255 }, { "epoch": 0.21, "grad_norm": 1.1203935593617163, "learning_rate": 9.18839670737586e-06, "loss": 0.6194, "step": 3256 }, { "epoch": 0.21, "grad_norm": 1.6390930813591829, "learning_rate": 9.187830489735996e-06, "loss": 0.6909, "step": 3257 }, { "epoch": 0.21, "grad_norm": 1.7948883444574018, "learning_rate": 9.187264092111978e-06, "loss": 0.8777, "step": 3258 }, { "epoch": 0.21, "grad_norm": 1.056317360345089, "learning_rate": 9.18669751452815e-06, "loss": 0.7575, "step": 3259 }, { "epoch": 0.21, "grad_norm": 1.505041215331889, "learning_rate": 9.186130757008858e-06, "loss": 0.7367, "step": 3260 }, { "epoch": 0.21, "grad_norm": 1.4649337129617754, "learning_rate": 9.185563819578462e-06, "loss": 0.7407, "step": 3261 }, { "epoch": 0.21, "grad_norm": 1.612812012387621, "learning_rate": 9.18499670226133e-06, "loss": 0.7551, "step": 3262 }, { "epoch": 0.21, "grad_norm": 1.7869716818911936, "learning_rate": 9.184429405081832e-06, "loss": 0.7073, "step": 3263 }, { "epoch": 0.21, "grad_norm": 2.364454443050548, "learning_rate": 9.183861928064353e-06, "loss": 0.8003, "step": 3264 }, { "epoch": 0.21, "grad_norm": 1.778360661797625, "learning_rate": 9.183294271233278e-06, "loss": 0.8714, "step": 3265 }, { "epoch": 0.21, "grad_norm": 1.6008284759964215, "learning_rate": 9.182726434613006e-06, "loss": 0.7933, "step": 3266 }, { "epoch": 0.21, "grad_norm": 1.742925969356523, "learning_rate": 9.18215841822794e-06, "loss": 0.9496, "step": 3267 }, { "epoch": 0.21, "grad_norm": 1.555652962007114, "learning_rate": 9.181590222102494e-06, "loss": 0.7821, "step": 3268 }, { "epoch": 0.21, "grad_norm": 1.5994844390692557, "learning_rate": 9.181021846261088e-06, "loss": 0.8429, "step": 3269 }, { "epoch": 0.21, "grad_norm": 1.61264927328936, "learning_rate": 9.180453290728146e-06, "loss": 0.7923, "step": 3270 }, { "epoch": 0.21, "grad_norm": 1.703781742493331, "learning_rate": 9.17988455552811e-06, "loss": 0.8552, "step": 3271 }, { "epoch": 0.21, "grad_norm": 1.4885220066739928, "learning_rate": 9.179315640685416e-06, "loss": 0.834, "step": 3272 }, { "epoch": 0.21, "grad_norm": 4.148368474058063, "learning_rate": 9.178746546224517e-06, "loss": 0.7111, "step": 3273 }, { "epoch": 0.21, "grad_norm": 1.6238006317630267, "learning_rate": 9.178177272169874e-06, "loss": 0.9552, "step": 3274 }, { "epoch": 0.21, "grad_norm": 1.873837015730229, "learning_rate": 9.177607818545951e-06, "loss": 0.8799, "step": 3275 }, { "epoch": 0.21, "grad_norm": 1.5418115162863033, "learning_rate": 9.177038185377222e-06, "loss": 0.8257, "step": 3276 }, { "epoch": 0.21, "grad_norm": 1.5909012374642901, "learning_rate": 9.176468372688168e-06, "loss": 0.7969, "step": 3277 }, { "epoch": 0.21, "grad_norm": 1.5327265734387807, "learning_rate": 9.175898380503281e-06, "loss": 0.7844, "step": 3278 }, { "epoch": 0.21, "grad_norm": 1.7170266022549991, "learning_rate": 9.175328208847056e-06, "loss": 0.7503, "step": 3279 }, { "epoch": 0.21, "grad_norm": 1.7861351231353053, "learning_rate": 9.174757857743997e-06, "loss": 0.8236, "step": 3280 }, { "epoch": 0.21, "grad_norm": 1.6116623784630635, "learning_rate": 9.17418732721862e-06, "loss": 0.7779, "step": 3281 }, { "epoch": 0.21, "grad_norm": 1.4896495401919942, "learning_rate": 9.173616617295442e-06, "loss": 0.7093, "step": 3282 }, { "epoch": 0.21, "grad_norm": 1.6172255413395356, "learning_rate": 9.17304572799899e-06, "loss": 0.8167, "step": 3283 }, { "epoch": 0.21, "grad_norm": 1.7056246288645174, "learning_rate": 9.172474659353803e-06, "loss": 0.8805, "step": 3284 }, { "epoch": 0.21, "grad_norm": 1.7243089639936786, "learning_rate": 9.171903411384422e-06, "loss": 0.7825, "step": 3285 }, { "epoch": 0.21, "grad_norm": 2.0795557312590947, "learning_rate": 9.171331984115399e-06, "loss": 0.6898, "step": 3286 }, { "epoch": 0.21, "grad_norm": 1.5242046527258724, "learning_rate": 9.170760377571293e-06, "loss": 0.8037, "step": 3287 }, { "epoch": 0.21, "grad_norm": 1.8638308517223243, "learning_rate": 9.17018859177667e-06, "loss": 0.8095, "step": 3288 }, { "epoch": 0.21, "grad_norm": 1.6293579932805389, "learning_rate": 9.169616626756103e-06, "loss": 0.8745, "step": 3289 }, { "epoch": 0.21, "grad_norm": 1.0366431477498226, "learning_rate": 9.169044482534175e-06, "loss": 0.5631, "step": 3290 }, { "epoch": 0.21, "grad_norm": 1.5941761364286935, "learning_rate": 9.168472159135477e-06, "loss": 0.8521, "step": 3291 }, { "epoch": 0.21, "grad_norm": 1.6366583511057786, "learning_rate": 9.167899656584602e-06, "loss": 0.8625, "step": 3292 }, { "epoch": 0.21, "grad_norm": 2.0917527074595834, "learning_rate": 9.167326974906161e-06, "loss": 0.8649, "step": 3293 }, { "epoch": 0.21, "grad_norm": 1.7560326074807011, "learning_rate": 9.166754114124761e-06, "loss": 0.9593, "step": 3294 }, { "epoch": 0.21, "grad_norm": 2.2748109872294284, "learning_rate": 9.166181074265027e-06, "loss": 0.8346, "step": 3295 }, { "epoch": 0.21, "grad_norm": 1.7450345036595145, "learning_rate": 9.165607855351583e-06, "loss": 0.9113, "step": 3296 }, { "epoch": 0.21, "grad_norm": 1.677944644670091, "learning_rate": 9.165034457409066e-06, "loss": 0.8427, "step": 3297 }, { "epoch": 0.21, "grad_norm": 1.7786490497599692, "learning_rate": 9.164460880462121e-06, "loss": 0.8096, "step": 3298 }, { "epoch": 0.21, "grad_norm": 1.7076564975529318, "learning_rate": 9.163887124535398e-06, "loss": 0.8561, "step": 3299 }, { "epoch": 0.21, "grad_norm": 1.7527342899838738, "learning_rate": 9.163313189653557e-06, "loss": 0.8079, "step": 3300 }, { "epoch": 0.21, "grad_norm": 1.7809095304839793, "learning_rate": 9.162739075841263e-06, "loss": 0.8524, "step": 3301 }, { "epoch": 0.21, "grad_norm": 1.0814685162907576, "learning_rate": 9.162164783123188e-06, "loss": 0.7851, "step": 3302 }, { "epoch": 0.21, "grad_norm": 1.7264793795489772, "learning_rate": 9.16159031152402e-06, "loss": 0.9292, "step": 3303 }, { "epoch": 0.21, "grad_norm": 1.044813364086506, "learning_rate": 9.161015661068443e-06, "loss": 0.6297, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.4907203942404479, "learning_rate": 9.160440831781157e-06, "loss": 0.8406, "step": 3305 }, { "epoch": 0.21, "grad_norm": 0.990803367261688, "learning_rate": 9.159865823686867e-06, "loss": 0.6564, "step": 3306 }, { "epoch": 0.21, "grad_norm": 2.6818241128239597, "learning_rate": 9.159290636810284e-06, "loss": 0.7904, "step": 3307 }, { "epoch": 0.21, "grad_norm": 1.5771675200524609, "learning_rate": 9.158715271176131e-06, "loss": 0.7536, "step": 3308 }, { "epoch": 0.21, "grad_norm": 1.9994438960693313, "learning_rate": 9.158139726809132e-06, "loss": 0.9088, "step": 3309 }, { "epoch": 0.21, "grad_norm": 1.514469176247582, "learning_rate": 9.157564003734025e-06, "loss": 0.7307, "step": 3310 }, { "epoch": 0.21, "grad_norm": 1.5752689755039309, "learning_rate": 9.156988101975555e-06, "loss": 0.8742, "step": 3311 }, { "epoch": 0.21, "grad_norm": 1.9079589865375304, "learning_rate": 9.156412021558471e-06, "loss": 0.7548, "step": 3312 }, { "epoch": 0.21, "grad_norm": 2.050761287407636, "learning_rate": 9.155835762507532e-06, "loss": 0.7791, "step": 3313 }, { "epoch": 0.21, "grad_norm": 1.5985228522607349, "learning_rate": 9.155259324847504e-06, "loss": 0.7567, "step": 3314 }, { "epoch": 0.21, "grad_norm": 1.6217644700467604, "learning_rate": 9.154682708603162e-06, "loss": 0.8067, "step": 3315 }, { "epoch": 0.21, "grad_norm": 1.867034897124534, "learning_rate": 9.154105913799289e-06, "loss": 0.7743, "step": 3316 }, { "epoch": 0.21, "grad_norm": 1.5734077736116547, "learning_rate": 9.153528940460669e-06, "loss": 0.6986, "step": 3317 }, { "epoch": 0.21, "grad_norm": 1.5241127487714734, "learning_rate": 9.152951788612105e-06, "loss": 0.8488, "step": 3318 }, { "epoch": 0.21, "grad_norm": 1.729480786558474, "learning_rate": 9.152374458278402e-06, "loss": 0.8298, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.6382461841814377, "learning_rate": 9.151796949484367e-06, "loss": 0.6968, "step": 3320 }, { "epoch": 0.21, "grad_norm": 1.5677121633015265, "learning_rate": 9.151219262254824e-06, "loss": 0.8495, "step": 3321 }, { "epoch": 0.21, "grad_norm": 1.6576620233968073, "learning_rate": 9.150641396614601e-06, "loss": 0.8023, "step": 3322 }, { "epoch": 0.21, "grad_norm": 1.9107156325863393, "learning_rate": 9.150063352588531e-06, "loss": 0.7509, "step": 3323 }, { "epoch": 0.21, "grad_norm": 1.849028967512775, "learning_rate": 9.14948513020146e-06, "loss": 0.8618, "step": 3324 }, { "epoch": 0.21, "grad_norm": 1.60234088570366, "learning_rate": 9.148906729478236e-06, "loss": 0.7684, "step": 3325 }, { "epoch": 0.21, "grad_norm": 1.6568410916591993, "learning_rate": 9.148328150443719e-06, "loss": 0.786, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.5304229680282795, "learning_rate": 9.147749393122776e-06, "loss": 0.8311, "step": 3327 }, { "epoch": 0.21, "grad_norm": 1.633119818312114, "learning_rate": 9.14717045754028e-06, "loss": 0.9008, "step": 3328 }, { "epoch": 0.21, "grad_norm": 1.7715810997396817, "learning_rate": 9.14659134372111e-06, "loss": 0.7422, "step": 3329 }, { "epoch": 0.21, "grad_norm": 1.4958857919490924, "learning_rate": 9.14601205169016e-06, "loss": 0.8044, "step": 3330 }, { "epoch": 0.21, "grad_norm": 1.574335084219585, "learning_rate": 9.145432581472323e-06, "loss": 0.7744, "step": 3331 }, { "epoch": 0.21, "grad_norm": 1.696694450182587, "learning_rate": 9.144852933092503e-06, "loss": 0.7841, "step": 3332 }, { "epoch": 0.21, "grad_norm": 1.8441393525732999, "learning_rate": 9.144273106575614e-06, "loss": 0.7551, "step": 3333 }, { "epoch": 0.21, "grad_norm": 1.6712487877961268, "learning_rate": 9.143693101946576e-06, "loss": 0.7554, "step": 3334 }, { "epoch": 0.21, "grad_norm": 1.5231848823289076, "learning_rate": 9.143112919230314e-06, "loss": 0.7273, "step": 3335 }, { "epoch": 0.21, "grad_norm": 1.795647646687611, "learning_rate": 9.142532558451767e-06, "loss": 0.8214, "step": 3336 }, { "epoch": 0.21, "grad_norm": 1.29144967956605, "learning_rate": 9.141952019635874e-06, "loss": 0.6651, "step": 3337 }, { "epoch": 0.21, "grad_norm": 1.5240600976863403, "learning_rate": 9.141371302807586e-06, "loss": 0.7825, "step": 3338 }, { "epoch": 0.21, "grad_norm": 1.7487446716074138, "learning_rate": 9.140790407991862e-06, "loss": 0.816, "step": 3339 }, { "epoch": 0.21, "grad_norm": 3.651674518514841, "learning_rate": 9.140209335213667e-06, "loss": 0.7954, "step": 3340 }, { "epoch": 0.21, "grad_norm": 1.7012899889801856, "learning_rate": 9.139628084497975e-06, "loss": 0.8781, "step": 3341 }, { "epoch": 0.21, "grad_norm": 2.1413619268183703, "learning_rate": 9.139046655869767e-06, "loss": 0.8078, "step": 3342 }, { "epoch": 0.21, "grad_norm": 1.8680536133613126, "learning_rate": 9.138465049354031e-06, "loss": 0.7075, "step": 3343 }, { "epoch": 0.21, "grad_norm": 1.6952553530330365, "learning_rate": 9.137883264975763e-06, "loss": 0.898, "step": 3344 }, { "epoch": 0.21, "grad_norm": 1.800084772547307, "learning_rate": 9.137301302759968e-06, "loss": 0.8721, "step": 3345 }, { "epoch": 0.21, "grad_norm": 1.8225684929036792, "learning_rate": 9.136719162731655e-06, "loss": 0.9572, "step": 3346 }, { "epoch": 0.21, "grad_norm": 2.306406453302287, "learning_rate": 9.136136844915847e-06, "loss": 0.7695, "step": 3347 }, { "epoch": 0.21, "grad_norm": 1.5556082939362892, "learning_rate": 9.135554349337567e-06, "loss": 0.7192, "step": 3348 }, { "epoch": 0.21, "grad_norm": 1.8172345789913464, "learning_rate": 9.134971676021851e-06, "loss": 0.8601, "step": 3349 }, { "epoch": 0.21, "grad_norm": 2.0103749714563928, "learning_rate": 9.13438882499374e-06, "loss": 0.8739, "step": 3350 }, { "epoch": 0.21, "grad_norm": 1.7643882859183395, "learning_rate": 9.133805796278288e-06, "loss": 0.8316, "step": 3351 }, { "epoch": 0.21, "grad_norm": 1.684701477969709, "learning_rate": 9.133222589900547e-06, "loss": 0.7563, "step": 3352 }, { "epoch": 0.21, "grad_norm": 1.5386680369303232, "learning_rate": 9.132639205885584e-06, "loss": 0.7849, "step": 3353 }, { "epoch": 0.21, "grad_norm": 1.487215590559732, "learning_rate": 9.132055644258473e-06, "loss": 0.8508, "step": 3354 }, { "epoch": 0.21, "grad_norm": 1.6920732231258462, "learning_rate": 9.131471905044294e-06, "loss": 0.8688, "step": 3355 }, { "epoch": 0.21, "grad_norm": 1.5597575379497082, "learning_rate": 9.130887988268131e-06, "loss": 0.7569, "step": 3356 }, { "epoch": 0.21, "grad_norm": 1.6285911296473192, "learning_rate": 9.130303893955084e-06, "loss": 0.8933, "step": 3357 }, { "epoch": 0.21, "grad_norm": 1.5502346369324829, "learning_rate": 9.129719622130255e-06, "loss": 0.774, "step": 3358 }, { "epoch": 0.21, "grad_norm": 1.560092408443634, "learning_rate": 9.129135172818754e-06, "loss": 0.9432, "step": 3359 }, { "epoch": 0.22, "grad_norm": 1.6770949699515636, "learning_rate": 9.1285505460457e-06, "loss": 0.7264, "step": 3360 }, { "epoch": 0.22, "grad_norm": 1.7688102179503271, "learning_rate": 9.127965741836218e-06, "loss": 0.8122, "step": 3361 }, { "epoch": 0.22, "grad_norm": 1.1929681416869538, "learning_rate": 9.127380760215443e-06, "loss": 0.5873, "step": 3362 }, { "epoch": 0.22, "grad_norm": 1.4732178691904778, "learning_rate": 9.126795601208516e-06, "loss": 0.7888, "step": 3363 }, { "epoch": 0.22, "grad_norm": 2.092478506649342, "learning_rate": 9.126210264840585e-06, "loss": 0.8061, "step": 3364 }, { "epoch": 0.22, "grad_norm": 1.7607280761107387, "learning_rate": 9.125624751136809e-06, "loss": 0.7401, "step": 3365 }, { "epoch": 0.22, "grad_norm": 1.6367783944857044, "learning_rate": 9.125039060122348e-06, "loss": 0.7882, "step": 3366 }, { "epoch": 0.22, "grad_norm": 2.458462978439416, "learning_rate": 9.124453191822376e-06, "loss": 0.9142, "step": 3367 }, { "epoch": 0.22, "grad_norm": 2.4938278247818286, "learning_rate": 9.123867146262074e-06, "loss": 0.7407, "step": 3368 }, { "epoch": 0.22, "grad_norm": 1.6540434561432398, "learning_rate": 9.123280923466627e-06, "loss": 0.8061, "step": 3369 }, { "epoch": 0.22, "grad_norm": 2.001404263662969, "learning_rate": 9.12269452346123e-06, "loss": 0.7912, "step": 3370 }, { "epoch": 0.22, "grad_norm": 1.8148243361950673, "learning_rate": 9.122107946271086e-06, "loss": 0.713, "step": 3371 }, { "epoch": 0.22, "grad_norm": 1.99665620851221, "learning_rate": 9.121521191921403e-06, "loss": 0.8396, "step": 3372 }, { "epoch": 0.22, "grad_norm": 1.7146640476680732, "learning_rate": 9.1209342604374e-06, "loss": 0.9052, "step": 3373 }, { "epoch": 0.22, "grad_norm": 1.2797679390449896, "learning_rate": 9.120347151844301e-06, "loss": 0.7097, "step": 3374 }, { "epoch": 0.22, "grad_norm": 1.6318453157267099, "learning_rate": 9.119759866167342e-06, "loss": 0.7417, "step": 3375 }, { "epoch": 0.22, "grad_norm": 1.9207369297896437, "learning_rate": 9.11917240343176e-06, "loss": 0.9572, "step": 3376 }, { "epoch": 0.22, "grad_norm": 1.9203929411542706, "learning_rate": 9.118584763662803e-06, "loss": 0.6919, "step": 3377 }, { "epoch": 0.22, "grad_norm": 1.7220673861807954, "learning_rate": 9.117996946885727e-06, "loss": 0.7748, "step": 3378 }, { "epoch": 0.22, "grad_norm": 1.6956434284123536, "learning_rate": 9.117408953125794e-06, "loss": 0.7537, "step": 3379 }, { "epoch": 0.22, "grad_norm": 1.1560181279030517, "learning_rate": 9.116820782408279e-06, "loss": 0.6959, "step": 3380 }, { "epoch": 0.22, "grad_norm": 1.590838623132751, "learning_rate": 9.116232434758456e-06, "loss": 0.846, "step": 3381 }, { "epoch": 0.22, "grad_norm": 1.6823442117338925, "learning_rate": 9.115643910201612e-06, "loss": 0.8171, "step": 3382 }, { "epoch": 0.22, "grad_norm": 1.4122460196157327, "learning_rate": 9.115055208763042e-06, "loss": 0.7355, "step": 3383 }, { "epoch": 0.22, "grad_norm": 1.5358831040171355, "learning_rate": 9.114466330468045e-06, "loss": 0.7223, "step": 3384 }, { "epoch": 0.22, "grad_norm": 1.5673125968522252, "learning_rate": 9.113877275341932e-06, "loss": 0.7525, "step": 3385 }, { "epoch": 0.22, "grad_norm": 1.0499715795921059, "learning_rate": 9.113288043410015e-06, "loss": 0.6094, "step": 3386 }, { "epoch": 0.22, "grad_norm": 1.890055650163448, "learning_rate": 9.112698634697625e-06, "loss": 0.9033, "step": 3387 }, { "epoch": 0.22, "grad_norm": 1.7162319027314727, "learning_rate": 9.112109049230087e-06, "loss": 0.8804, "step": 3388 }, { "epoch": 0.22, "grad_norm": 2.323671542159651, "learning_rate": 9.111519287032745e-06, "loss": 0.857, "step": 3389 }, { "epoch": 0.22, "grad_norm": 0.9900224176006595, "learning_rate": 9.110929348130942e-06, "loss": 0.6258, "step": 3390 }, { "epoch": 0.22, "grad_norm": 1.653371393816168, "learning_rate": 9.110339232550034e-06, "loss": 0.7634, "step": 3391 }, { "epoch": 0.22, "grad_norm": 1.8750453601736854, "learning_rate": 9.109748940315383e-06, "loss": 0.9041, "step": 3392 }, { "epoch": 0.22, "grad_norm": 1.629303619525052, "learning_rate": 9.109158471452358e-06, "loss": 0.8395, "step": 3393 }, { "epoch": 0.22, "grad_norm": 1.3225308316745086, "learning_rate": 9.108567825986336e-06, "loss": 0.6594, "step": 3394 }, { "epoch": 0.22, "grad_norm": 1.9646930885319807, "learning_rate": 9.107977003942703e-06, "loss": 0.7388, "step": 3395 }, { "epoch": 0.22, "grad_norm": 3.061278725431848, "learning_rate": 9.107386005346852e-06, "loss": 0.8576, "step": 3396 }, { "epoch": 0.22, "grad_norm": 1.6689237150622243, "learning_rate": 9.106794830224179e-06, "loss": 0.7604, "step": 3397 }, { "epoch": 0.22, "grad_norm": 1.8647550729160425, "learning_rate": 9.106203478600094e-06, "loss": 0.8849, "step": 3398 }, { "epoch": 0.22, "grad_norm": 1.9982331609132438, "learning_rate": 9.105611950500012e-06, "loss": 0.7985, "step": 3399 }, { "epoch": 0.22, "grad_norm": 1.2179999246069515, "learning_rate": 9.105020245949355e-06, "loss": 0.7166, "step": 3400 }, { "epoch": 0.22, "grad_norm": 1.8138359017101786, "learning_rate": 9.104428364973555e-06, "loss": 0.8487, "step": 3401 }, { "epoch": 0.22, "grad_norm": 1.4132614565677266, "learning_rate": 9.103836307598049e-06, "loss": 0.69, "step": 3402 }, { "epoch": 0.22, "grad_norm": 2.6603798498398827, "learning_rate": 9.10324407384828e-06, "loss": 0.8237, "step": 3403 }, { "epoch": 0.22, "grad_norm": 1.673798428827803, "learning_rate": 9.102651663749703e-06, "loss": 0.7371, "step": 3404 }, { "epoch": 0.22, "grad_norm": 1.502939827083371, "learning_rate": 9.102059077327779e-06, "loss": 0.7384, "step": 3405 }, { "epoch": 0.22, "grad_norm": 1.672825208234964, "learning_rate": 9.101466314607974e-06, "loss": 0.9526, "step": 3406 }, { "epoch": 0.22, "grad_norm": 1.1856592638533234, "learning_rate": 9.100873375615767e-06, "loss": 0.7004, "step": 3407 }, { "epoch": 0.22, "grad_norm": 1.5313131352671798, "learning_rate": 9.100280260376639e-06, "loss": 0.7896, "step": 3408 }, { "epoch": 0.22, "grad_norm": 1.7533475058422092, "learning_rate": 9.099686968916081e-06, "loss": 0.7828, "step": 3409 }, { "epoch": 0.22, "grad_norm": 1.5935812475167124, "learning_rate": 9.099093501259592e-06, "loss": 0.7801, "step": 3410 }, { "epoch": 0.22, "grad_norm": 1.8822802954439979, "learning_rate": 9.098499857432677e-06, "loss": 0.8342, "step": 3411 }, { "epoch": 0.22, "grad_norm": 1.6493532533306343, "learning_rate": 9.097906037460852e-06, "loss": 0.8366, "step": 3412 }, { "epoch": 0.22, "grad_norm": 1.6411227450501509, "learning_rate": 9.097312041369634e-06, "loss": 0.7649, "step": 3413 }, { "epoch": 0.22, "grad_norm": 1.5620648767205394, "learning_rate": 9.096717869184555e-06, "loss": 0.8278, "step": 3414 }, { "epoch": 0.22, "grad_norm": 1.7497296096229384, "learning_rate": 9.09612352093115e-06, "loss": 0.7643, "step": 3415 }, { "epoch": 0.22, "grad_norm": 1.5501606630924138, "learning_rate": 9.095528996634966e-06, "loss": 0.92, "step": 3416 }, { "epoch": 0.22, "grad_norm": 1.5833419698372937, "learning_rate": 9.094934296321549e-06, "loss": 0.8006, "step": 3417 }, { "epoch": 0.22, "grad_norm": 1.9471435835723778, "learning_rate": 9.094339420016461e-06, "loss": 0.7949, "step": 3418 }, { "epoch": 0.22, "grad_norm": 1.8522097192118783, "learning_rate": 9.093744367745267e-06, "loss": 0.7731, "step": 3419 }, { "epoch": 0.22, "grad_norm": 1.1798800267661262, "learning_rate": 9.093149139533543e-06, "loss": 0.6878, "step": 3420 }, { "epoch": 0.22, "grad_norm": 1.8968550797100148, "learning_rate": 9.092553735406871e-06, "loss": 0.7948, "step": 3421 }, { "epoch": 0.22, "grad_norm": 1.5729392812589313, "learning_rate": 9.091958155390839e-06, "loss": 0.8326, "step": 3422 }, { "epoch": 0.22, "grad_norm": 1.6035213681612335, "learning_rate": 9.091362399511043e-06, "loss": 0.7972, "step": 3423 }, { "epoch": 0.22, "grad_norm": 1.6299017858292653, "learning_rate": 9.090766467793088e-06, "loss": 0.7754, "step": 3424 }, { "epoch": 0.22, "grad_norm": 1.8755575127912787, "learning_rate": 9.090170360262587e-06, "loss": 0.8476, "step": 3425 }, { "epoch": 0.22, "grad_norm": 1.270642161221034, "learning_rate": 9.089574076945158e-06, "loss": 0.655, "step": 3426 }, { "epoch": 0.22, "grad_norm": 1.7304627148767582, "learning_rate": 9.088977617866428e-06, "loss": 0.795, "step": 3427 }, { "epoch": 0.22, "grad_norm": 1.663761528078763, "learning_rate": 9.088380983052033e-06, "loss": 0.8288, "step": 3428 }, { "epoch": 0.22, "grad_norm": 1.1368502149826936, "learning_rate": 9.087784172527614e-06, "loss": 0.6008, "step": 3429 }, { "epoch": 0.22, "grad_norm": 1.7120087224296223, "learning_rate": 9.087187186318821e-06, "loss": 0.7928, "step": 3430 }, { "epoch": 0.22, "grad_norm": 1.7374501969838811, "learning_rate": 9.086590024451312e-06, "loss": 0.7217, "step": 3431 }, { "epoch": 0.22, "grad_norm": 1.7250720631214176, "learning_rate": 9.08599268695075e-06, "loss": 0.9299, "step": 3432 }, { "epoch": 0.22, "grad_norm": 2.4805099185570834, "learning_rate": 9.085395173842807e-06, "loss": 0.6151, "step": 3433 }, { "epoch": 0.22, "grad_norm": 1.6702224995696109, "learning_rate": 9.084797485153165e-06, "loss": 0.7804, "step": 3434 }, { "epoch": 0.22, "grad_norm": 1.2014296920184055, "learning_rate": 9.08419962090751e-06, "loss": 0.7405, "step": 3435 }, { "epoch": 0.22, "grad_norm": 1.9484235197634505, "learning_rate": 9.083601581131538e-06, "loss": 0.8759, "step": 3436 }, { "epoch": 0.22, "grad_norm": 1.6942601344740364, "learning_rate": 9.08300336585095e-06, "loss": 0.8961, "step": 3437 }, { "epoch": 0.22, "grad_norm": 1.503907376170722, "learning_rate": 9.08240497509146e-06, "loss": 0.7889, "step": 3438 }, { "epoch": 0.22, "grad_norm": 1.8173943004479174, "learning_rate": 9.081806408878778e-06, "loss": 0.8657, "step": 3439 }, { "epoch": 0.22, "grad_norm": 2.0413951115668536, "learning_rate": 9.081207667238637e-06, "loss": 0.7855, "step": 3440 }, { "epoch": 0.22, "grad_norm": 1.8820113796022877, "learning_rate": 9.080608750196764e-06, "loss": 0.8581, "step": 3441 }, { "epoch": 0.22, "grad_norm": 1.9910538201737646, "learning_rate": 9.080009657778903e-06, "loss": 0.9328, "step": 3442 }, { "epoch": 0.22, "grad_norm": 1.6608817541062262, "learning_rate": 9.079410390010798e-06, "loss": 0.8243, "step": 3443 }, { "epoch": 0.22, "grad_norm": 1.5836445747733585, "learning_rate": 9.078810946918209e-06, "loss": 0.8151, "step": 3444 }, { "epoch": 0.22, "grad_norm": 1.8722091470124647, "learning_rate": 9.078211328526896e-06, "loss": 0.8678, "step": 3445 }, { "epoch": 0.22, "grad_norm": 1.6593969697229167, "learning_rate": 9.077611534862628e-06, "loss": 0.8616, "step": 3446 }, { "epoch": 0.22, "grad_norm": 1.776740442803796, "learning_rate": 9.077011565951186e-06, "loss": 0.7943, "step": 3447 }, { "epoch": 0.22, "grad_norm": 1.4986924223331846, "learning_rate": 9.076411421818354e-06, "loss": 0.7297, "step": 3448 }, { "epoch": 0.22, "grad_norm": 1.604291440607476, "learning_rate": 9.075811102489923e-06, "loss": 0.8364, "step": 3449 }, { "epoch": 0.22, "grad_norm": 1.607294780151397, "learning_rate": 9.075210607991696e-06, "loss": 0.8429, "step": 3450 }, { "epoch": 0.22, "grad_norm": 1.6906655197312583, "learning_rate": 9.074609938349481e-06, "loss": 0.7959, "step": 3451 }, { "epoch": 0.22, "grad_norm": 2.0761190076455165, "learning_rate": 9.074009093589093e-06, "loss": 0.9334, "step": 3452 }, { "epoch": 0.22, "grad_norm": 1.8211268117505273, "learning_rate": 9.073408073736355e-06, "loss": 0.8344, "step": 3453 }, { "epoch": 0.22, "grad_norm": 1.6140670314498018, "learning_rate": 9.072806878817095e-06, "loss": 0.7388, "step": 3454 }, { "epoch": 0.22, "grad_norm": 1.9496188145348803, "learning_rate": 9.072205508857155e-06, "loss": 0.88, "step": 3455 }, { "epoch": 0.22, "grad_norm": 1.1838878984431553, "learning_rate": 9.07160396388238e-06, "loss": 0.6126, "step": 3456 }, { "epoch": 0.22, "grad_norm": 1.6168845652191253, "learning_rate": 9.071002243918621e-06, "loss": 0.8081, "step": 3457 }, { "epoch": 0.22, "grad_norm": 1.697496376235737, "learning_rate": 9.070400348991742e-06, "loss": 0.8605, "step": 3458 }, { "epoch": 0.22, "grad_norm": 1.856512788411322, "learning_rate": 9.069798279127606e-06, "loss": 0.8831, "step": 3459 }, { "epoch": 0.22, "grad_norm": 1.870536867553196, "learning_rate": 9.069196034352094e-06, "loss": 0.7191, "step": 3460 }, { "epoch": 0.22, "grad_norm": 2.288317001578053, "learning_rate": 9.068593614691086e-06, "loss": 0.7173, "step": 3461 }, { "epoch": 0.22, "grad_norm": 1.401341143773674, "learning_rate": 9.067991020170474e-06, "loss": 0.746, "step": 3462 }, { "epoch": 0.22, "grad_norm": 1.9102345636054678, "learning_rate": 9.067388250816155e-06, "loss": 0.7233, "step": 3463 }, { "epoch": 0.22, "grad_norm": 1.5525477109756205, "learning_rate": 9.066785306654038e-06, "loss": 0.7036, "step": 3464 }, { "epoch": 0.22, "grad_norm": 1.5794146091391883, "learning_rate": 9.066182187710032e-06, "loss": 0.7468, "step": 3465 }, { "epoch": 0.22, "grad_norm": 1.6279623042695914, "learning_rate": 9.06557889401006e-06, "loss": 0.882, "step": 3466 }, { "epoch": 0.22, "grad_norm": 1.9968298970239757, "learning_rate": 9.064975425580051e-06, "loss": 0.9249, "step": 3467 }, { "epoch": 0.22, "grad_norm": 1.5988990103621874, "learning_rate": 9.06437178244594e-06, "loss": 0.8395, "step": 3468 }, { "epoch": 0.22, "grad_norm": 1.661019703357777, "learning_rate": 9.06376796463367e-06, "loss": 0.8668, "step": 3469 }, { "epoch": 0.22, "grad_norm": 1.4139060992478365, "learning_rate": 9.06316397216919e-06, "loss": 0.7366, "step": 3470 }, { "epoch": 0.22, "grad_norm": 1.7827052973321973, "learning_rate": 9.062559805078463e-06, "loss": 0.8741, "step": 3471 }, { "epoch": 0.22, "grad_norm": 1.6908341625154832, "learning_rate": 9.061955463387454e-06, "loss": 0.7169, "step": 3472 }, { "epoch": 0.22, "grad_norm": 1.332156159219384, "learning_rate": 9.061350947122129e-06, "loss": 0.6817, "step": 3473 }, { "epoch": 0.22, "grad_norm": 1.5014374186103652, "learning_rate": 9.060746256308479e-06, "loss": 0.8224, "step": 3474 }, { "epoch": 0.22, "grad_norm": 1.8466793872979994, "learning_rate": 9.060141390972486e-06, "loss": 0.6997, "step": 3475 }, { "epoch": 0.22, "grad_norm": 1.5871818056618527, "learning_rate": 9.059536351140146e-06, "loss": 0.8503, "step": 3476 }, { "epoch": 0.22, "grad_norm": 2.617027110625886, "learning_rate": 9.058931136837465e-06, "loss": 0.8859, "step": 3477 }, { "epoch": 0.22, "grad_norm": 1.0941626794288122, "learning_rate": 9.058325748090454e-06, "loss": 0.616, "step": 3478 }, { "epoch": 0.22, "grad_norm": 1.7277197098834, "learning_rate": 9.05772018492513e-06, "loss": 0.743, "step": 3479 }, { "epoch": 0.22, "grad_norm": 1.5556990350067428, "learning_rate": 9.057114447367517e-06, "loss": 0.8144, "step": 3480 }, { "epoch": 0.22, "grad_norm": 1.4791096337855525, "learning_rate": 9.056508535443652e-06, "loss": 0.716, "step": 3481 }, { "epoch": 0.22, "grad_norm": 1.699077838894071, "learning_rate": 9.055902449179573e-06, "loss": 0.7764, "step": 3482 }, { "epoch": 0.22, "grad_norm": 1.5508594346110685, "learning_rate": 9.05529618860133e-06, "loss": 0.769, "step": 3483 }, { "epoch": 0.22, "grad_norm": 1.3310239324449862, "learning_rate": 9.054689753734978e-06, "loss": 0.7524, "step": 3484 }, { "epoch": 0.22, "grad_norm": 1.765652236367995, "learning_rate": 9.05408314460658e-06, "loss": 0.8697, "step": 3485 }, { "epoch": 0.22, "grad_norm": 1.7614884416484597, "learning_rate": 9.053476361242208e-06, "loss": 0.8689, "step": 3486 }, { "epoch": 0.22, "grad_norm": 1.8110090931638676, "learning_rate": 9.052869403667938e-06, "loss": 0.8515, "step": 3487 }, { "epoch": 0.22, "grad_norm": 1.6348540676184207, "learning_rate": 9.052262271909859e-06, "loss": 0.7133, "step": 3488 }, { "epoch": 0.22, "grad_norm": 1.6211369625719931, "learning_rate": 9.051654965994062e-06, "loss": 0.7062, "step": 3489 }, { "epoch": 0.22, "grad_norm": 1.7719175653728576, "learning_rate": 9.051047485946648e-06, "loss": 0.8214, "step": 3490 }, { "epoch": 0.22, "grad_norm": 2.534694569195107, "learning_rate": 9.050439831793726e-06, "loss": 0.8043, "step": 3491 }, { "epoch": 0.22, "grad_norm": 1.6755159672609268, "learning_rate": 9.04983200356141e-06, "loss": 0.7478, "step": 3492 }, { "epoch": 0.22, "grad_norm": 1.824844729877922, "learning_rate": 9.049224001275825e-06, "loss": 0.9403, "step": 3493 }, { "epoch": 0.22, "grad_norm": 1.813142215446141, "learning_rate": 9.048615824963102e-06, "loss": 0.8925, "step": 3494 }, { "epoch": 0.22, "grad_norm": 1.5706170816571596, "learning_rate": 9.048007474649377e-06, "loss": 0.778, "step": 3495 }, { "epoch": 0.22, "grad_norm": 1.5703629818794178, "learning_rate": 9.047398950360798e-06, "loss": 0.7673, "step": 3496 }, { "epoch": 0.22, "grad_norm": 1.5516932436055606, "learning_rate": 9.046790252123514e-06, "loss": 0.7334, "step": 3497 }, { "epoch": 0.22, "grad_norm": 1.1957451148043494, "learning_rate": 9.046181379963692e-06, "loss": 0.7361, "step": 3498 }, { "epoch": 0.22, "grad_norm": 1.5420432270765305, "learning_rate": 9.045572333907495e-06, "loss": 0.8149, "step": 3499 }, { "epoch": 0.22, "grad_norm": 1.6717334080842388, "learning_rate": 9.0449631139811e-06, "loss": 0.647, "step": 3500 }, { "epoch": 0.22, "grad_norm": 1.8142625188529187, "learning_rate": 9.04435372021069e-06, "loss": 0.8767, "step": 3501 }, { "epoch": 0.22, "grad_norm": 1.1053826011256103, "learning_rate": 9.043744152622455e-06, "loss": 0.5602, "step": 3502 }, { "epoch": 0.22, "grad_norm": 1.5031770760083496, "learning_rate": 9.043134411242593e-06, "loss": 0.8602, "step": 3503 }, { "epoch": 0.22, "grad_norm": 1.6256037620070776, "learning_rate": 9.042524496097312e-06, "loss": 0.7279, "step": 3504 }, { "epoch": 0.22, "grad_norm": 1.294683026964428, "learning_rate": 9.041914407212821e-06, "loss": 0.6712, "step": 3505 }, { "epoch": 0.22, "grad_norm": 1.7084679073028777, "learning_rate": 9.041304144615341e-06, "loss": 0.7482, "step": 3506 }, { "epoch": 0.22, "grad_norm": 1.8434572012589485, "learning_rate": 9.040693708331103e-06, "loss": 0.7343, "step": 3507 }, { "epoch": 0.22, "grad_norm": 1.4181313658544956, "learning_rate": 9.040083098386338e-06, "loss": 0.7255, "step": 3508 }, { "epoch": 0.22, "grad_norm": 1.4620438847510906, "learning_rate": 9.039472314807293e-06, "loss": 0.7928, "step": 3509 }, { "epoch": 0.22, "grad_norm": 1.608852404068969, "learning_rate": 9.038861357620215e-06, "loss": 0.8439, "step": 3510 }, { "epoch": 0.22, "grad_norm": 1.6008152970065792, "learning_rate": 9.038250226851362e-06, "loss": 0.9584, "step": 3511 }, { "epoch": 0.22, "grad_norm": 1.9024615878886475, "learning_rate": 9.037638922527002e-06, "loss": 0.7887, "step": 3512 }, { "epoch": 0.22, "grad_norm": 1.5758724396976393, "learning_rate": 9.037027444673402e-06, "loss": 0.7624, "step": 3513 }, { "epoch": 0.22, "grad_norm": 1.6666591089916039, "learning_rate": 9.036415793316848e-06, "loss": 0.8594, "step": 3514 }, { "epoch": 0.22, "grad_norm": 1.6850096716778942, "learning_rate": 9.035803968483625e-06, "loss": 0.8164, "step": 3515 }, { "epoch": 0.23, "grad_norm": 1.6677698555593345, "learning_rate": 9.035191970200025e-06, "loss": 0.8525, "step": 3516 }, { "epoch": 0.23, "grad_norm": 1.5005079720032648, "learning_rate": 9.034579798492356e-06, "loss": 0.7328, "step": 3517 }, { "epoch": 0.23, "grad_norm": 1.1467270693669442, "learning_rate": 9.033967453386924e-06, "loss": 0.6943, "step": 3518 }, { "epoch": 0.23, "grad_norm": 1.6932353867107621, "learning_rate": 9.033354934910049e-06, "loss": 0.8224, "step": 3519 }, { "epoch": 0.23, "grad_norm": 1.677078232473085, "learning_rate": 9.032742243088053e-06, "loss": 0.783, "step": 3520 }, { "epoch": 0.23, "grad_norm": 1.6131848509399132, "learning_rate": 9.032129377947267e-06, "loss": 0.7743, "step": 3521 }, { "epoch": 0.23, "grad_norm": 1.7227073184483273, "learning_rate": 9.031516339514036e-06, "loss": 0.9379, "step": 3522 }, { "epoch": 0.23, "grad_norm": 2.0434125324778805, "learning_rate": 9.030903127814704e-06, "loss": 0.892, "step": 3523 }, { "epoch": 0.23, "grad_norm": 1.6805903724448963, "learning_rate": 9.030289742875624e-06, "loss": 0.8529, "step": 3524 }, { "epoch": 0.23, "grad_norm": 1.5092421904693278, "learning_rate": 9.029676184723161e-06, "loss": 0.7866, "step": 3525 }, { "epoch": 0.23, "grad_norm": 1.6096697100002895, "learning_rate": 9.029062453383683e-06, "loss": 0.7788, "step": 3526 }, { "epoch": 0.23, "grad_norm": 1.558423452373245, "learning_rate": 9.028448548883566e-06, "loss": 0.7193, "step": 3527 }, { "epoch": 0.23, "grad_norm": 1.8648945680826232, "learning_rate": 9.027834471249196e-06, "loss": 0.8328, "step": 3528 }, { "epoch": 0.23, "grad_norm": 1.8815080580445473, "learning_rate": 9.027220220506964e-06, "loss": 0.8241, "step": 3529 }, { "epoch": 0.23, "grad_norm": 1.6290082858476658, "learning_rate": 9.02660579668327e-06, "loss": 0.7314, "step": 3530 }, { "epoch": 0.23, "grad_norm": 1.5250408859394908, "learning_rate": 9.025991199804518e-06, "loss": 0.8632, "step": 3531 }, { "epoch": 0.23, "grad_norm": 1.8062528589997526, "learning_rate": 9.025376429897126e-06, "loss": 0.8747, "step": 3532 }, { "epoch": 0.23, "grad_norm": 1.6285767859860873, "learning_rate": 9.024761486987512e-06, "loss": 0.7223, "step": 3533 }, { "epoch": 0.23, "grad_norm": 1.8718377283931678, "learning_rate": 9.024146371102107e-06, "loss": 0.7678, "step": 3534 }, { "epoch": 0.23, "grad_norm": 1.8772261861773545, "learning_rate": 9.023531082267347e-06, "loss": 0.7778, "step": 3535 }, { "epoch": 0.23, "grad_norm": 1.7260944876954336, "learning_rate": 9.022915620509677e-06, "loss": 0.9028, "step": 3536 }, { "epoch": 0.23, "grad_norm": 1.729891517940884, "learning_rate": 9.022299985855544e-06, "loss": 0.6924, "step": 3537 }, { "epoch": 0.23, "grad_norm": 3.0215431116360816, "learning_rate": 9.021684178331413e-06, "loss": 0.7574, "step": 3538 }, { "epoch": 0.23, "grad_norm": 1.6441368317376328, "learning_rate": 9.021068197963744e-06, "loss": 0.7834, "step": 3539 }, { "epoch": 0.23, "grad_norm": 1.5023260438212358, "learning_rate": 9.020452044779015e-06, "loss": 1.0154, "step": 3540 }, { "epoch": 0.23, "grad_norm": 1.9118201766755671, "learning_rate": 9.019835718803704e-06, "loss": 0.9091, "step": 3541 }, { "epoch": 0.23, "grad_norm": 1.6676748625617068, "learning_rate": 9.019219220064301e-06, "loss": 0.8202, "step": 3542 }, { "epoch": 0.23, "grad_norm": 1.696068955696363, "learning_rate": 9.018602548587303e-06, "loss": 0.8801, "step": 3543 }, { "epoch": 0.23, "grad_norm": 1.8496517669138974, "learning_rate": 9.01798570439921e-06, "loss": 0.9208, "step": 3544 }, { "epoch": 0.23, "grad_norm": 1.2138456810839822, "learning_rate": 9.017368687526535e-06, "loss": 0.594, "step": 3545 }, { "epoch": 0.23, "grad_norm": 1.7036449437934869, "learning_rate": 9.016751497995795e-06, "loss": 0.857, "step": 3546 }, { "epoch": 0.23, "grad_norm": 1.8202453787945987, "learning_rate": 9.016134135833517e-06, "loss": 0.7585, "step": 3547 }, { "epoch": 0.23, "grad_norm": 1.4934093183974482, "learning_rate": 9.015516601066232e-06, "loss": 0.8128, "step": 3548 }, { "epoch": 0.23, "grad_norm": 1.817421234087891, "learning_rate": 9.01489889372048e-06, "loss": 0.848, "step": 3549 }, { "epoch": 0.23, "grad_norm": 1.4607798857478074, "learning_rate": 9.014281013822813e-06, "loss": 0.7829, "step": 3550 }, { "epoch": 0.23, "grad_norm": 1.5039335936178708, "learning_rate": 9.01366296139978e-06, "loss": 0.7279, "step": 3551 }, { "epoch": 0.23, "grad_norm": 1.5993958896263607, "learning_rate": 9.01304473647795e-06, "loss": 0.7235, "step": 3552 }, { "epoch": 0.23, "grad_norm": 1.3539598167133242, "learning_rate": 9.012426339083887e-06, "loss": 0.8089, "step": 3553 }, { "epoch": 0.23, "grad_norm": 1.726967345274172, "learning_rate": 9.011807769244174e-06, "loss": 0.8134, "step": 3554 }, { "epoch": 0.23, "grad_norm": 1.349064102639901, "learning_rate": 9.01118902698539e-06, "loss": 0.6253, "step": 3555 }, { "epoch": 0.23, "grad_norm": 1.8072094986035867, "learning_rate": 9.010570112334132e-06, "loss": 0.9397, "step": 3556 }, { "epoch": 0.23, "grad_norm": 1.5746455328647377, "learning_rate": 9.009951025316998e-06, "loss": 0.7074, "step": 3557 }, { "epoch": 0.23, "grad_norm": 1.6287141530245777, "learning_rate": 9.009331765960593e-06, "loss": 0.7372, "step": 3558 }, { "epoch": 0.23, "grad_norm": 1.0801897648357062, "learning_rate": 9.008712334291536e-06, "loss": 0.7091, "step": 3559 }, { "epoch": 0.23, "grad_norm": 1.3966636848574483, "learning_rate": 9.008092730336446e-06, "loss": 0.7134, "step": 3560 }, { "epoch": 0.23, "grad_norm": 1.5351436819965285, "learning_rate": 9.007472954121952e-06, "loss": 0.9527, "step": 3561 }, { "epoch": 0.23, "grad_norm": 1.7634337610372146, "learning_rate": 9.006853005674692e-06, "loss": 0.8223, "step": 3562 }, { "epoch": 0.23, "grad_norm": 1.2257921263794285, "learning_rate": 9.006232885021309e-06, "loss": 0.7223, "step": 3563 }, { "epoch": 0.23, "grad_norm": 1.8627208440592884, "learning_rate": 9.005612592188454e-06, "loss": 0.7075, "step": 3564 }, { "epoch": 0.23, "grad_norm": 1.938231149318346, "learning_rate": 9.004992127202787e-06, "loss": 0.7187, "step": 3565 }, { "epoch": 0.23, "grad_norm": 1.626532984483036, "learning_rate": 9.004371490090975e-06, "loss": 0.7843, "step": 3566 }, { "epoch": 0.23, "grad_norm": 1.7837588187004405, "learning_rate": 9.00375068087969e-06, "loss": 0.7712, "step": 3567 }, { "epoch": 0.23, "grad_norm": 1.4404867470446967, "learning_rate": 9.003129699595614e-06, "loss": 0.7569, "step": 3568 }, { "epoch": 0.23, "grad_norm": 1.2796773598255853, "learning_rate": 9.002508546265433e-06, "loss": 0.6238, "step": 3569 }, { "epoch": 0.23, "grad_norm": 1.6955574339437314, "learning_rate": 9.001887220915848e-06, "loss": 0.7507, "step": 3570 }, { "epoch": 0.23, "grad_norm": 1.5849824406610185, "learning_rate": 9.001265723573559e-06, "loss": 0.8435, "step": 3571 }, { "epoch": 0.23, "grad_norm": 1.7732195235391672, "learning_rate": 9.000644054265278e-06, "loss": 0.9243, "step": 3572 }, { "epoch": 0.23, "grad_norm": 1.6925266796971095, "learning_rate": 9.00002221301772e-06, "loss": 0.8697, "step": 3573 }, { "epoch": 0.23, "grad_norm": 1.4946368049351997, "learning_rate": 8.999400199857613e-06, "loss": 0.7691, "step": 3574 }, { "epoch": 0.23, "grad_norm": 1.7517867393913649, "learning_rate": 8.998778014811688e-06, "loss": 0.9154, "step": 3575 }, { "epoch": 0.23, "grad_norm": 1.475399655325638, "learning_rate": 8.998155657906689e-06, "loss": 0.7449, "step": 3576 }, { "epoch": 0.23, "grad_norm": 2.111704201398186, "learning_rate": 8.99753312916936e-06, "loss": 0.7333, "step": 3577 }, { "epoch": 0.23, "grad_norm": 1.4633131593483766, "learning_rate": 8.996910428626458e-06, "loss": 0.7919, "step": 3578 }, { "epoch": 0.23, "grad_norm": 1.309031128192826, "learning_rate": 8.996287556304743e-06, "loss": 0.6458, "step": 3579 }, { "epoch": 0.23, "grad_norm": 1.89599163867626, "learning_rate": 8.995664512230987e-06, "loss": 0.6219, "step": 3580 }, { "epoch": 0.23, "grad_norm": 1.1916346258988173, "learning_rate": 8.995041296431965e-06, "loss": 0.6449, "step": 3581 }, { "epoch": 0.23, "grad_norm": 1.463174642690785, "learning_rate": 8.994417908934465e-06, "loss": 0.7089, "step": 3582 }, { "epoch": 0.23, "grad_norm": 1.1830708053945036, "learning_rate": 8.993794349765275e-06, "loss": 0.648, "step": 3583 }, { "epoch": 0.23, "grad_norm": 1.1091619374153299, "learning_rate": 8.993170618951196e-06, "loss": 0.6164, "step": 3584 }, { "epoch": 0.23, "grad_norm": 1.5746552410916617, "learning_rate": 8.992546716519034e-06, "loss": 0.6702, "step": 3585 }, { "epoch": 0.23, "grad_norm": 1.7812839408064043, "learning_rate": 8.991922642495607e-06, "loss": 0.9276, "step": 3586 }, { "epoch": 0.23, "grad_norm": 1.6737538996398895, "learning_rate": 8.99129839690773e-06, "loss": 0.7963, "step": 3587 }, { "epoch": 0.23, "grad_norm": 1.596723388180633, "learning_rate": 8.990673979782236e-06, "loss": 0.7507, "step": 3588 }, { "epoch": 0.23, "grad_norm": 1.8484470383724012, "learning_rate": 8.99004939114596e-06, "loss": 0.8387, "step": 3589 }, { "epoch": 0.23, "grad_norm": 1.497915040230765, "learning_rate": 8.989424631025746e-06, "loss": 0.7899, "step": 3590 }, { "epoch": 0.23, "grad_norm": 1.9513050730946289, "learning_rate": 8.988799699448442e-06, "loss": 0.7677, "step": 3591 }, { "epoch": 0.23, "grad_norm": 1.7065826023256343, "learning_rate": 8.98817459644091e-06, "loss": 0.8813, "step": 3592 }, { "epoch": 0.23, "grad_norm": 1.7696390081391309, "learning_rate": 8.987549322030013e-06, "loss": 1.0442, "step": 3593 }, { "epoch": 0.23, "grad_norm": 2.023887220029423, "learning_rate": 8.986923876242626e-06, "loss": 0.8523, "step": 3594 }, { "epoch": 0.23, "grad_norm": 1.6981298206294126, "learning_rate": 8.986298259105629e-06, "loss": 0.7079, "step": 3595 }, { "epoch": 0.23, "grad_norm": 1.7128217051606454, "learning_rate": 8.985672470645908e-06, "loss": 0.8396, "step": 3596 }, { "epoch": 0.23, "grad_norm": 1.76017518646364, "learning_rate": 8.98504651089036e-06, "loss": 0.7498, "step": 3597 }, { "epoch": 0.23, "grad_norm": 1.7413673271669714, "learning_rate": 8.984420379865887e-06, "loss": 0.7842, "step": 3598 }, { "epoch": 0.23, "grad_norm": 1.130152931952505, "learning_rate": 8.983794077599398e-06, "loss": 0.6826, "step": 3599 }, { "epoch": 0.23, "grad_norm": 1.9588178926741306, "learning_rate": 8.98316760411781e-06, "loss": 0.9293, "step": 3600 }, { "epoch": 0.23, "grad_norm": 1.5271097004085128, "learning_rate": 8.98254095944805e-06, "loss": 0.7914, "step": 3601 }, { "epoch": 0.23, "grad_norm": 1.94246130592705, "learning_rate": 8.981914143617048e-06, "loss": 0.8045, "step": 3602 }, { "epoch": 0.23, "grad_norm": 1.5641031206145581, "learning_rate": 8.981287156651741e-06, "loss": 0.7913, "step": 3603 }, { "epoch": 0.23, "grad_norm": 1.87800380117487, "learning_rate": 8.98065999857908e-06, "loss": 0.9135, "step": 3604 }, { "epoch": 0.23, "grad_norm": 1.4689940491643605, "learning_rate": 8.980032669426015e-06, "loss": 0.7323, "step": 3605 }, { "epoch": 0.23, "grad_norm": 1.73143492990536, "learning_rate": 8.97940516921951e-06, "loss": 0.7728, "step": 3606 }, { "epoch": 0.23, "grad_norm": 1.6332165970287347, "learning_rate": 8.978777497986533e-06, "loss": 0.8113, "step": 3607 }, { "epoch": 0.23, "grad_norm": 1.7031762678418727, "learning_rate": 8.97814965575406e-06, "loss": 0.8226, "step": 3608 }, { "epoch": 0.23, "grad_norm": 1.6036955762729963, "learning_rate": 8.977521642549073e-06, "loss": 0.7142, "step": 3609 }, { "epoch": 0.23, "grad_norm": 1.6469098240671491, "learning_rate": 8.976893458398564e-06, "loss": 0.9038, "step": 3610 }, { "epoch": 0.23, "grad_norm": 1.139591797510259, "learning_rate": 8.97626510332953e-06, "loss": 0.7264, "step": 3611 }, { "epoch": 0.23, "grad_norm": 1.8044774913574246, "learning_rate": 8.97563657736898e-06, "loss": 0.8163, "step": 3612 }, { "epoch": 0.23, "grad_norm": 1.8243748033003482, "learning_rate": 8.97500788054392e-06, "loss": 0.7981, "step": 3613 }, { "epoch": 0.23, "grad_norm": 1.6325067179723363, "learning_rate": 8.974379012881376e-06, "loss": 0.7307, "step": 3614 }, { "epoch": 0.23, "grad_norm": 1.3155808317965572, "learning_rate": 8.973749974408375e-06, "loss": 0.5966, "step": 3615 }, { "epoch": 0.23, "grad_norm": 1.2212879603882996, "learning_rate": 8.973120765151948e-06, "loss": 0.6681, "step": 3616 }, { "epoch": 0.23, "grad_norm": 1.1200993300804671, "learning_rate": 8.972491385139138e-06, "loss": 0.678, "step": 3617 }, { "epoch": 0.23, "grad_norm": 1.8081193418264896, "learning_rate": 8.971861834396997e-06, "loss": 0.7682, "step": 3618 }, { "epoch": 0.23, "grad_norm": 1.6523811821104242, "learning_rate": 8.971232112952581e-06, "loss": 0.731, "step": 3619 }, { "epoch": 0.23, "grad_norm": 1.4974730613880645, "learning_rate": 8.970602220832954e-06, "loss": 0.7939, "step": 3620 }, { "epoch": 0.23, "grad_norm": 1.8338506136265451, "learning_rate": 8.969972158065185e-06, "loss": 0.8663, "step": 3621 }, { "epoch": 0.23, "grad_norm": 1.4257011554394337, "learning_rate": 8.969341924676356e-06, "loss": 0.7773, "step": 3622 }, { "epoch": 0.23, "grad_norm": 1.907631076517593, "learning_rate": 8.968711520693551e-06, "loss": 0.8352, "step": 3623 }, { "epoch": 0.23, "grad_norm": 1.7066416772642672, "learning_rate": 8.968080946143864e-06, "loss": 0.7608, "step": 3624 }, { "epoch": 0.23, "grad_norm": 1.741627967333178, "learning_rate": 8.967450201054397e-06, "loss": 0.8944, "step": 3625 }, { "epoch": 0.23, "grad_norm": 1.5638284985888398, "learning_rate": 8.966819285452257e-06, "loss": 0.9936, "step": 3626 }, { "epoch": 0.23, "grad_norm": 1.426023890025196, "learning_rate": 8.96618819936456e-06, "loss": 0.6618, "step": 3627 }, { "epoch": 0.23, "grad_norm": 1.4755699797791217, "learning_rate": 8.965556942818427e-06, "loss": 0.8397, "step": 3628 }, { "epoch": 0.23, "grad_norm": 1.595084278053458, "learning_rate": 8.964925515840991e-06, "loss": 0.7035, "step": 3629 }, { "epoch": 0.23, "grad_norm": 1.7971251459329616, "learning_rate": 8.964293918459388e-06, "loss": 0.8252, "step": 3630 }, { "epoch": 0.23, "grad_norm": 1.7077954341943355, "learning_rate": 8.963662150700761e-06, "loss": 0.7082, "step": 3631 }, { "epoch": 0.23, "grad_norm": 1.4322326909500476, "learning_rate": 8.963030212592264e-06, "loss": 0.741, "step": 3632 }, { "epoch": 0.23, "grad_norm": 1.712964165496001, "learning_rate": 8.962398104161055e-06, "loss": 0.7834, "step": 3633 }, { "epoch": 0.23, "grad_norm": 1.1514168518318824, "learning_rate": 8.961765825434304e-06, "loss": 0.7273, "step": 3634 }, { "epoch": 0.23, "grad_norm": 1.8265406574200227, "learning_rate": 8.96113337643918e-06, "loss": 0.827, "step": 3635 }, { "epoch": 0.23, "grad_norm": 1.5802210144495683, "learning_rate": 8.960500757202869e-06, "loss": 0.7613, "step": 3636 }, { "epoch": 0.23, "grad_norm": 1.8643271516200857, "learning_rate": 8.959867967752556e-06, "loss": 0.8019, "step": 3637 }, { "epoch": 0.23, "grad_norm": 1.8062026420425092, "learning_rate": 8.95923500811544e-06, "loss": 0.789, "step": 3638 }, { "epoch": 0.23, "grad_norm": 1.6467610510978514, "learning_rate": 8.95860187831872e-06, "loss": 0.7694, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.619776381724138, "learning_rate": 8.957968578389613e-06, "loss": 0.7578, "step": 3640 }, { "epoch": 0.23, "grad_norm": 1.7112550652871117, "learning_rate": 8.957335108355332e-06, "loss": 0.8268, "step": 3641 }, { "epoch": 0.23, "grad_norm": 0.973073864337585, "learning_rate": 8.956701468243103e-06, "loss": 0.5755, "step": 3642 }, { "epoch": 0.23, "grad_norm": 1.7382848994933588, "learning_rate": 8.956067658080158e-06, "loss": 0.7585, "step": 3643 }, { "epoch": 0.23, "grad_norm": 1.5502865416002565, "learning_rate": 8.955433677893742e-06, "loss": 0.8492, "step": 3644 }, { "epoch": 0.23, "grad_norm": 1.6580701089174792, "learning_rate": 8.954799527711094e-06, "loss": 0.7456, "step": 3645 }, { "epoch": 0.23, "grad_norm": 1.5874425412085515, "learning_rate": 8.954165207559475e-06, "loss": 0.735, "step": 3646 }, { "epoch": 0.23, "grad_norm": 1.759847954846904, "learning_rate": 8.953530717466143e-06, "loss": 0.8524, "step": 3647 }, { "epoch": 0.23, "grad_norm": 1.6312213013226124, "learning_rate": 8.952896057458368e-06, "loss": 0.8797, "step": 3648 }, { "epoch": 0.23, "grad_norm": 2.6414827130147964, "learning_rate": 8.952261227563427e-06, "loss": 0.7263, "step": 3649 }, { "epoch": 0.23, "grad_norm": 1.6787853252355849, "learning_rate": 8.951626227808603e-06, "loss": 0.9647, "step": 3650 }, { "epoch": 0.23, "grad_norm": 1.7663077657504496, "learning_rate": 8.950991058221187e-06, "loss": 0.7268, "step": 3651 }, { "epoch": 0.23, "grad_norm": 1.6091224765233387, "learning_rate": 8.95035571882848e-06, "loss": 0.7759, "step": 3652 }, { "epoch": 0.23, "grad_norm": 1.660851489735086, "learning_rate": 8.949720209657781e-06, "loss": 0.7031, "step": 3653 }, { "epoch": 0.23, "grad_norm": 1.0874920839500974, "learning_rate": 8.94908453073641e-06, "loss": 0.6352, "step": 3654 }, { "epoch": 0.23, "grad_norm": 2.069658584506672, "learning_rate": 8.948448682091684e-06, "loss": 0.8336, "step": 3655 }, { "epoch": 0.23, "grad_norm": 1.3031915218935566, "learning_rate": 8.947812663750929e-06, "loss": 0.6826, "step": 3656 }, { "epoch": 0.23, "grad_norm": 1.6997853722512792, "learning_rate": 8.947176475741482e-06, "loss": 0.8972, "step": 3657 }, { "epoch": 0.23, "grad_norm": 1.5040920751717248, "learning_rate": 8.946540118090685e-06, "loss": 0.7626, "step": 3658 }, { "epoch": 0.23, "grad_norm": 1.7173630470528907, "learning_rate": 8.945903590825884e-06, "loss": 0.7204, "step": 3659 }, { "epoch": 0.23, "grad_norm": 1.0711855575520883, "learning_rate": 8.94526689397444e-06, "loss": 0.7681, "step": 3660 }, { "epoch": 0.23, "grad_norm": 1.525096894009469, "learning_rate": 8.944630027563714e-06, "loss": 0.6938, "step": 3661 }, { "epoch": 0.23, "grad_norm": 2.0191542906874447, "learning_rate": 8.94399299162108e-06, "loss": 0.7932, "step": 3662 }, { "epoch": 0.23, "grad_norm": 2.0193204386649084, "learning_rate": 8.943355786173913e-06, "loss": 0.7596, "step": 3663 }, { "epoch": 0.23, "grad_norm": 1.8497715135883641, "learning_rate": 8.942718411249601e-06, "loss": 0.7896, "step": 3664 }, { "epoch": 0.23, "grad_norm": 1.8038838795600083, "learning_rate": 8.942080866875538e-06, "loss": 0.757, "step": 3665 }, { "epoch": 0.23, "grad_norm": 1.892072827631748, "learning_rate": 8.941443153079121e-06, "loss": 0.8594, "step": 3666 }, { "epoch": 0.23, "grad_norm": 1.570558929723304, "learning_rate": 8.94080526988776e-06, "loss": 0.7555, "step": 3667 }, { "epoch": 0.23, "grad_norm": 1.7490101092433297, "learning_rate": 8.940167217328869e-06, "loss": 0.9405, "step": 3668 }, { "epoch": 0.23, "grad_norm": 1.876947822756858, "learning_rate": 8.93952899542987e-06, "loss": 0.7434, "step": 3669 }, { "epoch": 0.23, "grad_norm": 1.7294563898502635, "learning_rate": 8.938890604218193e-06, "loss": 0.8843, "step": 3670 }, { "epoch": 0.23, "grad_norm": 1.6754522302305577, "learning_rate": 8.938252043721275e-06, "loss": 0.807, "step": 3671 }, { "epoch": 0.24, "grad_norm": 2.8118658802282086, "learning_rate": 8.93761331396656e-06, "loss": 0.6954, "step": 3672 }, { "epoch": 0.24, "grad_norm": 1.7520834995047272, "learning_rate": 8.936974414981498e-06, "loss": 0.8717, "step": 3673 }, { "epoch": 0.24, "grad_norm": 1.6195763780386125, "learning_rate": 8.93633534679355e-06, "loss": 0.7814, "step": 3674 }, { "epoch": 0.24, "grad_norm": 1.7230979613536217, "learning_rate": 8.935696109430178e-06, "loss": 0.7833, "step": 3675 }, { "epoch": 0.24, "grad_norm": 1.8573016367102262, "learning_rate": 8.935056702918858e-06, "loss": 0.7246, "step": 3676 }, { "epoch": 0.24, "grad_norm": 1.794717272886611, "learning_rate": 8.93441712728707e-06, "loss": 0.8783, "step": 3677 }, { "epoch": 0.24, "grad_norm": 1.686115188920535, "learning_rate": 8.933777382562301e-06, "loss": 0.8717, "step": 3678 }, { "epoch": 0.24, "grad_norm": 1.0894650457347357, "learning_rate": 8.933137468772047e-06, "loss": 0.6716, "step": 3679 }, { "epoch": 0.24, "grad_norm": 1.0227759935675087, "learning_rate": 8.932497385943806e-06, "loss": 0.5788, "step": 3680 }, { "epoch": 0.24, "grad_norm": 1.8131404153153987, "learning_rate": 8.931857134105093e-06, "loss": 0.8225, "step": 3681 }, { "epoch": 0.24, "grad_norm": 1.783897815459125, "learning_rate": 8.931216713283423e-06, "loss": 0.8539, "step": 3682 }, { "epoch": 0.24, "grad_norm": 1.7267426232171947, "learning_rate": 8.930576123506318e-06, "loss": 0.8148, "step": 3683 }, { "epoch": 0.24, "grad_norm": 1.5075355140587872, "learning_rate": 8.929935364801312e-06, "loss": 0.8119, "step": 3684 }, { "epoch": 0.24, "grad_norm": 1.6676433212315518, "learning_rate": 8.929294437195942e-06, "loss": 0.8472, "step": 3685 }, { "epoch": 0.24, "grad_norm": 1.5953036689497677, "learning_rate": 8.928653340717752e-06, "loss": 0.8604, "step": 3686 }, { "epoch": 0.24, "grad_norm": 1.1244937034999993, "learning_rate": 8.928012075394297e-06, "loss": 0.6909, "step": 3687 }, { "epoch": 0.24, "grad_norm": 1.0371491087836133, "learning_rate": 8.927370641253137e-06, "loss": 0.7283, "step": 3688 }, { "epoch": 0.24, "grad_norm": 1.6973603069116208, "learning_rate": 8.92672903832184e-06, "loss": 0.7293, "step": 3689 }, { "epoch": 0.24, "grad_norm": 1.5931068106233655, "learning_rate": 8.92608726662798e-06, "loss": 0.7247, "step": 3690 }, { "epoch": 0.24, "grad_norm": 1.6372632156064342, "learning_rate": 8.925445326199138e-06, "loss": 0.719, "step": 3691 }, { "epoch": 0.24, "grad_norm": 1.6190641197608762, "learning_rate": 8.924803217062906e-06, "loss": 0.884, "step": 3692 }, { "epoch": 0.24, "grad_norm": 1.0789643719580082, "learning_rate": 8.924160939246877e-06, "loss": 0.6714, "step": 3693 }, { "epoch": 0.24, "grad_norm": 1.615828360539447, "learning_rate": 8.923518492778659e-06, "loss": 0.7654, "step": 3694 }, { "epoch": 0.24, "grad_norm": 2.117289369091333, "learning_rate": 8.922875877685859e-06, "loss": 0.8851, "step": 3695 }, { "epoch": 0.24, "grad_norm": 1.498342508871235, "learning_rate": 8.922233093996098e-06, "loss": 0.8265, "step": 3696 }, { "epoch": 0.24, "grad_norm": 1.8497714656169004, "learning_rate": 8.921590141737e-06, "loss": 0.8202, "step": 3697 }, { "epoch": 0.24, "grad_norm": 1.6053874604792515, "learning_rate": 8.920947020936196e-06, "loss": 0.7165, "step": 3698 }, { "epoch": 0.24, "grad_norm": 2.2800373873833073, "learning_rate": 8.920303731621332e-06, "loss": 0.8224, "step": 3699 }, { "epoch": 0.24, "grad_norm": 2.2211410008200247, "learning_rate": 8.919660273820047e-06, "loss": 0.8502, "step": 3700 }, { "epoch": 0.24, "grad_norm": 1.5093157812616784, "learning_rate": 8.919016647560003e-06, "loss": 0.7624, "step": 3701 }, { "epoch": 0.24, "grad_norm": 1.6462349919402355, "learning_rate": 8.918372852868858e-06, "loss": 0.7971, "step": 3702 }, { "epoch": 0.24, "grad_norm": 1.598277572890283, "learning_rate": 8.91772888977428e-06, "loss": 0.7488, "step": 3703 }, { "epoch": 0.24, "grad_norm": 1.6535658050718445, "learning_rate": 8.917084758303948e-06, "loss": 0.8781, "step": 3704 }, { "epoch": 0.24, "grad_norm": 1.0020905013592034, "learning_rate": 8.916440458485542e-06, "loss": 0.6379, "step": 3705 }, { "epoch": 0.24, "grad_norm": 1.68329107913379, "learning_rate": 8.915795990346758e-06, "loss": 0.8476, "step": 3706 }, { "epoch": 0.24, "grad_norm": 1.463976586185754, "learning_rate": 8.915151353915288e-06, "loss": 0.8193, "step": 3707 }, { "epoch": 0.24, "grad_norm": 1.6021764498733622, "learning_rate": 8.91450654921884e-06, "loss": 0.8278, "step": 3708 }, { "epoch": 0.24, "grad_norm": 1.854483442394455, "learning_rate": 8.913861576285126e-06, "loss": 0.797, "step": 3709 }, { "epoch": 0.24, "grad_norm": 1.4948901051112307, "learning_rate": 8.913216435141867e-06, "loss": 0.9296, "step": 3710 }, { "epoch": 0.24, "grad_norm": 1.8718327274362287, "learning_rate": 8.912571125816787e-06, "loss": 0.9091, "step": 3711 }, { "epoch": 0.24, "grad_norm": 1.6230200815505786, "learning_rate": 8.911925648337622e-06, "loss": 0.809, "step": 3712 }, { "epoch": 0.24, "grad_norm": 1.4463638645631685, "learning_rate": 8.911280002732112e-06, "loss": 0.8181, "step": 3713 }, { "epoch": 0.24, "grad_norm": 1.20365632010465, "learning_rate": 8.910634189028006e-06, "loss": 0.5833, "step": 3714 }, { "epoch": 0.24, "grad_norm": 1.6636093370377207, "learning_rate": 8.909988207253062e-06, "loss": 0.8996, "step": 3715 }, { "epoch": 0.24, "grad_norm": 1.7900279117953697, "learning_rate": 8.90934205743504e-06, "loss": 0.8277, "step": 3716 }, { "epoch": 0.24, "grad_norm": 1.5571346332978329, "learning_rate": 8.90869573960171e-06, "loss": 0.745, "step": 3717 }, { "epoch": 0.24, "grad_norm": 1.775907535843662, "learning_rate": 8.90804925378085e-06, "loss": 0.8614, "step": 3718 }, { "epoch": 0.24, "grad_norm": 1.48086690882972, "learning_rate": 8.907402600000248e-06, "loss": 0.732, "step": 3719 }, { "epoch": 0.24, "grad_norm": 1.6226420577950227, "learning_rate": 8.90675577828769e-06, "loss": 0.8067, "step": 3720 }, { "epoch": 0.24, "grad_norm": 1.686207093652188, "learning_rate": 8.906108788670979e-06, "loss": 0.8743, "step": 3721 }, { "epoch": 0.24, "grad_norm": 1.6434352388805737, "learning_rate": 8.90546163117792e-06, "loss": 0.7436, "step": 3722 }, { "epoch": 0.24, "grad_norm": 1.4757874146124454, "learning_rate": 8.904814305836326e-06, "loss": 0.7271, "step": 3723 }, { "epoch": 0.24, "grad_norm": 1.760898088588842, "learning_rate": 8.904166812674019e-06, "loss": 0.8491, "step": 3724 }, { "epoch": 0.24, "grad_norm": 1.7287371335637447, "learning_rate": 8.903519151718826e-06, "loss": 0.811, "step": 3725 }, { "epoch": 0.24, "grad_norm": 1.9901247809624818, "learning_rate": 8.902871322998582e-06, "loss": 0.723, "step": 3726 }, { "epoch": 0.24, "grad_norm": 1.1969245609922192, "learning_rate": 8.90222332654113e-06, "loss": 0.6495, "step": 3727 }, { "epoch": 0.24, "grad_norm": 1.6903133780091188, "learning_rate": 8.901575162374318e-06, "loss": 0.7824, "step": 3728 }, { "epoch": 0.24, "grad_norm": 1.7829584765747535, "learning_rate": 8.900926830526006e-06, "loss": 0.7893, "step": 3729 }, { "epoch": 0.24, "grad_norm": 1.6960638745653482, "learning_rate": 8.900278331024055e-06, "loss": 0.6503, "step": 3730 }, { "epoch": 0.24, "grad_norm": 1.6637384947989227, "learning_rate": 8.899629663896336e-06, "loss": 0.7421, "step": 3731 }, { "epoch": 0.24, "grad_norm": 2.794069341135884, "learning_rate": 8.89898082917073e-06, "loss": 0.805, "step": 3732 }, { "epoch": 0.24, "grad_norm": 1.5777210097887715, "learning_rate": 8.898331826875119e-06, "loss": 0.7822, "step": 3733 }, { "epoch": 0.24, "grad_norm": 1.488995186839063, "learning_rate": 8.897682657037398e-06, "loss": 0.7292, "step": 3734 }, { "epoch": 0.24, "grad_norm": 1.6579602330544339, "learning_rate": 8.897033319685466e-06, "loss": 0.797, "step": 3735 }, { "epoch": 0.24, "grad_norm": 1.7579967633337628, "learning_rate": 8.896383814847232e-06, "loss": 0.7586, "step": 3736 }, { "epoch": 0.24, "grad_norm": 1.4359452292850574, "learning_rate": 8.89573414255061e-06, "loss": 0.7975, "step": 3737 }, { "epoch": 0.24, "grad_norm": 1.5679570594789343, "learning_rate": 8.895084302823518e-06, "loss": 0.8195, "step": 3738 }, { "epoch": 0.24, "grad_norm": 1.3316176864550773, "learning_rate": 8.89443429569389e-06, "loss": 0.7681, "step": 3739 }, { "epoch": 0.24, "grad_norm": 1.9432580151267, "learning_rate": 8.893784121189656e-06, "loss": 0.8413, "step": 3740 }, { "epoch": 0.24, "grad_norm": 1.8132399850923901, "learning_rate": 8.893133779338765e-06, "loss": 0.823, "step": 3741 }, { "epoch": 0.24, "grad_norm": 1.6122643847982587, "learning_rate": 8.892483270169165e-06, "loss": 0.7868, "step": 3742 }, { "epoch": 0.24, "grad_norm": 1.747444341815124, "learning_rate": 8.89183259370881e-06, "loss": 0.9306, "step": 3743 }, { "epoch": 0.24, "grad_norm": 1.9911480023538761, "learning_rate": 8.891181749985672e-06, "loss": 0.7616, "step": 3744 }, { "epoch": 0.24, "grad_norm": 2.5867436416421206, "learning_rate": 8.890530739027718e-06, "loss": 0.7231, "step": 3745 }, { "epoch": 0.24, "grad_norm": 1.679400597124273, "learning_rate": 8.889879560862926e-06, "loss": 0.8545, "step": 3746 }, { "epoch": 0.24, "grad_norm": 2.408430068545921, "learning_rate": 8.889228215519286e-06, "loss": 0.838, "step": 3747 }, { "epoch": 0.24, "grad_norm": 1.7274868412727602, "learning_rate": 8.888576703024789e-06, "loss": 0.7256, "step": 3748 }, { "epoch": 0.24, "grad_norm": 1.1347768652730061, "learning_rate": 8.887925023407437e-06, "loss": 0.6938, "step": 3749 }, { "epoch": 0.24, "grad_norm": 1.7603365946289335, "learning_rate": 8.887273176695237e-06, "loss": 0.8291, "step": 3750 }, { "epoch": 0.24, "grad_norm": 1.6921741207354415, "learning_rate": 8.886621162916204e-06, "loss": 0.651, "step": 3751 }, { "epoch": 0.24, "grad_norm": 1.6513383481051735, "learning_rate": 8.88596898209836e-06, "loss": 0.7997, "step": 3752 }, { "epoch": 0.24, "grad_norm": 1.616687574916316, "learning_rate": 8.885316634269735e-06, "loss": 0.7809, "step": 3753 }, { "epoch": 0.24, "grad_norm": 1.8125842158729868, "learning_rate": 8.884664119458366e-06, "loss": 0.7616, "step": 3754 }, { "epoch": 0.24, "grad_norm": 1.570244964555748, "learning_rate": 8.884011437692295e-06, "loss": 0.8132, "step": 3755 }, { "epoch": 0.24, "grad_norm": 1.5659345493706405, "learning_rate": 8.883358588999573e-06, "loss": 0.8417, "step": 3756 }, { "epoch": 0.24, "grad_norm": 1.0874943549486442, "learning_rate": 8.882705573408258e-06, "loss": 0.5902, "step": 3757 }, { "epoch": 0.24, "grad_norm": 1.8549010328708262, "learning_rate": 8.88205239094642e-06, "loss": 0.7529, "step": 3758 }, { "epoch": 0.24, "grad_norm": 1.544923808064895, "learning_rate": 8.881399041642125e-06, "loss": 0.7427, "step": 3759 }, { "epoch": 0.24, "grad_norm": 1.6120324203723384, "learning_rate": 8.880745525523455e-06, "loss": 0.8851, "step": 3760 }, { "epoch": 0.24, "grad_norm": 1.677194303167372, "learning_rate": 8.880091842618498e-06, "loss": 0.8111, "step": 3761 }, { "epoch": 0.24, "grad_norm": 1.7629474548776676, "learning_rate": 8.879437992955346e-06, "loss": 0.8218, "step": 3762 }, { "epoch": 0.24, "grad_norm": 1.597296714727216, "learning_rate": 8.878783976562102e-06, "loss": 0.7496, "step": 3763 }, { "epoch": 0.24, "grad_norm": 1.4648019666478718, "learning_rate": 8.878129793466872e-06, "loss": 0.816, "step": 3764 }, { "epoch": 0.24, "grad_norm": 1.6372891866161223, "learning_rate": 8.877475443697773e-06, "loss": 0.7674, "step": 3765 }, { "epoch": 0.24, "grad_norm": 1.651934888902152, "learning_rate": 8.876820927282928e-06, "loss": 0.7039, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.7550026170224249, "learning_rate": 8.876166244250463e-06, "loss": 0.858, "step": 3767 }, { "epoch": 0.24, "grad_norm": 1.7985976882972183, "learning_rate": 8.87551139462852e-06, "loss": 0.7935, "step": 3768 }, { "epoch": 0.24, "grad_norm": 1.5618832588059364, "learning_rate": 8.87485637844524e-06, "loss": 0.8672, "step": 3769 }, { "epoch": 0.24, "grad_norm": 1.6735885618700574, "learning_rate": 8.874201195728777e-06, "loss": 0.8528, "step": 3770 }, { "epoch": 0.24, "grad_norm": 1.1951463061761813, "learning_rate": 8.873545846507286e-06, "loss": 0.6107, "step": 3771 }, { "epoch": 0.24, "grad_norm": 1.7078148160972677, "learning_rate": 8.872890330808933e-06, "loss": 0.8056, "step": 3772 }, { "epoch": 0.24, "grad_norm": 1.2121425790628528, "learning_rate": 8.872234648661893e-06, "loss": 0.6094, "step": 3773 }, { "epoch": 0.24, "grad_norm": 1.7919239778682958, "learning_rate": 8.871578800094345e-06, "loss": 0.8471, "step": 3774 }, { "epoch": 0.24, "grad_norm": 1.7664031774644473, "learning_rate": 8.870922785134473e-06, "loss": 0.8502, "step": 3775 }, { "epoch": 0.24, "grad_norm": 1.8799215450764197, "learning_rate": 8.870266603810476e-06, "loss": 0.8195, "step": 3776 }, { "epoch": 0.24, "grad_norm": 1.9470995945931204, "learning_rate": 8.869610256150552e-06, "loss": 0.7792, "step": 3777 }, { "epoch": 0.24, "grad_norm": 1.5041963457239047, "learning_rate": 8.86895374218291e-06, "loss": 0.873, "step": 3778 }, { "epoch": 0.24, "grad_norm": 1.0639384624268045, "learning_rate": 8.868297061935767e-06, "loss": 0.6617, "step": 3779 }, { "epoch": 0.24, "grad_norm": 1.5984459486221636, "learning_rate": 8.867640215437344e-06, "loss": 0.7972, "step": 3780 }, { "epoch": 0.24, "grad_norm": 1.6959228954792365, "learning_rate": 8.86698320271587e-06, "loss": 0.7911, "step": 3781 }, { "epoch": 0.24, "grad_norm": 1.7265642360401767, "learning_rate": 8.866326023799586e-06, "loss": 0.7761, "step": 3782 }, { "epoch": 0.24, "grad_norm": 1.4669257762034136, "learning_rate": 8.865668678716734e-06, "loss": 0.7236, "step": 3783 }, { "epoch": 0.24, "grad_norm": 1.9339298041589792, "learning_rate": 8.865011167495564e-06, "loss": 0.8061, "step": 3784 }, { "epoch": 0.24, "grad_norm": 1.6258317826170585, "learning_rate": 8.864353490164335e-06, "loss": 0.8152, "step": 3785 }, { "epoch": 0.24, "grad_norm": 1.6413709323578134, "learning_rate": 8.863695646751313e-06, "loss": 0.7979, "step": 3786 }, { "epoch": 0.24, "grad_norm": 1.2002319017472531, "learning_rate": 8.863037637284773e-06, "loss": 0.8101, "step": 3787 }, { "epoch": 0.24, "grad_norm": 1.78288582299322, "learning_rate": 8.862379461792992e-06, "loss": 0.9937, "step": 3788 }, { "epoch": 0.24, "grad_norm": 1.9206259722831107, "learning_rate": 8.861721120304257e-06, "loss": 0.8384, "step": 3789 }, { "epoch": 0.24, "grad_norm": 2.2997084604579325, "learning_rate": 8.861062612846862e-06, "loss": 0.9418, "step": 3790 }, { "epoch": 0.24, "grad_norm": 3.110524574762617, "learning_rate": 8.86040393944911e-06, "loss": 0.8072, "step": 3791 }, { "epoch": 0.24, "grad_norm": 1.32794039891244, "learning_rate": 8.859745100139307e-06, "loss": 0.6495, "step": 3792 }, { "epoch": 0.24, "grad_norm": 1.659787457355343, "learning_rate": 8.859086094945773e-06, "loss": 0.7362, "step": 3793 }, { "epoch": 0.24, "grad_norm": 1.7006817521482458, "learning_rate": 8.858426923896826e-06, "loss": 0.7163, "step": 3794 }, { "epoch": 0.24, "grad_norm": 1.850998819539995, "learning_rate": 8.857767587020798e-06, "loss": 0.7871, "step": 3795 }, { "epoch": 0.24, "grad_norm": 2.353535226720062, "learning_rate": 8.857108084346025e-06, "loss": 0.8196, "step": 3796 }, { "epoch": 0.24, "grad_norm": 1.2740871018771431, "learning_rate": 8.856448415900851e-06, "loss": 0.7096, "step": 3797 }, { "epoch": 0.24, "grad_norm": 1.7257333851791812, "learning_rate": 8.855788581713629e-06, "loss": 0.8395, "step": 3798 }, { "epoch": 0.24, "grad_norm": 1.8819946623513961, "learning_rate": 8.855128581812714e-06, "loss": 0.9684, "step": 3799 }, { "epoch": 0.24, "grad_norm": 2.3405712029479573, "learning_rate": 8.854468416226473e-06, "loss": 0.8311, "step": 3800 }, { "epoch": 0.24, "grad_norm": 1.6657513508494748, "learning_rate": 8.853808084983282e-06, "loss": 0.7629, "step": 3801 }, { "epoch": 0.24, "grad_norm": 1.27565961349519, "learning_rate": 8.853147588111515e-06, "loss": 0.5786, "step": 3802 }, { "epoch": 0.24, "grad_norm": 1.7161097695616654, "learning_rate": 8.852486925639562e-06, "loss": 0.8693, "step": 3803 }, { "epoch": 0.24, "grad_norm": 2.064609599623157, "learning_rate": 8.851826097595815e-06, "loss": 0.7266, "step": 3804 }, { "epoch": 0.24, "grad_norm": 1.4556014184456243, "learning_rate": 8.851165104008678e-06, "loss": 0.776, "step": 3805 }, { "epoch": 0.24, "grad_norm": 1.8477805152988724, "learning_rate": 8.850503944906557e-06, "loss": 0.7768, "step": 3806 }, { "epoch": 0.24, "grad_norm": 1.6383486413565767, "learning_rate": 8.849842620317865e-06, "loss": 0.812, "step": 3807 }, { "epoch": 0.24, "grad_norm": 1.5617138237285473, "learning_rate": 8.84918113027103e-06, "loss": 0.8339, "step": 3808 }, { "epoch": 0.24, "grad_norm": 1.1003476648422568, "learning_rate": 8.848519474794476e-06, "loss": 0.6064, "step": 3809 }, { "epoch": 0.24, "grad_norm": 1.617391264424931, "learning_rate": 8.847857653916643e-06, "loss": 0.7441, "step": 3810 }, { "epoch": 0.24, "grad_norm": 1.4748567858959312, "learning_rate": 8.847195667665974e-06, "loss": 0.8241, "step": 3811 }, { "epoch": 0.24, "grad_norm": 1.8323267185998058, "learning_rate": 8.84653351607092e-06, "loss": 0.8518, "step": 3812 }, { "epoch": 0.24, "grad_norm": 1.2508522858344049, "learning_rate": 8.845871199159935e-06, "loss": 0.6402, "step": 3813 }, { "epoch": 0.24, "grad_norm": 1.567324118889176, "learning_rate": 8.845208716961489e-06, "loss": 0.72, "step": 3814 }, { "epoch": 0.24, "grad_norm": 2.0157953158644686, "learning_rate": 8.844546069504051e-06, "loss": 1.029, "step": 3815 }, { "epoch": 0.24, "grad_norm": 1.5214157241088397, "learning_rate": 8.843883256816104e-06, "loss": 0.6565, "step": 3816 }, { "epoch": 0.24, "grad_norm": 1.7310533772382057, "learning_rate": 8.843220278926128e-06, "loss": 0.9033, "step": 3817 }, { "epoch": 0.24, "grad_norm": 1.736140502585393, "learning_rate": 8.842557135862624e-06, "loss": 0.8125, "step": 3818 }, { "epoch": 0.24, "grad_norm": 1.6727719203537326, "learning_rate": 8.841893827654087e-06, "loss": 0.8896, "step": 3819 }, { "epoch": 0.24, "grad_norm": 1.6368656028882291, "learning_rate": 8.841230354329026e-06, "loss": 0.7804, "step": 3820 }, { "epoch": 0.24, "grad_norm": 1.6406022411345282, "learning_rate": 8.840566715915955e-06, "loss": 0.8655, "step": 3821 }, { "epoch": 0.24, "grad_norm": 3.1192222659865125, "learning_rate": 8.839902912443398e-06, "loss": 0.7693, "step": 3822 }, { "epoch": 0.24, "grad_norm": 1.2133290336781064, "learning_rate": 8.839238943939883e-06, "loss": 0.7251, "step": 3823 }, { "epoch": 0.24, "grad_norm": 1.135124773433207, "learning_rate": 8.838574810433945e-06, "loss": 0.6885, "step": 3824 }, { "epoch": 0.24, "grad_norm": 1.622607469365241, "learning_rate": 8.837910511954128e-06, "loss": 0.7951, "step": 3825 }, { "epoch": 0.24, "grad_norm": 1.7545105981016627, "learning_rate": 8.83724604852898e-06, "loss": 0.9067, "step": 3826 }, { "epoch": 0.24, "grad_norm": 2.1738586037009844, "learning_rate": 8.836581420187062e-06, "loss": 0.6478, "step": 3827 }, { "epoch": 0.25, "grad_norm": 1.6078040527977029, "learning_rate": 8.835916626956935e-06, "loss": 0.6532, "step": 3828 }, { "epoch": 0.25, "grad_norm": 4.0767432613644035, "learning_rate": 8.835251668867172e-06, "loss": 0.938, "step": 3829 }, { "epoch": 0.25, "grad_norm": 1.6668021538994986, "learning_rate": 8.834586545946353e-06, "loss": 0.6593, "step": 3830 }, { "epoch": 0.25, "grad_norm": 1.5259759668039408, "learning_rate": 8.833921258223059e-06, "loss": 0.7092, "step": 3831 }, { "epoch": 0.25, "grad_norm": 1.8243383875090249, "learning_rate": 8.833255805725887e-06, "loss": 0.796, "step": 3832 }, { "epoch": 0.25, "grad_norm": 1.688547578906459, "learning_rate": 8.832590188483437e-06, "loss": 0.7821, "step": 3833 }, { "epoch": 0.25, "grad_norm": 1.2928965489696709, "learning_rate": 8.831924406524312e-06, "loss": 0.7249, "step": 3834 }, { "epoch": 0.25, "grad_norm": 1.173565276804432, "learning_rate": 8.831258459877128e-06, "loss": 0.7739, "step": 3835 }, { "epoch": 0.25, "grad_norm": 1.2866550792050688, "learning_rate": 8.830592348570505e-06, "loss": 0.7463, "step": 3836 }, { "epoch": 0.25, "grad_norm": 1.8304189851515817, "learning_rate": 8.829926072633075e-06, "loss": 0.7704, "step": 3837 }, { "epoch": 0.25, "grad_norm": 1.7849217672962046, "learning_rate": 8.829259632093468e-06, "loss": 0.9617, "step": 3838 }, { "epoch": 0.25, "grad_norm": 1.7193831893417935, "learning_rate": 8.828593026980328e-06, "loss": 0.8377, "step": 3839 }, { "epoch": 0.25, "grad_norm": 1.6763109900060396, "learning_rate": 8.827926257322306e-06, "loss": 0.6738, "step": 3840 }, { "epoch": 0.25, "grad_norm": 1.9113694672966313, "learning_rate": 8.827259323148056e-06, "loss": 0.7497, "step": 3841 }, { "epoch": 0.25, "grad_norm": 1.4482105472280904, "learning_rate": 8.826592224486243e-06, "loss": 0.6903, "step": 3842 }, { "epoch": 0.25, "grad_norm": 1.81486521571732, "learning_rate": 8.825924961365538e-06, "loss": 0.8075, "step": 3843 }, { "epoch": 0.25, "grad_norm": 1.5265527475696323, "learning_rate": 8.825257533814614e-06, "loss": 0.7289, "step": 3844 }, { "epoch": 0.25, "grad_norm": 2.153310435072796, "learning_rate": 8.824589941862164e-06, "loss": 0.8064, "step": 3845 }, { "epoch": 0.25, "grad_norm": 1.4723234780942347, "learning_rate": 8.823922185536872e-06, "loss": 0.6861, "step": 3846 }, { "epoch": 0.25, "grad_norm": 1.9919074886868553, "learning_rate": 8.82325426486744e-06, "loss": 0.8631, "step": 3847 }, { "epoch": 0.25, "grad_norm": 1.7081102138788347, "learning_rate": 8.822586179882574e-06, "loss": 0.8015, "step": 3848 }, { "epoch": 0.25, "grad_norm": 1.6749340161821118, "learning_rate": 8.821917930610987e-06, "loss": 0.7748, "step": 3849 }, { "epoch": 0.25, "grad_norm": 2.1231808170824675, "learning_rate": 8.821249517081397e-06, "loss": 0.7022, "step": 3850 }, { "epoch": 0.25, "grad_norm": 1.6548651543795285, "learning_rate": 8.820580939322532e-06, "loss": 0.7363, "step": 3851 }, { "epoch": 0.25, "grad_norm": 1.853434488442324, "learning_rate": 8.819912197363128e-06, "loss": 0.7067, "step": 3852 }, { "epoch": 0.25, "grad_norm": 1.6504592802224514, "learning_rate": 8.819243291231922e-06, "loss": 0.7564, "step": 3853 }, { "epoch": 0.25, "grad_norm": 2.144866164229079, "learning_rate": 8.818574220957666e-06, "loss": 0.7568, "step": 3854 }, { "epoch": 0.25, "grad_norm": 1.9586464689412517, "learning_rate": 8.817904986569115e-06, "loss": 0.726, "step": 3855 }, { "epoch": 0.25, "grad_norm": 1.5420776101941416, "learning_rate": 8.81723558809503e-06, "loss": 0.8757, "step": 3856 }, { "epoch": 0.25, "grad_norm": 1.202147455484519, "learning_rate": 8.81656602556418e-06, "loss": 0.5955, "step": 3857 }, { "epoch": 0.25, "grad_norm": 1.6867742867509554, "learning_rate": 8.81589629900534e-06, "loss": 0.8474, "step": 3858 }, { "epoch": 0.25, "grad_norm": 1.6318394699824506, "learning_rate": 8.815226408447298e-06, "loss": 0.7619, "step": 3859 }, { "epoch": 0.25, "grad_norm": 2.035664629196218, "learning_rate": 8.81455635391884e-06, "loss": 0.8126, "step": 3860 }, { "epoch": 0.25, "grad_norm": 1.7736003385587473, "learning_rate": 8.813886135448766e-06, "loss": 0.8175, "step": 3861 }, { "epoch": 0.25, "grad_norm": 1.6355907636428642, "learning_rate": 8.813215753065882e-06, "loss": 0.7339, "step": 3862 }, { "epoch": 0.25, "grad_norm": 1.1217065537858248, "learning_rate": 8.812545206798995e-06, "loss": 0.7441, "step": 3863 }, { "epoch": 0.25, "grad_norm": 1.8074973254146742, "learning_rate": 8.811874496676927e-06, "loss": 0.9873, "step": 3864 }, { "epoch": 0.25, "grad_norm": 1.7682894229674497, "learning_rate": 8.811203622728504e-06, "loss": 0.9053, "step": 3865 }, { "epoch": 0.25, "grad_norm": 2.933922472221376, "learning_rate": 8.810532584982557e-06, "loss": 0.6814, "step": 3866 }, { "epoch": 0.25, "grad_norm": 1.8780496375273947, "learning_rate": 8.809861383467926e-06, "loss": 0.8386, "step": 3867 }, { "epoch": 0.25, "grad_norm": 1.2998783416299882, "learning_rate": 8.80919001821346e-06, "loss": 0.7942, "step": 3868 }, { "epoch": 0.25, "grad_norm": 1.1930629548124927, "learning_rate": 8.808518489248009e-06, "loss": 0.6213, "step": 3869 }, { "epoch": 0.25, "grad_norm": 1.5500712427046905, "learning_rate": 8.807846796600436e-06, "loss": 0.766, "step": 3870 }, { "epoch": 0.25, "grad_norm": 1.7452783999099057, "learning_rate": 8.807174940299613e-06, "loss": 0.8627, "step": 3871 }, { "epoch": 0.25, "grad_norm": 1.6672229785749546, "learning_rate": 8.806502920374406e-06, "loss": 0.7384, "step": 3872 }, { "epoch": 0.25, "grad_norm": 1.9308271682807898, "learning_rate": 8.805830736853705e-06, "loss": 0.694, "step": 3873 }, { "epoch": 0.25, "grad_norm": 1.85800624753589, "learning_rate": 8.805158389766395e-06, "loss": 0.6667, "step": 3874 }, { "epoch": 0.25, "grad_norm": 1.540517802567628, "learning_rate": 8.804485879141375e-06, "loss": 0.7841, "step": 3875 }, { "epoch": 0.25, "grad_norm": 2.3451528201480234, "learning_rate": 8.803813205007544e-06, "loss": 0.8768, "step": 3876 }, { "epoch": 0.25, "grad_norm": 1.0935434555922579, "learning_rate": 8.803140367393815e-06, "loss": 0.5944, "step": 3877 }, { "epoch": 0.25, "grad_norm": 1.7521343165938423, "learning_rate": 8.802467366329106e-06, "loss": 0.8326, "step": 3878 }, { "epoch": 0.25, "grad_norm": 1.7328571811166873, "learning_rate": 8.801794201842337e-06, "loss": 0.8173, "step": 3879 }, { "epoch": 0.25, "grad_norm": 1.636041170458891, "learning_rate": 8.801120873962445e-06, "loss": 0.8104, "step": 3880 }, { "epoch": 0.25, "grad_norm": 1.743045659778306, "learning_rate": 8.800447382718362e-06, "loss": 0.7055, "step": 3881 }, { "epoch": 0.25, "grad_norm": 1.0335170435614804, "learning_rate": 8.799773728139038e-06, "loss": 0.6284, "step": 3882 }, { "epoch": 0.25, "grad_norm": 1.6509549751378705, "learning_rate": 8.799099910253424e-06, "loss": 0.7007, "step": 3883 }, { "epoch": 0.25, "grad_norm": 1.8371531182782408, "learning_rate": 8.798425929090477e-06, "loss": 0.8564, "step": 3884 }, { "epoch": 0.25, "grad_norm": 2.463328953103161, "learning_rate": 8.797751784679167e-06, "loss": 0.8715, "step": 3885 }, { "epoch": 0.25, "grad_norm": 1.3561107134340111, "learning_rate": 8.797077477048464e-06, "loss": 0.733, "step": 3886 }, { "epoch": 0.25, "grad_norm": 1.743028036169716, "learning_rate": 8.796403006227352e-06, "loss": 0.734, "step": 3887 }, { "epoch": 0.25, "grad_norm": 1.595163893844569, "learning_rate": 8.795728372244813e-06, "loss": 0.772, "step": 3888 }, { "epoch": 0.25, "grad_norm": 1.8070978763377274, "learning_rate": 8.795053575129846e-06, "loss": 0.8352, "step": 3889 }, { "epoch": 0.25, "grad_norm": 1.5630732332937252, "learning_rate": 8.794378614911452e-06, "loss": 0.8283, "step": 3890 }, { "epoch": 0.25, "grad_norm": 1.5965986618852543, "learning_rate": 8.793703491618638e-06, "loss": 0.7631, "step": 3891 }, { "epoch": 0.25, "grad_norm": 1.3443257437090144, "learning_rate": 8.793028205280419e-06, "loss": 0.6535, "step": 3892 }, { "epoch": 0.25, "grad_norm": 1.5891299464727917, "learning_rate": 8.792352755925817e-06, "loss": 0.8195, "step": 3893 }, { "epoch": 0.25, "grad_norm": 1.7039666395842084, "learning_rate": 8.791677143583863e-06, "loss": 0.8022, "step": 3894 }, { "epoch": 0.25, "grad_norm": 1.5122698605045324, "learning_rate": 8.791001368283593e-06, "loss": 0.7564, "step": 3895 }, { "epoch": 0.25, "grad_norm": 1.533901710117323, "learning_rate": 8.79032543005405e-06, "loss": 0.9151, "step": 3896 }, { "epoch": 0.25, "grad_norm": 1.6748410826317606, "learning_rate": 8.789649328924286e-06, "loss": 0.7902, "step": 3897 }, { "epoch": 0.25, "grad_norm": 1.8793148336837167, "learning_rate": 8.788973064923355e-06, "loss": 0.8453, "step": 3898 }, { "epoch": 0.25, "grad_norm": 1.5948259008508872, "learning_rate": 8.788296638080325e-06, "loss": 0.8532, "step": 3899 }, { "epoch": 0.25, "grad_norm": 1.6262249920210674, "learning_rate": 8.787620048424264e-06, "loss": 0.7463, "step": 3900 }, { "epoch": 0.25, "grad_norm": 1.5796259025225694, "learning_rate": 8.786943295984254e-06, "loss": 0.6811, "step": 3901 }, { "epoch": 0.25, "grad_norm": 1.6465934568918146, "learning_rate": 8.786266380789377e-06, "loss": 0.8252, "step": 3902 }, { "epoch": 0.25, "grad_norm": 1.735921095541719, "learning_rate": 8.785589302868729e-06, "loss": 0.9157, "step": 3903 }, { "epoch": 0.25, "grad_norm": 2.1131555755515, "learning_rate": 8.784912062251405e-06, "loss": 0.8494, "step": 3904 }, { "epoch": 0.25, "grad_norm": 1.7071966776099892, "learning_rate": 8.784234658966514e-06, "loss": 0.6902, "step": 3905 }, { "epoch": 0.25, "grad_norm": 1.5347221966595677, "learning_rate": 8.783557093043172e-06, "loss": 0.72, "step": 3906 }, { "epoch": 0.25, "grad_norm": 1.0543346008838026, "learning_rate": 8.782879364510494e-06, "loss": 0.6266, "step": 3907 }, { "epoch": 0.25, "grad_norm": 1.762794079072018, "learning_rate": 8.78220147339761e-06, "loss": 0.8522, "step": 3908 }, { "epoch": 0.25, "grad_norm": 1.5620473292224188, "learning_rate": 8.781523419733655e-06, "loss": 0.7849, "step": 3909 }, { "epoch": 0.25, "grad_norm": 1.6883893837709358, "learning_rate": 8.780845203547769e-06, "loss": 0.7852, "step": 3910 }, { "epoch": 0.25, "grad_norm": 1.5988480681140622, "learning_rate": 8.7801668248691e-06, "loss": 0.976, "step": 3911 }, { "epoch": 0.25, "grad_norm": 2.347227550943021, "learning_rate": 8.779488283726806e-06, "loss": 0.8341, "step": 3912 }, { "epoch": 0.25, "grad_norm": 1.1420233617693496, "learning_rate": 8.778809580150044e-06, "loss": 0.616, "step": 3913 }, { "epoch": 0.25, "grad_norm": 1.1692564714784432, "learning_rate": 8.778130714167991e-06, "loss": 0.6922, "step": 3914 }, { "epoch": 0.25, "grad_norm": 1.972609887573859, "learning_rate": 8.777451685809817e-06, "loss": 0.6465, "step": 3915 }, { "epoch": 0.25, "grad_norm": 1.6811021082912745, "learning_rate": 8.776772495104705e-06, "loss": 0.813, "step": 3916 }, { "epoch": 0.25, "grad_norm": 1.534508144717871, "learning_rate": 8.77609314208185e-06, "loss": 0.7864, "step": 3917 }, { "epoch": 0.25, "grad_norm": 1.6434914738598267, "learning_rate": 8.775413626770447e-06, "loss": 0.835, "step": 3918 }, { "epoch": 0.25, "grad_norm": 1.6312067678124962, "learning_rate": 8.774733949199696e-06, "loss": 0.8111, "step": 3919 }, { "epoch": 0.25, "grad_norm": 1.5269347879919377, "learning_rate": 8.774054109398815e-06, "loss": 0.7242, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.603415489509232, "learning_rate": 8.773374107397017e-06, "loss": 0.8827, "step": 3921 }, { "epoch": 0.25, "grad_norm": 3.1533973975482312, "learning_rate": 8.772693943223529e-06, "loss": 0.8596, "step": 3922 }, { "epoch": 0.25, "grad_norm": 1.6372485439750142, "learning_rate": 8.772013616907584e-06, "loss": 0.6968, "step": 3923 }, { "epoch": 0.25, "grad_norm": 1.6144039385824585, "learning_rate": 8.771333128478419e-06, "loss": 0.8144, "step": 3924 }, { "epoch": 0.25, "grad_norm": 1.1962990233459576, "learning_rate": 8.77065247796528e-06, "loss": 0.6274, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.5814112636840443, "learning_rate": 8.769971665397423e-06, "loss": 0.7467, "step": 3926 }, { "epoch": 0.25, "grad_norm": 1.53777554567061, "learning_rate": 8.769290690804104e-06, "loss": 0.8313, "step": 3927 }, { "epoch": 0.25, "grad_norm": 1.5295090130503564, "learning_rate": 8.768609554214591e-06, "loss": 0.7599, "step": 3928 }, { "epoch": 0.25, "grad_norm": 1.7092294188638697, "learning_rate": 8.767928255658158e-06, "loss": 0.7053, "step": 3929 }, { "epoch": 0.25, "grad_norm": 1.3053488643343159, "learning_rate": 8.767246795164089e-06, "loss": 0.7249, "step": 3930 }, { "epoch": 0.25, "grad_norm": 1.3776958440094642, "learning_rate": 8.766565172761666e-06, "loss": 0.7431, "step": 3931 }, { "epoch": 0.25, "grad_norm": 1.5833156570483848, "learning_rate": 8.765883388480188e-06, "loss": 0.772, "step": 3932 }, { "epoch": 0.25, "grad_norm": 1.683384183270417, "learning_rate": 8.765201442348953e-06, "loss": 0.784, "step": 3933 }, { "epoch": 0.25, "grad_norm": 1.6815093650934936, "learning_rate": 8.764519334397275e-06, "loss": 0.6547, "step": 3934 }, { "epoch": 0.25, "grad_norm": 1.3278401784250815, "learning_rate": 8.763837064654464e-06, "loss": 0.6614, "step": 3935 }, { "epoch": 0.25, "grad_norm": 1.5115057525828537, "learning_rate": 8.763154633149846e-06, "loss": 0.7035, "step": 3936 }, { "epoch": 0.25, "grad_norm": 1.784111594543393, "learning_rate": 8.762472039912748e-06, "loss": 0.785, "step": 3937 }, { "epoch": 0.25, "grad_norm": 1.4920322809313478, "learning_rate": 8.761789284972508e-06, "loss": 0.7422, "step": 3938 }, { "epoch": 0.25, "grad_norm": 1.6666542302894785, "learning_rate": 8.76110636835847e-06, "loss": 0.7608, "step": 3939 }, { "epoch": 0.25, "grad_norm": 1.8827026829238491, "learning_rate": 8.760423290099983e-06, "loss": 0.843, "step": 3940 }, { "epoch": 0.25, "grad_norm": 1.609097283884154, "learning_rate": 8.759740050226406e-06, "loss": 0.8154, "step": 3941 }, { "epoch": 0.25, "grad_norm": 1.4581840804175108, "learning_rate": 8.7590566487671e-06, "loss": 0.9189, "step": 3942 }, { "epoch": 0.25, "grad_norm": 1.691499514004638, "learning_rate": 8.758373085751439e-06, "loss": 0.7547, "step": 3943 }, { "epoch": 0.25, "grad_norm": 1.8833572313611084, "learning_rate": 8.7576893612088e-06, "loss": 0.8808, "step": 3944 }, { "epoch": 0.25, "grad_norm": 1.0848672709282192, "learning_rate": 8.75700547516857e-06, "loss": 0.6238, "step": 3945 }, { "epoch": 0.25, "grad_norm": 1.5710437166877047, "learning_rate": 8.756321427660137e-06, "loss": 0.7231, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.1397551187554222, "learning_rate": 8.755637218712902e-06, "loss": 0.6538, "step": 3947 }, { "epoch": 0.25, "grad_norm": 1.076800866478013, "learning_rate": 8.754952848356272e-06, "loss": 0.735, "step": 3948 }, { "epoch": 0.25, "grad_norm": 1.5903614050875796, "learning_rate": 8.75426831661966e-06, "loss": 0.8493, "step": 3949 }, { "epoch": 0.25, "grad_norm": 1.8395626072989586, "learning_rate": 8.753583623532483e-06, "loss": 0.9049, "step": 3950 }, { "epoch": 0.25, "grad_norm": 1.9117400020382869, "learning_rate": 8.75289876912417e-06, "loss": 0.8745, "step": 3951 }, { "epoch": 0.25, "grad_norm": 1.8301891233067689, "learning_rate": 8.752213753424153e-06, "loss": 0.7419, "step": 3952 }, { "epoch": 0.25, "grad_norm": 1.186351096853561, "learning_rate": 8.751528576461873e-06, "loss": 0.6764, "step": 3953 }, { "epoch": 0.25, "grad_norm": 1.707380820945174, "learning_rate": 8.75084323826678e-06, "loss": 0.8385, "step": 3954 }, { "epoch": 0.25, "grad_norm": 1.549633249548265, "learning_rate": 8.750157738868323e-06, "loss": 0.7976, "step": 3955 }, { "epoch": 0.25, "grad_norm": 1.4899287956006915, "learning_rate": 8.749472078295968e-06, "loss": 0.8499, "step": 3956 }, { "epoch": 0.25, "grad_norm": 1.7679910972137167, "learning_rate": 8.748786256579182e-06, "loss": 0.8382, "step": 3957 }, { "epoch": 0.25, "grad_norm": 1.624271548169347, "learning_rate": 8.748100273747442e-06, "loss": 0.9651, "step": 3958 }, { "epoch": 0.25, "grad_norm": 1.8344626343939443, "learning_rate": 8.747414129830225e-06, "loss": 0.8117, "step": 3959 }, { "epoch": 0.25, "grad_norm": 1.6152306059577448, "learning_rate": 8.746727824857024e-06, "loss": 0.7583, "step": 3960 }, { "epoch": 0.25, "grad_norm": 1.7242117685952287, "learning_rate": 8.746041358857334e-06, "loss": 0.732, "step": 3961 }, { "epoch": 0.25, "grad_norm": 1.6412568215655745, "learning_rate": 8.74535473186066e-06, "loss": 0.8217, "step": 3962 }, { "epoch": 0.25, "grad_norm": 1.6433326026801804, "learning_rate": 8.744667943896507e-06, "loss": 0.7783, "step": 3963 }, { "epoch": 0.25, "grad_norm": 1.3959667387293038, "learning_rate": 8.743980994994394e-06, "loss": 0.7401, "step": 3964 }, { "epoch": 0.25, "grad_norm": 1.1691453729901644, "learning_rate": 8.743293885183847e-06, "loss": 0.6854, "step": 3965 }, { "epoch": 0.25, "grad_norm": 1.767086394619879, "learning_rate": 8.742606614494395e-06, "loss": 0.8048, "step": 3966 }, { "epoch": 0.25, "grad_norm": 1.645731863290697, "learning_rate": 8.741919182955573e-06, "loss": 0.7043, "step": 3967 }, { "epoch": 0.25, "grad_norm": 1.5557091862910382, "learning_rate": 8.741231590596928e-06, "loss": 0.7231, "step": 3968 }, { "epoch": 0.25, "grad_norm": 2.971535187641906, "learning_rate": 8.740543837448012e-06, "loss": 0.918, "step": 3969 }, { "epoch": 0.25, "grad_norm": 1.5885594066564968, "learning_rate": 8.73985592353838e-06, "loss": 0.7168, "step": 3970 }, { "epoch": 0.25, "grad_norm": 1.799712348860701, "learning_rate": 8.739167848897601e-06, "loss": 0.7984, "step": 3971 }, { "epoch": 0.25, "grad_norm": 1.5611452824028293, "learning_rate": 8.738479613555243e-06, "loss": 1.0503, "step": 3972 }, { "epoch": 0.25, "grad_norm": 1.6579802559871784, "learning_rate": 8.737791217540887e-06, "loss": 0.8716, "step": 3973 }, { "epoch": 0.25, "grad_norm": 1.776245832096215, "learning_rate": 8.73710266088412e-06, "loss": 0.8925, "step": 3974 }, { "epoch": 0.25, "grad_norm": 2.1497675943653216, "learning_rate": 8.736413943614533e-06, "loss": 0.7588, "step": 3975 }, { "epoch": 0.25, "grad_norm": 1.5547491185298705, "learning_rate": 8.735725065761724e-06, "loss": 0.6752, "step": 3976 }, { "epoch": 0.25, "grad_norm": 1.8244173773108736, "learning_rate": 8.735036027355304e-06, "loss": 0.7492, "step": 3977 }, { "epoch": 0.25, "grad_norm": 1.6693034201016161, "learning_rate": 8.734346828424881e-06, "loss": 0.8792, "step": 3978 }, { "epoch": 0.25, "grad_norm": 1.4976336434455164, "learning_rate": 8.733657469000081e-06, "loss": 0.8236, "step": 3979 }, { "epoch": 0.25, "grad_norm": 1.7752398816698904, "learning_rate": 8.732967949110528e-06, "loss": 0.808, "step": 3980 }, { "epoch": 0.25, "grad_norm": 1.5643012349904115, "learning_rate": 8.732278268785856e-06, "loss": 0.8605, "step": 3981 }, { "epoch": 0.25, "grad_norm": 1.4991877199124546, "learning_rate": 8.731588428055708e-06, "loss": 0.7346, "step": 3982 }, { "epoch": 0.25, "grad_norm": 1.6850930558699395, "learning_rate": 8.730898426949728e-06, "loss": 0.7721, "step": 3983 }, { "epoch": 0.26, "grad_norm": 1.765371625751711, "learning_rate": 8.730208265497575e-06, "loss": 0.8461, "step": 3984 }, { "epoch": 0.26, "grad_norm": 1.6083782980123016, "learning_rate": 8.729517943728909e-06, "loss": 0.7176, "step": 3985 }, { "epoch": 0.26, "grad_norm": 1.9465005795694879, "learning_rate": 8.728827461673398e-06, "loss": 0.8396, "step": 3986 }, { "epoch": 0.26, "grad_norm": 1.7903038182417685, "learning_rate": 8.728136819360717e-06, "loss": 0.7589, "step": 3987 }, { "epoch": 0.26, "grad_norm": 1.1201318265733067, "learning_rate": 8.727446016820553e-06, "loss": 0.6675, "step": 3988 }, { "epoch": 0.26, "grad_norm": 2.4080176650716814, "learning_rate": 8.726755054082589e-06, "loss": 0.7652, "step": 3989 }, { "epoch": 0.26, "grad_norm": 1.6018385174499727, "learning_rate": 8.726063931176522e-06, "loss": 0.7796, "step": 3990 }, { "epoch": 0.26, "grad_norm": 1.6143017478970687, "learning_rate": 8.72537264813206e-06, "loss": 0.6946, "step": 3991 }, { "epoch": 0.26, "grad_norm": 1.7251541614329369, "learning_rate": 8.724681204978908e-06, "loss": 0.7508, "step": 3992 }, { "epoch": 0.26, "grad_norm": 1.8753718933516823, "learning_rate": 8.723989601746785e-06, "loss": 0.8161, "step": 3993 }, { "epoch": 0.26, "grad_norm": 1.489413532319722, "learning_rate": 8.723297838465414e-06, "loss": 0.6478, "step": 3994 }, { "epoch": 0.26, "grad_norm": 1.5835989144972225, "learning_rate": 8.722605915164526e-06, "loss": 0.7847, "step": 3995 }, { "epoch": 0.26, "grad_norm": 1.1935526548030833, "learning_rate": 8.721913831873859e-06, "loss": 0.7278, "step": 3996 }, { "epoch": 0.26, "grad_norm": 1.7819433924209762, "learning_rate": 8.721221588623154e-06, "loss": 0.8241, "step": 3997 }, { "epoch": 0.26, "grad_norm": 1.6264898329826192, "learning_rate": 8.720529185442167e-06, "loss": 0.6607, "step": 3998 }, { "epoch": 0.26, "grad_norm": 1.7455131827605892, "learning_rate": 8.71983662236065e-06, "loss": 0.702, "step": 3999 }, { "epoch": 0.26, "grad_norm": 1.6721769964400144, "learning_rate": 8.719143899408376e-06, "loss": 0.7638, "step": 4000 }, { "epoch": 0.26, "grad_norm": 3.133365934130329, "learning_rate": 8.718451016615108e-06, "loss": 0.7624, "step": 4001 }, { "epoch": 0.26, "grad_norm": 1.041284058588272, "learning_rate": 8.71775797401063e-06, "loss": 0.5765, "step": 4002 }, { "epoch": 0.26, "grad_norm": 1.9884325673194345, "learning_rate": 8.717064771624728e-06, "loss": 0.7636, "step": 4003 }, { "epoch": 0.26, "grad_norm": 1.8024045130227846, "learning_rate": 8.716371409487191e-06, "loss": 0.8147, "step": 4004 }, { "epoch": 0.26, "grad_norm": 1.6586846952101497, "learning_rate": 8.715677887627822e-06, "loss": 0.9313, "step": 4005 }, { "epoch": 0.26, "grad_norm": 1.4989577309077264, "learning_rate": 8.714984206076423e-06, "loss": 0.7377, "step": 4006 }, { "epoch": 0.26, "grad_norm": 1.8666313215047567, "learning_rate": 8.71429036486281e-06, "loss": 0.8732, "step": 4007 }, { "epoch": 0.26, "grad_norm": 1.6061560511398878, "learning_rate": 8.713596364016802e-06, "loss": 0.7568, "step": 4008 }, { "epoch": 0.26, "grad_norm": 0.9514650337087606, "learning_rate": 8.712902203568226e-06, "loss": 0.5917, "step": 4009 }, { "epoch": 0.26, "grad_norm": 1.6280895469550012, "learning_rate": 8.712207883546913e-06, "loss": 0.8091, "step": 4010 }, { "epoch": 0.26, "grad_norm": 1.851996050865811, "learning_rate": 8.711513403982708e-06, "loss": 0.9756, "step": 4011 }, { "epoch": 0.26, "grad_norm": 1.5359373552694566, "learning_rate": 8.710818764905455e-06, "loss": 0.6608, "step": 4012 }, { "epoch": 0.26, "grad_norm": 1.750070067632335, "learning_rate": 8.710123966345008e-06, "loss": 0.9137, "step": 4013 }, { "epoch": 0.26, "grad_norm": 1.7117280938780604, "learning_rate": 8.70942900833123e-06, "loss": 0.725, "step": 4014 }, { "epoch": 0.26, "grad_norm": 1.7128690556665576, "learning_rate": 8.708733890893987e-06, "loss": 0.7469, "step": 4015 }, { "epoch": 0.26, "grad_norm": 1.6640389969443823, "learning_rate": 8.708038614063156e-06, "loss": 0.7788, "step": 4016 }, { "epoch": 0.26, "grad_norm": 1.5998756335900783, "learning_rate": 8.707343177868616e-06, "loss": 0.7417, "step": 4017 }, { "epoch": 0.26, "grad_norm": 1.5765213144436427, "learning_rate": 8.706647582340258e-06, "loss": 0.8392, "step": 4018 }, { "epoch": 0.26, "grad_norm": 1.6956249809887656, "learning_rate": 8.705951827507974e-06, "loss": 0.6846, "step": 4019 }, { "epoch": 0.26, "grad_norm": 1.9084588404610392, "learning_rate": 8.705255913401668e-06, "loss": 0.846, "step": 4020 }, { "epoch": 0.26, "grad_norm": 1.5831749293635267, "learning_rate": 8.704559840051249e-06, "loss": 0.7507, "step": 4021 }, { "epoch": 0.26, "grad_norm": 1.5827550940392454, "learning_rate": 8.703863607486631e-06, "loss": 0.7057, "step": 4022 }, { "epoch": 0.26, "grad_norm": 1.527478746505697, "learning_rate": 8.70316721573774e-06, "loss": 0.7274, "step": 4023 }, { "epoch": 0.26, "grad_norm": 1.6361432858303213, "learning_rate": 8.702470664834503e-06, "loss": 0.6666, "step": 4024 }, { "epoch": 0.26, "grad_norm": 1.7380832171176652, "learning_rate": 8.701773954806856e-06, "loss": 0.8803, "step": 4025 }, { "epoch": 0.26, "grad_norm": 1.5433200742636974, "learning_rate": 8.701077085684744e-06, "loss": 0.7508, "step": 4026 }, { "epoch": 0.26, "grad_norm": 1.808741381670794, "learning_rate": 8.700380057498115e-06, "loss": 0.7787, "step": 4027 }, { "epoch": 0.26, "grad_norm": 1.8605212956873967, "learning_rate": 8.699682870276927e-06, "loss": 0.8018, "step": 4028 }, { "epoch": 0.26, "grad_norm": 1.828619176247998, "learning_rate": 8.698985524051143e-06, "loss": 0.9155, "step": 4029 }, { "epoch": 0.26, "grad_norm": 1.0261757636819777, "learning_rate": 8.698288018850735e-06, "loss": 0.7033, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.4340861649807335, "learning_rate": 8.697590354705679e-06, "loss": 0.6675, "step": 4031 }, { "epoch": 0.26, "grad_norm": 1.720033209906944, "learning_rate": 8.696892531645958e-06, "loss": 0.8009, "step": 4032 }, { "epoch": 0.26, "grad_norm": 1.5884371100164465, "learning_rate": 8.696194549701564e-06, "loss": 0.7912, "step": 4033 }, { "epoch": 0.26, "grad_norm": 1.5328878099675747, "learning_rate": 8.695496408902496e-06, "loss": 0.7132, "step": 4034 }, { "epoch": 0.26, "grad_norm": 1.9828633245343843, "learning_rate": 8.694798109278758e-06, "loss": 0.6987, "step": 4035 }, { "epoch": 0.26, "grad_norm": 1.5841008066095414, "learning_rate": 8.69409965086036e-06, "loss": 0.7438, "step": 4036 }, { "epoch": 0.26, "grad_norm": 1.645479309551935, "learning_rate": 8.693401033677322e-06, "loss": 0.7718, "step": 4037 }, { "epoch": 0.26, "grad_norm": 1.6441946445136193, "learning_rate": 8.692702257759669e-06, "loss": 0.7486, "step": 4038 }, { "epoch": 0.26, "grad_norm": 1.7576714502625401, "learning_rate": 8.69200332313743e-06, "loss": 0.8622, "step": 4039 }, { "epoch": 0.26, "grad_norm": 2.005808553175904, "learning_rate": 8.691304229840649e-06, "loss": 0.6628, "step": 4040 }, { "epoch": 0.26, "grad_norm": 2.0239737916584684, "learning_rate": 8.690604977899369e-06, "loss": 0.8149, "step": 4041 }, { "epoch": 0.26, "grad_norm": 1.6131115293945175, "learning_rate": 8.689905567343639e-06, "loss": 0.8396, "step": 4042 }, { "epoch": 0.26, "grad_norm": 1.9936088260162421, "learning_rate": 8.689205998203522e-06, "loss": 0.728, "step": 4043 }, { "epoch": 0.26, "grad_norm": 1.9354648652889952, "learning_rate": 8.688506270509085e-06, "loss": 0.8296, "step": 4044 }, { "epoch": 0.26, "grad_norm": 1.5983177929850545, "learning_rate": 8.6878063842904e-06, "loss": 0.7856, "step": 4045 }, { "epoch": 0.26, "grad_norm": 1.8089289908420756, "learning_rate": 8.687106339577543e-06, "loss": 0.7445, "step": 4046 }, { "epoch": 0.26, "grad_norm": 1.236342189682186, "learning_rate": 8.686406136400604e-06, "loss": 0.6679, "step": 4047 }, { "epoch": 0.26, "grad_norm": 1.527086277716354, "learning_rate": 8.685705774789677e-06, "loss": 0.7383, "step": 4048 }, { "epoch": 0.26, "grad_norm": 2.147358090089281, "learning_rate": 8.685005254774859e-06, "loss": 0.779, "step": 4049 }, { "epoch": 0.26, "grad_norm": 1.7211533775647057, "learning_rate": 8.68430457638626e-06, "loss": 0.7801, "step": 4050 }, { "epoch": 0.26, "grad_norm": 1.6790381081290942, "learning_rate": 8.68360373965399e-06, "loss": 0.8589, "step": 4051 }, { "epoch": 0.26, "grad_norm": 1.690140303585107, "learning_rate": 8.682902744608173e-06, "loss": 0.8268, "step": 4052 }, { "epoch": 0.26, "grad_norm": 1.351168862375643, "learning_rate": 8.682201591278934e-06, "loss": 0.7144, "step": 4053 }, { "epoch": 0.26, "grad_norm": 1.7202768915308209, "learning_rate": 8.681500279696408e-06, "loss": 0.7441, "step": 4054 }, { "epoch": 0.26, "grad_norm": 1.5481136369490822, "learning_rate": 8.680798809890737e-06, "loss": 0.8122, "step": 4055 }, { "epoch": 0.26, "grad_norm": 1.4626572228293166, "learning_rate": 8.680097181892067e-06, "loss": 0.7527, "step": 4056 }, { "epoch": 0.26, "grad_norm": 1.3963051650927218, "learning_rate": 8.679395395730552e-06, "loss": 0.7196, "step": 4057 }, { "epoch": 0.26, "grad_norm": 1.61132909279697, "learning_rate": 8.678693451436355e-06, "loss": 0.7673, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.4729522518906366, "learning_rate": 8.677991349039644e-06, "loss": 0.8617, "step": 4059 }, { "epoch": 0.26, "grad_norm": 1.5996166425640646, "learning_rate": 8.677289088570594e-06, "loss": 0.7841, "step": 4060 }, { "epoch": 0.26, "grad_norm": 1.5660184504167411, "learning_rate": 8.676586670059383e-06, "loss": 0.8347, "step": 4061 }, { "epoch": 0.26, "grad_norm": 1.4762919013003535, "learning_rate": 8.675884093536206e-06, "loss": 0.7963, "step": 4062 }, { "epoch": 0.26, "grad_norm": 1.5596175023133236, "learning_rate": 8.675181359031253e-06, "loss": 0.7791, "step": 4063 }, { "epoch": 0.26, "grad_norm": 1.8229747453248837, "learning_rate": 8.674478466574727e-06, "loss": 0.8308, "step": 4064 }, { "epoch": 0.26, "grad_norm": 1.5099465750610528, "learning_rate": 8.673775416196838e-06, "loss": 0.7442, "step": 4065 }, { "epoch": 0.26, "grad_norm": 1.1159162719357283, "learning_rate": 8.673072207927805e-06, "loss": 0.6868, "step": 4066 }, { "epoch": 0.26, "grad_norm": 1.7006114537266934, "learning_rate": 8.672368841797842e-06, "loss": 0.7565, "step": 4067 }, { "epoch": 0.26, "grad_norm": 1.528508588973013, "learning_rate": 8.671665317837185e-06, "loss": 0.7778, "step": 4068 }, { "epoch": 0.26, "grad_norm": 1.5641188552039502, "learning_rate": 8.670961636076067e-06, "loss": 0.9469, "step": 4069 }, { "epoch": 0.26, "grad_norm": 1.7709032877561959, "learning_rate": 8.670257796544732e-06, "loss": 0.82, "step": 4070 }, { "epoch": 0.26, "grad_norm": 1.8431468715436574, "learning_rate": 8.669553799273429e-06, "loss": 0.8802, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.1948707238069893, "learning_rate": 8.668849644292416e-06, "loss": 0.7247, "step": 4072 }, { "epoch": 0.26, "grad_norm": 1.4386499309233476, "learning_rate": 8.668145331631953e-06, "loss": 0.7616, "step": 4073 }, { "epoch": 0.26, "grad_norm": 1.5739709554782273, "learning_rate": 8.667440861322312e-06, "loss": 0.8771, "step": 4074 }, { "epoch": 0.26, "grad_norm": 1.7228480849533738, "learning_rate": 8.666736233393769e-06, "loss": 0.7944, "step": 4075 }, { "epoch": 0.26, "grad_norm": 1.6961678972980225, "learning_rate": 8.666031447876607e-06, "loss": 0.9139, "step": 4076 }, { "epoch": 0.26, "grad_norm": 1.0908808889971395, "learning_rate": 8.665326504801117e-06, "loss": 0.6749, "step": 4077 }, { "epoch": 0.26, "grad_norm": 1.7296038248915333, "learning_rate": 8.664621404197598e-06, "loss": 0.7752, "step": 4078 }, { "epoch": 0.26, "grad_norm": 1.50964623525495, "learning_rate": 8.663916146096348e-06, "loss": 0.7045, "step": 4079 }, { "epoch": 0.26, "grad_norm": 1.6178614265048579, "learning_rate": 8.663210730527683e-06, "loss": 0.7577, "step": 4080 }, { "epoch": 0.26, "grad_norm": 1.8502497597478722, "learning_rate": 8.662505157521918e-06, "loss": 0.8713, "step": 4081 }, { "epoch": 0.26, "grad_norm": 1.696173253111632, "learning_rate": 8.661799427109377e-06, "loss": 0.5749, "step": 4082 }, { "epoch": 0.26, "grad_norm": 2.190678156673568, "learning_rate": 8.66109353932039e-06, "loss": 0.7668, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.7196141180698838, "learning_rate": 8.660387494185298e-06, "loss": 0.834, "step": 4084 }, { "epoch": 0.26, "grad_norm": 2.8528785609224965, "learning_rate": 8.659681291734441e-06, "loss": 0.7849, "step": 4085 }, { "epoch": 0.26, "grad_norm": 1.044244387932859, "learning_rate": 8.658974931998174e-06, "loss": 0.7092, "step": 4086 }, { "epoch": 0.26, "grad_norm": 1.6048504099830794, "learning_rate": 8.658268415006853e-06, "loss": 0.7567, "step": 4087 }, { "epoch": 0.26, "grad_norm": 1.6785961420587834, "learning_rate": 8.657561740790841e-06, "loss": 0.8112, "step": 4088 }, { "epoch": 0.26, "grad_norm": 1.5509126181077109, "learning_rate": 8.656854909380512e-06, "loss": 0.6742, "step": 4089 }, { "epoch": 0.26, "grad_norm": 1.023840766198388, "learning_rate": 8.656147920806241e-06, "loss": 0.7038, "step": 4090 }, { "epoch": 0.26, "grad_norm": 1.6695680768423835, "learning_rate": 8.655440775098418e-06, "loss": 0.7539, "step": 4091 }, { "epoch": 0.26, "grad_norm": 2.2351891785885654, "learning_rate": 8.65473347228743e-06, "loss": 0.8059, "step": 4092 }, { "epoch": 0.26, "grad_norm": 1.7496372844531707, "learning_rate": 8.654026012403678e-06, "loss": 0.8078, "step": 4093 }, { "epoch": 0.26, "grad_norm": 1.682891660481985, "learning_rate": 8.653318395477565e-06, "loss": 0.7241, "step": 4094 }, { "epoch": 0.26, "grad_norm": 1.6881153665264355, "learning_rate": 8.652610621539505e-06, "loss": 0.7691, "step": 4095 }, { "epoch": 0.26, "grad_norm": 1.713958826708956, "learning_rate": 8.651902690619916e-06, "loss": 0.7393, "step": 4096 }, { "epoch": 0.26, "grad_norm": 1.6146127757680395, "learning_rate": 8.651194602749223e-06, "loss": 0.7016, "step": 4097 }, { "epoch": 0.26, "grad_norm": 1.729078311064582, "learning_rate": 8.650486357957856e-06, "loss": 0.742, "step": 4098 }, { "epoch": 0.26, "grad_norm": 1.572593343629676, "learning_rate": 8.649777956276257e-06, "loss": 0.7179, "step": 4099 }, { "epoch": 0.26, "grad_norm": 2.320594528656888, "learning_rate": 8.649069397734873e-06, "loss": 0.7698, "step": 4100 }, { "epoch": 0.26, "grad_norm": 2.077884019054971, "learning_rate": 8.648360682364153e-06, "loss": 0.7983, "step": 4101 }, { "epoch": 0.26, "grad_norm": 2.316125865760831, "learning_rate": 8.647651810194556e-06, "loss": 0.8083, "step": 4102 }, { "epoch": 0.26, "grad_norm": 1.781111847788101, "learning_rate": 8.646942781256548e-06, "loss": 0.832, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.5191678825805934, "learning_rate": 8.646233595580604e-06, "loss": 0.7865, "step": 4104 }, { "epoch": 0.26, "grad_norm": 1.6041799324846608, "learning_rate": 8.645524253197202e-06, "loss": 0.7314, "step": 4105 }, { "epoch": 0.26, "grad_norm": 1.6970523599426472, "learning_rate": 8.644814754136827e-06, "loss": 0.6904, "step": 4106 }, { "epoch": 0.26, "grad_norm": 1.729283973359414, "learning_rate": 8.644105098429975e-06, "loss": 0.8862, "step": 4107 }, { "epoch": 0.26, "grad_norm": 1.1739128897606212, "learning_rate": 8.64339528610714e-06, "loss": 0.6544, "step": 4108 }, { "epoch": 0.26, "grad_norm": 1.9149739472678144, "learning_rate": 8.642685317198833e-06, "loss": 0.7806, "step": 4109 }, { "epoch": 0.26, "grad_norm": 1.2842155534556976, "learning_rate": 8.641975191735567e-06, "loss": 0.7988, "step": 4110 }, { "epoch": 0.26, "grad_norm": 1.8572759782611639, "learning_rate": 8.64126490974786e-06, "loss": 0.7516, "step": 4111 }, { "epoch": 0.26, "grad_norm": 2.0666890277925765, "learning_rate": 8.640554471266236e-06, "loss": 0.7617, "step": 4112 }, { "epoch": 0.26, "grad_norm": 1.218307234885658, "learning_rate": 8.639843876321232e-06, "loss": 0.6457, "step": 4113 }, { "epoch": 0.26, "grad_norm": 2.1838263676944436, "learning_rate": 8.639133124943386e-06, "loss": 0.7783, "step": 4114 }, { "epoch": 0.26, "grad_norm": 1.7125174525373075, "learning_rate": 8.638422217163244e-06, "loss": 0.7815, "step": 4115 }, { "epoch": 0.26, "grad_norm": 1.0249918093575454, "learning_rate": 8.637711153011363e-06, "loss": 0.6545, "step": 4116 }, { "epoch": 0.26, "grad_norm": 1.0537261382695045, "learning_rate": 8.6369999325183e-06, "loss": 0.7409, "step": 4117 }, { "epoch": 0.26, "grad_norm": 1.3528625130439869, "learning_rate": 8.63628855571462e-06, "loss": 0.7695, "step": 4118 }, { "epoch": 0.26, "grad_norm": 1.3205891006355088, "learning_rate": 8.6355770226309e-06, "loss": 0.6866, "step": 4119 }, { "epoch": 0.26, "grad_norm": 1.53531583110755, "learning_rate": 8.634865333297722e-06, "loss": 0.7246, "step": 4120 }, { "epoch": 0.26, "grad_norm": 1.5755852004498498, "learning_rate": 8.634153487745667e-06, "loss": 0.7383, "step": 4121 }, { "epoch": 0.26, "grad_norm": 1.6441834837622997, "learning_rate": 8.633441486005331e-06, "loss": 0.8197, "step": 4122 }, { "epoch": 0.26, "grad_norm": 1.4249038078516365, "learning_rate": 8.632729328107317e-06, "loss": 0.7446, "step": 4123 }, { "epoch": 0.26, "grad_norm": 1.5042625216232017, "learning_rate": 8.632017014082227e-06, "loss": 0.644, "step": 4124 }, { "epoch": 0.26, "grad_norm": 1.5343941213712537, "learning_rate": 8.63130454396068e-06, "loss": 0.7228, "step": 4125 }, { "epoch": 0.26, "grad_norm": 1.6353224169660594, "learning_rate": 8.630591917773294e-06, "loss": 1.0995, "step": 4126 }, { "epoch": 0.26, "grad_norm": 1.778925546424242, "learning_rate": 8.629879135550695e-06, "loss": 0.8001, "step": 4127 }, { "epoch": 0.26, "grad_norm": 1.8638980143745445, "learning_rate": 8.62916619732352e-06, "loss": 0.8176, "step": 4128 }, { "epoch": 0.26, "grad_norm": 1.0492503015013273, "learning_rate": 8.628453103122404e-06, "loss": 0.6423, "step": 4129 }, { "epoch": 0.26, "grad_norm": 1.8009816637992049, "learning_rate": 8.627739852978003e-06, "loss": 0.896, "step": 4130 }, { "epoch": 0.26, "grad_norm": 1.6386757346017489, "learning_rate": 8.627026446920963e-06, "loss": 0.7104, "step": 4131 }, { "epoch": 0.26, "grad_norm": 1.5583678432485668, "learning_rate": 8.62631288498195e-06, "loss": 0.6886, "step": 4132 }, { "epoch": 0.26, "grad_norm": 1.6234146049006228, "learning_rate": 8.625599167191627e-06, "loss": 0.8633, "step": 4133 }, { "epoch": 0.26, "grad_norm": 1.7403953500095422, "learning_rate": 8.624885293580671e-06, "loss": 0.8166, "step": 4134 }, { "epoch": 0.26, "grad_norm": 1.798978595636048, "learning_rate": 8.624171264179761e-06, "loss": 0.7312, "step": 4135 }, { "epoch": 0.26, "grad_norm": 1.82588362658196, "learning_rate": 8.623457079019585e-06, "loss": 0.7812, "step": 4136 }, { "epoch": 0.26, "grad_norm": 1.2646305712645298, "learning_rate": 8.62274273813084e-06, "loss": 0.6971, "step": 4137 }, { "epoch": 0.26, "grad_norm": 1.0822614249240647, "learning_rate": 8.622028241544225e-06, "loss": 0.5739, "step": 4138 }, { "epoch": 0.26, "grad_norm": 1.0131732079464404, "learning_rate": 8.621313589290446e-06, "loss": 0.6613, "step": 4139 }, { "epoch": 0.26, "grad_norm": 1.7333005649616586, "learning_rate": 8.620598781400216e-06, "loss": 0.8233, "step": 4140 }, { "epoch": 0.27, "grad_norm": 1.0985848619628507, "learning_rate": 8.619883817904262e-06, "loss": 0.7265, "step": 4141 }, { "epoch": 0.27, "grad_norm": 1.7151103162705525, "learning_rate": 8.619168698833306e-06, "loss": 0.7802, "step": 4142 }, { "epoch": 0.27, "grad_norm": 1.7820683897279708, "learning_rate": 8.618453424218085e-06, "loss": 0.8949, "step": 4143 }, { "epoch": 0.27, "grad_norm": 1.5100067267783288, "learning_rate": 8.61773799408934e-06, "loss": 0.7711, "step": 4144 }, { "epoch": 0.27, "grad_norm": 1.8176221791265585, "learning_rate": 8.617022408477816e-06, "loss": 0.7251, "step": 4145 }, { "epoch": 0.27, "grad_norm": 1.5349185628704018, "learning_rate": 8.61630666741427e-06, "loss": 0.8052, "step": 4146 }, { "epoch": 0.27, "grad_norm": 1.065897947754778, "learning_rate": 8.615590770929461e-06, "loss": 0.606, "step": 4147 }, { "epoch": 0.27, "grad_norm": 1.7055273742954775, "learning_rate": 8.61487471905416e-06, "loss": 0.822, "step": 4148 }, { "epoch": 0.27, "grad_norm": 1.4251231468457417, "learning_rate": 8.614158511819138e-06, "loss": 0.5955, "step": 4149 }, { "epoch": 0.27, "grad_norm": 1.7129395110877972, "learning_rate": 8.613442149255179e-06, "loss": 0.8887, "step": 4150 }, { "epoch": 0.27, "grad_norm": 1.124809744294895, "learning_rate": 8.612725631393068e-06, "loss": 0.6713, "step": 4151 }, { "epoch": 0.27, "grad_norm": 1.1759454584681035, "learning_rate": 8.612008958263603e-06, "loss": 0.7311, "step": 4152 }, { "epoch": 0.27, "grad_norm": 1.4859429140117653, "learning_rate": 8.611292129897581e-06, "loss": 0.7698, "step": 4153 }, { "epoch": 0.27, "grad_norm": 1.804996935033432, "learning_rate": 8.610575146325813e-06, "loss": 0.6355, "step": 4154 }, { "epoch": 0.27, "grad_norm": 1.208927046306869, "learning_rate": 8.60985800757911e-06, "loss": 0.6126, "step": 4155 }, { "epoch": 0.27, "grad_norm": 1.5091391721863512, "learning_rate": 8.609140713688296e-06, "loss": 0.7629, "step": 4156 }, { "epoch": 0.27, "grad_norm": 1.9153700094039967, "learning_rate": 8.608423264684198e-06, "loss": 0.8487, "step": 4157 }, { "epoch": 0.27, "grad_norm": 1.83015214279733, "learning_rate": 8.607705660597652e-06, "loss": 0.7023, "step": 4158 }, { "epoch": 0.27, "grad_norm": 1.8010513891455273, "learning_rate": 8.606987901459497e-06, "loss": 0.8415, "step": 4159 }, { "epoch": 0.27, "grad_norm": 1.631290043419788, "learning_rate": 8.60626998730058e-06, "loss": 0.7955, "step": 4160 }, { "epoch": 0.27, "grad_norm": 1.617233993367281, "learning_rate": 8.605551918151755e-06, "loss": 0.728, "step": 4161 }, { "epoch": 0.27, "grad_norm": 1.0281360258306975, "learning_rate": 8.604833694043889e-06, "loss": 0.7171, "step": 4162 }, { "epoch": 0.27, "grad_norm": 2.5418797699331686, "learning_rate": 8.604115315007844e-06, "loss": 0.766, "step": 4163 }, { "epoch": 0.27, "grad_norm": 3.116568301595423, "learning_rate": 8.603396781074495e-06, "loss": 0.759, "step": 4164 }, { "epoch": 0.27, "grad_norm": 1.66667460577687, "learning_rate": 8.602678092274725e-06, "loss": 0.8994, "step": 4165 }, { "epoch": 0.27, "grad_norm": 3.025775659366972, "learning_rate": 8.601959248639421e-06, "loss": 0.9111, "step": 4166 }, { "epoch": 0.27, "grad_norm": 1.6165080299311645, "learning_rate": 8.601240250199476e-06, "loss": 0.7457, "step": 4167 }, { "epoch": 0.27, "grad_norm": 1.0601623393678201, "learning_rate": 8.600521096985795e-06, "loss": 0.6133, "step": 4168 }, { "epoch": 0.27, "grad_norm": 1.6774454704056476, "learning_rate": 8.599801789029281e-06, "loss": 0.7682, "step": 4169 }, { "epoch": 0.27, "grad_norm": 1.4403176866920773, "learning_rate": 8.59908232636085e-06, "loss": 0.7678, "step": 4170 }, { "epoch": 0.27, "grad_norm": 1.7286069880480346, "learning_rate": 8.598362709011425e-06, "loss": 0.8705, "step": 4171 }, { "epoch": 0.27, "grad_norm": 1.689081242720693, "learning_rate": 8.59764293701193e-06, "loss": 0.7993, "step": 4172 }, { "epoch": 0.27, "grad_norm": 1.5132760144366295, "learning_rate": 8.596923010393303e-06, "loss": 0.7559, "step": 4173 }, { "epoch": 0.27, "grad_norm": 1.6211805161486785, "learning_rate": 8.596202929186483e-06, "loss": 0.723, "step": 4174 }, { "epoch": 0.27, "grad_norm": 1.8591664893292523, "learning_rate": 8.595482693422416e-06, "loss": 0.8769, "step": 4175 }, { "epoch": 0.27, "grad_norm": 1.9894264145365077, "learning_rate": 8.59476230313206e-06, "loss": 0.8145, "step": 4176 }, { "epoch": 0.27, "grad_norm": 1.6609292017508426, "learning_rate": 8.594041758346372e-06, "loss": 0.6859, "step": 4177 }, { "epoch": 0.27, "grad_norm": 1.2126622494600345, "learning_rate": 8.593321059096322e-06, "loss": 0.6541, "step": 4178 }, { "epoch": 0.27, "grad_norm": 1.6077983830689666, "learning_rate": 8.592600205412884e-06, "loss": 0.6747, "step": 4179 }, { "epoch": 0.27, "grad_norm": 1.6505194359253992, "learning_rate": 8.591879197327039e-06, "loss": 0.8228, "step": 4180 }, { "epoch": 0.27, "grad_norm": 1.6874502278476942, "learning_rate": 8.591158034869773e-06, "loss": 0.7423, "step": 4181 }, { "epoch": 0.27, "grad_norm": 1.626067688852219, "learning_rate": 8.590436718072081e-06, "loss": 0.6963, "step": 4182 }, { "epoch": 0.27, "grad_norm": 1.606826883409219, "learning_rate": 8.589715246964963e-06, "loss": 0.8354, "step": 4183 }, { "epoch": 0.27, "grad_norm": 1.6288033206070331, "learning_rate": 8.588993621579427e-06, "loss": 0.6784, "step": 4184 }, { "epoch": 0.27, "grad_norm": 1.6471514658806272, "learning_rate": 8.588271841946485e-06, "loss": 0.7472, "step": 4185 }, { "epoch": 0.27, "grad_norm": 1.414705635546286, "learning_rate": 8.587549908097161e-06, "loss": 0.7897, "step": 4186 }, { "epoch": 0.27, "grad_norm": 2.12319392968897, "learning_rate": 8.58682782006248e-06, "loss": 0.7796, "step": 4187 }, { "epoch": 0.27, "grad_norm": 1.687948700515777, "learning_rate": 8.586105577873476e-06, "loss": 0.7792, "step": 4188 }, { "epoch": 0.27, "grad_norm": 1.1127530293242867, "learning_rate": 8.585383181561191e-06, "loss": 0.7, "step": 4189 }, { "epoch": 0.27, "grad_norm": 1.5822934847215209, "learning_rate": 8.58466063115667e-06, "loss": 0.7278, "step": 4190 }, { "epoch": 0.27, "grad_norm": 1.979973689376353, "learning_rate": 8.583937926690967e-06, "loss": 0.7716, "step": 4191 }, { "epoch": 0.27, "grad_norm": 1.4521549232522601, "learning_rate": 8.583215068195141e-06, "loss": 0.7021, "step": 4192 }, { "epoch": 0.27, "grad_norm": 1.1430152617488731, "learning_rate": 8.582492055700264e-06, "loss": 0.6464, "step": 4193 }, { "epoch": 0.27, "grad_norm": 1.7177812488588946, "learning_rate": 8.581768889237405e-06, "loss": 0.8877, "step": 4194 }, { "epoch": 0.27, "grad_norm": 1.749294628625861, "learning_rate": 8.581045568837647e-06, "loss": 0.6906, "step": 4195 }, { "epoch": 0.27, "grad_norm": 1.6517829393440995, "learning_rate": 8.580322094532072e-06, "loss": 0.8378, "step": 4196 }, { "epoch": 0.27, "grad_norm": 0.9571970639935092, "learning_rate": 8.57959846635178e-06, "loss": 0.6382, "step": 4197 }, { "epoch": 0.27, "grad_norm": 1.4819071340088619, "learning_rate": 8.578874684327866e-06, "loss": 0.6492, "step": 4198 }, { "epoch": 0.27, "grad_norm": 1.6329299170303033, "learning_rate": 8.578150748491438e-06, "loss": 0.7399, "step": 4199 }, { "epoch": 0.27, "grad_norm": 1.724408306439968, "learning_rate": 8.57742665887361e-06, "loss": 0.835, "step": 4200 }, { "epoch": 0.27, "grad_norm": 1.7200029508335823, "learning_rate": 8.576702415505501e-06, "loss": 0.8352, "step": 4201 }, { "epoch": 0.27, "grad_norm": 1.778338589530881, "learning_rate": 8.575978018418239e-06, "loss": 0.9143, "step": 4202 }, { "epoch": 0.27, "grad_norm": 1.2223029708557485, "learning_rate": 8.575253467642954e-06, "loss": 0.6106, "step": 4203 }, { "epoch": 0.27, "grad_norm": 1.568874195826112, "learning_rate": 8.57452876321079e-06, "loss": 0.7224, "step": 4204 }, { "epoch": 0.27, "grad_norm": 1.7060308647708042, "learning_rate": 8.57380390515289e-06, "loss": 0.8262, "step": 4205 }, { "epoch": 0.27, "grad_norm": 1.7111409715990389, "learning_rate": 8.573078893500406e-06, "loss": 0.8044, "step": 4206 }, { "epoch": 0.27, "grad_norm": 1.5799349779367924, "learning_rate": 8.5723537282845e-06, "loss": 0.9236, "step": 4207 }, { "epoch": 0.27, "grad_norm": 1.5173116990675906, "learning_rate": 8.57162840953634e-06, "loss": 0.6936, "step": 4208 }, { "epoch": 0.27, "grad_norm": 1.6372950716261725, "learning_rate": 8.570902937287093e-06, "loss": 0.7461, "step": 4209 }, { "epoch": 0.27, "grad_norm": 1.5520664809862779, "learning_rate": 8.570177311567942e-06, "loss": 0.6729, "step": 4210 }, { "epoch": 0.27, "grad_norm": 1.4834635787384802, "learning_rate": 8.569451532410073e-06, "loss": 0.6894, "step": 4211 }, { "epoch": 0.27, "grad_norm": 1.8993037166924893, "learning_rate": 8.568725599844679e-06, "loss": 0.8835, "step": 4212 }, { "epoch": 0.27, "grad_norm": 1.5082463419703351, "learning_rate": 8.567999513902953e-06, "loss": 0.7495, "step": 4213 }, { "epoch": 0.27, "grad_norm": 1.634620177653217, "learning_rate": 8.56727327461611e-06, "loss": 0.7818, "step": 4214 }, { "epoch": 0.27, "grad_norm": 1.1928851942622987, "learning_rate": 8.566546882015355e-06, "loss": 0.7163, "step": 4215 }, { "epoch": 0.27, "grad_norm": 1.3974511692044642, "learning_rate": 8.56582033613191e-06, "loss": 0.7322, "step": 4216 }, { "epoch": 0.27, "grad_norm": 1.6659531593024612, "learning_rate": 8.565093636996999e-06, "loss": 0.8197, "step": 4217 }, { "epoch": 0.27, "grad_norm": 1.6047500564094164, "learning_rate": 8.564366784641855e-06, "loss": 0.7996, "step": 4218 }, { "epoch": 0.27, "grad_norm": 2.043538111892944, "learning_rate": 8.563639779097717e-06, "loss": 0.7145, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.816800144188728, "learning_rate": 8.56291262039583e-06, "loss": 0.9726, "step": 4220 }, { "epoch": 0.27, "grad_norm": 1.6799535694256067, "learning_rate": 8.562185308567443e-06, "loss": 0.7623, "step": 4221 }, { "epoch": 0.27, "grad_norm": 1.6800745605539218, "learning_rate": 8.56145784364382e-06, "loss": 0.8046, "step": 4222 }, { "epoch": 0.27, "grad_norm": 1.9324237296522235, "learning_rate": 8.56073022565622e-06, "loss": 0.743, "step": 4223 }, { "epoch": 0.27, "grad_norm": 1.8213749699332153, "learning_rate": 8.560002454635917e-06, "loss": 0.8891, "step": 4224 }, { "epoch": 0.27, "grad_norm": 1.7705477966540062, "learning_rate": 8.55927453061419e-06, "loss": 0.9065, "step": 4225 }, { "epoch": 0.27, "grad_norm": 2.0386521101689596, "learning_rate": 8.558546453622322e-06, "loss": 0.6943, "step": 4226 }, { "epoch": 0.27, "grad_norm": 1.2592018864693404, "learning_rate": 8.557818223691607e-06, "loss": 0.7586, "step": 4227 }, { "epoch": 0.27, "grad_norm": 1.1633703662753885, "learning_rate": 8.557089840853338e-06, "loss": 0.6116, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.7727079047854886, "learning_rate": 8.556361305138825e-06, "loss": 0.8136, "step": 4229 }, { "epoch": 0.27, "grad_norm": 1.640818089024876, "learning_rate": 8.555632616579374e-06, "loss": 0.8144, "step": 4230 }, { "epoch": 0.27, "grad_norm": 1.6964758872366676, "learning_rate": 8.554903775206305e-06, "loss": 0.7978, "step": 4231 }, { "epoch": 0.27, "grad_norm": 1.9148473092146616, "learning_rate": 8.554174781050941e-06, "loss": 0.8146, "step": 4232 }, { "epoch": 0.27, "grad_norm": 1.6884270903045153, "learning_rate": 8.553445634144614e-06, "loss": 0.7904, "step": 4233 }, { "epoch": 0.27, "grad_norm": 1.7593717214139708, "learning_rate": 8.55271633451866e-06, "loss": 0.6919, "step": 4234 }, { "epoch": 0.27, "grad_norm": 1.699436885918801, "learning_rate": 8.551986882204424e-06, "loss": 0.7331, "step": 4235 }, { "epoch": 0.27, "grad_norm": 1.702303729443637, "learning_rate": 8.551257277233256e-06, "loss": 0.6862, "step": 4236 }, { "epoch": 0.27, "grad_norm": 1.5993037301977187, "learning_rate": 8.550527519636511e-06, "loss": 0.6453, "step": 4237 }, { "epoch": 0.27, "grad_norm": 1.5784726569852907, "learning_rate": 8.549797609445555e-06, "loss": 0.6097, "step": 4238 }, { "epoch": 0.27, "grad_norm": 1.274858089620252, "learning_rate": 8.549067546691756e-06, "loss": 0.6179, "step": 4239 }, { "epoch": 0.27, "grad_norm": 1.6802732813746446, "learning_rate": 8.548337331406491e-06, "loss": 0.798, "step": 4240 }, { "epoch": 0.27, "grad_norm": 1.9108468810566237, "learning_rate": 8.547606963621146e-06, "loss": 0.7982, "step": 4241 }, { "epoch": 0.27, "grad_norm": 1.5546951865340846, "learning_rate": 8.546876443367105e-06, "loss": 0.7247, "step": 4242 }, { "epoch": 0.27, "grad_norm": 1.7052611036720002, "learning_rate": 8.54614577067577e-06, "loss": 0.729, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.6133610745731413, "learning_rate": 8.54541494557854e-06, "loss": 0.6996, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.7398879275121224, "learning_rate": 8.544683968106827e-06, "loss": 0.7038, "step": 4245 }, { "epoch": 0.27, "grad_norm": 1.4013932801452695, "learning_rate": 8.543952838292043e-06, "loss": 0.6669, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.8864995750056557, "learning_rate": 8.543221556165615e-06, "loss": 0.7931, "step": 4247 }, { "epoch": 0.27, "grad_norm": 1.5284094849731051, "learning_rate": 8.54249012175897e-06, "loss": 0.7406, "step": 4248 }, { "epoch": 0.27, "grad_norm": 2.0602714672835862, "learning_rate": 8.541758535103542e-06, "loss": 0.8649, "step": 4249 }, { "epoch": 0.27, "grad_norm": 0.9919764946259843, "learning_rate": 8.541026796230775e-06, "loss": 0.629, "step": 4250 }, { "epoch": 0.27, "grad_norm": 2.128137170401887, "learning_rate": 8.540294905172117e-06, "loss": 0.7494, "step": 4251 }, { "epoch": 0.27, "grad_norm": 1.5170560197052285, "learning_rate": 8.539562861959023e-06, "loss": 0.6935, "step": 4252 }, { "epoch": 0.27, "grad_norm": 1.7415258277445689, "learning_rate": 8.538830666622955e-06, "loss": 0.7268, "step": 4253 }, { "epoch": 0.27, "grad_norm": 1.8847061529850233, "learning_rate": 8.53809831919538e-06, "loss": 0.7332, "step": 4254 }, { "epoch": 0.27, "grad_norm": 1.7751763551575708, "learning_rate": 8.537365819707776e-06, "loss": 0.784, "step": 4255 }, { "epoch": 0.27, "grad_norm": 1.8085138909090415, "learning_rate": 8.53663316819162e-06, "loss": 0.7983, "step": 4256 }, { "epoch": 0.27, "grad_norm": 2.020273174882671, "learning_rate": 8.535900364678403e-06, "loss": 0.7682, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.6052075223414881, "learning_rate": 8.535167409199618e-06, "loss": 0.8487, "step": 4258 }, { "epoch": 0.27, "grad_norm": 2.2084004221523745, "learning_rate": 8.534434301786767e-06, "loss": 0.794, "step": 4259 }, { "epoch": 0.27, "grad_norm": 1.1874501008543843, "learning_rate": 8.533701042471356e-06, "loss": 0.5849, "step": 4260 }, { "epoch": 0.27, "grad_norm": 1.7599402197155165, "learning_rate": 8.532967631284898e-06, "loss": 0.8411, "step": 4261 }, { "epoch": 0.27, "grad_norm": 1.6971861218026247, "learning_rate": 8.532234068258918e-06, "loss": 0.7399, "step": 4262 }, { "epoch": 0.27, "grad_norm": 1.6935439749383807, "learning_rate": 8.531500353424937e-06, "loss": 0.8797, "step": 4263 }, { "epoch": 0.27, "grad_norm": 1.6698758274752539, "learning_rate": 8.530766486814495e-06, "loss": 0.7617, "step": 4264 }, { "epoch": 0.27, "grad_norm": 1.4720404941384806, "learning_rate": 8.530032468459126e-06, "loss": 0.7384, "step": 4265 }, { "epoch": 0.27, "grad_norm": 1.5699479281264495, "learning_rate": 8.529298298390379e-06, "loss": 0.8015, "step": 4266 }, { "epoch": 0.27, "grad_norm": 1.0207940078502575, "learning_rate": 8.528563976639807e-06, "loss": 0.7115, "step": 4267 }, { "epoch": 0.27, "grad_norm": 1.792746389867331, "learning_rate": 8.527829503238972e-06, "loss": 0.7162, "step": 4268 }, { "epoch": 0.27, "grad_norm": 1.8072831649942491, "learning_rate": 8.527094878219435e-06, "loss": 0.8786, "step": 4269 }, { "epoch": 0.27, "grad_norm": 1.7005136224418813, "learning_rate": 8.526360101612774e-06, "loss": 0.7379, "step": 4270 }, { "epoch": 0.27, "grad_norm": 2.2035409944932995, "learning_rate": 8.525625173450564e-06, "loss": 0.8539, "step": 4271 }, { "epoch": 0.27, "grad_norm": 1.4797757833282885, "learning_rate": 8.524890093764395e-06, "loss": 0.697, "step": 4272 }, { "epoch": 0.27, "grad_norm": 2.02071822776457, "learning_rate": 8.524154862585854e-06, "loss": 0.881, "step": 4273 }, { "epoch": 0.27, "grad_norm": 1.909725017457007, "learning_rate": 8.523419479946545e-06, "loss": 0.8403, "step": 4274 }, { "epoch": 0.27, "grad_norm": 1.5825968354604865, "learning_rate": 8.522683945878068e-06, "loss": 0.7189, "step": 4275 }, { "epoch": 0.27, "grad_norm": 1.4756456619328593, "learning_rate": 8.521948260412038e-06, "loss": 0.7315, "step": 4276 }, { "epoch": 0.27, "grad_norm": 1.6385529372809853, "learning_rate": 8.52121242358007e-06, "loss": 0.8226, "step": 4277 }, { "epoch": 0.27, "grad_norm": 1.7342319795014416, "learning_rate": 8.520476435413794e-06, "loss": 0.7603, "step": 4278 }, { "epoch": 0.27, "grad_norm": 1.5786921308039938, "learning_rate": 8.519740295944838e-06, "loss": 0.7797, "step": 4279 }, { "epoch": 0.27, "grad_norm": 1.4851042778524874, "learning_rate": 8.51900400520484e-06, "loss": 0.7551, "step": 4280 }, { "epoch": 0.27, "grad_norm": 1.6456451271732644, "learning_rate": 8.518267563225443e-06, "loss": 0.7588, "step": 4281 }, { "epoch": 0.27, "grad_norm": 1.6706851020066449, "learning_rate": 8.5175309700383e-06, "loss": 0.7718, "step": 4282 }, { "epoch": 0.27, "grad_norm": 2.522992467001703, "learning_rate": 8.51679422567507e-06, "loss": 0.7973, "step": 4283 }, { "epoch": 0.27, "grad_norm": 2.032385993985024, "learning_rate": 8.51605733016741e-06, "loss": 0.8395, "step": 4284 }, { "epoch": 0.27, "grad_norm": 1.1528843014418693, "learning_rate": 8.515320283546996e-06, "loss": 0.5962, "step": 4285 }, { "epoch": 0.27, "grad_norm": 1.2277353991879336, "learning_rate": 8.514583085845502e-06, "loss": 0.6518, "step": 4286 }, { "epoch": 0.27, "grad_norm": 2.451926250223998, "learning_rate": 8.513845737094613e-06, "loss": 0.7597, "step": 4287 }, { "epoch": 0.27, "grad_norm": 1.036666417639086, "learning_rate": 8.513108237326016e-06, "loss": 0.7413, "step": 4288 }, { "epoch": 0.27, "grad_norm": 1.7168188665316382, "learning_rate": 8.51237058657141e-06, "loss": 0.6926, "step": 4289 }, { "epoch": 0.27, "grad_norm": 1.699431212899662, "learning_rate": 8.511632784862498e-06, "loss": 0.752, "step": 4290 }, { "epoch": 0.27, "grad_norm": 1.7124983921844468, "learning_rate": 8.510894832230988e-06, "loss": 0.7295, "step": 4291 }, { "epoch": 0.27, "grad_norm": 2.0254770915108513, "learning_rate": 8.510156728708594e-06, "loss": 0.8032, "step": 4292 }, { "epoch": 0.27, "grad_norm": 1.530975507182397, "learning_rate": 8.50941847432704e-06, "loss": 0.7569, "step": 4293 }, { "epoch": 0.27, "grad_norm": 1.2420061498021184, "learning_rate": 8.508680069118055e-06, "loss": 0.7333, "step": 4294 }, { "epoch": 0.27, "grad_norm": 1.6288688446207347, "learning_rate": 8.507941513113372e-06, "loss": 0.825, "step": 4295 }, { "epoch": 0.27, "grad_norm": 4.272948643461886, "learning_rate": 8.507202806344735e-06, "loss": 1.0301, "step": 4296 }, { "epoch": 0.28, "grad_norm": 1.6910621403044241, "learning_rate": 8.506463948843891e-06, "loss": 0.7966, "step": 4297 }, { "epoch": 0.28, "grad_norm": 1.941971282040224, "learning_rate": 8.505724940642595e-06, "loss": 0.8497, "step": 4298 }, { "epoch": 0.28, "grad_norm": 1.6800549558138218, "learning_rate": 8.504985781772606e-06, "loss": 0.7997, "step": 4299 }, { "epoch": 0.28, "grad_norm": 1.7157230508031907, "learning_rate": 8.504246472265693e-06, "loss": 0.7958, "step": 4300 }, { "epoch": 0.28, "grad_norm": 1.71815369600009, "learning_rate": 8.503507012153632e-06, "loss": 0.8622, "step": 4301 }, { "epoch": 0.28, "grad_norm": 2.1632875458127456, "learning_rate": 8.502767401468202e-06, "loss": 0.749, "step": 4302 }, { "epoch": 0.28, "grad_norm": 1.6790769878892542, "learning_rate": 8.502027640241188e-06, "loss": 0.7051, "step": 4303 }, { "epoch": 0.28, "grad_norm": 1.6357857181046456, "learning_rate": 8.501287728504383e-06, "loss": 0.7857, "step": 4304 }, { "epoch": 0.28, "grad_norm": 1.4838897961930075, "learning_rate": 8.500547666289592e-06, "loss": 0.7148, "step": 4305 }, { "epoch": 0.28, "grad_norm": 1.7048820809639693, "learning_rate": 8.499807453628616e-06, "loss": 0.7742, "step": 4306 }, { "epoch": 0.28, "grad_norm": 1.0201954879529873, "learning_rate": 8.49906709055327e-06, "loss": 0.6467, "step": 4307 }, { "epoch": 0.28, "grad_norm": 1.1468875762582873, "learning_rate": 8.498326577095372e-06, "loss": 0.5425, "step": 4308 }, { "epoch": 0.28, "grad_norm": 1.644881533229438, "learning_rate": 8.497585913286752e-06, "loss": 0.8701, "step": 4309 }, { "epoch": 0.28, "grad_norm": 1.679325068111299, "learning_rate": 8.496845099159236e-06, "loss": 0.7857, "step": 4310 }, { "epoch": 0.28, "grad_norm": 1.6588281986895916, "learning_rate": 8.496104134744667e-06, "loss": 0.9117, "step": 4311 }, { "epoch": 0.28, "grad_norm": 1.662180280177273, "learning_rate": 8.495363020074886e-06, "loss": 0.825, "step": 4312 }, { "epoch": 0.28, "grad_norm": 2.079377733709957, "learning_rate": 8.49462175518175e-06, "loss": 0.7942, "step": 4313 }, { "epoch": 0.28, "grad_norm": 1.5508106114189228, "learning_rate": 8.493880340097114e-06, "loss": 0.7808, "step": 4314 }, { "epoch": 0.28, "grad_norm": 1.6812444707120928, "learning_rate": 8.493138774852842e-06, "loss": 0.7193, "step": 4315 }, { "epoch": 0.28, "grad_norm": 1.878540709850238, "learning_rate": 8.492397059480805e-06, "loss": 0.8249, "step": 4316 }, { "epoch": 0.28, "grad_norm": 1.7603952965463805, "learning_rate": 8.49165519401288e-06, "loss": 0.8938, "step": 4317 }, { "epoch": 0.28, "grad_norm": 1.7686057281081657, "learning_rate": 8.490913178480954e-06, "loss": 0.6201, "step": 4318 }, { "epoch": 0.28, "grad_norm": 1.1809692702522452, "learning_rate": 8.490171012916915e-06, "loss": 0.6407, "step": 4319 }, { "epoch": 0.28, "grad_norm": 2.1342016089476212, "learning_rate": 8.489428697352658e-06, "loss": 0.8165, "step": 4320 }, { "epoch": 0.28, "grad_norm": 1.2667683307471918, "learning_rate": 8.488686231820089e-06, "loss": 0.7823, "step": 4321 }, { "epoch": 0.28, "grad_norm": 1.804385154770755, "learning_rate": 8.487943616351118e-06, "loss": 0.7479, "step": 4322 }, { "epoch": 0.28, "grad_norm": 1.466199930176963, "learning_rate": 8.487200850977657e-06, "loss": 0.7799, "step": 4323 }, { "epoch": 0.28, "grad_norm": 1.5699701710085232, "learning_rate": 8.486457935731632e-06, "loss": 0.7405, "step": 4324 }, { "epoch": 0.28, "grad_norm": 1.5958684896273512, "learning_rate": 8.48571487064497e-06, "loss": 0.8373, "step": 4325 }, { "epoch": 0.28, "grad_norm": 1.9306484382966007, "learning_rate": 8.484971655749607e-06, "loss": 0.8523, "step": 4326 }, { "epoch": 0.28, "grad_norm": 1.825457425404425, "learning_rate": 8.484228291077488e-06, "loss": 0.8094, "step": 4327 }, { "epoch": 0.28, "grad_norm": 1.7497128604404393, "learning_rate": 8.483484776660556e-06, "loss": 0.881, "step": 4328 }, { "epoch": 0.28, "grad_norm": 1.7398982891501484, "learning_rate": 8.48274111253077e-06, "loss": 0.8133, "step": 4329 }, { "epoch": 0.28, "grad_norm": 1.6882090503235918, "learning_rate": 8.481997298720089e-06, "loss": 0.8976, "step": 4330 }, { "epoch": 0.28, "grad_norm": 1.9954847277131675, "learning_rate": 8.481253335260478e-06, "loss": 0.7712, "step": 4331 }, { "epoch": 0.28, "grad_norm": 1.8782160305172175, "learning_rate": 8.480509222183916e-06, "loss": 0.6911, "step": 4332 }, { "epoch": 0.28, "grad_norm": 1.2907071641918695, "learning_rate": 8.479764959522381e-06, "loss": 0.6632, "step": 4333 }, { "epoch": 0.28, "grad_norm": 2.003990058945138, "learning_rate": 8.47902054730786e-06, "loss": 0.66, "step": 4334 }, { "epoch": 0.28, "grad_norm": 1.815193320087648, "learning_rate": 8.478275985572346e-06, "loss": 0.8567, "step": 4335 }, { "epoch": 0.28, "grad_norm": 1.7615009419500784, "learning_rate": 8.477531274347839e-06, "loss": 0.8148, "step": 4336 }, { "epoch": 0.28, "grad_norm": 1.0900802349044199, "learning_rate": 8.476786413666346e-06, "loss": 0.7581, "step": 4337 }, { "epoch": 0.28, "grad_norm": 1.6839316150621526, "learning_rate": 8.476041403559878e-06, "loss": 0.7218, "step": 4338 }, { "epoch": 0.28, "grad_norm": 1.7915762268016335, "learning_rate": 8.475296244060454e-06, "loss": 0.8248, "step": 4339 }, { "epoch": 0.28, "grad_norm": 1.6722986723815862, "learning_rate": 8.4745509352001e-06, "loss": 0.7784, "step": 4340 }, { "epoch": 0.28, "grad_norm": 1.520224830372607, "learning_rate": 8.473805477010848e-06, "loss": 0.7719, "step": 4341 }, { "epoch": 0.28, "grad_norm": 1.6716934819052696, "learning_rate": 8.473059869524738e-06, "loss": 0.7338, "step": 4342 }, { "epoch": 0.28, "grad_norm": 1.51825333263434, "learning_rate": 8.472314112773813e-06, "loss": 0.7918, "step": 4343 }, { "epoch": 0.28, "grad_norm": 1.8146976851120524, "learning_rate": 8.47156820679012e-06, "loss": 0.8082, "step": 4344 }, { "epoch": 0.28, "grad_norm": 1.748003718310498, "learning_rate": 8.470822151605723e-06, "loss": 0.7675, "step": 4345 }, { "epoch": 0.28, "grad_norm": 1.2054111743778093, "learning_rate": 8.470075947252683e-06, "loss": 0.723, "step": 4346 }, { "epoch": 0.28, "grad_norm": 1.0519027616225405, "learning_rate": 8.469329593763069e-06, "loss": 0.7085, "step": 4347 }, { "epoch": 0.28, "grad_norm": 1.8978988043649259, "learning_rate": 8.46858309116896e-06, "loss": 0.8603, "step": 4348 }, { "epoch": 0.28, "grad_norm": 1.681370200093299, "learning_rate": 8.467836439502439e-06, "loss": 0.7624, "step": 4349 }, { "epoch": 0.28, "grad_norm": 2.007775358135663, "learning_rate": 8.467089638795593e-06, "loss": 0.7783, "step": 4350 }, { "epoch": 0.28, "grad_norm": 1.0414667476255282, "learning_rate": 8.46634268908052e-06, "loss": 0.6685, "step": 4351 }, { "epoch": 0.28, "grad_norm": 1.5084823813907793, "learning_rate": 8.465595590389324e-06, "loss": 0.7801, "step": 4352 }, { "epoch": 0.28, "grad_norm": 1.656281139253698, "learning_rate": 8.46484834275411e-06, "loss": 0.8731, "step": 4353 }, { "epoch": 0.28, "grad_norm": 1.6889600003643532, "learning_rate": 8.464100946206996e-06, "loss": 1.0602, "step": 4354 }, { "epoch": 0.28, "grad_norm": 1.6507968860582523, "learning_rate": 8.463353400780101e-06, "loss": 0.7809, "step": 4355 }, { "epoch": 0.28, "grad_norm": 1.8964914730941587, "learning_rate": 8.462605706505556e-06, "loss": 0.8923, "step": 4356 }, { "epoch": 0.28, "grad_norm": 1.6741508802082603, "learning_rate": 8.461857863415493e-06, "loss": 0.8506, "step": 4357 }, { "epoch": 0.28, "grad_norm": 1.4665709833284795, "learning_rate": 8.461109871542053e-06, "loss": 0.7855, "step": 4358 }, { "epoch": 0.28, "grad_norm": 1.2375891899847484, "learning_rate": 8.460361730917384e-06, "loss": 0.6959, "step": 4359 }, { "epoch": 0.28, "grad_norm": 2.1728641896806766, "learning_rate": 8.459613441573637e-06, "loss": 0.8328, "step": 4360 }, { "epoch": 0.28, "grad_norm": 1.8624347540515613, "learning_rate": 8.458865003542975e-06, "loss": 0.847, "step": 4361 }, { "epoch": 0.28, "grad_norm": 1.712281671616326, "learning_rate": 8.458116416857565e-06, "loss": 0.7403, "step": 4362 }, { "epoch": 0.28, "grad_norm": 1.6849131493288352, "learning_rate": 8.457367681549577e-06, "loss": 0.6754, "step": 4363 }, { "epoch": 0.28, "grad_norm": 1.5425813106914927, "learning_rate": 8.456618797651191e-06, "loss": 0.7802, "step": 4364 }, { "epoch": 0.28, "grad_norm": 1.6270463337785874, "learning_rate": 8.455869765194592e-06, "loss": 0.738, "step": 4365 }, { "epoch": 0.28, "grad_norm": 1.122107622957036, "learning_rate": 8.455120584211972e-06, "loss": 0.5638, "step": 4366 }, { "epoch": 0.28, "grad_norm": 1.7223485647004302, "learning_rate": 8.45437125473553e-06, "loss": 0.8579, "step": 4367 }, { "epoch": 0.28, "grad_norm": 1.902421373664125, "learning_rate": 8.45362177679747e-06, "loss": 0.6964, "step": 4368 }, { "epoch": 0.28, "grad_norm": 1.5661737000278588, "learning_rate": 8.452872150430002e-06, "loss": 0.8382, "step": 4369 }, { "epoch": 0.28, "grad_norm": 2.3322566495577894, "learning_rate": 8.452122375665346e-06, "loss": 0.7818, "step": 4370 }, { "epoch": 0.28, "grad_norm": 1.6961715042212802, "learning_rate": 8.451372452535724e-06, "loss": 0.7871, "step": 4371 }, { "epoch": 0.28, "grad_norm": 1.8150918084938656, "learning_rate": 8.450622381073367e-06, "loss": 0.7546, "step": 4372 }, { "epoch": 0.28, "grad_norm": 1.68981453758858, "learning_rate": 8.44987216131051e-06, "loss": 0.7762, "step": 4373 }, { "epoch": 0.28, "grad_norm": 2.677193794707099, "learning_rate": 8.449121793279395e-06, "loss": 0.7858, "step": 4374 }, { "epoch": 0.28, "grad_norm": 1.6990079074098374, "learning_rate": 8.448371277012275e-06, "loss": 0.7399, "step": 4375 }, { "epoch": 0.28, "grad_norm": 1.733797689708031, "learning_rate": 8.447620612541405e-06, "loss": 0.7477, "step": 4376 }, { "epoch": 0.28, "grad_norm": 0.9906277529915194, "learning_rate": 8.446869799899042e-06, "loss": 0.6447, "step": 4377 }, { "epoch": 0.28, "grad_norm": 2.2132880304540605, "learning_rate": 8.44611883911746e-06, "loss": 0.9104, "step": 4378 }, { "epoch": 0.28, "grad_norm": 1.6741142948062064, "learning_rate": 8.44536773022893e-06, "loss": 0.7752, "step": 4379 }, { "epoch": 0.28, "grad_norm": 0.9915829229966913, "learning_rate": 8.444616473265737e-06, "loss": 0.6514, "step": 4380 }, { "epoch": 0.28, "grad_norm": 1.6199935665130878, "learning_rate": 8.443865068260164e-06, "loss": 0.7636, "step": 4381 }, { "epoch": 0.28, "grad_norm": 1.969072293882751, "learning_rate": 8.443113515244508e-06, "loss": 0.7877, "step": 4382 }, { "epoch": 0.28, "grad_norm": 1.5954311227351905, "learning_rate": 8.442361814251069e-06, "loss": 0.7856, "step": 4383 }, { "epoch": 0.28, "grad_norm": 2.464498838185639, "learning_rate": 8.44160996531215e-06, "loss": 0.8339, "step": 4384 }, { "epoch": 0.28, "grad_norm": 4.408336164107793, "learning_rate": 8.440857968460068e-06, "loss": 0.7762, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.8139247708080815, "learning_rate": 8.440105823727143e-06, "loss": 0.9391, "step": 4386 }, { "epoch": 0.28, "grad_norm": 1.6469325613723262, "learning_rate": 8.439353531145695e-06, "loss": 0.6349, "step": 4387 }, { "epoch": 0.28, "grad_norm": 1.7218544017147612, "learning_rate": 8.43860109074806e-06, "loss": 0.8682, "step": 4388 }, { "epoch": 0.28, "grad_norm": 1.5690330173271754, "learning_rate": 8.437848502566576e-06, "loss": 0.8086, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.6359389003425826, "learning_rate": 8.437095766633587e-06, "loss": 0.7548, "step": 4390 }, { "epoch": 0.28, "grad_norm": 1.1932064618651066, "learning_rate": 8.436342882981445e-06, "loss": 0.7114, "step": 4391 }, { "epoch": 0.28, "grad_norm": 1.9170886750175906, "learning_rate": 8.435589851642507e-06, "loss": 0.8072, "step": 4392 }, { "epoch": 0.28, "grad_norm": 1.6623662767794274, "learning_rate": 8.434836672649134e-06, "loss": 0.8502, "step": 4393 }, { "epoch": 0.28, "grad_norm": 1.6786574830319532, "learning_rate": 8.4340833460337e-06, "loss": 0.7123, "step": 4394 }, { "epoch": 0.28, "grad_norm": 1.6302975476449415, "learning_rate": 8.433329871828582e-06, "loss": 0.7313, "step": 4395 }, { "epoch": 0.28, "grad_norm": 1.410474042119805, "learning_rate": 8.432576250066158e-06, "loss": 0.7269, "step": 4396 }, { "epoch": 0.28, "grad_norm": 1.5871955211842694, "learning_rate": 8.431822480778818e-06, "loss": 0.7828, "step": 4397 }, { "epoch": 0.28, "grad_norm": 1.6554326556492587, "learning_rate": 8.431068563998962e-06, "loss": 0.8821, "step": 4398 }, { "epoch": 0.28, "grad_norm": 1.6280745333237134, "learning_rate": 8.430314499758986e-06, "loss": 0.7152, "step": 4399 }, { "epoch": 0.28, "grad_norm": 1.2803160364664072, "learning_rate": 8.429560288091305e-06, "loss": 0.6726, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.7378542212287555, "learning_rate": 8.428805929028327e-06, "loss": 0.7368, "step": 4401 }, { "epoch": 0.28, "grad_norm": 1.276984227567959, "learning_rate": 8.428051422602475e-06, "loss": 0.672, "step": 4402 }, { "epoch": 0.28, "grad_norm": 2.0313075400144793, "learning_rate": 8.427296768846176e-06, "loss": 0.8261, "step": 4403 }, { "epoch": 0.28, "grad_norm": 1.9020513673793231, "learning_rate": 8.426541967791863e-06, "loss": 0.6969, "step": 4404 }, { "epoch": 0.28, "grad_norm": 1.718308535373971, "learning_rate": 8.425787019471979e-06, "loss": 0.763, "step": 4405 }, { "epoch": 0.28, "grad_norm": 1.5724565063853386, "learning_rate": 8.425031923918964e-06, "loss": 0.7273, "step": 4406 }, { "epoch": 0.28, "grad_norm": 2.040144487792473, "learning_rate": 8.424276681165276e-06, "loss": 0.7308, "step": 4407 }, { "epoch": 0.28, "grad_norm": 1.5938177551002386, "learning_rate": 8.42352129124337e-06, "loss": 0.808, "step": 4408 }, { "epoch": 0.28, "grad_norm": 1.593730052898299, "learning_rate": 8.422765754185716e-06, "loss": 0.8329, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.7257015642389473, "learning_rate": 8.422010070024779e-06, "loss": 0.8493, "step": 4410 }, { "epoch": 0.28, "grad_norm": 1.9228260687641343, "learning_rate": 8.421254238793041e-06, "loss": 0.7545, "step": 4411 }, { "epoch": 0.28, "grad_norm": 1.8314312132978325, "learning_rate": 8.420498260522985e-06, "loss": 0.8583, "step": 4412 }, { "epoch": 0.28, "grad_norm": 0.9893947971396478, "learning_rate": 8.419742135247099e-06, "loss": 0.6912, "step": 4413 }, { "epoch": 0.28, "grad_norm": 1.6043917342072487, "learning_rate": 8.418985862997886e-06, "loss": 1.0337, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.6848032034994471, "learning_rate": 8.418229443807842e-06, "loss": 0.9288, "step": 4415 }, { "epoch": 0.28, "grad_norm": 1.5063254748881116, "learning_rate": 8.417472877709479e-06, "loss": 0.638, "step": 4416 }, { "epoch": 0.28, "grad_norm": 1.5308380696837427, "learning_rate": 8.416716164735315e-06, "loss": 0.7335, "step": 4417 }, { "epoch": 0.28, "grad_norm": 1.653614321212712, "learning_rate": 8.415959304917868e-06, "loss": 0.7862, "step": 4418 }, { "epoch": 0.28, "grad_norm": 1.1274939309667216, "learning_rate": 8.415202298289668e-06, "loss": 0.603, "step": 4419 }, { "epoch": 0.28, "grad_norm": 1.5298698472186998, "learning_rate": 8.41444514488325e-06, "loss": 0.6754, "step": 4420 }, { "epoch": 0.28, "grad_norm": 1.9832331151613733, "learning_rate": 8.413687844731155e-06, "loss": 0.7646, "step": 4421 }, { "epoch": 0.28, "grad_norm": 1.6804538902622739, "learning_rate": 8.41293039786593e-06, "loss": 0.849, "step": 4422 }, { "epoch": 0.28, "grad_norm": 1.7015585444118133, "learning_rate": 8.412172804320127e-06, "loss": 0.7352, "step": 4423 }, { "epoch": 0.28, "grad_norm": 1.6048728959665322, "learning_rate": 8.411415064126306e-06, "loss": 0.8644, "step": 4424 }, { "epoch": 0.28, "grad_norm": 1.4868574496999338, "learning_rate": 8.410657177317035e-06, "loss": 0.7551, "step": 4425 }, { "epoch": 0.28, "grad_norm": 0.9946131952465439, "learning_rate": 8.409899143924885e-06, "loss": 0.6817, "step": 4426 }, { "epoch": 0.28, "grad_norm": 1.5072632205050482, "learning_rate": 8.409140963982436e-06, "loss": 0.7541, "step": 4427 }, { "epoch": 0.28, "grad_norm": 1.7286599477160933, "learning_rate": 8.40838263752227e-06, "loss": 1.1296, "step": 4428 }, { "epoch": 0.28, "grad_norm": 1.7916806557159553, "learning_rate": 8.407624164576982e-06, "loss": 0.7988, "step": 4429 }, { "epoch": 0.28, "grad_norm": 1.1668069696256227, "learning_rate": 8.406865545179165e-06, "loss": 0.6613, "step": 4430 }, { "epoch": 0.28, "grad_norm": 1.2692087344096303, "learning_rate": 8.406106779361429e-06, "loss": 0.7022, "step": 4431 }, { "epoch": 0.28, "grad_norm": 1.7459209859465754, "learning_rate": 8.405347867156379e-06, "loss": 0.6972, "step": 4432 }, { "epoch": 0.28, "grad_norm": 1.9575419781288586, "learning_rate": 8.404588808596635e-06, "loss": 0.7937, "step": 4433 }, { "epoch": 0.28, "grad_norm": 1.6729879635888536, "learning_rate": 8.403829603714817e-06, "loss": 0.8115, "step": 4434 }, { "epoch": 0.28, "grad_norm": 1.6445070116947897, "learning_rate": 8.403070252543555e-06, "loss": 0.8325, "step": 4435 }, { "epoch": 0.28, "grad_norm": 1.6054300470182108, "learning_rate": 8.402310755115483e-06, "loss": 0.8267, "step": 4436 }, { "epoch": 0.28, "grad_norm": 1.2616420796990744, "learning_rate": 8.401551111463246e-06, "loss": 0.7431, "step": 4437 }, { "epoch": 0.28, "grad_norm": 1.6225766465270026, "learning_rate": 8.400791321619489e-06, "loss": 0.8768, "step": 4438 }, { "epoch": 0.28, "grad_norm": 2.329512121499445, "learning_rate": 8.400031385616868e-06, "loss": 0.6907, "step": 4439 }, { "epoch": 0.28, "grad_norm": 1.585473007593403, "learning_rate": 8.399271303488041e-06, "loss": 0.7219, "step": 4440 }, { "epoch": 0.28, "grad_norm": 1.1776920857185138, "learning_rate": 8.398511075265677e-06, "loss": 0.6778, "step": 4441 }, { "epoch": 0.28, "grad_norm": 1.9093548161436673, "learning_rate": 8.397750700982449e-06, "loss": 0.8455, "step": 4442 }, { "epoch": 0.28, "grad_norm": 1.132253895945719, "learning_rate": 8.396990180671034e-06, "loss": 0.5809, "step": 4443 }, { "epoch": 0.28, "grad_norm": 1.6391229727898557, "learning_rate": 8.39622951436412e-06, "loss": 0.7006, "step": 4444 }, { "epoch": 0.28, "grad_norm": 1.5768986650221832, "learning_rate": 8.395468702094399e-06, "loss": 0.75, "step": 4445 }, { "epoch": 0.28, "grad_norm": 1.7881528065299368, "learning_rate": 8.394707743894565e-06, "loss": 0.7678, "step": 4446 }, { "epoch": 0.28, "grad_norm": 1.5067350439985, "learning_rate": 8.393946639797328e-06, "loss": 0.6888, "step": 4447 }, { "epoch": 0.28, "grad_norm": 1.4593707758698171, "learning_rate": 8.393185389835396e-06, "loss": 0.7105, "step": 4448 }, { "epoch": 0.28, "grad_norm": 1.670779723010952, "learning_rate": 8.392423994041486e-06, "loss": 0.8579, "step": 4449 }, { "epoch": 0.28, "grad_norm": 1.680718133873467, "learning_rate": 8.39166245244832e-06, "loss": 0.795, "step": 4450 }, { "epoch": 0.28, "grad_norm": 1.8131847992502184, "learning_rate": 8.39090076508863e-06, "loss": 0.8435, "step": 4451 }, { "epoch": 0.28, "grad_norm": 1.7761221314527151, "learning_rate": 8.390138931995148e-06, "loss": 0.8971, "step": 4452 }, { "epoch": 0.29, "grad_norm": 1.7409394709119326, "learning_rate": 8.389376953200622e-06, "loss": 0.8969, "step": 4453 }, { "epoch": 0.29, "grad_norm": 1.7249677611829792, "learning_rate": 8.388614828737794e-06, "loss": 0.7835, "step": 4454 }, { "epoch": 0.29, "grad_norm": 1.5705790166275, "learning_rate": 8.387852558639422e-06, "loss": 0.7499, "step": 4455 }, { "epoch": 0.29, "grad_norm": 1.5469892994929233, "learning_rate": 8.387090142938264e-06, "loss": 0.641, "step": 4456 }, { "epoch": 0.29, "grad_norm": 1.555671762678239, "learning_rate": 8.386327581667091e-06, "loss": 1.0254, "step": 4457 }, { "epoch": 0.29, "grad_norm": 1.5481307036507737, "learning_rate": 8.385564874858674e-06, "loss": 0.731, "step": 4458 }, { "epoch": 0.29, "grad_norm": 1.5147920850972316, "learning_rate": 8.384802022545793e-06, "loss": 0.6265, "step": 4459 }, { "epoch": 0.29, "grad_norm": 1.4187748479984692, "learning_rate": 8.384039024761233e-06, "loss": 0.6984, "step": 4460 }, { "epoch": 0.29, "grad_norm": 2.0767479789986854, "learning_rate": 8.383275881537786e-06, "loss": 0.845, "step": 4461 }, { "epoch": 0.29, "grad_norm": 1.4914093697237072, "learning_rate": 8.382512592908251e-06, "loss": 0.6856, "step": 4462 }, { "epoch": 0.29, "grad_norm": 1.5894839623648844, "learning_rate": 8.381749158905433e-06, "loss": 0.8318, "step": 4463 }, { "epoch": 0.29, "grad_norm": 1.6153753342980612, "learning_rate": 8.380985579562142e-06, "loss": 0.7667, "step": 4464 }, { "epoch": 0.29, "grad_norm": 1.7070360971730538, "learning_rate": 8.380221854911195e-06, "loss": 0.7418, "step": 4465 }, { "epoch": 0.29, "grad_norm": 1.1299789343162059, "learning_rate": 8.379457984985416e-06, "loss": 0.664, "step": 4466 }, { "epoch": 0.29, "grad_norm": 1.6736701499392794, "learning_rate": 8.378693969817633e-06, "loss": 0.7474, "step": 4467 }, { "epoch": 0.29, "grad_norm": 1.1370174267055382, "learning_rate": 8.377929809440684e-06, "loss": 0.6321, "step": 4468 }, { "epoch": 0.29, "grad_norm": 1.9218339862509657, "learning_rate": 8.37716550388741e-06, "loss": 0.9201, "step": 4469 }, { "epoch": 0.29, "grad_norm": 1.8097267543477071, "learning_rate": 8.376401053190658e-06, "loss": 0.7696, "step": 4470 }, { "epoch": 0.29, "grad_norm": 1.8511854489391466, "learning_rate": 8.375636457383282e-06, "loss": 0.7694, "step": 4471 }, { "epoch": 0.29, "grad_norm": 1.843877528486311, "learning_rate": 8.374871716498147e-06, "loss": 0.9408, "step": 4472 }, { "epoch": 0.29, "grad_norm": 1.8075219221726455, "learning_rate": 8.374106830568117e-06, "loss": 0.7224, "step": 4473 }, { "epoch": 0.29, "grad_norm": 1.02987688181577, "learning_rate": 8.373341799626065e-06, "loss": 0.6424, "step": 4474 }, { "epoch": 0.29, "grad_norm": 1.7903588248885123, "learning_rate": 8.372576623704872e-06, "loss": 0.882, "step": 4475 }, { "epoch": 0.29, "grad_norm": 1.728495159899056, "learning_rate": 8.37181130283742e-06, "loss": 0.7366, "step": 4476 }, { "epoch": 0.29, "grad_norm": 1.6048126447277786, "learning_rate": 8.371045837056603e-06, "loss": 0.8271, "step": 4477 }, { "epoch": 0.29, "grad_norm": 1.444058220084101, "learning_rate": 8.370280226395322e-06, "loss": 0.8784, "step": 4478 }, { "epoch": 0.29, "grad_norm": 1.490092687055353, "learning_rate": 8.369514470886478e-06, "loss": 0.72, "step": 4479 }, { "epoch": 0.29, "grad_norm": 1.5956030093377833, "learning_rate": 8.368748570562982e-06, "loss": 0.878, "step": 4480 }, { "epoch": 0.29, "grad_norm": 1.6228737995862446, "learning_rate": 8.36798252545775e-06, "loss": 0.7187, "step": 4481 }, { "epoch": 0.29, "grad_norm": 1.0747372935087705, "learning_rate": 8.367216335603707e-06, "loss": 0.6796, "step": 4482 }, { "epoch": 0.29, "grad_norm": 1.1448580674607687, "learning_rate": 8.366450001033784e-06, "loss": 0.594, "step": 4483 }, { "epoch": 0.29, "grad_norm": 1.715571862666746, "learning_rate": 8.36568352178091e-06, "loss": 0.7813, "step": 4484 }, { "epoch": 0.29, "grad_norm": 1.6915315248584137, "learning_rate": 8.364916897878033e-06, "loss": 0.7557, "step": 4485 }, { "epoch": 0.29, "grad_norm": 1.7471094762008814, "learning_rate": 8.364150129358098e-06, "loss": 0.843, "step": 4486 }, { "epoch": 0.29, "grad_norm": 1.1407371782700328, "learning_rate": 8.363383216254058e-06, "loss": 0.6088, "step": 4487 }, { "epoch": 0.29, "grad_norm": 1.3045921541294223, "learning_rate": 8.362616158598875e-06, "loss": 0.7265, "step": 4488 }, { "epoch": 0.29, "grad_norm": 1.8039141532961036, "learning_rate": 8.361848956425516e-06, "loss": 0.7967, "step": 4489 }, { "epoch": 0.29, "grad_norm": 1.4783221064905316, "learning_rate": 8.361081609766954e-06, "loss": 0.73, "step": 4490 }, { "epoch": 0.29, "grad_norm": 1.7574546903936206, "learning_rate": 8.360314118656165e-06, "loss": 0.7207, "step": 4491 }, { "epoch": 0.29, "grad_norm": 1.5553183709519636, "learning_rate": 8.359546483126137e-06, "loss": 0.6748, "step": 4492 }, { "epoch": 0.29, "grad_norm": 1.6050514345862659, "learning_rate": 8.358778703209862e-06, "loss": 0.8219, "step": 4493 }, { "epoch": 0.29, "grad_norm": 1.1791352678229372, "learning_rate": 8.358010778940336e-06, "loss": 0.7283, "step": 4494 }, { "epoch": 0.29, "grad_norm": 1.7884779545553735, "learning_rate": 8.357242710350561e-06, "loss": 0.8079, "step": 4495 }, { "epoch": 0.29, "grad_norm": 1.8841974790877007, "learning_rate": 8.35647449747355e-06, "loss": 0.6928, "step": 4496 }, { "epoch": 0.29, "grad_norm": 1.788416369826595, "learning_rate": 8.355706140342317e-06, "loss": 0.7654, "step": 4497 }, { "epoch": 0.29, "grad_norm": 1.642550474908607, "learning_rate": 8.354937638989887e-06, "loss": 0.7462, "step": 4498 }, { "epoch": 0.29, "grad_norm": 1.4605853633600292, "learning_rate": 8.354168993449285e-06, "loss": 0.6457, "step": 4499 }, { "epoch": 0.29, "grad_norm": 1.4510420941894386, "learning_rate": 8.35340020375355e-06, "loss": 0.7724, "step": 4500 }, { "epoch": 0.29, "grad_norm": 1.4653075241929134, "learning_rate": 8.352631269935719e-06, "loss": 0.6335, "step": 4501 }, { "epoch": 0.29, "grad_norm": 1.041823804000188, "learning_rate": 8.351862192028842e-06, "loss": 0.6763, "step": 4502 }, { "epoch": 0.29, "grad_norm": 1.6665375645485898, "learning_rate": 8.35109297006597e-06, "loss": 0.8844, "step": 4503 }, { "epoch": 0.29, "grad_norm": 1.5858694720927664, "learning_rate": 8.350323604080166e-06, "loss": 0.7656, "step": 4504 }, { "epoch": 0.29, "grad_norm": 2.133970602868547, "learning_rate": 8.349554094104491e-06, "loss": 0.7553, "step": 4505 }, { "epoch": 0.29, "grad_norm": 1.1979768131924187, "learning_rate": 8.34878444017202e-06, "loss": 0.6542, "step": 4506 }, { "epoch": 0.29, "grad_norm": 1.48316613992092, "learning_rate": 8.348014642315831e-06, "loss": 0.6723, "step": 4507 }, { "epoch": 0.29, "grad_norm": 1.599102622730587, "learning_rate": 8.347244700569008e-06, "loss": 0.8337, "step": 4508 }, { "epoch": 0.29, "grad_norm": 1.7026290193276308, "learning_rate": 8.346474614964642e-06, "loss": 0.6037, "step": 4509 }, { "epoch": 0.29, "grad_norm": 1.3969615111995017, "learning_rate": 8.345704385535826e-06, "loss": 0.8326, "step": 4510 }, { "epoch": 0.29, "grad_norm": 1.7857522296237471, "learning_rate": 8.34493401231567e-06, "loss": 0.8123, "step": 4511 }, { "epoch": 0.29, "grad_norm": 1.5376513502067737, "learning_rate": 8.344163495337276e-06, "loss": 0.718, "step": 4512 }, { "epoch": 0.29, "grad_norm": 1.8898877004221486, "learning_rate": 8.34339283463376e-06, "loss": 0.8085, "step": 4513 }, { "epoch": 0.29, "grad_norm": 1.589731447639949, "learning_rate": 8.34262203023825e-06, "loss": 0.8306, "step": 4514 }, { "epoch": 0.29, "grad_norm": 1.1442891073980526, "learning_rate": 8.341851082183868e-06, "loss": 0.6824, "step": 4515 }, { "epoch": 0.29, "grad_norm": 1.0817985624080066, "learning_rate": 8.341079990503747e-06, "loss": 0.5887, "step": 4516 }, { "epoch": 0.29, "grad_norm": 1.8000576422690377, "learning_rate": 8.340308755231027e-06, "loss": 0.7878, "step": 4517 }, { "epoch": 0.29, "grad_norm": 1.0488898843796055, "learning_rate": 8.339537376398858e-06, "loss": 0.6868, "step": 4518 }, { "epoch": 0.29, "grad_norm": 1.6140848464038342, "learning_rate": 8.338765854040391e-06, "loss": 0.7819, "step": 4519 }, { "epoch": 0.29, "grad_norm": 2.0093177642591606, "learning_rate": 8.337994188188783e-06, "loss": 0.8194, "step": 4520 }, { "epoch": 0.29, "grad_norm": 1.4869672762957233, "learning_rate": 8.337222378877196e-06, "loss": 0.78, "step": 4521 }, { "epoch": 0.29, "grad_norm": 1.65333820106473, "learning_rate": 8.336450426138807e-06, "loss": 0.8919, "step": 4522 }, { "epoch": 0.29, "grad_norm": 1.4759812682419835, "learning_rate": 8.33567833000679e-06, "loss": 0.725, "step": 4523 }, { "epoch": 0.29, "grad_norm": 1.6135927949850537, "learning_rate": 8.334906090514324e-06, "loss": 0.8784, "step": 4524 }, { "epoch": 0.29, "grad_norm": 1.667619759577265, "learning_rate": 8.334133707694603e-06, "loss": 0.7681, "step": 4525 }, { "epoch": 0.29, "grad_norm": 1.5128027600889382, "learning_rate": 8.333361181580822e-06, "loss": 0.8451, "step": 4526 }, { "epoch": 0.29, "grad_norm": 2.393770217886908, "learning_rate": 8.33258851220618e-06, "loss": 0.7116, "step": 4527 }, { "epoch": 0.29, "grad_norm": 1.6112454150981312, "learning_rate": 8.33181569960389e-06, "loss": 0.8302, "step": 4528 }, { "epoch": 0.29, "grad_norm": 1.813779697172448, "learning_rate": 8.33104274380716e-06, "loss": 0.7092, "step": 4529 }, { "epoch": 0.29, "grad_norm": 1.528097087027875, "learning_rate": 8.330269644849214e-06, "loss": 0.6863, "step": 4530 }, { "epoch": 0.29, "grad_norm": 1.4619698325539356, "learning_rate": 8.329496402763275e-06, "loss": 0.7935, "step": 4531 }, { "epoch": 0.29, "grad_norm": 1.7688669209235937, "learning_rate": 8.328723017582576e-06, "loss": 0.6953, "step": 4532 }, { "epoch": 0.29, "grad_norm": 2.1186056150830788, "learning_rate": 8.327949489340359e-06, "loss": 0.8869, "step": 4533 }, { "epoch": 0.29, "grad_norm": 1.057370089587332, "learning_rate": 8.327175818069864e-06, "loss": 0.6931, "step": 4534 }, { "epoch": 0.29, "grad_norm": 1.5298332957022334, "learning_rate": 8.326402003804344e-06, "loss": 0.7936, "step": 4535 }, { "epoch": 0.29, "grad_norm": 1.009658255348121, "learning_rate": 8.325628046577055e-06, "loss": 0.6036, "step": 4536 }, { "epoch": 0.29, "grad_norm": 1.1497591766850828, "learning_rate": 8.324853946421261e-06, "loss": 0.6791, "step": 4537 }, { "epoch": 0.29, "grad_norm": 1.829475732691563, "learning_rate": 8.324079703370232e-06, "loss": 0.7785, "step": 4538 }, { "epoch": 0.29, "grad_norm": 1.4679774598094013, "learning_rate": 8.323305317457241e-06, "loss": 0.7777, "step": 4539 }, { "epoch": 0.29, "grad_norm": 1.7677973940496832, "learning_rate": 8.32253078871557e-06, "loss": 0.8293, "step": 4540 }, { "epoch": 0.29, "grad_norm": 1.637849491355878, "learning_rate": 8.32175611717851e-06, "loss": 0.7557, "step": 4541 }, { "epoch": 0.29, "grad_norm": 1.582610243237984, "learning_rate": 8.32098130287935e-06, "loss": 0.6854, "step": 4542 }, { "epoch": 0.29, "grad_norm": 1.6066323322732852, "learning_rate": 8.320206345851393e-06, "loss": 0.7729, "step": 4543 }, { "epoch": 0.29, "grad_norm": 1.0820030366627897, "learning_rate": 8.319431246127942e-06, "loss": 0.6222, "step": 4544 }, { "epoch": 0.29, "grad_norm": 1.2374255125628204, "learning_rate": 8.318656003742314e-06, "loss": 0.6241, "step": 4545 }, { "epoch": 0.29, "grad_norm": 1.874645566105042, "learning_rate": 8.317880618727821e-06, "loss": 0.8837, "step": 4546 }, { "epoch": 0.29, "grad_norm": 1.927360605329629, "learning_rate": 8.317105091117795e-06, "loss": 0.7922, "step": 4547 }, { "epoch": 0.29, "grad_norm": 1.6033973517298348, "learning_rate": 8.316329420945559e-06, "loss": 0.7274, "step": 4548 }, { "epoch": 0.29, "grad_norm": 1.5543477139703867, "learning_rate": 8.315553608244453e-06, "loss": 0.7484, "step": 4549 }, { "epoch": 0.29, "grad_norm": 1.147856233105325, "learning_rate": 8.314777653047822e-06, "loss": 0.6651, "step": 4550 }, { "epoch": 0.29, "grad_norm": 1.7186737392657103, "learning_rate": 8.314001555389014e-06, "loss": 0.9586, "step": 4551 }, { "epoch": 0.29, "grad_norm": 0.9958246999995133, "learning_rate": 8.31322531530138e-06, "loss": 0.5363, "step": 4552 }, { "epoch": 0.29, "grad_norm": 1.4463092967128741, "learning_rate": 8.312448932818284e-06, "loss": 0.672, "step": 4553 }, { "epoch": 0.29, "grad_norm": 1.6525951357526925, "learning_rate": 8.311672407973093e-06, "loss": 0.7959, "step": 4554 }, { "epoch": 0.29, "grad_norm": 1.7093573471569217, "learning_rate": 8.310895740799181e-06, "loss": 1.1197, "step": 4555 }, { "epoch": 0.29, "grad_norm": 1.6903407371493497, "learning_rate": 8.310118931329928e-06, "loss": 0.6761, "step": 4556 }, { "epoch": 0.29, "grad_norm": 2.1563462191427405, "learning_rate": 8.30934197959872e-06, "loss": 0.7958, "step": 4557 }, { "epoch": 0.29, "grad_norm": 1.7568520622500203, "learning_rate": 8.308564885638946e-06, "loss": 0.9157, "step": 4558 }, { "epoch": 0.29, "grad_norm": 1.7888851529051681, "learning_rate": 8.307787649484005e-06, "loss": 0.7704, "step": 4559 }, { "epoch": 0.29, "grad_norm": 1.718913610851135, "learning_rate": 8.307010271167302e-06, "loss": 0.821, "step": 4560 }, { "epoch": 0.29, "grad_norm": 6.859526751860796, "learning_rate": 8.306232750722248e-06, "loss": 0.8049, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.7761394548056437, "learning_rate": 8.305455088182256e-06, "loss": 0.8346, "step": 4562 }, { "epoch": 0.29, "grad_norm": 1.2894159571927575, "learning_rate": 8.30467728358075e-06, "loss": 0.6211, "step": 4563 }, { "epoch": 0.29, "grad_norm": 1.7891458872346868, "learning_rate": 8.303899336951157e-06, "loss": 0.7502, "step": 4564 }, { "epoch": 0.29, "grad_norm": 1.9172215575370146, "learning_rate": 8.303121248326917e-06, "loss": 0.7395, "step": 4565 }, { "epoch": 0.29, "grad_norm": 1.6202676082134755, "learning_rate": 8.302343017741464e-06, "loss": 0.8019, "step": 4566 }, { "epoch": 0.29, "grad_norm": 2.0127196790850217, "learning_rate": 8.301564645228249e-06, "loss": 0.8056, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.7087629548639556, "learning_rate": 8.30078613082072e-06, "loss": 0.6803, "step": 4568 }, { "epoch": 0.29, "grad_norm": 1.806400730057532, "learning_rate": 8.300007474552343e-06, "loss": 0.7498, "step": 4569 }, { "epoch": 0.29, "grad_norm": 1.9006810878245302, "learning_rate": 8.299228676456575e-06, "loss": 0.7912, "step": 4570 }, { "epoch": 0.29, "grad_norm": 1.991173018158861, "learning_rate": 8.298449736566894e-06, "loss": 0.7419, "step": 4571 }, { "epoch": 0.29, "grad_norm": 1.5455453855567478, "learning_rate": 8.297670654916772e-06, "loss": 0.7193, "step": 4572 }, { "epoch": 0.29, "grad_norm": 1.0391782760732848, "learning_rate": 8.296891431539696e-06, "loss": 0.6793, "step": 4573 }, { "epoch": 0.29, "grad_norm": 1.1490833900875994, "learning_rate": 8.296112066469153e-06, "loss": 0.6206, "step": 4574 }, { "epoch": 0.29, "grad_norm": 2.084018433962913, "learning_rate": 8.29533255973864e-06, "loss": 0.7841, "step": 4575 }, { "epoch": 0.29, "grad_norm": 1.6834546830143002, "learning_rate": 8.29455291138166e-06, "loss": 0.7922, "step": 4576 }, { "epoch": 0.29, "grad_norm": 3.597665138111601, "learning_rate": 8.293773121431717e-06, "loss": 0.7355, "step": 4577 }, { "epoch": 0.29, "grad_norm": 1.564192375166879, "learning_rate": 8.292993189922326e-06, "loss": 0.7717, "step": 4578 }, { "epoch": 0.29, "grad_norm": 1.6778545684913049, "learning_rate": 8.292213116887008e-06, "loss": 0.8203, "step": 4579 }, { "epoch": 0.29, "grad_norm": 1.7603139786971573, "learning_rate": 8.291432902359289e-06, "loss": 0.7268, "step": 4580 }, { "epoch": 0.29, "grad_norm": 1.5955003963957073, "learning_rate": 8.290652546372698e-06, "loss": 0.737, "step": 4581 }, { "epoch": 0.29, "grad_norm": 1.492641702195323, "learning_rate": 8.289872048960776e-06, "loss": 0.6165, "step": 4582 }, { "epoch": 0.29, "grad_norm": 1.4674881099783268, "learning_rate": 8.289091410157067e-06, "loss": 0.7174, "step": 4583 }, { "epoch": 0.29, "grad_norm": 1.2894611460163263, "learning_rate": 8.288310629995119e-06, "loss": 0.6619, "step": 4584 }, { "epoch": 0.29, "grad_norm": 1.1102222409339066, "learning_rate": 8.28752970850849e-06, "loss": 0.6565, "step": 4585 }, { "epoch": 0.29, "grad_norm": 1.702210642157778, "learning_rate": 8.286748645730744e-06, "loss": 0.9572, "step": 4586 }, { "epoch": 0.29, "grad_norm": 1.7119904097600005, "learning_rate": 8.285967441695445e-06, "loss": 0.7128, "step": 4587 }, { "epoch": 0.29, "grad_norm": 1.4821840433205573, "learning_rate": 8.285186096436173e-06, "loss": 0.8188, "step": 4588 }, { "epoch": 0.29, "grad_norm": 1.7324581173369664, "learning_rate": 8.284404609986505e-06, "loss": 0.7455, "step": 4589 }, { "epoch": 0.29, "grad_norm": 1.7541065926311146, "learning_rate": 8.283622982380027e-06, "loss": 0.8434, "step": 4590 }, { "epoch": 0.29, "grad_norm": 1.5510236963316868, "learning_rate": 8.282841213650334e-06, "loss": 0.722, "step": 4591 }, { "epoch": 0.29, "grad_norm": 1.6885656084803362, "learning_rate": 8.282059303831022e-06, "loss": 0.7558, "step": 4592 }, { "epoch": 0.29, "grad_norm": 4.278182803096758, "learning_rate": 8.281277252955699e-06, "loss": 0.7135, "step": 4593 }, { "epoch": 0.29, "grad_norm": 2.2181689794062316, "learning_rate": 8.280495061057976e-06, "loss": 0.8971, "step": 4594 }, { "epoch": 0.29, "grad_norm": 1.5248265299993269, "learning_rate": 8.279712728171468e-06, "loss": 0.7411, "step": 4595 }, { "epoch": 0.29, "grad_norm": 1.9640020104893547, "learning_rate": 8.278930254329798e-06, "loss": 0.7536, "step": 4596 }, { "epoch": 0.29, "grad_norm": 1.6978715621766811, "learning_rate": 8.278147639566596e-06, "loss": 0.8781, "step": 4597 }, { "epoch": 0.29, "grad_norm": 2.019202529316623, "learning_rate": 8.277364883915496e-06, "loss": 0.7986, "step": 4598 }, { "epoch": 0.29, "grad_norm": 1.6903761666201556, "learning_rate": 8.27658198741014e-06, "loss": 0.7608, "step": 4599 }, { "epoch": 0.29, "grad_norm": 1.6317327875303234, "learning_rate": 8.275798950084176e-06, "loss": 0.7399, "step": 4600 }, { "epoch": 0.29, "grad_norm": 1.7115915426823771, "learning_rate": 8.275015771971255e-06, "loss": 0.9004, "step": 4601 }, { "epoch": 0.29, "grad_norm": 1.5603797944838458, "learning_rate": 8.27423245310504e-06, "loss": 0.6907, "step": 4602 }, { "epoch": 0.29, "grad_norm": 1.6990640343482066, "learning_rate": 8.273448993519194e-06, "loss": 0.6378, "step": 4603 }, { "epoch": 0.29, "grad_norm": 1.761548420553797, "learning_rate": 8.272665393247388e-06, "loss": 0.7993, "step": 4604 }, { "epoch": 0.29, "grad_norm": 1.5926880460641233, "learning_rate": 8.2718816523233e-06, "loss": 0.8227, "step": 4605 }, { "epoch": 0.29, "grad_norm": 1.6330532089152028, "learning_rate": 8.271097770780613e-06, "loss": 0.8349, "step": 4606 }, { "epoch": 0.29, "grad_norm": 2.1746368258031703, "learning_rate": 8.270313748653018e-06, "loss": 0.8243, "step": 4607 }, { "epoch": 0.29, "grad_norm": 2.4812641459150195, "learning_rate": 8.269529585974212e-06, "loss": 0.8066, "step": 4608 }, { "epoch": 0.3, "grad_norm": 1.4413465270262573, "learning_rate": 8.268745282777893e-06, "loss": 0.7023, "step": 4609 }, { "epoch": 0.3, "grad_norm": 1.4868678165020863, "learning_rate": 8.267960839097768e-06, "loss": 0.7088, "step": 4610 }, { "epoch": 0.3, "grad_norm": 1.4201550287944285, "learning_rate": 8.267176254967556e-06, "loss": 0.716, "step": 4611 }, { "epoch": 0.3, "grad_norm": 1.416942225757298, "learning_rate": 8.266391530420974e-06, "loss": 0.6856, "step": 4612 }, { "epoch": 0.3, "grad_norm": 1.709849253295992, "learning_rate": 8.265606665491746e-06, "loss": 0.7437, "step": 4613 }, { "epoch": 0.3, "grad_norm": 1.8681401759328273, "learning_rate": 8.264821660213607e-06, "loss": 0.6852, "step": 4614 }, { "epoch": 0.3, "grad_norm": 1.7167832436606223, "learning_rate": 8.264036514620292e-06, "loss": 0.7938, "step": 4615 }, { "epoch": 0.3, "grad_norm": 1.6101891070607228, "learning_rate": 8.263251228745547e-06, "loss": 0.7926, "step": 4616 }, { "epoch": 0.3, "grad_norm": 1.61897937983757, "learning_rate": 8.262465802623122e-06, "loss": 0.7872, "step": 4617 }, { "epoch": 0.3, "grad_norm": 1.0251202560523518, "learning_rate": 8.26168023628677e-06, "loss": 0.656, "step": 4618 }, { "epoch": 0.3, "grad_norm": 1.6620040014998316, "learning_rate": 8.260894529770258e-06, "loss": 0.6811, "step": 4619 }, { "epoch": 0.3, "grad_norm": 1.80793338392204, "learning_rate": 8.260108683107348e-06, "loss": 0.8603, "step": 4620 }, { "epoch": 0.3, "grad_norm": 1.6076594875456853, "learning_rate": 8.25932269633182e-06, "loss": 0.7566, "step": 4621 }, { "epoch": 0.3, "grad_norm": 1.6972888948274725, "learning_rate": 8.258536569477451e-06, "loss": 0.9217, "step": 4622 }, { "epoch": 0.3, "grad_norm": 1.051013756428394, "learning_rate": 8.257750302578027e-06, "loss": 0.5895, "step": 4623 }, { "epoch": 0.3, "grad_norm": 1.659835283129725, "learning_rate": 8.256963895667339e-06, "loss": 0.7391, "step": 4624 }, { "epoch": 0.3, "grad_norm": 1.9575298774917769, "learning_rate": 8.256177348779188e-06, "loss": 0.8645, "step": 4625 }, { "epoch": 0.3, "grad_norm": 1.5688016571656656, "learning_rate": 8.255390661947376e-06, "loss": 0.7457, "step": 4626 }, { "epoch": 0.3, "grad_norm": 1.5772366577411914, "learning_rate": 8.254603835205715e-06, "loss": 0.7588, "step": 4627 }, { "epoch": 0.3, "grad_norm": 1.5086563935044024, "learning_rate": 8.253816868588019e-06, "loss": 0.7418, "step": 4628 }, { "epoch": 0.3, "grad_norm": 1.9265394903308022, "learning_rate": 8.253029762128111e-06, "loss": 0.8576, "step": 4629 }, { "epoch": 0.3, "grad_norm": 1.590178808453041, "learning_rate": 8.252242515859821e-06, "loss": 0.7649, "step": 4630 }, { "epoch": 0.3, "grad_norm": 1.215321736204887, "learning_rate": 8.25145512981698e-06, "loss": 0.7345, "step": 4631 }, { "epoch": 0.3, "grad_norm": 1.8045905785705774, "learning_rate": 8.250667604033432e-06, "loss": 0.8125, "step": 4632 }, { "epoch": 0.3, "grad_norm": 1.5745698963820485, "learning_rate": 8.249879938543017e-06, "loss": 0.7931, "step": 4633 }, { "epoch": 0.3, "grad_norm": 1.6865790835188648, "learning_rate": 8.249092133379593e-06, "loss": 0.8415, "step": 4634 }, { "epoch": 0.3, "grad_norm": 1.96643792701391, "learning_rate": 8.248304188577018e-06, "loss": 0.8169, "step": 4635 }, { "epoch": 0.3, "grad_norm": 1.783246952398437, "learning_rate": 8.247516104169153e-06, "loss": 0.6991, "step": 4636 }, { "epoch": 0.3, "grad_norm": 1.7768090602682978, "learning_rate": 8.24672788018987e-06, "loss": 0.7663, "step": 4637 }, { "epoch": 0.3, "grad_norm": 1.7833765449671017, "learning_rate": 8.245939516673045e-06, "loss": 0.7466, "step": 4638 }, { "epoch": 0.3, "grad_norm": 1.4530006028692204, "learning_rate": 8.245151013652561e-06, "loss": 0.6899, "step": 4639 }, { "epoch": 0.3, "grad_norm": 1.7763103759900258, "learning_rate": 8.244362371162305e-06, "loss": 0.8529, "step": 4640 }, { "epoch": 0.3, "grad_norm": 1.562176139591041, "learning_rate": 8.24357358923617e-06, "loss": 0.7741, "step": 4641 }, { "epoch": 0.3, "grad_norm": 1.859324580654587, "learning_rate": 8.242784667908062e-06, "loss": 0.7962, "step": 4642 }, { "epoch": 0.3, "grad_norm": 1.6346767919204528, "learning_rate": 8.241995607211878e-06, "loss": 0.9037, "step": 4643 }, { "epoch": 0.3, "grad_norm": 1.794592973392829, "learning_rate": 8.24120640718154e-06, "loss": 0.8711, "step": 4644 }, { "epoch": 0.3, "grad_norm": 1.7657341588376385, "learning_rate": 8.240417067850957e-06, "loss": 0.9278, "step": 4645 }, { "epoch": 0.3, "grad_norm": 1.9121022006326236, "learning_rate": 8.23962758925406e-06, "loss": 0.9179, "step": 4646 }, { "epoch": 0.3, "grad_norm": 1.676760631122023, "learning_rate": 8.238837971424776e-06, "loss": 1.0135, "step": 4647 }, { "epoch": 0.3, "grad_norm": 1.1300749353507746, "learning_rate": 8.23804821439704e-06, "loss": 0.6656, "step": 4648 }, { "epoch": 0.3, "grad_norm": 1.7462766027316752, "learning_rate": 8.2372583182048e-06, "loss": 0.8137, "step": 4649 }, { "epoch": 0.3, "grad_norm": 1.558581363349382, "learning_rate": 8.236468282881997e-06, "loss": 0.7535, "step": 4650 }, { "epoch": 0.3, "grad_norm": 1.0481880511703905, "learning_rate": 8.235678108462589e-06, "loss": 0.6814, "step": 4651 }, { "epoch": 0.3, "grad_norm": 2.0682707422235156, "learning_rate": 8.234887794980532e-06, "loss": 0.8182, "step": 4652 }, { "epoch": 0.3, "grad_norm": 2.0084003972302082, "learning_rate": 8.2340973424698e-06, "loss": 0.7843, "step": 4653 }, { "epoch": 0.3, "grad_norm": 1.6065575126804068, "learning_rate": 8.233306750964357e-06, "loss": 0.778, "step": 4654 }, { "epoch": 0.3, "grad_norm": 1.6734948037262656, "learning_rate": 8.232516020498184e-06, "loss": 0.8158, "step": 4655 }, { "epoch": 0.3, "grad_norm": 1.7352334159417444, "learning_rate": 8.231725151105265e-06, "loss": 0.8646, "step": 4656 }, { "epoch": 0.3, "grad_norm": 1.5477460111600474, "learning_rate": 8.230934142819588e-06, "loss": 0.7212, "step": 4657 }, { "epoch": 0.3, "grad_norm": 1.5981372437174453, "learning_rate": 8.230142995675155e-06, "loss": 0.8379, "step": 4658 }, { "epoch": 0.3, "grad_norm": 1.7696136365975745, "learning_rate": 8.229351709705961e-06, "loss": 0.9038, "step": 4659 }, { "epoch": 0.3, "grad_norm": 2.184814379433402, "learning_rate": 8.228560284946015e-06, "loss": 0.742, "step": 4660 }, { "epoch": 0.3, "grad_norm": 2.2868634177317726, "learning_rate": 8.227768721429334e-06, "loss": 0.9138, "step": 4661 }, { "epoch": 0.3, "grad_norm": 1.210059643507474, "learning_rate": 8.226977019189936e-06, "loss": 0.6207, "step": 4662 }, { "epoch": 0.3, "grad_norm": 1.0540231888243001, "learning_rate": 8.226185178261846e-06, "loss": 0.5885, "step": 4663 }, { "epoch": 0.3, "grad_norm": 1.6538267256381332, "learning_rate": 8.225393198679096e-06, "loss": 0.7275, "step": 4664 }, { "epoch": 0.3, "grad_norm": 1.0972818100449138, "learning_rate": 8.224601080475723e-06, "loss": 0.6673, "step": 4665 }, { "epoch": 0.3, "grad_norm": 1.8303035411936177, "learning_rate": 8.223808823685773e-06, "loss": 0.9678, "step": 4666 }, { "epoch": 0.3, "grad_norm": 1.5036247613675908, "learning_rate": 8.223016428343294e-06, "loss": 0.76, "step": 4667 }, { "epoch": 0.3, "grad_norm": 2.1411776852320528, "learning_rate": 8.222223894482339e-06, "loss": 0.7639, "step": 4668 }, { "epoch": 0.3, "grad_norm": 1.570282548479257, "learning_rate": 8.221431222136976e-06, "loss": 0.7442, "step": 4669 }, { "epoch": 0.3, "grad_norm": 1.6224528959235358, "learning_rate": 8.220638411341264e-06, "loss": 0.7087, "step": 4670 }, { "epoch": 0.3, "grad_norm": 1.7038306954345412, "learning_rate": 8.219845462129284e-06, "loss": 0.895, "step": 4671 }, { "epoch": 0.3, "grad_norm": 1.7943718269297395, "learning_rate": 8.219052374535109e-06, "loss": 0.7735, "step": 4672 }, { "epoch": 0.3, "grad_norm": 1.5353691094303947, "learning_rate": 8.218259148592828e-06, "loss": 0.7451, "step": 4673 }, { "epoch": 0.3, "grad_norm": 1.5558813441551944, "learning_rate": 8.21746578433653e-06, "loss": 0.7268, "step": 4674 }, { "epoch": 0.3, "grad_norm": 1.887415792627772, "learning_rate": 8.216672281800317e-06, "loss": 0.7653, "step": 4675 }, { "epoch": 0.3, "grad_norm": 2.1956507138501626, "learning_rate": 8.215878641018287e-06, "loss": 0.7206, "step": 4676 }, { "epoch": 0.3, "grad_norm": 1.8556809362065767, "learning_rate": 8.21508486202455e-06, "loss": 0.8549, "step": 4677 }, { "epoch": 0.3, "grad_norm": 1.9597742052532119, "learning_rate": 8.214290944853221e-06, "loss": 0.9575, "step": 4678 }, { "epoch": 0.3, "grad_norm": 1.4742569915157508, "learning_rate": 8.213496889538422e-06, "loss": 0.6459, "step": 4679 }, { "epoch": 0.3, "grad_norm": 1.5485209682373062, "learning_rate": 8.212702696114279e-06, "loss": 0.7174, "step": 4680 }, { "epoch": 0.3, "grad_norm": 1.7727528336663616, "learning_rate": 8.211908364614924e-06, "loss": 0.7058, "step": 4681 }, { "epoch": 0.3, "grad_norm": 1.05702280576159, "learning_rate": 8.211113895074498e-06, "loss": 0.7213, "step": 4682 }, { "epoch": 0.3, "grad_norm": 1.681230876125267, "learning_rate": 8.210319287527143e-06, "loss": 0.744, "step": 4683 }, { "epoch": 0.3, "grad_norm": 1.9699632002943845, "learning_rate": 8.209524542007012e-06, "loss": 0.9628, "step": 4684 }, { "epoch": 0.3, "grad_norm": 1.6311492271073815, "learning_rate": 8.20872965854826e-06, "loss": 0.7377, "step": 4685 }, { "epoch": 0.3, "grad_norm": 1.1146660708522211, "learning_rate": 8.207934637185049e-06, "loss": 0.6426, "step": 4686 }, { "epoch": 0.3, "grad_norm": 1.747248099338355, "learning_rate": 8.207139477951549e-06, "loss": 0.8849, "step": 4687 }, { "epoch": 0.3, "grad_norm": 2.0044451099230582, "learning_rate": 8.206344180881933e-06, "loss": 0.9247, "step": 4688 }, { "epoch": 0.3, "grad_norm": 1.606697103277168, "learning_rate": 8.205548746010383e-06, "loss": 0.835, "step": 4689 }, { "epoch": 0.3, "grad_norm": 1.589091812435628, "learning_rate": 8.204753173371081e-06, "loss": 0.8675, "step": 4690 }, { "epoch": 0.3, "grad_norm": 1.613202482550026, "learning_rate": 8.203957462998225e-06, "loss": 0.8419, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.6183806599806534, "learning_rate": 8.203161614926007e-06, "loss": 0.8177, "step": 4692 }, { "epoch": 0.3, "grad_norm": 1.6609736183943382, "learning_rate": 8.202365629188634e-06, "loss": 0.7207, "step": 4693 }, { "epoch": 0.3, "grad_norm": 1.7397440648950866, "learning_rate": 8.201569505820315e-06, "loss": 0.7405, "step": 4694 }, { "epoch": 0.3, "grad_norm": 1.5304063367248162, "learning_rate": 8.200773244855267e-06, "loss": 0.869, "step": 4695 }, { "epoch": 0.3, "grad_norm": 1.2272506453680097, "learning_rate": 8.199976846327711e-06, "loss": 0.6678, "step": 4696 }, { "epoch": 0.3, "grad_norm": 1.5339749839124142, "learning_rate": 8.199180310271873e-06, "loss": 0.7471, "step": 4697 }, { "epoch": 0.3, "grad_norm": 1.7048772687109568, "learning_rate": 8.19838363672199e-06, "loss": 0.8412, "step": 4698 }, { "epoch": 0.3, "grad_norm": 1.9163994512852032, "learning_rate": 8.197586825712295e-06, "loss": 0.6611, "step": 4699 }, { "epoch": 0.3, "grad_norm": 1.3667250302965994, "learning_rate": 8.19678987727704e-06, "loss": 0.641, "step": 4700 }, { "epoch": 0.3, "grad_norm": 1.9045218170832992, "learning_rate": 8.195992791450475e-06, "loss": 1.0755, "step": 4701 }, { "epoch": 0.3, "grad_norm": 1.9892239519859818, "learning_rate": 8.195195568266853e-06, "loss": 0.8018, "step": 4702 }, { "epoch": 0.3, "grad_norm": 1.4943912105053274, "learning_rate": 8.19439820776044e-06, "loss": 0.5791, "step": 4703 }, { "epoch": 0.3, "grad_norm": 1.731655941167014, "learning_rate": 8.193600709965504e-06, "loss": 0.7909, "step": 4704 }, { "epoch": 0.3, "grad_norm": 1.5950313544103787, "learning_rate": 8.19280307491632e-06, "loss": 0.8145, "step": 4705 }, { "epoch": 0.3, "grad_norm": 1.540846598359827, "learning_rate": 8.19200530264717e-06, "loss": 0.6732, "step": 4706 }, { "epoch": 0.3, "grad_norm": 1.464265946572946, "learning_rate": 8.19120739319234e-06, "loss": 0.7665, "step": 4707 }, { "epoch": 0.3, "grad_norm": 1.9140339491666924, "learning_rate": 8.19040934658612e-06, "loss": 0.7611, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.9034246981885532, "learning_rate": 8.189611162862811e-06, "loss": 0.7812, "step": 4709 }, { "epoch": 0.3, "grad_norm": 1.1438307577300846, "learning_rate": 8.188812842056717e-06, "loss": 0.6789, "step": 4710 }, { "epoch": 0.3, "grad_norm": 1.6756629415778497, "learning_rate": 8.188014384202148e-06, "loss": 0.896, "step": 4711 }, { "epoch": 0.3, "grad_norm": 1.82106717789976, "learning_rate": 8.187215789333418e-06, "loss": 0.7997, "step": 4712 }, { "epoch": 0.3, "grad_norm": 1.151363005632731, "learning_rate": 8.186417057484851e-06, "loss": 0.6251, "step": 4713 }, { "epoch": 0.3, "grad_norm": 1.5896918804532938, "learning_rate": 8.185618188690776e-06, "loss": 0.7682, "step": 4714 }, { "epoch": 0.3, "grad_norm": 1.6691428695014652, "learning_rate": 8.184819182985524e-06, "loss": 0.7099, "step": 4715 }, { "epoch": 0.3, "grad_norm": 1.6073460924917364, "learning_rate": 8.184020040403437e-06, "loss": 0.7503, "step": 4716 }, { "epoch": 0.3, "grad_norm": 1.0482922009333997, "learning_rate": 8.183220760978858e-06, "loss": 0.6115, "step": 4717 }, { "epoch": 0.3, "grad_norm": 1.7009465031095197, "learning_rate": 8.18242134474614e-06, "loss": 0.7771, "step": 4718 }, { "epoch": 0.3, "grad_norm": 1.5651673272614135, "learning_rate": 8.18162179173964e-06, "loss": 0.6851, "step": 4719 }, { "epoch": 0.3, "grad_norm": 1.6831357736886676, "learning_rate": 8.180822101993719e-06, "loss": 0.7978, "step": 4720 }, { "epoch": 0.3, "grad_norm": 1.7427684985701823, "learning_rate": 8.18002227554275e-06, "loss": 0.7032, "step": 4721 }, { "epoch": 0.3, "grad_norm": 1.1419719325619895, "learning_rate": 8.179222312421104e-06, "loss": 0.6453, "step": 4722 }, { "epoch": 0.3, "grad_norm": 1.7466898712154557, "learning_rate": 8.178422212663166e-06, "loss": 0.6952, "step": 4723 }, { "epoch": 0.3, "grad_norm": 1.919874572986698, "learning_rate": 8.177621976303318e-06, "loss": 0.8835, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.6862023195795046, "learning_rate": 8.176821603375955e-06, "loss": 0.6729, "step": 4725 }, { "epoch": 0.3, "grad_norm": 1.5475646199949942, "learning_rate": 8.176021093915476e-06, "loss": 0.7563, "step": 4726 }, { "epoch": 0.3, "grad_norm": 1.7786632070880208, "learning_rate": 8.175220447956282e-06, "loss": 0.7519, "step": 4727 }, { "epoch": 0.3, "grad_norm": 2.237224590806993, "learning_rate": 8.174419665532787e-06, "loss": 0.7416, "step": 4728 }, { "epoch": 0.3, "grad_norm": 1.5848818438762864, "learning_rate": 8.173618746679406e-06, "loss": 0.8141, "step": 4729 }, { "epoch": 0.3, "grad_norm": 1.9740833571918135, "learning_rate": 8.172817691430556e-06, "loss": 0.7113, "step": 4730 }, { "epoch": 0.3, "grad_norm": 1.9925500579877793, "learning_rate": 8.172016499820672e-06, "loss": 0.7666, "step": 4731 }, { "epoch": 0.3, "grad_norm": 2.075300019397133, "learning_rate": 8.171215171884183e-06, "loss": 0.7705, "step": 4732 }, { "epoch": 0.3, "grad_norm": 1.5381518729088353, "learning_rate": 8.170413707655532e-06, "loss": 0.7181, "step": 4733 }, { "epoch": 0.3, "grad_norm": 1.6611242108303184, "learning_rate": 8.169612107169158e-06, "loss": 0.788, "step": 4734 }, { "epoch": 0.3, "grad_norm": 1.4995760476179492, "learning_rate": 8.168810370459519e-06, "loss": 0.6991, "step": 4735 }, { "epoch": 0.3, "grad_norm": 1.7353670847997993, "learning_rate": 8.168008497561066e-06, "loss": 0.8417, "step": 4736 }, { "epoch": 0.3, "grad_norm": 1.8225115318397433, "learning_rate": 8.167206488508268e-06, "loss": 0.8576, "step": 4737 }, { "epoch": 0.3, "grad_norm": 1.495291142572953, "learning_rate": 8.166404343335587e-06, "loss": 0.6653, "step": 4738 }, { "epoch": 0.3, "grad_norm": 1.6312582405320049, "learning_rate": 8.165602062077502e-06, "loss": 0.7514, "step": 4739 }, { "epoch": 0.3, "grad_norm": 1.5737373750675931, "learning_rate": 8.164799644768494e-06, "loss": 0.7807, "step": 4740 }, { "epoch": 0.3, "grad_norm": 1.4095474101612921, "learning_rate": 8.163997091443046e-06, "loss": 0.7849, "step": 4741 }, { "epoch": 0.3, "grad_norm": 1.642713396980053, "learning_rate": 8.16319440213565e-06, "loss": 0.8403, "step": 4742 }, { "epoch": 0.3, "grad_norm": 1.5742514967219199, "learning_rate": 8.162391576880808e-06, "loss": 0.7312, "step": 4743 }, { "epoch": 0.3, "grad_norm": 1.6904911303535382, "learning_rate": 8.16158861571302e-06, "loss": 0.7676, "step": 4744 }, { "epoch": 0.3, "grad_norm": 1.73988252301804, "learning_rate": 8.160785518666795e-06, "loss": 0.7167, "step": 4745 }, { "epoch": 0.3, "grad_norm": 1.7908164984292525, "learning_rate": 8.159982285776654e-06, "loss": 0.8033, "step": 4746 }, { "epoch": 0.3, "grad_norm": 1.770562500409325, "learning_rate": 8.159178917077112e-06, "loss": 0.934, "step": 4747 }, { "epoch": 0.3, "grad_norm": 1.6979871942759206, "learning_rate": 8.158375412602698e-06, "loss": 0.7927, "step": 4748 }, { "epoch": 0.3, "grad_norm": 1.9092778738994711, "learning_rate": 8.157571772387947e-06, "loss": 0.842, "step": 4749 }, { "epoch": 0.3, "grad_norm": 1.6171512896178812, "learning_rate": 8.156767996467394e-06, "loss": 0.8403, "step": 4750 }, { "epoch": 0.3, "grad_norm": 2.2824668719385, "learning_rate": 8.155964084875587e-06, "loss": 0.9081, "step": 4751 }, { "epoch": 0.3, "grad_norm": 1.1846067481977087, "learning_rate": 8.155160037647076e-06, "loss": 0.6831, "step": 4752 }, { "epoch": 0.3, "grad_norm": 1.7054537420521394, "learning_rate": 8.154355854816416e-06, "loss": 0.7824, "step": 4753 }, { "epoch": 0.3, "grad_norm": 1.5213619924019273, "learning_rate": 8.15355153641817e-06, "loss": 0.886, "step": 4754 }, { "epoch": 0.3, "grad_norm": 1.6521156960978352, "learning_rate": 8.152747082486905e-06, "loss": 0.6749, "step": 4755 }, { "epoch": 0.3, "grad_norm": 1.456819608450185, "learning_rate": 8.151942493057195e-06, "loss": 0.6661, "step": 4756 }, { "epoch": 0.3, "grad_norm": 1.5264683409250654, "learning_rate": 8.15113776816362e-06, "loss": 0.7196, "step": 4757 }, { "epoch": 0.3, "grad_norm": 1.5907964225798867, "learning_rate": 8.150332907840765e-06, "loss": 0.7605, "step": 4758 }, { "epoch": 0.3, "grad_norm": 1.678869996504765, "learning_rate": 8.14952791212322e-06, "loss": 0.8368, "step": 4759 }, { "epoch": 0.3, "grad_norm": 1.527322732446782, "learning_rate": 8.148722781045586e-06, "loss": 0.7732, "step": 4760 }, { "epoch": 0.3, "grad_norm": 1.6933968925201126, "learning_rate": 8.147917514642462e-06, "loss": 0.7869, "step": 4761 }, { "epoch": 0.3, "grad_norm": 1.0790741887843178, "learning_rate": 8.147112112948459e-06, "loss": 0.6688, "step": 4762 }, { "epoch": 0.3, "grad_norm": 1.7220162729499997, "learning_rate": 8.146306575998188e-06, "loss": 0.7171, "step": 4763 }, { "epoch": 0.3, "grad_norm": 1.7505730227015566, "learning_rate": 8.145500903826274e-06, "loss": 0.7452, "step": 4764 }, { "epoch": 0.3, "grad_norm": 1.751676186000328, "learning_rate": 8.14469509646734e-06, "loss": 0.8148, "step": 4765 }, { "epoch": 0.31, "grad_norm": 1.6204767948577041, "learning_rate": 8.143889153956019e-06, "loss": 0.7487, "step": 4766 }, { "epoch": 0.31, "grad_norm": 1.7930789731071672, "learning_rate": 8.143083076326947e-06, "loss": 0.8142, "step": 4767 }, { "epoch": 0.31, "grad_norm": 1.6393813458494948, "learning_rate": 8.14227686361477e-06, "loss": 0.8207, "step": 4768 }, { "epoch": 0.31, "grad_norm": 1.6247096539662784, "learning_rate": 8.141470515854137e-06, "loss": 0.7376, "step": 4769 }, { "epoch": 0.31, "grad_norm": 4.225248847747295, "learning_rate": 8.1406640330797e-06, "loss": 0.8118, "step": 4770 }, { "epoch": 0.31, "grad_norm": 1.7877879700641781, "learning_rate": 8.139857415326125e-06, "loss": 0.8275, "step": 4771 }, { "epoch": 0.31, "grad_norm": 1.5479221823335878, "learning_rate": 8.139050662628074e-06, "loss": 0.7433, "step": 4772 }, { "epoch": 0.31, "grad_norm": 1.8299417307441745, "learning_rate": 8.138243775020222e-06, "loss": 0.7644, "step": 4773 }, { "epoch": 0.31, "grad_norm": 2.1723876273061724, "learning_rate": 8.137436752537248e-06, "loss": 0.922, "step": 4774 }, { "epoch": 0.31, "grad_norm": 1.7193506477978653, "learning_rate": 8.136629595213834e-06, "loss": 0.7179, "step": 4775 }, { "epoch": 0.31, "grad_norm": 1.6581758047956379, "learning_rate": 8.135822303084671e-06, "loss": 0.7645, "step": 4776 }, { "epoch": 0.31, "grad_norm": 1.682614435077846, "learning_rate": 8.135014876184454e-06, "loss": 0.8139, "step": 4777 }, { "epoch": 0.31, "grad_norm": 1.2162755305894613, "learning_rate": 8.134207314547887e-06, "loss": 0.6496, "step": 4778 }, { "epoch": 0.31, "grad_norm": 1.3728165558756191, "learning_rate": 8.133399618209675e-06, "loss": 0.707, "step": 4779 }, { "epoch": 0.31, "grad_norm": 1.64329056686378, "learning_rate": 8.132591787204531e-06, "loss": 0.8679, "step": 4780 }, { "epoch": 0.31, "grad_norm": 1.0582169182009393, "learning_rate": 8.131783821567175e-06, "loss": 0.7764, "step": 4781 }, { "epoch": 0.31, "grad_norm": 2.1098116160754175, "learning_rate": 8.130975721332332e-06, "loss": 0.7786, "step": 4782 }, { "epoch": 0.31, "grad_norm": 1.6922385339011108, "learning_rate": 8.13016748653473e-06, "loss": 0.8412, "step": 4783 }, { "epoch": 0.31, "grad_norm": 1.6554960428447405, "learning_rate": 8.129359117209107e-06, "loss": 0.7698, "step": 4784 }, { "epoch": 0.31, "grad_norm": 1.6533250335680907, "learning_rate": 8.128550613390205e-06, "loss": 0.7342, "step": 4785 }, { "epoch": 0.31, "grad_norm": 1.7639495468260797, "learning_rate": 8.127741975112771e-06, "loss": 0.7045, "step": 4786 }, { "epoch": 0.31, "grad_norm": 1.8462104686511174, "learning_rate": 8.12693320241156e-06, "loss": 0.8423, "step": 4787 }, { "epoch": 0.31, "grad_norm": 1.0979087841366582, "learning_rate": 8.126124295321331e-06, "loss": 0.7009, "step": 4788 }, { "epoch": 0.31, "grad_norm": 1.6547607843526162, "learning_rate": 8.12531525387685e-06, "loss": 0.7379, "step": 4789 }, { "epoch": 0.31, "grad_norm": 1.620594354250753, "learning_rate": 8.124506078112883e-06, "loss": 0.8345, "step": 4790 }, { "epoch": 0.31, "grad_norm": 1.16348036997198, "learning_rate": 8.123696768064212e-06, "loss": 0.7522, "step": 4791 }, { "epoch": 0.31, "grad_norm": 1.6473653808307414, "learning_rate": 8.122887323765617e-06, "loss": 0.7565, "step": 4792 }, { "epoch": 0.31, "grad_norm": 1.7697468631844235, "learning_rate": 8.122077745251888e-06, "loss": 0.806, "step": 4793 }, { "epoch": 0.31, "grad_norm": 1.5262916776202538, "learning_rate": 8.12126803255782e-06, "loss": 0.6257, "step": 4794 }, { "epoch": 0.31, "grad_norm": 1.8388262555530988, "learning_rate": 8.120458185718206e-06, "loss": 0.7302, "step": 4795 }, { "epoch": 0.31, "grad_norm": 1.705038002794443, "learning_rate": 8.119648204767857e-06, "loss": 0.7353, "step": 4796 }, { "epoch": 0.31, "grad_norm": 1.524625263575999, "learning_rate": 8.118838089741585e-06, "loss": 0.8338, "step": 4797 }, { "epoch": 0.31, "grad_norm": 1.8198450661660721, "learning_rate": 8.118027840674205e-06, "loss": 0.9565, "step": 4798 }, { "epoch": 0.31, "grad_norm": 1.9673891288131704, "learning_rate": 8.117217457600541e-06, "loss": 0.8134, "step": 4799 }, { "epoch": 0.31, "grad_norm": 1.530280281675799, "learning_rate": 8.11640694055542e-06, "loss": 0.7457, "step": 4800 }, { "epoch": 0.31, "grad_norm": 1.0687382353268622, "learning_rate": 8.11559628957368e-06, "loss": 0.7043, "step": 4801 }, { "epoch": 0.31, "grad_norm": 2.0629811128666606, "learning_rate": 8.114785504690155e-06, "loss": 0.7253, "step": 4802 }, { "epoch": 0.31, "grad_norm": 1.9274276745758587, "learning_rate": 8.113974585939694e-06, "loss": 0.8931, "step": 4803 }, { "epoch": 0.31, "grad_norm": 1.8008451687841278, "learning_rate": 8.11316353335715e-06, "loss": 0.9006, "step": 4804 }, { "epoch": 0.31, "grad_norm": 1.680778234835611, "learning_rate": 8.112352346977378e-06, "loss": 0.8548, "step": 4805 }, { "epoch": 0.31, "grad_norm": 1.4838544746535816, "learning_rate": 8.111541026835243e-06, "loss": 0.711, "step": 4806 }, { "epoch": 0.31, "grad_norm": 1.6367160346970195, "learning_rate": 8.110729572965613e-06, "loss": 0.8179, "step": 4807 }, { "epoch": 0.31, "grad_norm": 1.7128918632271637, "learning_rate": 8.109917985403362e-06, "loss": 0.6799, "step": 4808 }, { "epoch": 0.31, "grad_norm": 1.5464181389072091, "learning_rate": 8.109106264183369e-06, "loss": 0.7668, "step": 4809 }, { "epoch": 0.31, "grad_norm": 1.6071069109385798, "learning_rate": 8.108294409340525e-06, "loss": 0.759, "step": 4810 }, { "epoch": 0.31, "grad_norm": 1.4163759529492104, "learning_rate": 8.107482420909719e-06, "loss": 0.6867, "step": 4811 }, { "epoch": 0.31, "grad_norm": 1.6919439542463377, "learning_rate": 8.106670298925845e-06, "loss": 0.7101, "step": 4812 }, { "epoch": 0.31, "grad_norm": 1.5896882141035944, "learning_rate": 8.105858043423811e-06, "loss": 0.783, "step": 4813 }, { "epoch": 0.31, "grad_norm": 1.5133963119600493, "learning_rate": 8.105045654438525e-06, "loss": 0.7182, "step": 4814 }, { "epoch": 0.31, "grad_norm": 1.5791613224977947, "learning_rate": 8.104233132004902e-06, "loss": 0.845, "step": 4815 }, { "epoch": 0.31, "grad_norm": 1.392179654113928, "learning_rate": 8.103420476157861e-06, "loss": 0.8782, "step": 4816 }, { "epoch": 0.31, "grad_norm": 1.118469789589783, "learning_rate": 8.10260768693233e-06, "loss": 0.7246, "step": 4817 }, { "epoch": 0.31, "grad_norm": 1.798861252432391, "learning_rate": 8.101794764363238e-06, "loss": 0.819, "step": 4818 }, { "epoch": 0.31, "grad_norm": 1.412758465542849, "learning_rate": 8.100981708485527e-06, "loss": 0.6725, "step": 4819 }, { "epoch": 0.31, "grad_norm": 1.6349584343594619, "learning_rate": 8.100168519334137e-06, "loss": 0.7919, "step": 4820 }, { "epoch": 0.31, "grad_norm": 2.570575557488305, "learning_rate": 8.09935519694402e-06, "loss": 0.7616, "step": 4821 }, { "epoch": 0.31, "grad_norm": 1.6350064655368266, "learning_rate": 8.098541741350126e-06, "loss": 0.7642, "step": 4822 }, { "epoch": 0.31, "grad_norm": 2.031468269342966, "learning_rate": 8.09772815258742e-06, "loss": 1.012, "step": 4823 }, { "epoch": 0.31, "grad_norm": 1.1418033214966832, "learning_rate": 8.096914430690868e-06, "loss": 0.7023, "step": 4824 }, { "epoch": 0.31, "grad_norm": 1.6040727491192914, "learning_rate": 8.096100575695443e-06, "loss": 0.7359, "step": 4825 }, { "epoch": 0.31, "grad_norm": 1.7085831422175568, "learning_rate": 8.09528658763612e-06, "loss": 0.9249, "step": 4826 }, { "epoch": 0.31, "grad_norm": 1.2268694757294771, "learning_rate": 8.094472466547882e-06, "loss": 0.7458, "step": 4827 }, { "epoch": 0.31, "grad_norm": 1.5361919952322698, "learning_rate": 8.09365821246572e-06, "loss": 0.7499, "step": 4828 }, { "epoch": 0.31, "grad_norm": 1.2565995233719354, "learning_rate": 8.09284382542463e-06, "loss": 0.758, "step": 4829 }, { "epoch": 0.31, "grad_norm": 1.668653705222062, "learning_rate": 8.092029305459612e-06, "loss": 0.8735, "step": 4830 }, { "epoch": 0.31, "grad_norm": 1.2076382549043607, "learning_rate": 8.09121465260567e-06, "loss": 0.6822, "step": 4831 }, { "epoch": 0.31, "grad_norm": 1.8545250019769584, "learning_rate": 8.090399866897818e-06, "loss": 0.7902, "step": 4832 }, { "epoch": 0.31, "grad_norm": 1.7515901442189978, "learning_rate": 8.089584948371074e-06, "loss": 0.8078, "step": 4833 }, { "epoch": 0.31, "grad_norm": 1.1436477376289593, "learning_rate": 8.088769897060461e-06, "loss": 0.7401, "step": 4834 }, { "epoch": 0.31, "grad_norm": 1.4581397483480005, "learning_rate": 8.087954713001007e-06, "loss": 0.6961, "step": 4835 }, { "epoch": 0.31, "grad_norm": 0.9364975116552803, "learning_rate": 8.08713939622775e-06, "loss": 0.6191, "step": 4836 }, { "epoch": 0.31, "grad_norm": 1.6403925884316477, "learning_rate": 8.086323946775727e-06, "loss": 0.8937, "step": 4837 }, { "epoch": 0.31, "grad_norm": 1.6796912465837095, "learning_rate": 8.085508364679989e-06, "loss": 0.8054, "step": 4838 }, { "epoch": 0.31, "grad_norm": 1.547779899452086, "learning_rate": 8.084692649975583e-06, "loss": 0.7272, "step": 4839 }, { "epoch": 0.31, "grad_norm": 1.1672626988560089, "learning_rate": 8.083876802697567e-06, "loss": 0.7988, "step": 4840 }, { "epoch": 0.31, "grad_norm": 1.6796258314029082, "learning_rate": 8.083060822881008e-06, "loss": 0.853, "step": 4841 }, { "epoch": 0.31, "grad_norm": 1.6592908130181294, "learning_rate": 8.082244710560973e-06, "loss": 0.7006, "step": 4842 }, { "epoch": 0.31, "grad_norm": 1.8102450520121, "learning_rate": 8.081428465772539e-06, "loss": 0.7749, "step": 4843 }, { "epoch": 0.31, "grad_norm": 1.7494555364916895, "learning_rate": 8.080612088550782e-06, "loss": 0.7976, "step": 4844 }, { "epoch": 0.31, "grad_norm": 1.8724063060801057, "learning_rate": 8.079795578930792e-06, "loss": 0.7573, "step": 4845 }, { "epoch": 0.31, "grad_norm": 1.7397835280824967, "learning_rate": 8.07897893694766e-06, "loss": 0.7009, "step": 4846 }, { "epoch": 0.31, "grad_norm": 1.0272979106756286, "learning_rate": 8.07816216263648e-06, "loss": 0.6556, "step": 4847 }, { "epoch": 0.31, "grad_norm": 1.5132719130381365, "learning_rate": 8.07734525603236e-06, "loss": 0.7076, "step": 4848 }, { "epoch": 0.31, "grad_norm": 1.9927653616828949, "learning_rate": 8.076528217170408e-06, "loss": 0.7408, "step": 4849 }, { "epoch": 0.31, "grad_norm": 1.7725065617345608, "learning_rate": 8.075711046085738e-06, "loss": 0.7996, "step": 4850 }, { "epoch": 0.31, "grad_norm": 1.928441833856673, "learning_rate": 8.07489374281347e-06, "loss": 0.8713, "step": 4851 }, { "epoch": 0.31, "grad_norm": 1.6828225171377873, "learning_rate": 8.07407630738873e-06, "loss": 0.8449, "step": 4852 }, { "epoch": 0.31, "grad_norm": 1.6180116794704655, "learning_rate": 8.07325873984665e-06, "loss": 0.6812, "step": 4853 }, { "epoch": 0.31, "grad_norm": 1.4955770627222662, "learning_rate": 8.072441040222367e-06, "loss": 0.811, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.53100309015893, "learning_rate": 8.071623208551023e-06, "loss": 0.7091, "step": 4855 }, { "epoch": 0.31, "grad_norm": 1.0902437681613664, "learning_rate": 8.07080524486777e-06, "loss": 0.618, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.6848718096328998, "learning_rate": 8.069987149207759e-06, "loss": 0.7031, "step": 4857 }, { "epoch": 0.31, "grad_norm": 1.956547314262503, "learning_rate": 8.069168921606151e-06, "loss": 0.832, "step": 4858 }, { "epoch": 0.31, "grad_norm": 1.7256145631932764, "learning_rate": 8.068350562098113e-06, "loss": 0.7759, "step": 4859 }, { "epoch": 0.31, "grad_norm": 1.8917624033096878, "learning_rate": 8.067532070718814e-06, "loss": 0.7628, "step": 4860 }, { "epoch": 0.31, "grad_norm": 1.529298914109627, "learning_rate": 8.066713447503434e-06, "loss": 0.6589, "step": 4861 }, { "epoch": 0.31, "grad_norm": 1.8599284484732668, "learning_rate": 8.065894692487153e-06, "loss": 0.8492, "step": 4862 }, { "epoch": 0.31, "grad_norm": 1.6062227323904514, "learning_rate": 8.065075805705161e-06, "loss": 0.7682, "step": 4863 }, { "epoch": 0.31, "grad_norm": 1.4873551427936424, "learning_rate": 8.064256787192651e-06, "loss": 0.6923, "step": 4864 }, { "epoch": 0.31, "grad_norm": 1.3838023073204555, "learning_rate": 8.063437636984824e-06, "loss": 0.5708, "step": 4865 }, { "epoch": 0.31, "grad_norm": 1.237031949478768, "learning_rate": 8.062618355116883e-06, "loss": 0.7553, "step": 4866 }, { "epoch": 0.31, "grad_norm": 1.4912026240529745, "learning_rate": 8.061798941624041e-06, "loss": 0.8116, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.8561755883826734, "learning_rate": 8.060979396541516e-06, "loss": 0.8415, "step": 4868 }, { "epoch": 0.31, "grad_norm": 1.9838101403260875, "learning_rate": 8.060159719904526e-06, "loss": 0.8017, "step": 4869 }, { "epoch": 0.31, "grad_norm": 1.6213813139189106, "learning_rate": 8.059339911748303e-06, "loss": 0.7932, "step": 4870 }, { "epoch": 0.31, "grad_norm": 1.5237716991216983, "learning_rate": 8.058519972108078e-06, "loss": 0.7868, "step": 4871 }, { "epoch": 0.31, "grad_norm": 1.5085323546222522, "learning_rate": 8.057699901019093e-06, "loss": 0.8077, "step": 4872 }, { "epoch": 0.31, "grad_norm": 1.6711933498406182, "learning_rate": 8.05687969851659e-06, "loss": 0.7205, "step": 4873 }, { "epoch": 0.31, "grad_norm": 1.8206844177674586, "learning_rate": 8.056059364635822e-06, "loss": 0.7946, "step": 4874 }, { "epoch": 0.31, "grad_norm": 1.5626932091933088, "learning_rate": 8.055238899412046e-06, "loss": 0.7561, "step": 4875 }, { "epoch": 0.31, "grad_norm": 1.8017281055354446, "learning_rate": 8.05441830288052e-06, "loss": 0.7696, "step": 4876 }, { "epoch": 0.31, "grad_norm": 1.9858457200715394, "learning_rate": 8.053597575076513e-06, "loss": 0.8182, "step": 4877 }, { "epoch": 0.31, "grad_norm": 1.445891503591154, "learning_rate": 8.052776716035298e-06, "loss": 0.7321, "step": 4878 }, { "epoch": 0.31, "grad_norm": 5.710709594290961, "learning_rate": 8.051955725792155e-06, "loss": 0.6618, "step": 4879 }, { "epoch": 0.31, "grad_norm": 1.9672818169791169, "learning_rate": 8.05113460438237e-06, "loss": 0.7925, "step": 4880 }, { "epoch": 0.31, "grad_norm": 1.4959759194788498, "learning_rate": 8.050313351841229e-06, "loss": 0.7616, "step": 4881 }, { "epoch": 0.31, "grad_norm": 1.6222845935566117, "learning_rate": 8.049491968204031e-06, "loss": 0.8404, "step": 4882 }, { "epoch": 0.31, "grad_norm": 1.6276205289340053, "learning_rate": 8.048670453506074e-06, "loss": 0.7869, "step": 4883 }, { "epoch": 0.31, "grad_norm": 1.110409580070419, "learning_rate": 8.04784880778267e-06, "loss": 0.6818, "step": 4884 }, { "epoch": 0.31, "grad_norm": 1.2741999018893306, "learning_rate": 8.047027031069126e-06, "loss": 0.7507, "step": 4885 }, { "epoch": 0.31, "grad_norm": 1.8538586953295755, "learning_rate": 8.046205123400764e-06, "loss": 0.9014, "step": 4886 }, { "epoch": 0.31, "grad_norm": 1.8920822353835287, "learning_rate": 8.045383084812907e-06, "loss": 0.7223, "step": 4887 }, { "epoch": 0.31, "grad_norm": 1.8857926229934994, "learning_rate": 8.044560915340884e-06, "loss": 0.775, "step": 4888 }, { "epoch": 0.31, "grad_norm": 1.8868597866207872, "learning_rate": 8.04373861502003e-06, "loss": 0.7293, "step": 4889 }, { "epoch": 0.31, "grad_norm": 1.1197607620662728, "learning_rate": 8.042916183885687e-06, "loss": 0.6297, "step": 4890 }, { "epoch": 0.31, "grad_norm": 2.057395849843335, "learning_rate": 8.0420936219732e-06, "loss": 0.7899, "step": 4891 }, { "epoch": 0.31, "grad_norm": 1.5086296071520393, "learning_rate": 8.04127092931792e-06, "loss": 0.7411, "step": 4892 }, { "epoch": 0.31, "grad_norm": 1.8575021583243394, "learning_rate": 8.040448105955209e-06, "loss": 0.9475, "step": 4893 }, { "epoch": 0.31, "grad_norm": 1.7530289790234737, "learning_rate": 8.039625151920424e-06, "loss": 0.8058, "step": 4894 }, { "epoch": 0.31, "grad_norm": 1.6383413284894863, "learning_rate": 8.03880206724894e-06, "loss": 0.8378, "step": 4895 }, { "epoch": 0.31, "grad_norm": 2.1983903498298676, "learning_rate": 8.037978851976126e-06, "loss": 0.7816, "step": 4896 }, { "epoch": 0.31, "grad_norm": 3.50865184952771, "learning_rate": 8.037155506137367e-06, "loss": 0.7206, "step": 4897 }, { "epoch": 0.31, "grad_norm": 1.8964754848314744, "learning_rate": 8.036332029768045e-06, "loss": 0.8518, "step": 4898 }, { "epoch": 0.31, "grad_norm": 1.6342731931269223, "learning_rate": 8.035508422903554e-06, "loss": 0.7252, "step": 4899 }, { "epoch": 0.31, "grad_norm": 1.448112651828251, "learning_rate": 8.034684685579288e-06, "loss": 0.9814, "step": 4900 }, { "epoch": 0.31, "grad_norm": 1.7603852039738197, "learning_rate": 8.033860817830651e-06, "loss": 0.7745, "step": 4901 }, { "epoch": 0.31, "grad_norm": 1.454715827375003, "learning_rate": 8.03303681969305e-06, "loss": 0.6776, "step": 4902 }, { "epoch": 0.31, "grad_norm": 2.083148459048739, "learning_rate": 8.0322126912019e-06, "loss": 0.9434, "step": 4903 }, { "epoch": 0.31, "grad_norm": 1.769702972691584, "learning_rate": 8.031388432392625e-06, "loss": 0.7852, "step": 4904 }, { "epoch": 0.31, "grad_norm": 1.7000961456157466, "learning_rate": 8.03056404330064e-06, "loss": 0.753, "step": 4905 }, { "epoch": 0.31, "grad_norm": 1.0634162255783497, "learning_rate": 8.029739523961381e-06, "loss": 0.6249, "step": 4906 }, { "epoch": 0.31, "grad_norm": 1.644928053734878, "learning_rate": 8.028914874410284e-06, "loss": 0.7043, "step": 4907 }, { "epoch": 0.31, "grad_norm": 1.5992496850034417, "learning_rate": 8.02809009468279e-06, "loss": 0.7349, "step": 4908 }, { "epoch": 0.31, "grad_norm": 1.8149497387883178, "learning_rate": 8.027265184814349e-06, "loss": 0.8016, "step": 4909 }, { "epoch": 0.31, "grad_norm": 1.4943696999323044, "learning_rate": 8.026440144840409e-06, "loss": 0.8076, "step": 4910 }, { "epoch": 0.31, "grad_norm": 1.6824384225524316, "learning_rate": 8.025614974796432e-06, "loss": 0.7108, "step": 4911 }, { "epoch": 0.31, "grad_norm": 1.5032772398472065, "learning_rate": 8.024789674717882e-06, "loss": 0.718, "step": 4912 }, { "epoch": 0.31, "grad_norm": 1.7834805224372436, "learning_rate": 8.023964244640225e-06, "loss": 0.772, "step": 4913 }, { "epoch": 0.31, "grad_norm": 1.711858270181343, "learning_rate": 8.023138684598942e-06, "loss": 0.8259, "step": 4914 }, { "epoch": 0.31, "grad_norm": 1.640731466473471, "learning_rate": 8.022312994629508e-06, "loss": 0.7916, "step": 4915 }, { "epoch": 0.31, "grad_norm": 1.3606284001861229, "learning_rate": 8.021487174767414e-06, "loss": 0.782, "step": 4916 }, { "epoch": 0.31, "grad_norm": 1.3592803313337736, "learning_rate": 8.020661225048149e-06, "loss": 0.7213, "step": 4917 }, { "epoch": 0.31, "grad_norm": 1.4771128881499804, "learning_rate": 8.019835145507212e-06, "loss": 0.7283, "step": 4918 }, { "epoch": 0.31, "grad_norm": 1.749650419671561, "learning_rate": 8.019008936180108e-06, "loss": 0.7247, "step": 4919 }, { "epoch": 0.31, "grad_norm": 1.5724773367402707, "learning_rate": 8.018182597102344e-06, "loss": 0.7439, "step": 4920 }, { "epoch": 0.31, "grad_norm": 1.5653563789378075, "learning_rate": 8.017356128309432e-06, "loss": 0.7392, "step": 4921 }, { "epoch": 0.32, "grad_norm": 1.8656255760529414, "learning_rate": 8.016529529836894e-06, "loss": 0.8595, "step": 4922 }, { "epoch": 0.32, "grad_norm": 1.802773320279468, "learning_rate": 8.015702801720255e-06, "loss": 0.7932, "step": 4923 }, { "epoch": 0.32, "grad_norm": 1.7879662752705274, "learning_rate": 8.014875943995048e-06, "loss": 0.949, "step": 4924 }, { "epoch": 0.32, "grad_norm": 1.7169302452550037, "learning_rate": 8.014048956696807e-06, "loss": 0.7974, "step": 4925 }, { "epoch": 0.32, "grad_norm": 1.907709915227307, "learning_rate": 8.013221839861076e-06, "loss": 0.6949, "step": 4926 }, { "epoch": 0.32, "grad_norm": 1.8662610722276851, "learning_rate": 8.012394593523403e-06, "loss": 0.7733, "step": 4927 }, { "epoch": 0.32, "grad_norm": 1.9804521372307438, "learning_rate": 8.011567217719339e-06, "loss": 0.6497, "step": 4928 }, { "epoch": 0.32, "grad_norm": 1.6159491219794382, "learning_rate": 8.010739712484443e-06, "loss": 0.7338, "step": 4929 }, { "epoch": 0.32, "grad_norm": 2.243692510443187, "learning_rate": 8.009912077854282e-06, "loss": 0.7528, "step": 4930 }, { "epoch": 0.32, "grad_norm": 1.9836252748662282, "learning_rate": 8.009084313864424e-06, "loss": 0.8997, "step": 4931 }, { "epoch": 0.32, "grad_norm": 1.7114161884055235, "learning_rate": 8.008256420550446e-06, "loss": 0.8425, "step": 4932 }, { "epoch": 0.32, "grad_norm": 1.7363017285000897, "learning_rate": 8.007428397947926e-06, "loss": 0.7586, "step": 4933 }, { "epoch": 0.32, "grad_norm": 1.5255627924612916, "learning_rate": 8.006600246092455e-06, "loss": 0.7013, "step": 4934 }, { "epoch": 0.32, "grad_norm": 1.5023340385898154, "learning_rate": 8.005771965019622e-06, "loss": 0.753, "step": 4935 }, { "epoch": 0.32, "grad_norm": 1.3889554498393688, "learning_rate": 8.004943554765028e-06, "loss": 0.6652, "step": 4936 }, { "epoch": 0.32, "grad_norm": 1.7051157186493033, "learning_rate": 8.004115015364273e-06, "loss": 0.7956, "step": 4937 }, { "epoch": 0.32, "grad_norm": 1.7334143064483258, "learning_rate": 8.003286346852967e-06, "loss": 0.7914, "step": 4938 }, { "epoch": 0.32, "grad_norm": 1.713472043237712, "learning_rate": 8.002457549266725e-06, "loss": 0.8826, "step": 4939 }, { "epoch": 0.32, "grad_norm": 1.5837912068687612, "learning_rate": 8.001628622641166e-06, "loss": 0.7084, "step": 4940 }, { "epoch": 0.32, "grad_norm": 1.5970018824011758, "learning_rate": 8.000799567011916e-06, "loss": 0.746, "step": 4941 }, { "epoch": 0.32, "grad_norm": 1.6648099046034912, "learning_rate": 7.999970382414606e-06, "loss": 0.6995, "step": 4942 }, { "epoch": 0.32, "grad_norm": 1.5781985163720025, "learning_rate": 7.999141068884873e-06, "loss": 0.7284, "step": 4943 }, { "epoch": 0.32, "grad_norm": 1.7661921817218011, "learning_rate": 7.998311626458359e-06, "loss": 0.8994, "step": 4944 }, { "epoch": 0.32, "grad_norm": 1.8655290053661526, "learning_rate": 7.997482055170712e-06, "loss": 0.8311, "step": 4945 }, { "epoch": 0.32, "grad_norm": 1.545546916944185, "learning_rate": 7.996652355057585e-06, "loss": 0.8188, "step": 4946 }, { "epoch": 0.32, "grad_norm": 1.5344110204451087, "learning_rate": 7.995822526154636e-06, "loss": 0.6795, "step": 4947 }, { "epoch": 0.32, "grad_norm": 2.213269291590156, "learning_rate": 7.99499256849753e-06, "loss": 0.7436, "step": 4948 }, { "epoch": 0.32, "grad_norm": 1.6803278999365467, "learning_rate": 7.994162482121936e-06, "loss": 0.7559, "step": 4949 }, { "epoch": 0.32, "grad_norm": 1.7943557399725216, "learning_rate": 7.993332267063533e-06, "loss": 0.6904, "step": 4950 }, { "epoch": 0.32, "grad_norm": 1.4759216173978686, "learning_rate": 7.992501923357996e-06, "loss": 0.7466, "step": 4951 }, { "epoch": 0.32, "grad_norm": 1.6852990534124874, "learning_rate": 7.991671451041017e-06, "loss": 0.6942, "step": 4952 }, { "epoch": 0.32, "grad_norm": 1.627915976840741, "learning_rate": 7.990840850148283e-06, "loss": 0.8379, "step": 4953 }, { "epoch": 0.32, "grad_norm": 1.1846384123282474, "learning_rate": 7.990010120715494e-06, "loss": 0.7465, "step": 4954 }, { "epoch": 0.32, "grad_norm": 1.6940958817982286, "learning_rate": 7.989179262778356e-06, "loss": 0.8244, "step": 4955 }, { "epoch": 0.32, "grad_norm": 1.760455642824546, "learning_rate": 7.98834827637257e-06, "loss": 0.7809, "step": 4956 }, { "epoch": 0.32, "grad_norm": 1.6196845117775927, "learning_rate": 7.987517161533858e-06, "loss": 0.7805, "step": 4957 }, { "epoch": 0.32, "grad_norm": 1.9492484172026976, "learning_rate": 7.986685918297934e-06, "loss": 0.8282, "step": 4958 }, { "epoch": 0.32, "grad_norm": 1.9889743469504249, "learning_rate": 7.985854546700526e-06, "loss": 0.7389, "step": 4959 }, { "epoch": 0.32, "grad_norm": 1.4343726454695802, "learning_rate": 7.985023046777363e-06, "loss": 0.8016, "step": 4960 }, { "epoch": 0.32, "grad_norm": 2.936670023252702, "learning_rate": 7.984191418564183e-06, "loss": 0.868, "step": 4961 }, { "epoch": 0.32, "grad_norm": 2.5767616352016223, "learning_rate": 7.983359662096725e-06, "loss": 0.6955, "step": 4962 }, { "epoch": 0.32, "grad_norm": 1.6890534216773145, "learning_rate": 7.982527777410738e-06, "loss": 0.7615, "step": 4963 }, { "epoch": 0.32, "grad_norm": 1.6464816358629761, "learning_rate": 7.981695764541975e-06, "loss": 0.8848, "step": 4964 }, { "epoch": 0.32, "grad_norm": 1.5885950569273972, "learning_rate": 7.980863623526195e-06, "loss": 0.8546, "step": 4965 }, { "epoch": 0.32, "grad_norm": 1.7378614614290884, "learning_rate": 7.980031354399156e-06, "loss": 0.7288, "step": 4966 }, { "epoch": 0.32, "grad_norm": 1.695407027168348, "learning_rate": 7.979198957196634e-06, "loss": 0.7631, "step": 4967 }, { "epoch": 0.32, "grad_norm": 1.58020338308687, "learning_rate": 7.9783664319544e-06, "loss": 0.7237, "step": 4968 }, { "epoch": 0.32, "grad_norm": 1.681598914257006, "learning_rate": 7.977533778708237e-06, "loss": 0.744, "step": 4969 }, { "epoch": 0.32, "grad_norm": 1.4735287176257343, "learning_rate": 7.976700997493926e-06, "loss": 0.819, "step": 4970 }, { "epoch": 0.32, "grad_norm": 1.711121291441388, "learning_rate": 7.975868088347263e-06, "loss": 0.7317, "step": 4971 }, { "epoch": 0.32, "grad_norm": 1.1507066769402676, "learning_rate": 7.975035051304042e-06, "loss": 0.7091, "step": 4972 }, { "epoch": 0.32, "grad_norm": 1.6749277293095268, "learning_rate": 7.974201886400066e-06, "loss": 0.8447, "step": 4973 }, { "epoch": 0.32, "grad_norm": 1.2588373460734683, "learning_rate": 7.973368593671142e-06, "loss": 0.6535, "step": 4974 }, { "epoch": 0.32, "grad_norm": 1.7073752255606045, "learning_rate": 7.972535173153087e-06, "loss": 0.6282, "step": 4975 }, { "epoch": 0.32, "grad_norm": 1.5014569496792418, "learning_rate": 7.971701624881714e-06, "loss": 0.7625, "step": 4976 }, { "epoch": 0.32, "grad_norm": 1.9040743981082284, "learning_rate": 7.970867948892849e-06, "loss": 0.768, "step": 4977 }, { "epoch": 0.32, "grad_norm": 1.5521730819864983, "learning_rate": 7.970034145222323e-06, "loss": 0.6393, "step": 4978 }, { "epoch": 0.32, "grad_norm": 1.048807321600004, "learning_rate": 7.96920021390597e-06, "loss": 0.5617, "step": 4979 }, { "epoch": 0.32, "grad_norm": 1.6207196647975748, "learning_rate": 7.96836615497963e-06, "loss": 0.681, "step": 4980 }, { "epoch": 0.32, "grad_norm": 1.566231044899986, "learning_rate": 7.967531968479152e-06, "loss": 0.8429, "step": 4981 }, { "epoch": 0.32, "grad_norm": 1.6926675940681744, "learning_rate": 7.966697654440384e-06, "loss": 0.7497, "step": 4982 }, { "epoch": 0.32, "grad_norm": 1.5894170982660558, "learning_rate": 7.965863212899185e-06, "loss": 0.8169, "step": 4983 }, { "epoch": 0.32, "grad_norm": 1.5722649397152837, "learning_rate": 7.96502864389142e-06, "loss": 0.715, "step": 4984 }, { "epoch": 0.32, "grad_norm": 1.6331758594335772, "learning_rate": 7.96419394745295e-06, "loss": 0.7304, "step": 4985 }, { "epoch": 0.32, "grad_norm": 1.437443997705726, "learning_rate": 7.963359123619654e-06, "loss": 0.7339, "step": 4986 }, { "epoch": 0.32, "grad_norm": 1.5183560089326016, "learning_rate": 7.962524172427412e-06, "loss": 0.7404, "step": 4987 }, { "epoch": 0.32, "grad_norm": 1.1563431722730821, "learning_rate": 7.961689093912103e-06, "loss": 0.6472, "step": 4988 }, { "epoch": 0.32, "grad_norm": 1.6226874923249188, "learning_rate": 7.960853888109624e-06, "loss": 0.8272, "step": 4989 }, { "epoch": 0.32, "grad_norm": 1.7275409828302353, "learning_rate": 7.960018555055863e-06, "loss": 0.7637, "step": 4990 }, { "epoch": 0.32, "grad_norm": 1.6460849503896737, "learning_rate": 7.959183094786726e-06, "loss": 0.7586, "step": 4991 }, { "epoch": 0.32, "grad_norm": 1.552207941203827, "learning_rate": 7.958347507338117e-06, "loss": 0.6429, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.1075280711055826, "learning_rate": 7.957511792745948e-06, "loss": 0.6829, "step": 4993 }, { "epoch": 0.32, "grad_norm": 1.7229642209107452, "learning_rate": 7.956675951046138e-06, "loss": 0.7371, "step": 4994 }, { "epoch": 0.32, "grad_norm": 1.6209475351374585, "learning_rate": 7.955839982274608e-06, "loss": 0.7061, "step": 4995 }, { "epoch": 0.32, "grad_norm": 1.9263101756097165, "learning_rate": 7.955003886467287e-06, "loss": 0.7276, "step": 4996 }, { "epoch": 0.32, "grad_norm": 1.0951465213133758, "learning_rate": 7.954167663660108e-06, "loss": 0.6448, "step": 4997 }, { "epoch": 0.32, "grad_norm": 1.6583900207003375, "learning_rate": 7.95333131388901e-06, "loss": 0.7153, "step": 4998 }, { "epoch": 0.32, "grad_norm": 1.0850254762826983, "learning_rate": 7.952494837189937e-06, "loss": 0.6182, "step": 4999 }, { "epoch": 0.32, "grad_norm": 1.2366452347350048, "learning_rate": 7.951658233598843e-06, "loss": 0.7174, "step": 5000 }, { "epoch": 0.32, "grad_norm": 1.6777366626656378, "learning_rate": 7.950821503151678e-06, "loss": 0.7712, "step": 5001 }, { "epoch": 0.32, "grad_norm": 1.9494073712746534, "learning_rate": 7.949984645884406e-06, "loss": 0.7152, "step": 5002 }, { "epoch": 0.32, "grad_norm": 2.645698748378012, "learning_rate": 7.949147661832992e-06, "loss": 0.7909, "step": 5003 }, { "epoch": 0.32, "grad_norm": 1.5728836431268263, "learning_rate": 7.948310551033408e-06, "loss": 0.7264, "step": 5004 }, { "epoch": 0.32, "grad_norm": 1.5849211497270859, "learning_rate": 7.947473313521635e-06, "loss": 0.7968, "step": 5005 }, { "epoch": 0.32, "grad_norm": 1.5653155127920593, "learning_rate": 7.94663594933365e-06, "loss": 0.7992, "step": 5006 }, { "epoch": 0.32, "grad_norm": 1.6107899304736144, "learning_rate": 7.945798458505442e-06, "loss": 0.7724, "step": 5007 }, { "epoch": 0.32, "grad_norm": 1.8052799182145232, "learning_rate": 7.94496084107301e-06, "loss": 0.8142, "step": 5008 }, { "epoch": 0.32, "grad_norm": 1.728076946729865, "learning_rate": 7.944123097072348e-06, "loss": 0.6645, "step": 5009 }, { "epoch": 0.32, "grad_norm": 1.6399051735399746, "learning_rate": 7.94328522653946e-06, "loss": 0.6851, "step": 5010 }, { "epoch": 0.32, "grad_norm": 1.2210147239257334, "learning_rate": 7.942447229510359e-06, "loss": 0.6692, "step": 5011 }, { "epoch": 0.32, "grad_norm": 1.7722751713811602, "learning_rate": 7.941609106021059e-06, "loss": 0.7258, "step": 5012 }, { "epoch": 0.32, "grad_norm": 1.6358166889032588, "learning_rate": 7.94077085610758e-06, "loss": 0.7976, "step": 5013 }, { "epoch": 0.32, "grad_norm": 1.9142599149691946, "learning_rate": 7.93993247980595e-06, "loss": 1.0842, "step": 5014 }, { "epoch": 0.32, "grad_norm": 1.815006657787516, "learning_rate": 7.9390939771522e-06, "loss": 0.7483, "step": 5015 }, { "epoch": 0.32, "grad_norm": 1.8785705058488822, "learning_rate": 7.938255348182366e-06, "loss": 0.7458, "step": 5016 }, { "epoch": 0.32, "grad_norm": 1.0961209943064791, "learning_rate": 7.93741659293249e-06, "loss": 0.6005, "step": 5017 }, { "epoch": 0.32, "grad_norm": 1.2243244354509721, "learning_rate": 7.936577711438624e-06, "loss": 0.8089, "step": 5018 }, { "epoch": 0.32, "grad_norm": 1.8146875656107126, "learning_rate": 7.935738703736817e-06, "loss": 0.7693, "step": 5019 }, { "epoch": 0.32, "grad_norm": 1.6997083968711668, "learning_rate": 7.93489956986313e-06, "loss": 0.7056, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.6692584767577565, "learning_rate": 7.934060309853627e-06, "loss": 0.8681, "step": 5021 }, { "epoch": 0.32, "grad_norm": 2.6073148066605962, "learning_rate": 7.933220923744375e-06, "loss": 0.7876, "step": 5022 }, { "epoch": 0.32, "grad_norm": 1.771254738571761, "learning_rate": 7.932381411571453e-06, "loss": 0.7684, "step": 5023 }, { "epoch": 0.32, "grad_norm": 1.8289247938456785, "learning_rate": 7.93154177337094e-06, "loss": 1.01, "step": 5024 }, { "epoch": 0.32, "grad_norm": 1.670477363634034, "learning_rate": 7.930702009178919e-06, "loss": 0.7297, "step": 5025 }, { "epoch": 0.32, "grad_norm": 1.499729378812331, "learning_rate": 7.929862119031486e-06, "loss": 0.7672, "step": 5026 }, { "epoch": 0.32, "grad_norm": 1.6111836703108613, "learning_rate": 7.929022102964736e-06, "loss": 0.6642, "step": 5027 }, { "epoch": 0.32, "grad_norm": 1.9242890187115904, "learning_rate": 7.928181961014769e-06, "loss": 0.8155, "step": 5028 }, { "epoch": 0.32, "grad_norm": 1.734253758824674, "learning_rate": 7.927341693217697e-06, "loss": 0.947, "step": 5029 }, { "epoch": 0.32, "grad_norm": 1.6585454478912312, "learning_rate": 7.926501299609627e-06, "loss": 0.7077, "step": 5030 }, { "epoch": 0.32, "grad_norm": 1.7166416302380734, "learning_rate": 7.925660780226683e-06, "loss": 0.7741, "step": 5031 }, { "epoch": 0.32, "grad_norm": 1.727770496886415, "learning_rate": 7.924820135104984e-06, "loss": 0.9165, "step": 5032 }, { "epoch": 0.32, "grad_norm": 1.4126799081069417, "learning_rate": 7.923979364280664e-06, "loss": 0.7609, "step": 5033 }, { "epoch": 0.32, "grad_norm": 1.9282162145087722, "learning_rate": 7.923138467789853e-06, "loss": 0.7789, "step": 5034 }, { "epoch": 0.32, "grad_norm": 1.0263892950312876, "learning_rate": 7.922297445668695e-06, "loss": 0.7644, "step": 5035 }, { "epoch": 0.32, "grad_norm": 1.5580006759171665, "learning_rate": 7.921456297953331e-06, "loss": 0.7738, "step": 5036 }, { "epoch": 0.32, "grad_norm": 1.7307444021736154, "learning_rate": 7.920615024679917e-06, "loss": 0.8103, "step": 5037 }, { "epoch": 0.32, "grad_norm": 2.1161663587209225, "learning_rate": 7.919773625884604e-06, "loss": 0.7698, "step": 5038 }, { "epoch": 0.32, "grad_norm": 1.6832167001693765, "learning_rate": 7.918932101603556e-06, "loss": 0.8264, "step": 5039 }, { "epoch": 0.32, "grad_norm": 1.1497788038364005, "learning_rate": 7.918090451872942e-06, "loss": 0.5546, "step": 5040 }, { "epoch": 0.32, "grad_norm": 1.6940308494849654, "learning_rate": 7.917248676728929e-06, "loss": 0.762, "step": 5041 }, { "epoch": 0.32, "grad_norm": 1.1993774235732266, "learning_rate": 7.9164067762077e-06, "loss": 0.6181, "step": 5042 }, { "epoch": 0.32, "grad_norm": 1.7523164114979117, "learning_rate": 7.915564750345436e-06, "loss": 0.6532, "step": 5043 }, { "epoch": 0.32, "grad_norm": 1.775350730438765, "learning_rate": 7.914722599178324e-06, "loss": 0.7751, "step": 5044 }, { "epoch": 0.32, "grad_norm": 1.7615164467515054, "learning_rate": 7.91388032274256e-06, "loss": 0.7681, "step": 5045 }, { "epoch": 0.32, "grad_norm": 1.698340169002557, "learning_rate": 7.913037921074342e-06, "loss": 0.8445, "step": 5046 }, { "epoch": 0.32, "grad_norm": 2.2936927882192157, "learning_rate": 7.912195394209877e-06, "loss": 0.7063, "step": 5047 }, { "epoch": 0.32, "grad_norm": 1.729041773102053, "learning_rate": 7.911352742185373e-06, "loss": 0.8491, "step": 5048 }, { "epoch": 0.32, "grad_norm": 1.562883486157276, "learning_rate": 7.910509965037045e-06, "loss": 0.631, "step": 5049 }, { "epoch": 0.32, "grad_norm": 1.7702611673538755, "learning_rate": 7.909667062801114e-06, "loss": 0.8469, "step": 5050 }, { "epoch": 0.32, "grad_norm": 1.8497589186228214, "learning_rate": 7.90882403551381e-06, "loss": 0.7954, "step": 5051 }, { "epoch": 0.32, "grad_norm": 1.645041359345051, "learning_rate": 7.907980883211357e-06, "loss": 0.9514, "step": 5052 }, { "epoch": 0.32, "grad_norm": 0.994999586211474, "learning_rate": 7.907137605929998e-06, "loss": 0.5952, "step": 5053 }, { "epoch": 0.32, "grad_norm": 1.9613852780349095, "learning_rate": 7.906294203705973e-06, "loss": 0.8436, "step": 5054 }, { "epoch": 0.32, "grad_norm": 1.56560990486662, "learning_rate": 7.90545067657553e-06, "loss": 0.701, "step": 5055 }, { "epoch": 0.32, "grad_norm": 2.503965131082921, "learning_rate": 7.904607024574923e-06, "loss": 0.8205, "step": 5056 }, { "epoch": 0.32, "grad_norm": 3.9283630875540045, "learning_rate": 7.903763247740407e-06, "loss": 0.7771, "step": 5057 }, { "epoch": 0.32, "grad_norm": 1.14216214478269, "learning_rate": 7.902919346108252e-06, "loss": 0.712, "step": 5058 }, { "epoch": 0.32, "grad_norm": 1.1702819789750103, "learning_rate": 7.902075319714722e-06, "loss": 0.796, "step": 5059 }, { "epoch": 0.32, "grad_norm": 1.704482140096139, "learning_rate": 7.901231168596092e-06, "loss": 0.7747, "step": 5060 }, { "epoch": 0.32, "grad_norm": 1.740285633038742, "learning_rate": 7.900386892788644e-06, "loss": 0.8155, "step": 5061 }, { "epoch": 0.32, "grad_norm": 0.9279367156175246, "learning_rate": 7.89954249232866e-06, "loss": 0.6237, "step": 5062 }, { "epoch": 0.32, "grad_norm": 1.481444175913986, "learning_rate": 7.898697967252433e-06, "loss": 0.6526, "step": 5063 }, { "epoch": 0.32, "grad_norm": 1.7011160276699406, "learning_rate": 7.89785331759626e-06, "loss": 0.736, "step": 5064 }, { "epoch": 0.32, "grad_norm": 1.5979414306990711, "learning_rate": 7.897008543396438e-06, "loss": 0.7889, "step": 5065 }, { "epoch": 0.32, "grad_norm": 1.5338095649611723, "learning_rate": 7.896163644689278e-06, "loss": 0.7364, "step": 5066 }, { "epoch": 0.32, "grad_norm": 1.7257732705513047, "learning_rate": 7.89531862151109e-06, "loss": 0.9004, "step": 5067 }, { "epoch": 0.32, "grad_norm": 1.6762208932025273, "learning_rate": 7.894473473898191e-06, "loss": 0.7747, "step": 5068 }, { "epoch": 0.32, "grad_norm": 1.0860213388349775, "learning_rate": 7.893628201886906e-06, "loss": 0.5004, "step": 5069 }, { "epoch": 0.32, "grad_norm": 1.0260804268638335, "learning_rate": 7.89278280551356e-06, "loss": 0.6172, "step": 5070 }, { "epoch": 0.32, "grad_norm": 1.6119592550466464, "learning_rate": 7.891937284814489e-06, "loss": 0.7064, "step": 5071 }, { "epoch": 0.32, "grad_norm": 1.5986922787096136, "learning_rate": 7.891091639826027e-06, "loss": 0.7291, "step": 5072 }, { "epoch": 0.32, "grad_norm": 1.6562289874366294, "learning_rate": 7.890245870584523e-06, "loss": 0.83, "step": 5073 }, { "epoch": 0.32, "grad_norm": 1.7320144304517873, "learning_rate": 7.889399977126327e-06, "loss": 0.7644, "step": 5074 }, { "epoch": 0.32, "grad_norm": 1.1801545237093105, "learning_rate": 7.888553959487788e-06, "loss": 0.6641, "step": 5075 }, { "epoch": 0.32, "grad_norm": 1.9975776883102043, "learning_rate": 7.887707817705272e-06, "loss": 0.7722, "step": 5076 }, { "epoch": 0.32, "grad_norm": 1.6811260668904882, "learning_rate": 7.886861551815139e-06, "loss": 0.7318, "step": 5077 }, { "epoch": 0.33, "grad_norm": 1.1299706318915428, "learning_rate": 7.886015161853766e-06, "loss": 0.6958, "step": 5078 }, { "epoch": 0.33, "grad_norm": 1.8690825993765936, "learning_rate": 7.885168647857523e-06, "loss": 0.7505, "step": 5079 }, { "epoch": 0.33, "grad_norm": 1.1173666645476308, "learning_rate": 7.884322009862796e-06, "loss": 0.6941, "step": 5080 }, { "epoch": 0.33, "grad_norm": 1.6384418706137949, "learning_rate": 7.88347524790597e-06, "loss": 0.8571, "step": 5081 }, { "epoch": 0.33, "grad_norm": 1.5435628557701153, "learning_rate": 7.882628362023435e-06, "loss": 0.7085, "step": 5082 }, { "epoch": 0.33, "grad_norm": 1.0496328469406215, "learning_rate": 7.881781352251591e-06, "loss": 0.727, "step": 5083 }, { "epoch": 0.33, "grad_norm": 1.7771501607646516, "learning_rate": 7.880934218626841e-06, "loss": 0.7826, "step": 5084 }, { "epoch": 0.33, "grad_norm": 2.0911099818592316, "learning_rate": 7.88008696118559e-06, "loss": 0.7591, "step": 5085 }, { "epoch": 0.33, "grad_norm": 2.0408978598288154, "learning_rate": 7.879239579964256e-06, "loss": 0.701, "step": 5086 }, { "epoch": 0.33, "grad_norm": 1.6370104155648753, "learning_rate": 7.878392074999252e-06, "loss": 0.797, "step": 5087 }, { "epoch": 0.33, "grad_norm": 1.5631488390031318, "learning_rate": 7.877544446327006e-06, "loss": 0.6429, "step": 5088 }, { "epoch": 0.33, "grad_norm": 1.455665086529218, "learning_rate": 7.876696693983947e-06, "loss": 0.67, "step": 5089 }, { "epoch": 0.33, "grad_norm": 2.676821315694578, "learning_rate": 7.875848818006508e-06, "loss": 0.863, "step": 5090 }, { "epoch": 0.33, "grad_norm": 1.7907870229634377, "learning_rate": 7.875000818431132e-06, "loss": 0.8886, "step": 5091 }, { "epoch": 0.33, "grad_norm": 1.5628986305313286, "learning_rate": 7.874152695294258e-06, "loss": 0.654, "step": 5092 }, { "epoch": 0.33, "grad_norm": 1.2197860549636468, "learning_rate": 7.873304448632345e-06, "loss": 0.7072, "step": 5093 }, { "epoch": 0.33, "grad_norm": 1.6890984904981616, "learning_rate": 7.87245607848184e-06, "loss": 0.8025, "step": 5094 }, { "epoch": 0.33, "grad_norm": 1.6247974581333637, "learning_rate": 7.871607584879211e-06, "loss": 0.7809, "step": 5095 }, { "epoch": 0.33, "grad_norm": 0.9647995820479437, "learning_rate": 7.870758967860923e-06, "loss": 0.7298, "step": 5096 }, { "epoch": 0.33, "grad_norm": 1.727138607165472, "learning_rate": 7.869910227463447e-06, "loss": 0.8884, "step": 5097 }, { "epoch": 0.33, "grad_norm": 2.065401854771935, "learning_rate": 7.869061363723256e-06, "loss": 0.6195, "step": 5098 }, { "epoch": 0.33, "grad_norm": 1.6707731399991652, "learning_rate": 7.86821237667684e-06, "loss": 0.7779, "step": 5099 }, { "epoch": 0.33, "grad_norm": 1.6325356540612073, "learning_rate": 7.867363266360682e-06, "loss": 0.763, "step": 5100 }, { "epoch": 0.33, "grad_norm": 1.621017475276033, "learning_rate": 7.866514032811276e-06, "loss": 0.8365, "step": 5101 }, { "epoch": 0.33, "grad_norm": 1.626206246611595, "learning_rate": 7.865664676065122e-06, "loss": 0.8211, "step": 5102 }, { "epoch": 0.33, "grad_norm": 1.0295901210839487, "learning_rate": 7.864815196158718e-06, "loss": 0.7884, "step": 5103 }, { "epoch": 0.33, "grad_norm": 1.3136403876496723, "learning_rate": 7.86396559312858e-06, "loss": 0.6288, "step": 5104 }, { "epoch": 0.33, "grad_norm": 1.744633311360438, "learning_rate": 7.863115867011217e-06, "loss": 0.795, "step": 5105 }, { "epoch": 0.33, "grad_norm": 1.4463805462306791, "learning_rate": 7.86226601784315e-06, "loss": 0.7689, "step": 5106 }, { "epoch": 0.33, "grad_norm": 1.7086892680027421, "learning_rate": 7.861416045660906e-06, "loss": 0.8377, "step": 5107 }, { "epoch": 0.33, "grad_norm": 1.5566742490889374, "learning_rate": 7.860565950501012e-06, "loss": 0.7442, "step": 5108 }, { "epoch": 0.33, "grad_norm": 1.5933879830408484, "learning_rate": 7.859715732400004e-06, "loss": 0.765, "step": 5109 }, { "epoch": 0.33, "grad_norm": 1.920681012531842, "learning_rate": 7.858865391394422e-06, "loss": 0.808, "step": 5110 }, { "epoch": 0.33, "grad_norm": 1.7102169004447063, "learning_rate": 7.858014927520815e-06, "loss": 0.7407, "step": 5111 }, { "epoch": 0.33, "grad_norm": 1.8241938948654552, "learning_rate": 7.85716434081573e-06, "loss": 0.7159, "step": 5112 }, { "epoch": 0.33, "grad_norm": 1.6007870000092155, "learning_rate": 7.856313631315726e-06, "loss": 0.7389, "step": 5113 }, { "epoch": 0.33, "grad_norm": 1.7844378661964766, "learning_rate": 7.855462799057364e-06, "loss": 0.761, "step": 5114 }, { "epoch": 0.33, "grad_norm": 1.7349119555455519, "learning_rate": 7.854611844077213e-06, "loss": 0.9289, "step": 5115 }, { "epoch": 0.33, "grad_norm": 1.628029991333196, "learning_rate": 7.853760766411841e-06, "loss": 0.6996, "step": 5116 }, { "epoch": 0.33, "grad_norm": 1.5706765150167625, "learning_rate": 7.852909566097828e-06, "loss": 0.7608, "step": 5117 }, { "epoch": 0.33, "grad_norm": 1.6186114968069372, "learning_rate": 7.852058243171757e-06, "loss": 0.8056, "step": 5118 }, { "epoch": 0.33, "grad_norm": 1.6001665602839343, "learning_rate": 7.851206797670217e-06, "loss": 0.7541, "step": 5119 }, { "epoch": 0.33, "grad_norm": 1.7287223836448073, "learning_rate": 7.850355229629797e-06, "loss": 0.646, "step": 5120 }, { "epoch": 0.33, "grad_norm": 1.9190335089232498, "learning_rate": 7.849503539087102e-06, "loss": 0.8737, "step": 5121 }, { "epoch": 0.33, "grad_norm": 1.5385179177714774, "learning_rate": 7.84865172607873e-06, "loss": 0.7729, "step": 5122 }, { "epoch": 0.33, "grad_norm": 1.774169674907433, "learning_rate": 7.847799790641295e-06, "loss": 0.8779, "step": 5123 }, { "epoch": 0.33, "grad_norm": 1.7620215479672776, "learning_rate": 7.846947732811408e-06, "loss": 0.6851, "step": 5124 }, { "epoch": 0.33, "grad_norm": 1.4567515721820872, "learning_rate": 7.846095552625688e-06, "loss": 0.7622, "step": 5125 }, { "epoch": 0.33, "grad_norm": 1.626839717640311, "learning_rate": 7.845243250120764e-06, "loss": 0.7025, "step": 5126 }, { "epoch": 0.33, "grad_norm": 1.2329462139664542, "learning_rate": 7.844390825333264e-06, "loss": 0.6828, "step": 5127 }, { "epoch": 0.33, "grad_norm": 1.7290266486851782, "learning_rate": 7.84353827829982e-06, "loss": 0.7788, "step": 5128 }, { "epoch": 0.33, "grad_norm": 1.1861891284771946, "learning_rate": 7.842685609057078e-06, "loss": 0.6736, "step": 5129 }, { "epoch": 0.33, "grad_norm": 2.3927087664007374, "learning_rate": 7.841832817641682e-06, "loss": 0.796, "step": 5130 }, { "epoch": 0.33, "grad_norm": 1.694965715520239, "learning_rate": 7.840979904090282e-06, "loss": 0.784, "step": 5131 }, { "epoch": 0.33, "grad_norm": 1.653472671478647, "learning_rate": 7.840126868439537e-06, "loss": 0.8181, "step": 5132 }, { "epoch": 0.33, "grad_norm": 2.952309998423196, "learning_rate": 7.839273710726107e-06, "loss": 0.7074, "step": 5133 }, { "epoch": 0.33, "grad_norm": 1.7275867333725081, "learning_rate": 7.838420430986658e-06, "loss": 0.7016, "step": 5134 }, { "epoch": 0.33, "grad_norm": 1.4803323389445961, "learning_rate": 7.837567029257864e-06, "loss": 0.811, "step": 5135 }, { "epoch": 0.33, "grad_norm": 2.1541083920013886, "learning_rate": 7.836713505576402e-06, "loss": 0.7917, "step": 5136 }, { "epoch": 0.33, "grad_norm": 1.652834322075595, "learning_rate": 7.835859859978952e-06, "loss": 0.7936, "step": 5137 }, { "epoch": 0.33, "grad_norm": 1.598033983980319, "learning_rate": 7.835006092502207e-06, "loss": 0.7184, "step": 5138 }, { "epoch": 0.33, "grad_norm": 1.778799110417206, "learning_rate": 7.834152203182854e-06, "loss": 0.8022, "step": 5139 }, { "epoch": 0.33, "grad_norm": 1.6648184866306424, "learning_rate": 7.833298192057598e-06, "loss": 0.7035, "step": 5140 }, { "epoch": 0.33, "grad_norm": 1.2270328885069013, "learning_rate": 7.832444059163137e-06, "loss": 0.6624, "step": 5141 }, { "epoch": 0.33, "grad_norm": 1.134088588377376, "learning_rate": 7.831589804536184e-06, "loss": 0.7206, "step": 5142 }, { "epoch": 0.33, "grad_norm": 1.1278610196522405, "learning_rate": 7.830735428213451e-06, "loss": 0.664, "step": 5143 }, { "epoch": 0.33, "grad_norm": 1.8216404457016644, "learning_rate": 7.829880930231657e-06, "loss": 0.831, "step": 5144 }, { "epoch": 0.33, "grad_norm": 1.5510680075548244, "learning_rate": 7.829026310627526e-06, "loss": 0.685, "step": 5145 }, { "epoch": 0.33, "grad_norm": 2.009389211430918, "learning_rate": 7.828171569437789e-06, "loss": 0.7745, "step": 5146 }, { "epoch": 0.33, "grad_norm": 3.3488682549515056, "learning_rate": 7.827316706699182e-06, "loss": 0.6874, "step": 5147 }, { "epoch": 0.33, "grad_norm": 1.5411203551290042, "learning_rate": 7.826461722448445e-06, "loss": 0.7848, "step": 5148 }, { "epoch": 0.33, "grad_norm": 1.607727785636411, "learning_rate": 7.825606616722319e-06, "loss": 0.7292, "step": 5149 }, { "epoch": 0.33, "grad_norm": 1.7734855364232476, "learning_rate": 7.82475138955756e-06, "loss": 0.7292, "step": 5150 }, { "epoch": 0.33, "grad_norm": 1.507562903956145, "learning_rate": 7.823896040990922e-06, "loss": 0.7643, "step": 5151 }, { "epoch": 0.33, "grad_norm": 1.4880570680794112, "learning_rate": 7.823040571059165e-06, "loss": 0.838, "step": 5152 }, { "epoch": 0.33, "grad_norm": 1.1546572693498685, "learning_rate": 7.822184979799057e-06, "loss": 0.5509, "step": 5153 }, { "epoch": 0.33, "grad_norm": 1.5865682850505123, "learning_rate": 7.821329267247369e-06, "loss": 0.7288, "step": 5154 }, { "epoch": 0.33, "grad_norm": 1.6960256946401053, "learning_rate": 7.820473433440876e-06, "loss": 0.764, "step": 5155 }, { "epoch": 0.33, "grad_norm": 1.774502760456078, "learning_rate": 7.819617478416364e-06, "loss": 0.8466, "step": 5156 }, { "epoch": 0.33, "grad_norm": 1.4960861353192618, "learning_rate": 7.818761402210616e-06, "loss": 0.7489, "step": 5157 }, { "epoch": 0.33, "grad_norm": 1.268965013714951, "learning_rate": 7.817905204860426e-06, "loss": 0.6014, "step": 5158 }, { "epoch": 0.33, "grad_norm": 1.5760335850504916, "learning_rate": 7.817048886402591e-06, "loss": 0.8145, "step": 5159 }, { "epoch": 0.33, "grad_norm": 1.5305188568109855, "learning_rate": 7.816192446873917e-06, "loss": 0.6722, "step": 5160 }, { "epoch": 0.33, "grad_norm": 1.5639701238554944, "learning_rate": 7.815335886311205e-06, "loss": 0.7943, "step": 5161 }, { "epoch": 0.33, "grad_norm": 1.8482089106351443, "learning_rate": 7.814479204751276e-06, "loss": 0.7463, "step": 5162 }, { "epoch": 0.33, "grad_norm": 1.9040227251889583, "learning_rate": 7.813622402230943e-06, "loss": 0.7628, "step": 5163 }, { "epoch": 0.33, "grad_norm": 1.4912297305966555, "learning_rate": 7.812765478787034e-06, "loss": 0.7601, "step": 5164 }, { "epoch": 0.33, "grad_norm": 1.501858659591542, "learning_rate": 7.811908434456372e-06, "loss": 0.7644, "step": 5165 }, { "epoch": 0.33, "grad_norm": 1.6358006956470843, "learning_rate": 7.811051269275795e-06, "loss": 0.7798, "step": 5166 }, { "epoch": 0.33, "grad_norm": 3.022432694188951, "learning_rate": 7.810193983282142e-06, "loss": 0.7226, "step": 5167 }, { "epoch": 0.33, "grad_norm": 1.112897933321924, "learning_rate": 7.809336576512257e-06, "loss": 0.6343, "step": 5168 }, { "epoch": 0.33, "grad_norm": 2.782004516731596, "learning_rate": 7.808479049002988e-06, "loss": 0.8185, "step": 5169 }, { "epoch": 0.33, "grad_norm": 1.550025630800704, "learning_rate": 7.80762140079119e-06, "loss": 0.7156, "step": 5170 }, { "epoch": 0.33, "grad_norm": 1.733290361672026, "learning_rate": 7.806763631913726e-06, "loss": 0.8222, "step": 5171 }, { "epoch": 0.33, "grad_norm": 1.7132332656519933, "learning_rate": 7.805905742407458e-06, "loss": 0.6939, "step": 5172 }, { "epoch": 0.33, "grad_norm": 1.7752507765595948, "learning_rate": 7.805047732309257e-06, "loss": 0.779, "step": 5173 }, { "epoch": 0.33, "grad_norm": 1.5046823604126238, "learning_rate": 7.804189601655999e-06, "loss": 0.6701, "step": 5174 }, { "epoch": 0.33, "grad_norm": 1.666313427862753, "learning_rate": 7.803331350484563e-06, "loss": 0.8328, "step": 5175 }, { "epoch": 0.33, "grad_norm": 1.0738942322678704, "learning_rate": 7.802472978831838e-06, "loss": 0.6597, "step": 5176 }, { "epoch": 0.33, "grad_norm": 1.603013256781222, "learning_rate": 7.801614486734712e-06, "loss": 0.7162, "step": 5177 }, { "epoch": 0.33, "grad_norm": 1.7052587628432385, "learning_rate": 7.800755874230084e-06, "loss": 0.7927, "step": 5178 }, { "epoch": 0.33, "grad_norm": 1.112860952136572, "learning_rate": 7.799897141354854e-06, "loss": 0.7093, "step": 5179 }, { "epoch": 0.33, "grad_norm": 1.6613898551003867, "learning_rate": 7.799038288145926e-06, "loss": 0.7086, "step": 5180 }, { "epoch": 0.33, "grad_norm": 1.6464002288084933, "learning_rate": 7.798179314640214e-06, "loss": 0.7839, "step": 5181 }, { "epoch": 0.33, "grad_norm": 1.5926547587986906, "learning_rate": 7.797320220874636e-06, "loss": 0.7246, "step": 5182 }, { "epoch": 0.33, "grad_norm": 1.6737291196976831, "learning_rate": 7.79646100688611e-06, "loss": 0.9035, "step": 5183 }, { "epoch": 0.33, "grad_norm": 1.783794358956057, "learning_rate": 7.795601672711571e-06, "loss": 0.8224, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.3516525781729731, "learning_rate": 7.794742218387943e-06, "loss": 0.7253, "step": 5185 }, { "epoch": 0.33, "grad_norm": 1.794200011586643, "learning_rate": 7.793882643952169e-06, "loss": 0.8226, "step": 5186 }, { "epoch": 0.33, "grad_norm": 1.6139946787997892, "learning_rate": 7.793022949441189e-06, "loss": 0.8752, "step": 5187 }, { "epoch": 0.33, "grad_norm": 1.6980356068071427, "learning_rate": 7.792163134891952e-06, "loss": 0.7564, "step": 5188 }, { "epoch": 0.33, "grad_norm": 2.164005995488857, "learning_rate": 7.79130320034141e-06, "loss": 0.7609, "step": 5189 }, { "epoch": 0.33, "grad_norm": 1.5533610392009716, "learning_rate": 7.790443145826522e-06, "loss": 0.7607, "step": 5190 }, { "epoch": 0.33, "grad_norm": 1.6444328997917985, "learning_rate": 7.789582971384252e-06, "loss": 0.7998, "step": 5191 }, { "epoch": 0.33, "grad_norm": 1.643123785430927, "learning_rate": 7.788722677051565e-06, "loss": 0.8536, "step": 5192 }, { "epoch": 0.33, "grad_norm": 1.1775740451588717, "learning_rate": 7.787862262865438e-06, "loss": 0.7087, "step": 5193 }, { "epoch": 0.33, "grad_norm": 1.0971161562510092, "learning_rate": 7.78700172886285e-06, "loss": 0.7289, "step": 5194 }, { "epoch": 0.33, "grad_norm": 1.0852254563282218, "learning_rate": 7.786141075080786e-06, "loss": 0.613, "step": 5195 }, { "epoch": 0.33, "grad_norm": 1.0936879018893255, "learning_rate": 7.78528030155623e-06, "loss": 0.6871, "step": 5196 }, { "epoch": 0.33, "grad_norm": 1.5358447743049086, "learning_rate": 7.784419408326181e-06, "loss": 0.785, "step": 5197 }, { "epoch": 0.33, "grad_norm": 1.487007363251542, "learning_rate": 7.783558395427636e-06, "loss": 0.786, "step": 5198 }, { "epoch": 0.33, "grad_norm": 1.5534343383103402, "learning_rate": 7.782697262897601e-06, "loss": 0.7095, "step": 5199 }, { "epoch": 0.33, "grad_norm": 1.55947963651134, "learning_rate": 7.781836010773082e-06, "loss": 0.7522, "step": 5200 }, { "epoch": 0.33, "grad_norm": 1.8317445677882107, "learning_rate": 7.7809746390911e-06, "loss": 0.7247, "step": 5201 }, { "epoch": 0.33, "grad_norm": 1.5342922603254652, "learning_rate": 7.78011314788867e-06, "loss": 0.7206, "step": 5202 }, { "epoch": 0.33, "grad_norm": 1.6198301547361442, "learning_rate": 7.77925153720282e-06, "loss": 0.8193, "step": 5203 }, { "epoch": 0.33, "grad_norm": 1.79873210019172, "learning_rate": 7.778389807070578e-06, "loss": 0.8231, "step": 5204 }, { "epoch": 0.33, "grad_norm": 1.6946946900475304, "learning_rate": 7.777527957528982e-06, "loss": 0.8556, "step": 5205 }, { "epoch": 0.33, "grad_norm": 1.0790176548734978, "learning_rate": 7.776665988615066e-06, "loss": 0.5321, "step": 5206 }, { "epoch": 0.33, "grad_norm": 1.3669952435595445, "learning_rate": 7.775803900365885e-06, "loss": 0.7133, "step": 5207 }, { "epoch": 0.33, "grad_norm": 2.826809711214662, "learning_rate": 7.774941692818484e-06, "loss": 0.7637, "step": 5208 }, { "epoch": 0.33, "grad_norm": 1.1051513134723863, "learning_rate": 7.77407936600992e-06, "loss": 0.6799, "step": 5209 }, { "epoch": 0.33, "grad_norm": 1.776967625950418, "learning_rate": 7.773216919977254e-06, "loss": 0.7406, "step": 5210 }, { "epoch": 0.33, "grad_norm": 1.7220341413497158, "learning_rate": 7.77235435475755e-06, "loss": 0.7392, "step": 5211 }, { "epoch": 0.33, "grad_norm": 1.6391545533744902, "learning_rate": 7.771491670387884e-06, "loss": 0.7265, "step": 5212 }, { "epoch": 0.33, "grad_norm": 1.4486369032037507, "learning_rate": 7.77062886690533e-06, "loss": 0.8602, "step": 5213 }, { "epoch": 0.33, "grad_norm": 1.6259673032502506, "learning_rate": 7.769765944346967e-06, "loss": 0.7742, "step": 5214 }, { "epoch": 0.33, "grad_norm": 1.5133929936258226, "learning_rate": 7.768902902749885e-06, "loss": 0.8018, "step": 5215 }, { "epoch": 0.33, "grad_norm": 1.6439064767438256, "learning_rate": 7.768039742151174e-06, "loss": 0.782, "step": 5216 }, { "epoch": 0.33, "grad_norm": 1.955778852098654, "learning_rate": 7.767176462587932e-06, "loss": 0.817, "step": 5217 }, { "epoch": 0.33, "grad_norm": 1.8522767414806378, "learning_rate": 7.766313064097261e-06, "loss": 0.8358, "step": 5218 }, { "epoch": 0.33, "grad_norm": 3.052985190119143, "learning_rate": 7.765449546716266e-06, "loss": 0.8576, "step": 5219 }, { "epoch": 0.33, "grad_norm": 1.851405872526366, "learning_rate": 7.76458591048206e-06, "loss": 0.778, "step": 5220 }, { "epoch": 0.33, "grad_norm": 1.6010057740952859, "learning_rate": 7.76372215543176e-06, "loss": 0.7149, "step": 5221 }, { "epoch": 0.33, "grad_norm": 2.1789544811208432, "learning_rate": 7.762858281602492e-06, "loss": 0.8019, "step": 5222 }, { "epoch": 0.33, "grad_norm": 1.561150773240336, "learning_rate": 7.76199428903138e-06, "loss": 0.815, "step": 5223 }, { "epoch": 0.33, "grad_norm": 1.7339237673167636, "learning_rate": 7.761130177755556e-06, "loss": 0.7501, "step": 5224 }, { "epoch": 0.33, "grad_norm": 1.58790326264441, "learning_rate": 7.76026594781216e-06, "loss": 0.9281, "step": 5225 }, { "epoch": 0.33, "grad_norm": 1.552540712088056, "learning_rate": 7.759401599238334e-06, "loss": 0.7181, "step": 5226 }, { "epoch": 0.33, "grad_norm": 1.7208878878730611, "learning_rate": 7.758537132071224e-06, "loss": 0.7466, "step": 5227 }, { "epoch": 0.33, "grad_norm": 1.6833209244017364, "learning_rate": 7.757672546347984e-06, "loss": 0.7125, "step": 5228 }, { "epoch": 0.33, "grad_norm": 0.9784062332215656, "learning_rate": 7.756807842105774e-06, "loss": 0.6505, "step": 5229 }, { "epoch": 0.33, "grad_norm": 10.135003557157228, "learning_rate": 7.755943019381756e-06, "loss": 0.9057, "step": 5230 }, { "epoch": 0.33, "grad_norm": 1.2171107143858046, "learning_rate": 7.755078078213099e-06, "loss": 0.7084, "step": 5231 }, { "epoch": 0.33, "grad_norm": 1.4618265688362073, "learning_rate": 7.754213018636973e-06, "loss": 0.6776, "step": 5232 }, { "epoch": 0.33, "grad_norm": 1.2927624432971896, "learning_rate": 7.75334784069056e-06, "loss": 0.725, "step": 5233 }, { "epoch": 0.34, "grad_norm": 1.7108385447958097, "learning_rate": 7.752482544411045e-06, "loss": 0.7304, "step": 5234 }, { "epoch": 0.34, "grad_norm": 1.5544016818103166, "learning_rate": 7.75161712983561e-06, "loss": 0.7816, "step": 5235 }, { "epoch": 0.34, "grad_norm": 2.0967873532292067, "learning_rate": 7.750751597001454e-06, "loss": 0.7058, "step": 5236 }, { "epoch": 0.34, "grad_norm": 1.3529044655452036, "learning_rate": 7.749885945945774e-06, "loss": 0.7505, "step": 5237 }, { "epoch": 0.34, "grad_norm": 1.7195483913972334, "learning_rate": 7.749020176705775e-06, "loss": 0.8128, "step": 5238 }, { "epoch": 0.34, "grad_norm": 1.4045148185622989, "learning_rate": 7.748154289318666e-06, "loss": 0.7862, "step": 5239 }, { "epoch": 0.34, "grad_norm": 1.6486194135416672, "learning_rate": 7.747288283821659e-06, "loss": 0.7375, "step": 5240 }, { "epoch": 0.34, "grad_norm": 1.5656564133266635, "learning_rate": 7.746422160251975e-06, "loss": 0.7713, "step": 5241 }, { "epoch": 0.34, "grad_norm": 1.137040605445626, "learning_rate": 7.745555918646836e-06, "loss": 0.587, "step": 5242 }, { "epoch": 0.34, "grad_norm": 1.7504791243165807, "learning_rate": 7.744689559043475e-06, "loss": 0.8014, "step": 5243 }, { "epoch": 0.34, "grad_norm": 1.0479817381727359, "learning_rate": 7.743823081479125e-06, "loss": 0.6103, "step": 5244 }, { "epoch": 0.34, "grad_norm": 1.8182195868374809, "learning_rate": 7.742956485991022e-06, "loss": 0.7475, "step": 5245 }, { "epoch": 0.34, "grad_norm": 1.1760711093216978, "learning_rate": 7.742089772616415e-06, "loss": 0.658, "step": 5246 }, { "epoch": 0.34, "grad_norm": 2.0046575477607287, "learning_rate": 7.74122294139255e-06, "loss": 0.7041, "step": 5247 }, { "epoch": 0.34, "grad_norm": 1.5278885535795155, "learning_rate": 7.740355992356682e-06, "loss": 0.6766, "step": 5248 }, { "epoch": 0.34, "grad_norm": 4.42356614423994, "learning_rate": 7.739488925546074e-06, "loss": 0.8087, "step": 5249 }, { "epoch": 0.34, "grad_norm": 1.0848657914840651, "learning_rate": 7.73862174099799e-06, "loss": 0.6435, "step": 5250 }, { "epoch": 0.34, "grad_norm": 1.5093629365681893, "learning_rate": 7.737754438749694e-06, "loss": 0.8384, "step": 5251 }, { "epoch": 0.34, "grad_norm": 1.202333615511314, "learning_rate": 7.736887018838467e-06, "loss": 0.705, "step": 5252 }, { "epoch": 0.34, "grad_norm": 1.6543806896967659, "learning_rate": 7.736019481301587e-06, "loss": 0.6906, "step": 5253 }, { "epoch": 0.34, "grad_norm": 1.1228362563469496, "learning_rate": 7.735151826176339e-06, "loss": 0.5898, "step": 5254 }, { "epoch": 0.34, "grad_norm": 1.626210459381413, "learning_rate": 7.734284053500015e-06, "loss": 0.8143, "step": 5255 }, { "epoch": 0.34, "grad_norm": 1.643978973636602, "learning_rate": 7.733416163309904e-06, "loss": 0.8232, "step": 5256 }, { "epoch": 0.34, "grad_norm": 1.7372479484221037, "learning_rate": 7.732548155643311e-06, "loss": 0.7006, "step": 5257 }, { "epoch": 0.34, "grad_norm": 1.5793751357968753, "learning_rate": 7.731680030537541e-06, "loss": 0.7353, "step": 5258 }, { "epoch": 0.34, "grad_norm": 1.7291305561966976, "learning_rate": 7.730811788029903e-06, "loss": 0.8885, "step": 5259 }, { "epoch": 0.34, "grad_norm": 1.5279301400591134, "learning_rate": 7.72994342815771e-06, "loss": 0.6151, "step": 5260 }, { "epoch": 0.34, "grad_norm": 1.1967533780846102, "learning_rate": 7.72907495095829e-06, "loss": 0.7008, "step": 5261 }, { "epoch": 0.34, "grad_norm": 1.5012919350829448, "learning_rate": 7.728206356468962e-06, "loss": 0.7641, "step": 5262 }, { "epoch": 0.34, "grad_norm": 1.6167319338027835, "learning_rate": 7.727337644727055e-06, "loss": 0.7682, "step": 5263 }, { "epoch": 0.34, "grad_norm": 1.2786529911551965, "learning_rate": 7.726468815769907e-06, "loss": 0.6784, "step": 5264 }, { "epoch": 0.34, "grad_norm": 1.6028560779798273, "learning_rate": 7.725599869634861e-06, "loss": 0.7887, "step": 5265 }, { "epoch": 0.34, "grad_norm": 1.6844886461457325, "learning_rate": 7.724730806359258e-06, "loss": 0.8343, "step": 5266 }, { "epoch": 0.34, "grad_norm": 1.7776056952229504, "learning_rate": 7.72386162598045e-06, "loss": 0.7212, "step": 5267 }, { "epoch": 0.34, "grad_norm": 1.6732465017952247, "learning_rate": 7.722992328535795e-06, "loss": 0.812, "step": 5268 }, { "epoch": 0.34, "grad_norm": 1.7631139751165101, "learning_rate": 7.722122914062649e-06, "loss": 0.8213, "step": 5269 }, { "epoch": 0.34, "grad_norm": 1.6078625508773603, "learning_rate": 7.721253382598382e-06, "loss": 0.751, "step": 5270 }, { "epoch": 0.34, "grad_norm": 2.3330510626366867, "learning_rate": 7.720383734180362e-06, "loss": 0.8664, "step": 5271 }, { "epoch": 0.34, "grad_norm": 1.5660788530168177, "learning_rate": 7.719513968845967e-06, "loss": 0.7036, "step": 5272 }, { "epoch": 0.34, "grad_norm": 1.6182097678803855, "learning_rate": 7.718644086632575e-06, "loss": 0.8246, "step": 5273 }, { "epoch": 0.34, "grad_norm": 1.4529698630800911, "learning_rate": 7.717774087577572e-06, "loss": 0.831, "step": 5274 }, { "epoch": 0.34, "grad_norm": 1.6565023461481883, "learning_rate": 7.71690397171835e-06, "loss": 0.7251, "step": 5275 }, { "epoch": 0.34, "grad_norm": 1.6896744246616036, "learning_rate": 7.716033739092304e-06, "loss": 0.9378, "step": 5276 }, { "epoch": 0.34, "grad_norm": 1.7437130200994342, "learning_rate": 7.715163389736834e-06, "loss": 0.8276, "step": 5277 }, { "epoch": 0.34, "grad_norm": 1.5600105409962859, "learning_rate": 7.714292923689347e-06, "loss": 0.7458, "step": 5278 }, { "epoch": 0.34, "grad_norm": 1.0656615483349605, "learning_rate": 7.713422340987258e-06, "loss": 0.6543, "step": 5279 }, { "epoch": 0.34, "grad_norm": 3.0123090276744233, "learning_rate": 7.712551641667975e-06, "loss": 0.7526, "step": 5280 }, { "epoch": 0.34, "grad_norm": 1.6686359908404784, "learning_rate": 7.711680825768922e-06, "loss": 0.6633, "step": 5281 }, { "epoch": 0.34, "grad_norm": 1.528239605308788, "learning_rate": 7.710809893327527e-06, "loss": 0.7436, "step": 5282 }, { "epoch": 0.34, "grad_norm": 1.06847502906923, "learning_rate": 7.709938844381217e-06, "loss": 0.6819, "step": 5283 }, { "epoch": 0.34, "grad_norm": 1.8473972334781747, "learning_rate": 7.70906767896743e-06, "loss": 0.9025, "step": 5284 }, { "epoch": 0.34, "grad_norm": 1.5472993906190429, "learning_rate": 7.708196397123607e-06, "loss": 0.7368, "step": 5285 }, { "epoch": 0.34, "grad_norm": 1.7734523521099952, "learning_rate": 7.707324998887193e-06, "loss": 1.0468, "step": 5286 }, { "epoch": 0.34, "grad_norm": 1.6138419695955883, "learning_rate": 7.706453484295643e-06, "loss": 0.7331, "step": 5287 }, { "epoch": 0.34, "grad_norm": 1.4686216703530857, "learning_rate": 7.705581853386405e-06, "loss": 0.8467, "step": 5288 }, { "epoch": 0.34, "grad_norm": 1.1199625942908964, "learning_rate": 7.704710106196947e-06, "loss": 0.6588, "step": 5289 }, { "epoch": 0.34, "grad_norm": 1.697271209891627, "learning_rate": 7.70383824276473e-06, "loss": 0.8499, "step": 5290 }, { "epoch": 0.34, "grad_norm": 1.554494965424848, "learning_rate": 7.702966263127227e-06, "loss": 0.8208, "step": 5291 }, { "epoch": 0.34, "grad_norm": 1.2542718075327317, "learning_rate": 7.702094167321915e-06, "loss": 0.7404, "step": 5292 }, { "epoch": 0.34, "grad_norm": 1.7490084147973033, "learning_rate": 7.701221955386273e-06, "loss": 0.7639, "step": 5293 }, { "epoch": 0.34, "grad_norm": 1.586559445103627, "learning_rate": 7.700349627357785e-06, "loss": 0.7423, "step": 5294 }, { "epoch": 0.34, "grad_norm": 1.9632007925737136, "learning_rate": 7.699477183273949e-06, "loss": 0.7786, "step": 5295 }, { "epoch": 0.34, "grad_norm": 2.0957153655607264, "learning_rate": 7.698604623172253e-06, "loss": 0.6643, "step": 5296 }, { "epoch": 0.34, "grad_norm": 1.7320570492348086, "learning_rate": 7.6977319470902e-06, "loss": 0.7878, "step": 5297 }, { "epoch": 0.34, "grad_norm": 0.9686256504145452, "learning_rate": 7.696859155065299e-06, "loss": 0.6891, "step": 5298 }, { "epoch": 0.34, "grad_norm": 1.7885662869604433, "learning_rate": 7.695986247135058e-06, "loss": 0.6916, "step": 5299 }, { "epoch": 0.34, "grad_norm": 2.0721303497853887, "learning_rate": 7.695113223336993e-06, "loss": 0.7006, "step": 5300 }, { "epoch": 0.34, "grad_norm": 1.7781679205150263, "learning_rate": 7.694240083708623e-06, "loss": 0.7837, "step": 5301 }, { "epoch": 0.34, "grad_norm": 3.713517361316411, "learning_rate": 7.693366828287478e-06, "loss": 0.8052, "step": 5302 }, { "epoch": 0.34, "grad_norm": 1.7293729854233548, "learning_rate": 7.692493457111085e-06, "loss": 0.7558, "step": 5303 }, { "epoch": 0.34, "grad_norm": 1.7692133554220058, "learning_rate": 7.69161997021698e-06, "loss": 0.7793, "step": 5304 }, { "epoch": 0.34, "grad_norm": 1.6732943542647913, "learning_rate": 7.690746367642707e-06, "loss": 0.7419, "step": 5305 }, { "epoch": 0.34, "grad_norm": 1.8591198975320318, "learning_rate": 7.68987264942581e-06, "loss": 0.7152, "step": 5306 }, { "epoch": 0.34, "grad_norm": 1.5839576849187829, "learning_rate": 7.688998815603837e-06, "loss": 0.8029, "step": 5307 }, { "epoch": 0.34, "grad_norm": 1.1963837732869358, "learning_rate": 7.688124866214345e-06, "loss": 0.7122, "step": 5308 }, { "epoch": 0.34, "grad_norm": 1.6721615035911253, "learning_rate": 7.687250801294893e-06, "loss": 0.804, "step": 5309 }, { "epoch": 0.34, "grad_norm": 1.4358379969645267, "learning_rate": 7.686376620883052e-06, "loss": 0.7439, "step": 5310 }, { "epoch": 0.34, "grad_norm": 1.193458254938523, "learning_rate": 7.685502325016384e-06, "loss": 0.7152, "step": 5311 }, { "epoch": 0.34, "grad_norm": 1.532685128466169, "learning_rate": 7.684627913732473e-06, "loss": 0.6697, "step": 5312 }, { "epoch": 0.34, "grad_norm": 1.6057279315151214, "learning_rate": 7.683753387068894e-06, "loss": 0.7266, "step": 5313 }, { "epoch": 0.34, "grad_norm": 1.3973613734237813, "learning_rate": 7.682878745063235e-06, "loss": 0.5685, "step": 5314 }, { "epoch": 0.34, "grad_norm": 1.7268803154943972, "learning_rate": 7.682003987753082e-06, "loss": 0.8753, "step": 5315 }, { "epoch": 0.34, "grad_norm": 1.755392267374136, "learning_rate": 7.681129115176036e-06, "loss": 0.7658, "step": 5316 }, { "epoch": 0.34, "grad_norm": 1.712141316012681, "learning_rate": 7.680254127369695e-06, "loss": 0.7627, "step": 5317 }, { "epoch": 0.34, "grad_norm": 1.734360132373459, "learning_rate": 7.679379024371663e-06, "loss": 0.7856, "step": 5318 }, { "epoch": 0.34, "grad_norm": 2.4732939555198206, "learning_rate": 7.67850380621955e-06, "loss": 0.7176, "step": 5319 }, { "epoch": 0.34, "grad_norm": 1.9728437670580359, "learning_rate": 7.67762847295097e-06, "loss": 0.7759, "step": 5320 }, { "epoch": 0.34, "grad_norm": 1.794913995145031, "learning_rate": 7.676753024603546e-06, "loss": 0.7461, "step": 5321 }, { "epoch": 0.34, "grad_norm": 1.6155418310393637, "learning_rate": 7.675877461214904e-06, "loss": 0.6626, "step": 5322 }, { "epoch": 0.34, "grad_norm": 2.179885118276009, "learning_rate": 7.675001782822669e-06, "loss": 0.7198, "step": 5323 }, { "epoch": 0.34, "grad_norm": 4.785711963614956, "learning_rate": 7.674125989464478e-06, "loss": 0.7631, "step": 5324 }, { "epoch": 0.34, "grad_norm": 1.666358064228459, "learning_rate": 7.673250081177974e-06, "loss": 0.7669, "step": 5325 }, { "epoch": 0.34, "grad_norm": 1.6217859595824997, "learning_rate": 7.672374058000795e-06, "loss": 0.7214, "step": 5326 }, { "epoch": 0.34, "grad_norm": 2.0207155011333624, "learning_rate": 7.671497919970598e-06, "loss": 0.6974, "step": 5327 }, { "epoch": 0.34, "grad_norm": 1.628253185730718, "learning_rate": 7.670621667125031e-06, "loss": 0.6989, "step": 5328 }, { "epoch": 0.34, "grad_norm": 1.3078156062507877, "learning_rate": 7.669745299501758e-06, "loss": 0.6527, "step": 5329 }, { "epoch": 0.34, "grad_norm": 1.678830658449658, "learning_rate": 7.668868817138442e-06, "loss": 0.6622, "step": 5330 }, { "epoch": 0.34, "grad_norm": 2.9036908502883034, "learning_rate": 7.667992220072751e-06, "loss": 0.7542, "step": 5331 }, { "epoch": 0.34, "grad_norm": 1.0991912014460827, "learning_rate": 7.667115508342362e-06, "loss": 0.6823, "step": 5332 }, { "epoch": 0.34, "grad_norm": 1.3476642912454975, "learning_rate": 7.666238681984952e-06, "loss": 0.706, "step": 5333 }, { "epoch": 0.34, "grad_norm": 1.7567197481512016, "learning_rate": 7.665361741038207e-06, "loss": 0.8287, "step": 5334 }, { "epoch": 0.34, "grad_norm": 1.583332028870951, "learning_rate": 7.664484685539814e-06, "loss": 0.7993, "step": 5335 }, { "epoch": 0.34, "grad_norm": 1.7093910498563025, "learning_rate": 7.663607515527469e-06, "loss": 0.7889, "step": 5336 }, { "epoch": 0.34, "grad_norm": 1.1487118180821978, "learning_rate": 7.662730231038869e-06, "loss": 0.6696, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.7448389090375163, "learning_rate": 7.66185283211172e-06, "loss": 0.7261, "step": 5338 }, { "epoch": 0.34, "grad_norm": 1.5065293860557072, "learning_rate": 7.66097531878373e-06, "loss": 0.8075, "step": 5339 }, { "epoch": 0.34, "grad_norm": 1.2707375013144757, "learning_rate": 7.66009769109261e-06, "loss": 0.6404, "step": 5340 }, { "epoch": 0.34, "grad_norm": 1.5015866784604426, "learning_rate": 7.659219949076082e-06, "loss": 0.8384, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.5838603926707115, "learning_rate": 7.658342092771869e-06, "loss": 0.8543, "step": 5342 }, { "epoch": 0.34, "grad_norm": 1.696159090859117, "learning_rate": 7.657464122217699e-06, "loss": 0.8681, "step": 5343 }, { "epoch": 0.34, "grad_norm": 1.8278112784144387, "learning_rate": 7.656586037451303e-06, "loss": 0.6723, "step": 5344 }, { "epoch": 0.34, "grad_norm": 1.193352700127546, "learning_rate": 7.655707838510423e-06, "loss": 0.8237, "step": 5345 }, { "epoch": 0.34, "grad_norm": 1.9211353748448505, "learning_rate": 7.6548295254328e-06, "loss": 0.8653, "step": 5346 }, { "epoch": 0.34, "grad_norm": 1.805612751516022, "learning_rate": 7.653951098256184e-06, "loss": 1.0001, "step": 5347 }, { "epoch": 0.34, "grad_norm": 1.5930894351542504, "learning_rate": 7.653072557018325e-06, "loss": 0.6166, "step": 5348 }, { "epoch": 0.34, "grad_norm": 1.8902034776573642, "learning_rate": 7.652193901756983e-06, "loss": 0.9354, "step": 5349 }, { "epoch": 0.34, "grad_norm": 1.8082154990216834, "learning_rate": 7.65131513250992e-06, "loss": 0.6756, "step": 5350 }, { "epoch": 0.34, "grad_norm": 1.5644613362648527, "learning_rate": 7.650436249314902e-06, "loss": 0.8664, "step": 5351 }, { "epoch": 0.34, "grad_norm": 1.6732780111001049, "learning_rate": 7.649557252209706e-06, "loss": 0.7926, "step": 5352 }, { "epoch": 0.34, "grad_norm": 1.5835236830480899, "learning_rate": 7.648678141232107e-06, "loss": 0.6725, "step": 5353 }, { "epoch": 0.34, "grad_norm": 1.8232740464857937, "learning_rate": 7.647798916419885e-06, "loss": 0.8628, "step": 5354 }, { "epoch": 0.34, "grad_norm": 1.5632812230020872, "learning_rate": 7.646919577810831e-06, "loss": 0.817, "step": 5355 }, { "epoch": 0.34, "grad_norm": 1.5126026506427048, "learning_rate": 7.646040125442736e-06, "loss": 0.7257, "step": 5356 }, { "epoch": 0.34, "grad_norm": 1.0132232198055673, "learning_rate": 7.645160559353392e-06, "loss": 0.6159, "step": 5357 }, { "epoch": 0.34, "grad_norm": 1.72212152716323, "learning_rate": 7.64428087958061e-06, "loss": 0.7668, "step": 5358 }, { "epoch": 0.34, "grad_norm": 1.4981609080361262, "learning_rate": 7.64340108616219e-06, "loss": 0.7417, "step": 5359 }, { "epoch": 0.34, "grad_norm": 3.4852150632208634, "learning_rate": 7.642521179135946e-06, "loss": 0.7416, "step": 5360 }, { "epoch": 0.34, "grad_norm": 1.4587715786564979, "learning_rate": 7.641641158539696e-06, "loss": 0.712, "step": 5361 }, { "epoch": 0.34, "grad_norm": 1.5458977480122724, "learning_rate": 7.64076102441126e-06, "loss": 0.7932, "step": 5362 }, { "epoch": 0.34, "grad_norm": 1.5897643297529696, "learning_rate": 7.639880776788464e-06, "loss": 0.7268, "step": 5363 }, { "epoch": 0.34, "grad_norm": 1.1795853509588028, "learning_rate": 7.639000415709138e-06, "loss": 0.6025, "step": 5364 }, { "epoch": 0.34, "grad_norm": 1.9094032678945054, "learning_rate": 7.63811994121112e-06, "loss": 0.6733, "step": 5365 }, { "epoch": 0.34, "grad_norm": 1.5976435215517812, "learning_rate": 7.63723935333225e-06, "loss": 0.821, "step": 5366 }, { "epoch": 0.34, "grad_norm": 1.7484073678426009, "learning_rate": 7.636358652110375e-06, "loss": 0.6777, "step": 5367 }, { "epoch": 0.34, "grad_norm": 1.7287077001826832, "learning_rate": 7.635477837583343e-06, "loss": 0.7245, "step": 5368 }, { "epoch": 0.34, "grad_norm": 1.8280783278004586, "learning_rate": 7.634596909789014e-06, "loss": 0.7766, "step": 5369 }, { "epoch": 0.34, "grad_norm": 2.0162413067673244, "learning_rate": 7.633715868765245e-06, "loss": 0.8341, "step": 5370 }, { "epoch": 0.34, "grad_norm": 1.5399455485350224, "learning_rate": 7.632834714549902e-06, "loss": 0.7936, "step": 5371 }, { "epoch": 0.34, "grad_norm": 1.5411674546349194, "learning_rate": 7.631953447180855e-06, "loss": 0.7173, "step": 5372 }, { "epoch": 0.34, "grad_norm": 1.546790681212396, "learning_rate": 7.63107206669598e-06, "loss": 0.8533, "step": 5373 }, { "epoch": 0.34, "grad_norm": 1.7469821931312313, "learning_rate": 7.630190573133156e-06, "loss": 0.6686, "step": 5374 }, { "epoch": 0.34, "grad_norm": 1.8526714364272048, "learning_rate": 7.629308966530268e-06, "loss": 0.8583, "step": 5375 }, { "epoch": 0.34, "grad_norm": 1.6583097056426352, "learning_rate": 7.628427246925205e-06, "loss": 0.73, "step": 5376 }, { "epoch": 0.34, "grad_norm": 1.565912433620099, "learning_rate": 7.6275454143558614e-06, "loss": 0.713, "step": 5377 }, { "epoch": 0.34, "grad_norm": 1.7948870831497636, "learning_rate": 7.6266634688601385e-06, "loss": 0.8128, "step": 5378 }, { "epoch": 0.34, "grad_norm": 1.537080591772565, "learning_rate": 7.6257814104759385e-06, "loss": 0.7951, "step": 5379 }, { "epoch": 0.34, "grad_norm": 1.8894126347027604, "learning_rate": 7.6248992392411705e-06, "loss": 0.8617, "step": 5380 }, { "epoch": 0.34, "grad_norm": 1.5363156422799307, "learning_rate": 7.62401695519375e-06, "loss": 0.8012, "step": 5381 }, { "epoch": 0.34, "grad_norm": 1.6723839896399781, "learning_rate": 7.623134558371594e-06, "loss": 0.7889, "step": 5382 }, { "epoch": 0.34, "grad_norm": 1.5846295506581525, "learning_rate": 7.6222520488126286e-06, "loss": 0.8195, "step": 5383 }, { "epoch": 0.34, "grad_norm": 1.6561958830122387, "learning_rate": 7.621369426554778e-06, "loss": 0.7874, "step": 5384 }, { "epoch": 0.34, "grad_norm": 1.5269389181498951, "learning_rate": 7.620486691635978e-06, "loss": 0.7706, "step": 5385 }, { "epoch": 0.34, "grad_norm": 1.6269943532745774, "learning_rate": 7.619603844094165e-06, "loss": 0.8046, "step": 5386 }, { "epoch": 0.34, "grad_norm": 1.5431628084105293, "learning_rate": 7.618720883967285e-06, "loss": 0.7388, "step": 5387 }, { "epoch": 0.34, "grad_norm": 1.463400575228471, "learning_rate": 7.6178378112932845e-06, "loss": 0.7924, "step": 5388 }, { "epoch": 0.34, "grad_norm": 1.5880175174256814, "learning_rate": 7.616954626110116e-06, "loss": 0.8237, "step": 5389 }, { "epoch": 0.34, "grad_norm": 1.833756737492772, "learning_rate": 7.616071328455735e-06, "loss": 0.8042, "step": 5390 }, { "epoch": 0.35, "grad_norm": 1.4941655905475317, "learning_rate": 7.615187918368107e-06, "loss": 0.7159, "step": 5391 }, { "epoch": 0.35, "grad_norm": 1.9376111598375412, "learning_rate": 7.614304395885197e-06, "loss": 0.8042, "step": 5392 }, { "epoch": 0.35, "grad_norm": 1.6520298129391475, "learning_rate": 7.613420761044976e-06, "loss": 0.6921, "step": 5393 }, { "epoch": 0.35, "grad_norm": 1.7966176142057073, "learning_rate": 7.612537013885424e-06, "loss": 0.7944, "step": 5394 }, { "epoch": 0.35, "grad_norm": 1.1154431829441258, "learning_rate": 7.611653154444521e-06, "loss": 0.6467, "step": 5395 }, { "epoch": 0.35, "grad_norm": 1.568551882544611, "learning_rate": 7.610769182760251e-06, "loss": 0.7645, "step": 5396 }, { "epoch": 0.35, "grad_norm": 1.6107538490250732, "learning_rate": 7.60988509887061e-06, "loss": 0.6262, "step": 5397 }, { "epoch": 0.35, "grad_norm": 1.5494350163612474, "learning_rate": 7.60900090281359e-06, "loss": 0.8177, "step": 5398 }, { "epoch": 0.35, "grad_norm": 1.4521558301184514, "learning_rate": 7.608116594627195e-06, "loss": 0.6408, "step": 5399 }, { "epoch": 0.35, "grad_norm": 2.8718883836979683, "learning_rate": 7.607232174349428e-06, "loss": 0.7418, "step": 5400 }, { "epoch": 0.35, "grad_norm": 1.0105082850103457, "learning_rate": 7.606347642018301e-06, "loss": 0.6331, "step": 5401 }, { "epoch": 0.35, "grad_norm": 1.6635111558201316, "learning_rate": 7.605462997671828e-06, "loss": 0.7927, "step": 5402 }, { "epoch": 0.35, "grad_norm": 1.6666583688054146, "learning_rate": 7.604578241348033e-06, "loss": 0.7885, "step": 5403 }, { "epoch": 0.35, "grad_norm": 1.3386609114737573, "learning_rate": 7.603693373084936e-06, "loss": 0.7148, "step": 5404 }, { "epoch": 0.35, "grad_norm": 1.669016219523583, "learning_rate": 7.602808392920568e-06, "loss": 0.7485, "step": 5405 }, { "epoch": 0.35, "grad_norm": 1.2245824671393983, "learning_rate": 7.601923300892966e-06, "loss": 0.6688, "step": 5406 }, { "epoch": 0.35, "grad_norm": 1.5185518590936928, "learning_rate": 7.601038097040169e-06, "loss": 0.8412, "step": 5407 }, { "epoch": 0.35, "grad_norm": 1.8690262033268301, "learning_rate": 7.600152781400218e-06, "loss": 0.7577, "step": 5408 }, { "epoch": 0.35, "grad_norm": 1.9940546476724592, "learning_rate": 7.599267354011166e-06, "loss": 0.8495, "step": 5409 }, { "epoch": 0.35, "grad_norm": 2.1838764960393284, "learning_rate": 7.598381814911063e-06, "loss": 0.8205, "step": 5410 }, { "epoch": 0.35, "grad_norm": 1.5887502970708156, "learning_rate": 7.597496164137972e-06, "loss": 0.8135, "step": 5411 }, { "epoch": 0.35, "grad_norm": 1.187547865116055, "learning_rate": 7.596610401729951e-06, "loss": 0.6528, "step": 5412 }, { "epoch": 0.35, "grad_norm": 2.2805312637825823, "learning_rate": 7.595724527725074e-06, "loss": 0.7818, "step": 5413 }, { "epoch": 0.35, "grad_norm": 1.725655314935426, "learning_rate": 7.594838542161409e-06, "loss": 0.7435, "step": 5414 }, { "epoch": 0.35, "grad_norm": 1.197120185931372, "learning_rate": 7.593952445077035e-06, "loss": 0.6983, "step": 5415 }, { "epoch": 0.35, "grad_norm": 1.5853878688509568, "learning_rate": 7.5930662365100385e-06, "loss": 0.6836, "step": 5416 }, { "epoch": 0.35, "grad_norm": 1.6299750410169278, "learning_rate": 7.592179916498503e-06, "loss": 0.7012, "step": 5417 }, { "epoch": 0.35, "grad_norm": 1.7572638308022748, "learning_rate": 7.591293485080522e-06, "loss": 0.874, "step": 5418 }, { "epoch": 0.35, "grad_norm": 1.1298233445005168, "learning_rate": 7.590406942294191e-06, "loss": 0.6742, "step": 5419 }, { "epoch": 0.35, "grad_norm": 1.487713250022731, "learning_rate": 7.589520288177614e-06, "loss": 0.8847, "step": 5420 }, { "epoch": 0.35, "grad_norm": 1.5538747725466129, "learning_rate": 7.588633522768896e-06, "loss": 0.6016, "step": 5421 }, { "epoch": 0.35, "grad_norm": 1.5847621273376993, "learning_rate": 7.587746646106147e-06, "loss": 0.798, "step": 5422 }, { "epoch": 0.35, "grad_norm": 1.0593157121062267, "learning_rate": 7.586859658227487e-06, "loss": 0.544, "step": 5423 }, { "epoch": 0.35, "grad_norm": 1.670869880789685, "learning_rate": 7.585972559171033e-06, "loss": 0.7809, "step": 5424 }, { "epoch": 0.35, "grad_norm": 1.674409993056436, "learning_rate": 7.585085348974912e-06, "loss": 0.6827, "step": 5425 }, { "epoch": 0.35, "grad_norm": 0.9996546887338782, "learning_rate": 7.584198027677256e-06, "loss": 0.7227, "step": 5426 }, { "epoch": 0.35, "grad_norm": 1.6444339804770698, "learning_rate": 7.5833105953161986e-06, "loss": 0.8074, "step": 5427 }, { "epoch": 0.35, "grad_norm": 1.6573191051337905, "learning_rate": 7.5824230519298816e-06, "loss": 0.7261, "step": 5428 }, { "epoch": 0.35, "grad_norm": 2.0678273208842954, "learning_rate": 7.581535397556445e-06, "loss": 0.7845, "step": 5429 }, { "epoch": 0.35, "grad_norm": 1.8461080699397792, "learning_rate": 7.5806476322340436e-06, "loss": 0.7397, "step": 5430 }, { "epoch": 0.35, "grad_norm": 1.585099793070582, "learning_rate": 7.579759756000829e-06, "loss": 0.7734, "step": 5431 }, { "epoch": 0.35, "grad_norm": 1.4392120086600266, "learning_rate": 7.578871768894962e-06, "loss": 0.7992, "step": 5432 }, { "epoch": 0.35, "grad_norm": 1.8493268302552535, "learning_rate": 7.577983670954604e-06, "loss": 0.8619, "step": 5433 }, { "epoch": 0.35, "grad_norm": 1.5469432919882946, "learning_rate": 7.577095462217926e-06, "loss": 0.745, "step": 5434 }, { "epoch": 0.35, "grad_norm": 1.6090055337980271, "learning_rate": 7.5762071427231e-06, "loss": 0.7744, "step": 5435 }, { "epoch": 0.35, "grad_norm": 1.4475732266549024, "learning_rate": 7.575318712508304e-06, "loss": 0.7372, "step": 5436 }, { "epoch": 0.35, "grad_norm": 2.125298534835551, "learning_rate": 7.5744301716117206e-06, "loss": 0.842, "step": 5437 }, { "epoch": 0.35, "grad_norm": 1.5002074881102148, "learning_rate": 7.573541520071539e-06, "loss": 0.6934, "step": 5438 }, { "epoch": 0.35, "grad_norm": 2.9746681742651, "learning_rate": 7.572652757925951e-06, "loss": 0.8629, "step": 5439 }, { "epoch": 0.35, "grad_norm": 1.8019499486160415, "learning_rate": 7.571763885213153e-06, "loss": 0.7695, "step": 5440 }, { "epoch": 0.35, "grad_norm": 1.473161789842885, "learning_rate": 7.570874901971347e-06, "loss": 0.7293, "step": 5441 }, { "epoch": 0.35, "grad_norm": 1.7641791222679448, "learning_rate": 7.569985808238739e-06, "loss": 0.8566, "step": 5442 }, { "epoch": 0.35, "grad_norm": 1.9289986675690374, "learning_rate": 7.569096604053543e-06, "loss": 0.9156, "step": 5443 }, { "epoch": 0.35, "grad_norm": 1.7229933926389893, "learning_rate": 7.56820728945397e-06, "loss": 0.8159, "step": 5444 }, { "epoch": 0.35, "grad_norm": 1.2612588967539764, "learning_rate": 7.5673178644782475e-06, "loss": 0.6955, "step": 5445 }, { "epoch": 0.35, "grad_norm": 1.6134874337385612, "learning_rate": 7.566428329164598e-06, "loss": 0.7077, "step": 5446 }, { "epoch": 0.35, "grad_norm": 1.6190185194551598, "learning_rate": 7.565538683551251e-06, "loss": 0.7977, "step": 5447 }, { "epoch": 0.35, "grad_norm": 1.7001284362432922, "learning_rate": 7.564648927676443e-06, "loss": 0.8417, "step": 5448 }, { "epoch": 0.35, "grad_norm": 2.686362654133293, "learning_rate": 7.563759061578412e-06, "loss": 0.738, "step": 5449 }, { "epoch": 0.35, "grad_norm": 1.6920962124129617, "learning_rate": 7.562869085295403e-06, "loss": 0.7336, "step": 5450 }, { "epoch": 0.35, "grad_norm": 1.8201843144416459, "learning_rate": 7.561978998865667e-06, "loss": 0.8195, "step": 5451 }, { "epoch": 0.35, "grad_norm": 1.7820763535343818, "learning_rate": 7.561088802327456e-06, "loss": 0.641, "step": 5452 }, { "epoch": 0.35, "grad_norm": 1.478097588368944, "learning_rate": 7.560198495719032e-06, "loss": 0.7147, "step": 5453 }, { "epoch": 0.35, "grad_norm": 1.6026363271590045, "learning_rate": 7.5593080790786554e-06, "loss": 0.8466, "step": 5454 }, { "epoch": 0.35, "grad_norm": 1.688202312454985, "learning_rate": 7.558417552444595e-06, "loss": 0.7523, "step": 5455 }, { "epoch": 0.35, "grad_norm": 1.993492677048482, "learning_rate": 7.557526915855126e-06, "loss": 0.7076, "step": 5456 }, { "epoch": 0.35, "grad_norm": 1.5689720647476555, "learning_rate": 7.556636169348524e-06, "loss": 0.8261, "step": 5457 }, { "epoch": 0.35, "grad_norm": 1.735767047613095, "learning_rate": 7.555745312963068e-06, "loss": 0.7989, "step": 5458 }, { "epoch": 0.35, "grad_norm": 1.9470197784968648, "learning_rate": 7.554854346737053e-06, "loss": 0.7374, "step": 5459 }, { "epoch": 0.35, "grad_norm": 1.277822695423221, "learning_rate": 7.553963270708764e-06, "loss": 0.7559, "step": 5460 }, { "epoch": 0.35, "grad_norm": 1.6162126872750187, "learning_rate": 7.5530720849165e-06, "loss": 0.7538, "step": 5461 }, { "epoch": 0.35, "grad_norm": 1.654560002409347, "learning_rate": 7.5521807893985645e-06, "loss": 0.7391, "step": 5462 }, { "epoch": 0.35, "grad_norm": 1.4752812789009382, "learning_rate": 7.551289384193261e-06, "loss": 0.7578, "step": 5463 }, { "epoch": 0.35, "grad_norm": 1.6200189247842458, "learning_rate": 7.550397869338902e-06, "loss": 0.7468, "step": 5464 }, { "epoch": 0.35, "grad_norm": 1.134543519208253, "learning_rate": 7.549506244873799e-06, "loss": 0.6956, "step": 5465 }, { "epoch": 0.35, "grad_norm": 1.727853136786551, "learning_rate": 7.548614510836276e-06, "loss": 0.7777, "step": 5466 }, { "epoch": 0.35, "grad_norm": 1.5336720925194192, "learning_rate": 7.547722667264656e-06, "loss": 0.7057, "step": 5467 }, { "epoch": 0.35, "grad_norm": 1.5970339130318936, "learning_rate": 7.546830714197271e-06, "loss": 0.7007, "step": 5468 }, { "epoch": 0.35, "grad_norm": 1.4265620652610327, "learning_rate": 7.545938651672453e-06, "loss": 0.6664, "step": 5469 }, { "epoch": 0.35, "grad_norm": 1.617836891484468, "learning_rate": 7.54504647972854e-06, "loss": 0.8116, "step": 5470 }, { "epoch": 0.35, "grad_norm": 1.4579381065832324, "learning_rate": 7.5441541984038795e-06, "loss": 0.6413, "step": 5471 }, { "epoch": 0.35, "grad_norm": 1.7004614265314786, "learning_rate": 7.543261807736815e-06, "loss": 0.8881, "step": 5472 }, { "epoch": 0.35, "grad_norm": 1.2853491778513726, "learning_rate": 7.542369307765705e-06, "loss": 0.6803, "step": 5473 }, { "epoch": 0.35, "grad_norm": 1.7332777880389145, "learning_rate": 7.541476698528902e-06, "loss": 0.9247, "step": 5474 }, { "epoch": 0.35, "grad_norm": 2.051734573088237, "learning_rate": 7.540583980064772e-06, "loss": 0.8611, "step": 5475 }, { "epoch": 0.35, "grad_norm": 1.4562844397384551, "learning_rate": 7.539691152411681e-06, "loss": 0.7356, "step": 5476 }, { "epoch": 0.35, "grad_norm": 1.6816442684614692, "learning_rate": 7.538798215608001e-06, "loss": 0.8697, "step": 5477 }, { "epoch": 0.35, "grad_norm": 1.5376572964199942, "learning_rate": 7.537905169692108e-06, "loss": 0.7433, "step": 5478 }, { "epoch": 0.35, "grad_norm": 1.9750448032016379, "learning_rate": 7.537012014702383e-06, "loss": 0.785, "step": 5479 }, { "epoch": 0.35, "grad_norm": 1.5920508119740926, "learning_rate": 7.536118750677212e-06, "loss": 0.7789, "step": 5480 }, { "epoch": 0.35, "grad_norm": 1.6132913009085206, "learning_rate": 7.535225377654989e-06, "loss": 0.8348, "step": 5481 }, { "epoch": 0.35, "grad_norm": 1.6372765676647083, "learning_rate": 7.5343318956741054e-06, "loss": 0.8122, "step": 5482 }, { "epoch": 0.35, "grad_norm": 1.7068403929156641, "learning_rate": 7.533438304772962e-06, "loss": 0.8183, "step": 5483 }, { "epoch": 0.35, "grad_norm": 1.617847879083708, "learning_rate": 7.5325446049899646e-06, "loss": 0.7483, "step": 5484 }, { "epoch": 0.35, "grad_norm": 1.5065511473586717, "learning_rate": 7.531650796363521e-06, "loss": 0.7841, "step": 5485 }, { "epoch": 0.35, "grad_norm": 1.156062919659018, "learning_rate": 7.530756878932045e-06, "loss": 0.6978, "step": 5486 }, { "epoch": 0.35, "grad_norm": 1.5808327705751442, "learning_rate": 7.529862852733958e-06, "loss": 0.7686, "step": 5487 }, { "epoch": 0.35, "grad_norm": 1.5779044634071147, "learning_rate": 7.528968717807678e-06, "loss": 0.7109, "step": 5488 }, { "epoch": 0.35, "grad_norm": 1.4258477286049347, "learning_rate": 7.52807447419164e-06, "loss": 0.6771, "step": 5489 }, { "epoch": 0.35, "grad_norm": 2.459470847827794, "learning_rate": 7.527180121924274e-06, "loss": 0.8067, "step": 5490 }, { "epoch": 0.35, "grad_norm": 1.1390559629978094, "learning_rate": 7.526285661044015e-06, "loss": 0.6671, "step": 5491 }, { "epoch": 0.35, "grad_norm": 1.8390118618800602, "learning_rate": 7.525391091589307e-06, "loss": 0.8022, "step": 5492 }, { "epoch": 0.35, "grad_norm": 1.5636757609036571, "learning_rate": 7.524496413598596e-06, "loss": 0.7528, "step": 5493 }, { "epoch": 0.35, "grad_norm": 1.6276928633339367, "learning_rate": 7.523601627110333e-06, "loss": 0.6505, "step": 5494 }, { "epoch": 0.35, "grad_norm": 1.7093952587132775, "learning_rate": 7.522706732162976e-06, "loss": 0.7215, "step": 5495 }, { "epoch": 0.35, "grad_norm": 1.9994440954115553, "learning_rate": 7.521811728794985e-06, "loss": 0.7384, "step": 5496 }, { "epoch": 0.35, "grad_norm": 1.5729440309192237, "learning_rate": 7.520916617044826e-06, "loss": 0.7516, "step": 5497 }, { "epoch": 0.35, "grad_norm": 1.7803451608471783, "learning_rate": 7.520021396950965e-06, "loss": 0.8173, "step": 5498 }, { "epoch": 0.35, "grad_norm": 1.5368669853596688, "learning_rate": 7.519126068551882e-06, "loss": 0.8234, "step": 5499 }, { "epoch": 0.35, "grad_norm": 1.1372233765917392, "learning_rate": 7.518230631886054e-06, "loss": 0.7591, "step": 5500 }, { "epoch": 0.35, "grad_norm": 1.6035677122428327, "learning_rate": 7.517335086991963e-06, "loss": 0.7585, "step": 5501 }, { "epoch": 0.35, "grad_norm": 1.9137286316484081, "learning_rate": 7.5164394339081006e-06, "loss": 0.7248, "step": 5502 }, { "epoch": 0.35, "grad_norm": 1.7724428037354254, "learning_rate": 7.51554367267296e-06, "loss": 0.6695, "step": 5503 }, { "epoch": 0.35, "grad_norm": 1.7588422837333828, "learning_rate": 7.514647803325038e-06, "loss": 0.7439, "step": 5504 }, { "epoch": 0.35, "grad_norm": 1.7449917469173168, "learning_rate": 7.513751825902837e-06, "loss": 0.7687, "step": 5505 }, { "epoch": 0.35, "grad_norm": 5.50376858427176, "learning_rate": 7.512855740444865e-06, "loss": 0.7567, "step": 5506 }, { "epoch": 0.35, "grad_norm": 1.6001044390282155, "learning_rate": 7.5119595469896325e-06, "loss": 0.8167, "step": 5507 }, { "epoch": 0.35, "grad_norm": 1.9557537622883756, "learning_rate": 7.511063245575656e-06, "loss": 0.6272, "step": 5508 }, { "epoch": 0.35, "grad_norm": 1.1216643407122031, "learning_rate": 7.51016683624146e-06, "loss": 0.7061, "step": 5509 }, { "epoch": 0.35, "grad_norm": 1.5535643217101107, "learning_rate": 7.5092703190255675e-06, "loss": 0.69, "step": 5510 }, { "epoch": 0.35, "grad_norm": 1.1585495162808817, "learning_rate": 7.50837369396651e-06, "loss": 0.4857, "step": 5511 }, { "epoch": 0.35, "grad_norm": 1.0939129270606935, "learning_rate": 7.507476961102822e-06, "loss": 0.6782, "step": 5512 }, { "epoch": 0.35, "grad_norm": 1.590833561881634, "learning_rate": 7.506580120473042e-06, "loss": 0.8995, "step": 5513 }, { "epoch": 0.35, "grad_norm": 1.093465845390339, "learning_rate": 7.505683172115714e-06, "loss": 0.6605, "step": 5514 }, { "epoch": 0.35, "grad_norm": 1.7248865030050093, "learning_rate": 7.504786116069391e-06, "loss": 0.7347, "step": 5515 }, { "epoch": 0.35, "grad_norm": 1.486920347268871, "learning_rate": 7.5038889523726225e-06, "loss": 0.7, "step": 5516 }, { "epoch": 0.35, "grad_norm": 1.5790609800053144, "learning_rate": 7.502991681063969e-06, "loss": 0.7327, "step": 5517 }, { "epoch": 0.35, "grad_norm": 1.4983463696147572, "learning_rate": 7.502094302181992e-06, "loss": 0.8197, "step": 5518 }, { "epoch": 0.35, "grad_norm": 1.9029701397629544, "learning_rate": 7.5011968157652615e-06, "loss": 0.75, "step": 5519 }, { "epoch": 0.35, "grad_norm": 1.6102887069136111, "learning_rate": 7.500299221852346e-06, "loss": 0.8082, "step": 5520 }, { "epoch": 0.35, "grad_norm": 1.6741699409631994, "learning_rate": 7.499401520481824e-06, "loss": 0.7656, "step": 5521 }, { "epoch": 0.35, "grad_norm": 1.0214135201397296, "learning_rate": 7.498503711692276e-06, "loss": 0.6283, "step": 5522 }, { "epoch": 0.35, "grad_norm": 1.849286924109776, "learning_rate": 7.497605795522289e-06, "loss": 0.7391, "step": 5523 }, { "epoch": 0.35, "grad_norm": 1.6279130603461018, "learning_rate": 7.4967077720104545e-06, "loss": 0.664, "step": 5524 }, { "epoch": 0.35, "grad_norm": 1.5930221789719068, "learning_rate": 7.495809641195364e-06, "loss": 0.7947, "step": 5525 }, { "epoch": 0.35, "grad_norm": 1.9548387860936003, "learning_rate": 7.49491140311562e-06, "loss": 0.7924, "step": 5526 }, { "epoch": 0.35, "grad_norm": 1.2397511764596325, "learning_rate": 7.494013057809827e-06, "loss": 0.6925, "step": 5527 }, { "epoch": 0.35, "grad_norm": 1.7441165413479935, "learning_rate": 7.493114605316594e-06, "loss": 0.8571, "step": 5528 }, { "epoch": 0.35, "grad_norm": 1.5093472259939962, "learning_rate": 7.492216045674532e-06, "loss": 0.7673, "step": 5529 }, { "epoch": 0.35, "grad_norm": 1.5146839336050464, "learning_rate": 7.491317378922263e-06, "loss": 0.8486, "step": 5530 }, { "epoch": 0.35, "grad_norm": 1.6613101665532357, "learning_rate": 7.490418605098408e-06, "loss": 0.7524, "step": 5531 }, { "epoch": 0.35, "grad_norm": 1.210128980496615, "learning_rate": 7.489519724241594e-06, "loss": 0.6088, "step": 5532 }, { "epoch": 0.35, "grad_norm": 1.4579237370196716, "learning_rate": 7.488620736390454e-06, "loss": 0.6703, "step": 5533 }, { "epoch": 0.35, "grad_norm": 1.6788979979828533, "learning_rate": 7.487721641583624e-06, "loss": 0.7171, "step": 5534 }, { "epoch": 0.35, "grad_norm": 1.0873364950401785, "learning_rate": 7.486822439859744e-06, "loss": 0.8222, "step": 5535 }, { "epoch": 0.35, "grad_norm": 1.5887909016445323, "learning_rate": 7.485923131257462e-06, "loss": 0.7513, "step": 5536 }, { "epoch": 0.35, "grad_norm": 1.5624897120502588, "learning_rate": 7.485023715815427e-06, "loss": 0.6461, "step": 5537 }, { "epoch": 0.35, "grad_norm": 1.0987502237889741, "learning_rate": 7.484124193572295e-06, "loss": 0.7224, "step": 5538 }, { "epoch": 0.35, "grad_norm": 1.717923859199252, "learning_rate": 7.483224564566725e-06, "loss": 0.775, "step": 5539 }, { "epoch": 0.35, "grad_norm": 1.7119583303679855, "learning_rate": 7.482324828837382e-06, "loss": 0.8281, "step": 5540 }, { "epoch": 0.35, "grad_norm": 1.4792965231753665, "learning_rate": 7.481424986422933e-06, "loss": 0.6656, "step": 5541 }, { "epoch": 0.35, "grad_norm": 1.4355508462000721, "learning_rate": 7.480525037362052e-06, "loss": 0.8536, "step": 5542 }, { "epoch": 0.35, "grad_norm": 1.6433812623484239, "learning_rate": 7.479624981693416e-06, "loss": 0.7986, "step": 5543 }, { "epoch": 0.35, "grad_norm": 1.8022045145589627, "learning_rate": 7.478724819455709e-06, "loss": 0.6646, "step": 5544 }, { "epoch": 0.35, "grad_norm": 1.3578564466655854, "learning_rate": 7.47782455068762e-06, "loss": 0.6949, "step": 5545 }, { "epoch": 0.35, "grad_norm": 1.6326310676023073, "learning_rate": 7.476924175427838e-06, "loss": 0.7281, "step": 5546 }, { "epoch": 0.36, "grad_norm": 1.5188970581661276, "learning_rate": 7.476023693715059e-06, "loss": 0.7578, "step": 5547 }, { "epoch": 0.36, "grad_norm": 1.1385202139434842, "learning_rate": 7.475123105587985e-06, "loss": 0.631, "step": 5548 }, { "epoch": 0.36, "grad_norm": 1.597889818550531, "learning_rate": 7.47422241108532e-06, "loss": 0.6945, "step": 5549 }, { "epoch": 0.36, "grad_norm": 1.1271199083885501, "learning_rate": 7.473321610245774e-06, "loss": 0.6707, "step": 5550 }, { "epoch": 0.36, "grad_norm": 1.7008652181948276, "learning_rate": 7.4724207031080645e-06, "loss": 0.8256, "step": 5551 }, { "epoch": 0.36, "grad_norm": 1.532315419866208, "learning_rate": 7.471519689710908e-06, "loss": 0.7936, "step": 5552 }, { "epoch": 0.36, "grad_norm": 1.471405055482869, "learning_rate": 7.4706185700930266e-06, "loss": 0.6357, "step": 5553 }, { "epoch": 0.36, "grad_norm": 1.2318594348030993, "learning_rate": 7.469717344293152e-06, "loss": 0.7069, "step": 5554 }, { "epoch": 0.36, "grad_norm": 1.848032687429573, "learning_rate": 7.4688160123500165e-06, "loss": 0.826, "step": 5555 }, { "epoch": 0.36, "grad_norm": 1.005023045808648, "learning_rate": 7.467914574302356e-06, "loss": 0.595, "step": 5556 }, { "epoch": 0.36, "grad_norm": 1.236131266423729, "learning_rate": 7.467013030188912e-06, "loss": 0.72, "step": 5557 }, { "epoch": 0.36, "grad_norm": 1.0350081301840874, "learning_rate": 7.466111380048432e-06, "loss": 0.6734, "step": 5558 }, { "epoch": 0.36, "grad_norm": 1.6571357202365007, "learning_rate": 7.465209623919668e-06, "loss": 0.6886, "step": 5559 }, { "epoch": 0.36, "grad_norm": 1.4729138380612745, "learning_rate": 7.464307761841374e-06, "loss": 0.7585, "step": 5560 }, { "epoch": 0.36, "grad_norm": 1.855771945843649, "learning_rate": 7.46340579385231e-06, "loss": 0.8684, "step": 5561 }, { "epoch": 0.36, "grad_norm": 1.6194957483190775, "learning_rate": 7.462503719991241e-06, "loss": 0.7416, "step": 5562 }, { "epoch": 0.36, "grad_norm": 1.8584101729413023, "learning_rate": 7.4616015402969375e-06, "loss": 0.8027, "step": 5563 }, { "epoch": 0.36, "grad_norm": 1.6650731365676534, "learning_rate": 7.460699254808172e-06, "loss": 0.7817, "step": 5564 }, { "epoch": 0.36, "grad_norm": 1.7766525511891784, "learning_rate": 7.459796863563723e-06, "loss": 0.7692, "step": 5565 }, { "epoch": 0.36, "grad_norm": 1.7273440963167137, "learning_rate": 7.458894366602374e-06, "loss": 0.8812, "step": 5566 }, { "epoch": 0.36, "grad_norm": 1.9365395075274157, "learning_rate": 7.457991763962912e-06, "loss": 0.8949, "step": 5567 }, { "epoch": 0.36, "grad_norm": 1.5850590667807023, "learning_rate": 7.457089055684129e-06, "loss": 0.7958, "step": 5568 }, { "epoch": 0.36, "grad_norm": 1.0531858065976685, "learning_rate": 7.456186241804821e-06, "loss": 0.5971, "step": 5569 }, { "epoch": 0.36, "grad_norm": 1.667409729679505, "learning_rate": 7.45528332236379e-06, "loss": 0.7431, "step": 5570 }, { "epoch": 0.36, "grad_norm": 1.9340699709353213, "learning_rate": 7.45438029739984e-06, "loss": 0.7508, "step": 5571 }, { "epoch": 0.36, "grad_norm": 1.7141177366221856, "learning_rate": 7.453477166951783e-06, "loss": 0.6922, "step": 5572 }, { "epoch": 0.36, "grad_norm": 1.621793176884108, "learning_rate": 7.4525739310584326e-06, "loss": 0.7582, "step": 5573 }, { "epoch": 0.36, "grad_norm": 1.7735374976759304, "learning_rate": 7.451670589758609e-06, "loss": 0.7424, "step": 5574 }, { "epoch": 0.36, "grad_norm": 1.7121710097192135, "learning_rate": 7.450767143091133e-06, "loss": 0.7494, "step": 5575 }, { "epoch": 0.36, "grad_norm": 1.5683632961055267, "learning_rate": 7.4498635910948365e-06, "loss": 0.8683, "step": 5576 }, { "epoch": 0.36, "grad_norm": 1.6615391638522912, "learning_rate": 7.448959933808552e-06, "loss": 0.7761, "step": 5577 }, { "epoch": 0.36, "grad_norm": 1.7539463663788155, "learning_rate": 7.448056171271114e-06, "loss": 0.9216, "step": 5578 }, { "epoch": 0.36, "grad_norm": 1.823415898975383, "learning_rate": 7.447152303521366e-06, "loss": 0.7433, "step": 5579 }, { "epoch": 0.36, "grad_norm": 1.6389249117469105, "learning_rate": 7.446248330598154e-06, "loss": 0.8448, "step": 5580 }, { "epoch": 0.36, "grad_norm": 1.627160779133411, "learning_rate": 7.445344252540327e-06, "loss": 0.8371, "step": 5581 }, { "epoch": 0.36, "grad_norm": 1.5409509288220187, "learning_rate": 7.444440069386746e-06, "loss": 0.7764, "step": 5582 }, { "epoch": 0.36, "grad_norm": 1.7421158205330671, "learning_rate": 7.443535781176265e-06, "loss": 0.841, "step": 5583 }, { "epoch": 0.36, "grad_norm": 1.4837054711552258, "learning_rate": 7.442631387947753e-06, "loss": 0.7853, "step": 5584 }, { "epoch": 0.36, "grad_norm": 1.6359270633443315, "learning_rate": 7.441726889740075e-06, "loss": 0.6843, "step": 5585 }, { "epoch": 0.36, "grad_norm": 1.6262790169655081, "learning_rate": 7.440822286592105e-06, "loss": 0.7886, "step": 5586 }, { "epoch": 0.36, "grad_norm": 1.7327810019527095, "learning_rate": 7.439917578542723e-06, "loss": 0.7787, "step": 5587 }, { "epoch": 0.36, "grad_norm": 1.5006910126976354, "learning_rate": 7.4390127656308116e-06, "loss": 0.8095, "step": 5588 }, { "epoch": 0.36, "grad_norm": 1.472327109236041, "learning_rate": 7.438107847895256e-06, "loss": 0.6904, "step": 5589 }, { "epoch": 0.36, "grad_norm": 1.646645441497063, "learning_rate": 7.437202825374948e-06, "loss": 0.7305, "step": 5590 }, { "epoch": 0.36, "grad_norm": 1.5433726874134088, "learning_rate": 7.436297698108783e-06, "loss": 0.7141, "step": 5591 }, { "epoch": 0.36, "grad_norm": 1.6330985341636268, "learning_rate": 7.435392466135665e-06, "loss": 0.7461, "step": 5592 }, { "epoch": 0.36, "grad_norm": 2.0157377518322375, "learning_rate": 7.434487129494494e-06, "loss": 0.946, "step": 5593 }, { "epoch": 0.36, "grad_norm": 0.969677716682077, "learning_rate": 7.433581688224181e-06, "loss": 0.6371, "step": 5594 }, { "epoch": 0.36, "grad_norm": 1.7469747170256917, "learning_rate": 7.4326761423636415e-06, "loss": 0.734, "step": 5595 }, { "epoch": 0.36, "grad_norm": 1.6100883773389791, "learning_rate": 7.431770491951794e-06, "loss": 0.7664, "step": 5596 }, { "epoch": 0.36, "grad_norm": 1.749993907600988, "learning_rate": 7.430864737027561e-06, "loss": 0.8598, "step": 5597 }, { "epoch": 0.36, "grad_norm": 1.8582611607172848, "learning_rate": 7.429958877629868e-06, "loss": 0.7148, "step": 5598 }, { "epoch": 0.36, "grad_norm": 1.5419933576156575, "learning_rate": 7.429052913797649e-06, "loss": 0.7686, "step": 5599 }, { "epoch": 0.36, "grad_norm": 1.1853518408621486, "learning_rate": 7.42814684556984e-06, "loss": 0.682, "step": 5600 }, { "epoch": 0.36, "grad_norm": 1.6220630544162755, "learning_rate": 7.4272406729853805e-06, "loss": 0.7703, "step": 5601 }, { "epoch": 0.36, "grad_norm": 1.1916620369740605, "learning_rate": 7.426334396083219e-06, "loss": 0.5905, "step": 5602 }, { "epoch": 0.36, "grad_norm": 1.3684818879285263, "learning_rate": 7.425428014902304e-06, "loss": 0.6716, "step": 5603 }, { "epoch": 0.36, "grad_norm": 1.576971107815369, "learning_rate": 7.4245215294815885e-06, "loss": 0.627, "step": 5604 }, { "epoch": 0.36, "grad_norm": 1.6460626177322015, "learning_rate": 7.423614939860033e-06, "loss": 0.7997, "step": 5605 }, { "epoch": 0.36, "grad_norm": 1.174884203587737, "learning_rate": 7.4227082460765995e-06, "loss": 0.586, "step": 5606 }, { "epoch": 0.36, "grad_norm": 1.0677426097206655, "learning_rate": 7.421801448170256e-06, "loss": 0.6602, "step": 5607 }, { "epoch": 0.36, "grad_norm": 1.5648397945333918, "learning_rate": 7.420894546179975e-06, "loss": 0.7806, "step": 5608 }, { "epoch": 0.36, "grad_norm": 1.1332419331822052, "learning_rate": 7.419987540144733e-06, "loss": 0.7227, "step": 5609 }, { "epoch": 0.36, "grad_norm": 1.5333232827349765, "learning_rate": 7.4190804301035136e-06, "loss": 0.6816, "step": 5610 }, { "epoch": 0.36, "grad_norm": 1.499454373829792, "learning_rate": 7.4181732160952995e-06, "loss": 0.7427, "step": 5611 }, { "epoch": 0.36, "grad_norm": 1.5263076533588915, "learning_rate": 7.417265898159082e-06, "loss": 0.7273, "step": 5612 }, { "epoch": 0.36, "grad_norm": 1.6736252398732356, "learning_rate": 7.416358476333856e-06, "loss": 0.7401, "step": 5613 }, { "epoch": 0.36, "grad_norm": 1.7667144486257176, "learning_rate": 7.4154509506586195e-06, "loss": 0.8809, "step": 5614 }, { "epoch": 0.36, "grad_norm": 1.8615887366892578, "learning_rate": 7.414543321172376e-06, "loss": 0.9483, "step": 5615 }, { "epoch": 0.36, "grad_norm": 1.7018205813073652, "learning_rate": 7.413635587914134e-06, "loss": 0.9116, "step": 5616 }, { "epoch": 0.36, "grad_norm": 1.555231165127083, "learning_rate": 7.412727750922908e-06, "loss": 0.7579, "step": 5617 }, { "epoch": 0.36, "grad_norm": 1.63370953130235, "learning_rate": 7.411819810237712e-06, "loss": 0.843, "step": 5618 }, { "epoch": 0.36, "grad_norm": 1.6276335773855213, "learning_rate": 7.4109117658975685e-06, "loss": 0.841, "step": 5619 }, { "epoch": 0.36, "grad_norm": 2.1323033773106785, "learning_rate": 7.4100036179415035e-06, "loss": 0.8333, "step": 5620 }, { "epoch": 0.36, "grad_norm": 1.5838657887432503, "learning_rate": 7.409095366408547e-06, "loss": 0.8183, "step": 5621 }, { "epoch": 0.36, "grad_norm": 1.606367555980552, "learning_rate": 7.408187011337734e-06, "loss": 0.7279, "step": 5622 }, { "epoch": 0.36, "grad_norm": 1.634642211157334, "learning_rate": 7.407278552768103e-06, "loss": 0.6629, "step": 5623 }, { "epoch": 0.36, "grad_norm": 1.219291555837024, "learning_rate": 7.4063699907387e-06, "loss": 0.7104, "step": 5624 }, { "epoch": 0.36, "grad_norm": 1.4730001078214363, "learning_rate": 7.40546132528857e-06, "loss": 0.8511, "step": 5625 }, { "epoch": 0.36, "grad_norm": 1.66516816929722, "learning_rate": 7.404552556456768e-06, "loss": 0.836, "step": 5626 }, { "epoch": 0.36, "grad_norm": 1.858644910274926, "learning_rate": 7.403643684282347e-06, "loss": 0.7137, "step": 5627 }, { "epoch": 0.36, "grad_norm": 1.751703210085078, "learning_rate": 7.402734708804373e-06, "loss": 0.774, "step": 5628 }, { "epoch": 0.36, "grad_norm": 1.6743002166951353, "learning_rate": 7.40182563006191e-06, "loss": 0.7868, "step": 5629 }, { "epoch": 0.36, "grad_norm": 1.9007671390705794, "learning_rate": 7.4009164480940275e-06, "loss": 0.8084, "step": 5630 }, { "epoch": 0.36, "grad_norm": 1.5750262020698604, "learning_rate": 7.4000071629398015e-06, "loss": 0.745, "step": 5631 }, { "epoch": 0.36, "grad_norm": 1.1980909847069714, "learning_rate": 7.399097774638312e-06, "loss": 0.6482, "step": 5632 }, { "epoch": 0.36, "grad_norm": 1.4982592970633104, "learning_rate": 7.398188283228641e-06, "loss": 0.6938, "step": 5633 }, { "epoch": 0.36, "grad_norm": 1.7138954354272404, "learning_rate": 7.397278688749876e-06, "loss": 0.7791, "step": 5634 }, { "epoch": 0.36, "grad_norm": 1.2352640941512067, "learning_rate": 7.39636899124111e-06, "loss": 0.6584, "step": 5635 }, { "epoch": 0.36, "grad_norm": 2.202775510479605, "learning_rate": 7.395459190741441e-06, "loss": 0.7491, "step": 5636 }, { "epoch": 0.36, "grad_norm": 1.6845590909628474, "learning_rate": 7.39454928728997e-06, "loss": 0.769, "step": 5637 }, { "epoch": 0.36, "grad_norm": 1.8501832116268613, "learning_rate": 7.3936392809258e-06, "loss": 0.7869, "step": 5638 }, { "epoch": 0.36, "grad_norm": 1.3948682737457085, "learning_rate": 7.392729171688047e-06, "loss": 0.7591, "step": 5639 }, { "epoch": 0.36, "grad_norm": 1.9284231066044846, "learning_rate": 7.39181895961582e-06, "loss": 0.8484, "step": 5640 }, { "epoch": 0.36, "grad_norm": 1.1256634947226365, "learning_rate": 7.39090864474824e-06, "loss": 0.6268, "step": 5641 }, { "epoch": 0.36, "grad_norm": 1.0915152015631673, "learning_rate": 7.389998227124431e-06, "loss": 0.6216, "step": 5642 }, { "epoch": 0.36, "grad_norm": 1.7051079057192082, "learning_rate": 7.389087706783518e-06, "loss": 0.7937, "step": 5643 }, { "epoch": 0.36, "grad_norm": 1.7835286067456522, "learning_rate": 7.3881770837646385e-06, "loss": 0.811, "step": 5644 }, { "epoch": 0.36, "grad_norm": 1.355157981693391, "learning_rate": 7.387266358106925e-06, "loss": 0.6344, "step": 5645 }, { "epoch": 0.36, "grad_norm": 1.6990823728286546, "learning_rate": 7.386355529849519e-06, "loss": 0.8814, "step": 5646 }, { "epoch": 0.36, "grad_norm": 1.5792992146616291, "learning_rate": 7.385444599031568e-06, "loss": 0.8273, "step": 5647 }, { "epoch": 0.36, "grad_norm": 1.5206236824520056, "learning_rate": 7.38453356569222e-06, "loss": 0.5925, "step": 5648 }, { "epoch": 0.36, "grad_norm": 1.8225891769139442, "learning_rate": 7.38362242987063e-06, "loss": 0.8565, "step": 5649 }, { "epoch": 0.36, "grad_norm": 1.5004996311987522, "learning_rate": 7.382711191605958e-06, "loss": 0.7202, "step": 5650 }, { "epoch": 0.36, "grad_norm": 1.6131460401722746, "learning_rate": 7.381799850937363e-06, "loss": 0.8035, "step": 5651 }, { "epoch": 0.36, "grad_norm": 1.4673426360733302, "learning_rate": 7.380888407904018e-06, "loss": 0.6414, "step": 5652 }, { "epoch": 0.36, "grad_norm": 1.7448448617921741, "learning_rate": 7.379976862545091e-06, "loss": 0.7552, "step": 5653 }, { "epoch": 0.36, "grad_norm": 1.4549242952149406, "learning_rate": 7.3790652148997595e-06, "loss": 0.9165, "step": 5654 }, { "epoch": 0.36, "grad_norm": 1.7353187097528935, "learning_rate": 7.378153465007203e-06, "loss": 0.707, "step": 5655 }, { "epoch": 0.36, "grad_norm": 1.4813288108806335, "learning_rate": 7.377241612906609e-06, "loss": 0.6942, "step": 5656 }, { "epoch": 0.36, "grad_norm": 1.6062050335864815, "learning_rate": 7.376329658637167e-06, "loss": 0.6484, "step": 5657 }, { "epoch": 0.36, "grad_norm": 1.5587575968559597, "learning_rate": 7.375417602238066e-06, "loss": 0.6456, "step": 5658 }, { "epoch": 0.36, "grad_norm": 1.537985153274391, "learning_rate": 7.374505443748512e-06, "loss": 0.792, "step": 5659 }, { "epoch": 0.36, "grad_norm": 1.7298535370815868, "learning_rate": 7.373593183207701e-06, "loss": 0.7017, "step": 5660 }, { "epoch": 0.36, "grad_norm": 1.5402016370601415, "learning_rate": 7.372680820654844e-06, "loss": 0.7393, "step": 5661 }, { "epoch": 0.36, "grad_norm": 1.7827001697129758, "learning_rate": 7.37176835612915e-06, "loss": 0.8223, "step": 5662 }, { "epoch": 0.36, "grad_norm": 1.344957037295358, "learning_rate": 7.3708557896698375e-06, "loss": 0.6039, "step": 5663 }, { "epoch": 0.36, "grad_norm": 1.5600660926608945, "learning_rate": 7.369943121316122e-06, "loss": 0.782, "step": 5664 }, { "epoch": 0.36, "grad_norm": 1.6626846570735545, "learning_rate": 7.3690303511072324e-06, "loss": 0.7811, "step": 5665 }, { "epoch": 0.36, "grad_norm": 1.1122527787784537, "learning_rate": 7.368117479082397e-06, "loss": 0.6136, "step": 5666 }, { "epoch": 0.36, "grad_norm": 1.5290761269289752, "learning_rate": 7.367204505280848e-06, "loss": 0.6973, "step": 5667 }, { "epoch": 0.36, "grad_norm": 1.7352139579688364, "learning_rate": 7.366291429741824e-06, "loss": 0.8142, "step": 5668 }, { "epoch": 0.36, "grad_norm": 1.4704330069337732, "learning_rate": 7.365378252504567e-06, "loss": 0.7209, "step": 5669 }, { "epoch": 0.36, "grad_norm": 1.5090770561753244, "learning_rate": 7.3644649736083216e-06, "loss": 0.6558, "step": 5670 }, { "epoch": 0.36, "grad_norm": 1.7669622718181566, "learning_rate": 7.363551593092342e-06, "loss": 0.8154, "step": 5671 }, { "epoch": 0.36, "grad_norm": 1.3266783496597514, "learning_rate": 7.362638110995879e-06, "loss": 0.6623, "step": 5672 }, { "epoch": 0.36, "grad_norm": 1.756245134929058, "learning_rate": 7.361724527358195e-06, "loss": 0.7902, "step": 5673 }, { "epoch": 0.36, "grad_norm": 1.4053687494306673, "learning_rate": 7.360810842218554e-06, "loss": 0.6412, "step": 5674 }, { "epoch": 0.36, "grad_norm": 1.6741226869591532, "learning_rate": 7.359897055616225e-06, "loss": 0.7237, "step": 5675 }, { "epoch": 0.36, "grad_norm": 1.0215396607458698, "learning_rate": 7.358983167590479e-06, "loss": 0.6225, "step": 5676 }, { "epoch": 0.36, "grad_norm": 1.1256604362884655, "learning_rate": 7.358069178180592e-06, "loss": 0.5923, "step": 5677 }, { "epoch": 0.36, "grad_norm": 1.4216304283899972, "learning_rate": 7.357155087425848e-06, "loss": 0.7537, "step": 5678 }, { "epoch": 0.36, "grad_norm": 1.5729847511283659, "learning_rate": 7.356240895365531e-06, "loss": 0.7383, "step": 5679 }, { "epoch": 0.36, "grad_norm": 1.1001701813881315, "learning_rate": 7.35532660203893e-06, "loss": 0.61, "step": 5680 }, { "epoch": 0.36, "grad_norm": 1.7809580535364828, "learning_rate": 7.354412207485343e-06, "loss": 0.8468, "step": 5681 }, { "epoch": 0.36, "grad_norm": 1.5761740792766457, "learning_rate": 7.353497711744067e-06, "loss": 0.787, "step": 5682 }, { "epoch": 0.36, "grad_norm": 1.7462862264329428, "learning_rate": 7.352583114854402e-06, "loss": 0.7808, "step": 5683 }, { "epoch": 0.36, "grad_norm": 1.6378631182020984, "learning_rate": 7.351668416855659e-06, "loss": 0.7573, "step": 5684 }, { "epoch": 0.36, "grad_norm": 1.6220033585027096, "learning_rate": 7.350753617787151e-06, "loss": 0.7224, "step": 5685 }, { "epoch": 0.36, "grad_norm": 1.676919250160219, "learning_rate": 7.349838717688191e-06, "loss": 0.7789, "step": 5686 }, { "epoch": 0.36, "grad_norm": 1.8228147052650157, "learning_rate": 7.348923716598099e-06, "loss": 0.8107, "step": 5687 }, { "epoch": 0.36, "grad_norm": 1.460750658846165, "learning_rate": 7.348008614556203e-06, "loss": 0.7803, "step": 5688 }, { "epoch": 0.36, "grad_norm": 1.6651383356876404, "learning_rate": 7.347093411601831e-06, "loss": 0.9925, "step": 5689 }, { "epoch": 0.36, "grad_norm": 1.8116745012330686, "learning_rate": 7.346178107774316e-06, "loss": 0.8364, "step": 5690 }, { "epoch": 0.36, "grad_norm": 1.6491835188477777, "learning_rate": 7.3452627031129964e-06, "loss": 0.6876, "step": 5691 }, { "epoch": 0.36, "grad_norm": 1.6450978435471195, "learning_rate": 7.344347197657212e-06, "loss": 0.7869, "step": 5692 }, { "epoch": 0.36, "grad_norm": 1.8528742392498316, "learning_rate": 7.3434315914463125e-06, "loss": 0.885, "step": 5693 }, { "epoch": 0.36, "grad_norm": 1.7298249365717766, "learning_rate": 7.342515884519646e-06, "loss": 0.8575, "step": 5694 }, { "epoch": 0.36, "grad_norm": 1.5165282915990794, "learning_rate": 7.341600076916571e-06, "loss": 0.7176, "step": 5695 }, { "epoch": 0.36, "grad_norm": 1.3604273123718846, "learning_rate": 7.340684168676444e-06, "loss": 0.7561, "step": 5696 }, { "epoch": 0.36, "grad_norm": 0.9979344856371529, "learning_rate": 7.33976815983863e-06, "loss": 0.686, "step": 5697 }, { "epoch": 0.36, "grad_norm": 1.5451175583010577, "learning_rate": 7.338852050442497e-06, "loss": 0.8096, "step": 5698 }, { "epoch": 0.36, "grad_norm": 1.63750769250105, "learning_rate": 7.3379358405274195e-06, "loss": 0.7725, "step": 5699 }, { "epoch": 0.36, "grad_norm": 1.5965073729442214, "learning_rate": 7.33701953013277e-06, "loss": 0.8385, "step": 5700 }, { "epoch": 0.36, "grad_norm": 1.180602539509325, "learning_rate": 7.33610311929793e-06, "loss": 0.5854, "step": 5701 }, { "epoch": 0.36, "grad_norm": 1.6073776193417157, "learning_rate": 7.33518660806229e-06, "loss": 0.7919, "step": 5702 }, { "epoch": 0.37, "grad_norm": 1.7572321372964925, "learning_rate": 7.334269996465236e-06, "loss": 0.8705, "step": 5703 }, { "epoch": 0.37, "grad_norm": 1.6513234056739754, "learning_rate": 7.333353284546162e-06, "loss": 0.7965, "step": 5704 }, { "epoch": 0.37, "grad_norm": 1.4822833799118662, "learning_rate": 7.332436472344468e-06, "loss": 0.6441, "step": 5705 }, { "epoch": 0.37, "grad_norm": 1.4149720917063275, "learning_rate": 7.331519559899554e-06, "loss": 0.6708, "step": 5706 }, { "epoch": 0.37, "grad_norm": 1.5945123031590185, "learning_rate": 7.330602547250828e-06, "loss": 0.6963, "step": 5707 }, { "epoch": 0.37, "grad_norm": 1.7457930336935414, "learning_rate": 7.329685434437703e-06, "loss": 0.8529, "step": 5708 }, { "epoch": 0.37, "grad_norm": 1.6423618758766145, "learning_rate": 7.3287682214995934e-06, "loss": 0.8244, "step": 5709 }, { "epoch": 0.37, "grad_norm": 1.5629865445884201, "learning_rate": 7.327850908475919e-06, "loss": 0.7418, "step": 5710 }, { "epoch": 0.37, "grad_norm": 1.5162633602382702, "learning_rate": 7.326933495406103e-06, "loss": 0.6698, "step": 5711 }, { "epoch": 0.37, "grad_norm": 1.619999505812879, "learning_rate": 7.326015982329576e-06, "loss": 0.7674, "step": 5712 }, { "epoch": 0.37, "grad_norm": 1.659998718892619, "learning_rate": 7.32509836928577e-06, "loss": 0.7409, "step": 5713 }, { "epoch": 0.37, "grad_norm": 1.5589437246592759, "learning_rate": 7.3241806563141216e-06, "loss": 0.6708, "step": 5714 }, { "epoch": 0.37, "grad_norm": 1.7161925432065057, "learning_rate": 7.323262843454071e-06, "loss": 0.7463, "step": 5715 }, { "epoch": 0.37, "grad_norm": 2.08528969898164, "learning_rate": 7.322344930745067e-06, "loss": 0.7107, "step": 5716 }, { "epoch": 0.37, "grad_norm": 1.7396568604762577, "learning_rate": 7.321426918226557e-06, "loss": 0.7268, "step": 5717 }, { "epoch": 0.37, "grad_norm": 1.4567736444802677, "learning_rate": 7.320508805937996e-06, "loss": 0.7734, "step": 5718 }, { "epoch": 0.37, "grad_norm": 1.6649235190797433, "learning_rate": 7.319590593918844e-06, "loss": 0.8407, "step": 5719 }, { "epoch": 0.37, "grad_norm": 1.2798204001871345, "learning_rate": 7.31867228220856e-06, "loss": 0.7228, "step": 5720 }, { "epoch": 0.37, "grad_norm": 1.5542262481954683, "learning_rate": 7.317753870846615e-06, "loss": 0.684, "step": 5721 }, { "epoch": 0.37, "grad_norm": 1.8281885962616513, "learning_rate": 7.316835359872477e-06, "loss": 0.7754, "step": 5722 }, { "epoch": 0.37, "grad_norm": 1.0980922347813762, "learning_rate": 7.315916749325626e-06, "loss": 0.6201, "step": 5723 }, { "epoch": 0.37, "grad_norm": 1.7566568718175406, "learning_rate": 7.314998039245539e-06, "loss": 0.708, "step": 5724 }, { "epoch": 0.37, "grad_norm": 1.7339842671551164, "learning_rate": 7.3140792296717004e-06, "loss": 0.7064, "step": 5725 }, { "epoch": 0.37, "grad_norm": 1.0268055910409912, "learning_rate": 7.3131603206436e-06, "loss": 0.6736, "step": 5726 }, { "epoch": 0.37, "grad_norm": 1.6493455676031537, "learning_rate": 7.312241312200727e-06, "loss": 0.7872, "step": 5727 }, { "epoch": 0.37, "grad_norm": 1.6300600948179507, "learning_rate": 7.311322204382583e-06, "loss": 0.82, "step": 5728 }, { "epoch": 0.37, "grad_norm": 1.0652100086136986, "learning_rate": 7.310402997228667e-06, "loss": 0.6123, "step": 5729 }, { "epoch": 0.37, "grad_norm": 1.8570526490934711, "learning_rate": 7.309483690778485e-06, "loss": 0.8498, "step": 5730 }, { "epoch": 0.37, "grad_norm": 1.5428260890248642, "learning_rate": 7.308564285071547e-06, "loss": 0.695, "step": 5731 }, { "epoch": 0.37, "grad_norm": 1.6453393447659541, "learning_rate": 7.307644780147367e-06, "loss": 0.7466, "step": 5732 }, { "epoch": 0.37, "grad_norm": 1.7146256761748848, "learning_rate": 7.306725176045464e-06, "loss": 0.7923, "step": 5733 }, { "epoch": 0.37, "grad_norm": 1.512041785863264, "learning_rate": 7.30580547280536e-06, "loss": 0.8329, "step": 5734 }, { "epoch": 0.37, "grad_norm": 2.718130581550925, "learning_rate": 7.304885670466581e-06, "loss": 0.8227, "step": 5735 }, { "epoch": 0.37, "grad_norm": 1.7321787314838977, "learning_rate": 7.303965769068659e-06, "loss": 0.7058, "step": 5736 }, { "epoch": 0.37, "grad_norm": 1.6825496062441734, "learning_rate": 7.3030457686511305e-06, "loss": 0.8466, "step": 5737 }, { "epoch": 0.37, "grad_norm": 1.7653821442989863, "learning_rate": 7.302125669253533e-06, "loss": 0.7414, "step": 5738 }, { "epoch": 0.37, "grad_norm": 2.1864603042741435, "learning_rate": 7.3012054709154124e-06, "loss": 0.8762, "step": 5739 }, { "epoch": 0.37, "grad_norm": 1.6767789921781044, "learning_rate": 7.3002851736763165e-06, "loss": 0.8352, "step": 5740 }, { "epoch": 0.37, "grad_norm": 1.8693773322718856, "learning_rate": 7.299364777575797e-06, "loss": 0.8073, "step": 5741 }, { "epoch": 0.37, "grad_norm": 1.6578188505746672, "learning_rate": 7.298444282653412e-06, "loss": 0.6647, "step": 5742 }, { "epoch": 0.37, "grad_norm": 1.616101616060232, "learning_rate": 7.29752368894872e-06, "loss": 0.7999, "step": 5743 }, { "epoch": 0.37, "grad_norm": 1.670633376008691, "learning_rate": 7.296602996501288e-06, "loss": 0.7413, "step": 5744 }, { "epoch": 0.37, "grad_norm": 1.6425908114802972, "learning_rate": 7.295682205350685e-06, "loss": 0.7429, "step": 5745 }, { "epoch": 0.37, "grad_norm": 1.5450107577105352, "learning_rate": 7.294761315536485e-06, "loss": 0.7337, "step": 5746 }, { "epoch": 0.37, "grad_norm": 1.6138638437647626, "learning_rate": 7.293840327098265e-06, "loss": 0.6532, "step": 5747 }, { "epoch": 0.37, "grad_norm": 1.6752509063752739, "learning_rate": 7.292919240075609e-06, "loss": 0.6832, "step": 5748 }, { "epoch": 0.37, "grad_norm": 1.6189659018144853, "learning_rate": 7.291998054508102e-06, "loss": 0.8058, "step": 5749 }, { "epoch": 0.37, "grad_norm": 1.610703780061905, "learning_rate": 7.291076770435333e-06, "loss": 0.6658, "step": 5750 }, { "epoch": 0.37, "grad_norm": 1.6120096789779061, "learning_rate": 7.2901553878969e-06, "loss": 0.8054, "step": 5751 }, { "epoch": 0.37, "grad_norm": 1.5809047421057794, "learning_rate": 7.2892339069324e-06, "loss": 0.7822, "step": 5752 }, { "epoch": 0.37, "grad_norm": 1.6222552392915044, "learning_rate": 7.288312327581439e-06, "loss": 0.8, "step": 5753 }, { "epoch": 0.37, "grad_norm": 1.86001218842813, "learning_rate": 7.287390649883621e-06, "loss": 0.9455, "step": 5754 }, { "epoch": 0.37, "grad_norm": 1.5946968987092232, "learning_rate": 7.286468873878559e-06, "loss": 0.7768, "step": 5755 }, { "epoch": 0.37, "grad_norm": 2.354400217817018, "learning_rate": 7.285546999605871e-06, "loss": 0.7571, "step": 5756 }, { "epoch": 0.37, "grad_norm": 1.5178281167433219, "learning_rate": 7.2846250271051735e-06, "loss": 0.7391, "step": 5757 }, { "epoch": 0.37, "grad_norm": 1.718501191645186, "learning_rate": 7.283702956416092e-06, "loss": 0.7983, "step": 5758 }, { "epoch": 0.37, "grad_norm": 1.5031102889647356, "learning_rate": 7.282780787578258e-06, "loss": 0.6873, "step": 5759 }, { "epoch": 0.37, "grad_norm": 2.1529747813462534, "learning_rate": 7.281858520631304e-06, "loss": 0.8273, "step": 5760 }, { "epoch": 0.37, "grad_norm": 1.6635713845988733, "learning_rate": 7.280936155614864e-06, "loss": 0.7502, "step": 5761 }, { "epoch": 0.37, "grad_norm": 1.5245703143474412, "learning_rate": 7.280013692568582e-06, "loss": 0.7288, "step": 5762 }, { "epoch": 0.37, "grad_norm": 1.149899153572121, "learning_rate": 7.2790911315321015e-06, "loss": 0.705, "step": 5763 }, { "epoch": 0.37, "grad_norm": 2.1537058763752013, "learning_rate": 7.278168472545072e-06, "loss": 0.7424, "step": 5764 }, { "epoch": 0.37, "grad_norm": 1.6713827817717515, "learning_rate": 7.2772457156471496e-06, "loss": 0.9613, "step": 5765 }, { "epoch": 0.37, "grad_norm": 1.4985337894425423, "learning_rate": 7.276322860877992e-06, "loss": 0.7403, "step": 5766 }, { "epoch": 0.37, "grad_norm": 1.7877048326491563, "learning_rate": 7.275399908277261e-06, "loss": 0.722, "step": 5767 }, { "epoch": 0.37, "grad_norm": 1.5559041100284425, "learning_rate": 7.274476857884622e-06, "loss": 0.7768, "step": 5768 }, { "epoch": 0.37, "grad_norm": 2.0194370309206775, "learning_rate": 7.273553709739749e-06, "loss": 0.7637, "step": 5769 }, { "epoch": 0.37, "grad_norm": 1.5078269344037418, "learning_rate": 7.272630463882314e-06, "loss": 0.6711, "step": 5770 }, { "epoch": 0.37, "grad_norm": 1.5515866644742249, "learning_rate": 7.271707120351997e-06, "loss": 0.7987, "step": 5771 }, { "epoch": 0.37, "grad_norm": 1.517987291809831, "learning_rate": 7.2707836791884815e-06, "loss": 0.8135, "step": 5772 }, { "epoch": 0.37, "grad_norm": 1.549639803704944, "learning_rate": 7.269860140431455e-06, "loss": 0.7394, "step": 5773 }, { "epoch": 0.37, "grad_norm": 1.839193063467491, "learning_rate": 7.268936504120609e-06, "loss": 0.8217, "step": 5774 }, { "epoch": 0.37, "grad_norm": 1.5470720712665624, "learning_rate": 7.268012770295641e-06, "loss": 0.7916, "step": 5775 }, { "epoch": 0.37, "grad_norm": 1.1542387429354828, "learning_rate": 7.2670889389962486e-06, "loss": 0.6557, "step": 5776 }, { "epoch": 0.37, "grad_norm": 1.7397487584335634, "learning_rate": 7.266165010262138e-06, "loss": 0.8778, "step": 5777 }, { "epoch": 0.37, "grad_norm": 1.7011397696113282, "learning_rate": 7.265240984133017e-06, "loss": 0.8309, "step": 5778 }, { "epoch": 0.37, "grad_norm": 1.6490948817880176, "learning_rate": 7.264316860648598e-06, "loss": 0.7906, "step": 5779 }, { "epoch": 0.37, "grad_norm": 1.7571842978348673, "learning_rate": 7.263392639848599e-06, "loss": 0.7023, "step": 5780 }, { "epoch": 0.37, "grad_norm": 1.8006469952991944, "learning_rate": 7.26246832177274e-06, "loss": 0.8383, "step": 5781 }, { "epoch": 0.37, "grad_norm": 1.2239891563552932, "learning_rate": 7.2615439064607475e-06, "loss": 0.7052, "step": 5782 }, { "epoch": 0.37, "grad_norm": 1.6688836603796433, "learning_rate": 7.2606193939523496e-06, "loss": 0.7356, "step": 5783 }, { "epoch": 0.37, "grad_norm": 1.7575931850595665, "learning_rate": 7.25969478428728e-06, "loss": 0.7811, "step": 5784 }, { "epoch": 0.37, "grad_norm": 1.6585884557209483, "learning_rate": 7.258770077505276e-06, "loss": 0.7579, "step": 5785 }, { "epoch": 0.37, "grad_norm": 1.0824331819356263, "learning_rate": 7.257845273646082e-06, "loss": 0.7079, "step": 5786 }, { "epoch": 0.37, "grad_norm": 1.1871984731578922, "learning_rate": 7.256920372749441e-06, "loss": 0.6412, "step": 5787 }, { "epoch": 0.37, "grad_norm": 1.0737778185053046, "learning_rate": 7.255995374855106e-06, "loss": 0.5833, "step": 5788 }, { "epoch": 0.37, "grad_norm": 1.6121121813499069, "learning_rate": 7.255070280002829e-06, "loss": 0.8472, "step": 5789 }, { "epoch": 0.37, "grad_norm": 1.6283429736565338, "learning_rate": 7.2541450882323714e-06, "loss": 0.7103, "step": 5790 }, { "epoch": 0.37, "grad_norm": 1.8635888968266128, "learning_rate": 7.253219799583495e-06, "loss": 0.8124, "step": 5791 }, { "epoch": 0.37, "grad_norm": 1.5287268683442758, "learning_rate": 7.252294414095965e-06, "loss": 0.709, "step": 5792 }, { "epoch": 0.37, "grad_norm": 1.6357701159911389, "learning_rate": 7.251368931809554e-06, "loss": 0.7412, "step": 5793 }, { "epoch": 0.37, "grad_norm": 1.5389053999074043, "learning_rate": 7.250443352764036e-06, "loss": 0.7151, "step": 5794 }, { "epoch": 0.37, "grad_norm": 1.672909149075235, "learning_rate": 7.249517676999192e-06, "loss": 0.53, "step": 5795 }, { "epoch": 0.37, "grad_norm": 1.7460962442294128, "learning_rate": 7.248591904554807e-06, "loss": 0.8006, "step": 5796 }, { "epoch": 0.37, "grad_norm": 1.5993245205156543, "learning_rate": 7.247666035470666e-06, "loss": 0.7194, "step": 5797 }, { "epoch": 0.37, "grad_norm": 1.634821383853191, "learning_rate": 7.2467400697865616e-06, "loss": 0.8339, "step": 5798 }, { "epoch": 0.37, "grad_norm": 1.630054313221848, "learning_rate": 7.24581400754229e-06, "loss": 0.6872, "step": 5799 }, { "epoch": 0.37, "grad_norm": 1.7986220222108418, "learning_rate": 7.244887848777651e-06, "loss": 0.7533, "step": 5800 }, { "epoch": 0.37, "grad_norm": 1.8263622075954224, "learning_rate": 7.24396159353245e-06, "loss": 0.7788, "step": 5801 }, { "epoch": 0.37, "grad_norm": 1.665550641863339, "learning_rate": 7.2430352418464944e-06, "loss": 0.6928, "step": 5802 }, { "epoch": 0.37, "grad_norm": 1.5299944259868694, "learning_rate": 7.242108793759597e-06, "loss": 0.7703, "step": 5803 }, { "epoch": 0.37, "grad_norm": 1.8624736990824229, "learning_rate": 7.2411822493115765e-06, "loss": 0.8078, "step": 5804 }, { "epoch": 0.37, "grad_norm": 1.5605768975028957, "learning_rate": 7.240255608542252e-06, "loss": 0.7536, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.6034145968211817, "learning_rate": 7.239328871491449e-06, "loss": 0.8629, "step": 5806 }, { "epoch": 0.37, "grad_norm": 1.7309765176152883, "learning_rate": 7.238402038198995e-06, "loss": 0.8155, "step": 5807 }, { "epoch": 0.37, "grad_norm": 0.9424669939553046, "learning_rate": 7.237475108704726e-06, "loss": 0.6922, "step": 5808 }, { "epoch": 0.37, "grad_norm": 1.8307149576726325, "learning_rate": 7.236548083048478e-06, "loss": 0.7003, "step": 5809 }, { "epoch": 0.37, "grad_norm": 1.7309161124082706, "learning_rate": 7.235620961270093e-06, "loss": 0.7762, "step": 5810 }, { "epoch": 0.37, "grad_norm": 1.5075943457698266, "learning_rate": 7.234693743409418e-06, "loss": 0.7826, "step": 5811 }, { "epoch": 0.37, "grad_norm": 1.7364140495203406, "learning_rate": 7.233766429506299e-06, "loss": 0.8612, "step": 5812 }, { "epoch": 0.37, "grad_norm": 1.8236847398837832, "learning_rate": 7.232839019600595e-06, "loss": 0.7421, "step": 5813 }, { "epoch": 0.37, "grad_norm": 1.5963949630061633, "learning_rate": 7.231911513732162e-06, "loss": 0.8231, "step": 5814 }, { "epoch": 0.37, "grad_norm": 1.5417236179300131, "learning_rate": 7.230983911940861e-06, "loss": 0.7518, "step": 5815 }, { "epoch": 0.37, "grad_norm": 1.2188439272409846, "learning_rate": 7.230056214266559e-06, "loss": 0.693, "step": 5816 }, { "epoch": 0.37, "grad_norm": 1.2445925193389518, "learning_rate": 7.229128420749127e-06, "loss": 0.6444, "step": 5817 }, { "epoch": 0.37, "grad_norm": 1.8281338618822265, "learning_rate": 7.228200531428441e-06, "loss": 0.7841, "step": 5818 }, { "epoch": 0.37, "grad_norm": 1.6643151790225985, "learning_rate": 7.227272546344377e-06, "loss": 0.7855, "step": 5819 }, { "epoch": 0.37, "grad_norm": 1.5072608626519515, "learning_rate": 7.226344465536821e-06, "loss": 0.6838, "step": 5820 }, { "epoch": 0.37, "grad_norm": 1.5964054970169828, "learning_rate": 7.225416289045655e-06, "loss": 0.8019, "step": 5821 }, { "epoch": 0.37, "grad_norm": 1.866583955831165, "learning_rate": 7.2244880169107745e-06, "loss": 0.6672, "step": 5822 }, { "epoch": 0.37, "grad_norm": 1.5634390632264241, "learning_rate": 7.2235596491720724e-06, "loss": 0.8176, "step": 5823 }, { "epoch": 0.37, "grad_norm": 1.8305193816169207, "learning_rate": 7.2226311858694506e-06, "loss": 0.7603, "step": 5824 }, { "epoch": 0.37, "grad_norm": 1.7269296074711507, "learning_rate": 7.22170262704281e-06, "loss": 0.734, "step": 5825 }, { "epoch": 0.37, "grad_norm": 1.682295515909925, "learning_rate": 7.2207739727320605e-06, "loss": 0.6543, "step": 5826 }, { "epoch": 0.37, "grad_norm": 1.6503755679814056, "learning_rate": 7.21984522297711e-06, "loss": 0.8156, "step": 5827 }, { "epoch": 0.37, "grad_norm": 1.6022419096496532, "learning_rate": 7.218916377817877e-06, "loss": 0.7966, "step": 5828 }, { "epoch": 0.37, "grad_norm": 1.5851667046141737, "learning_rate": 7.217987437294281e-06, "loss": 0.7653, "step": 5829 }, { "epoch": 0.37, "grad_norm": 1.894907556593096, "learning_rate": 7.217058401446245e-06, "loss": 1.0541, "step": 5830 }, { "epoch": 0.37, "grad_norm": 1.7663093357917832, "learning_rate": 7.216129270313698e-06, "loss": 0.725, "step": 5831 }, { "epoch": 0.37, "grad_norm": 1.2555896126920245, "learning_rate": 7.215200043936571e-06, "loss": 0.6899, "step": 5832 }, { "epoch": 0.37, "grad_norm": 1.6570570058027712, "learning_rate": 7.214270722354802e-06, "loss": 0.6826, "step": 5833 }, { "epoch": 0.37, "grad_norm": 1.8951519336413623, "learning_rate": 7.21334130560833e-06, "loss": 0.6691, "step": 5834 }, { "epoch": 0.37, "grad_norm": 1.6502116361185604, "learning_rate": 7.2124117937371e-06, "loss": 0.7286, "step": 5835 }, { "epoch": 0.37, "grad_norm": 1.8705294860414368, "learning_rate": 7.211482186781058e-06, "loss": 0.8108, "step": 5836 }, { "epoch": 0.37, "grad_norm": 1.6305407411989352, "learning_rate": 7.21055248478016e-06, "loss": 0.6478, "step": 5837 }, { "epoch": 0.37, "grad_norm": 1.6171252171649382, "learning_rate": 7.20962268777436e-06, "loss": 0.708, "step": 5838 }, { "epoch": 0.37, "grad_norm": 1.5277844380068988, "learning_rate": 7.208692795803622e-06, "loss": 0.7901, "step": 5839 }, { "epoch": 0.37, "grad_norm": 1.7445740518045643, "learning_rate": 7.207762808907908e-06, "loss": 0.6974, "step": 5840 }, { "epoch": 0.37, "grad_norm": 2.0475072325605046, "learning_rate": 7.206832727127186e-06, "loss": 0.8177, "step": 5841 }, { "epoch": 0.37, "grad_norm": 1.5945376642414884, "learning_rate": 7.205902550501433e-06, "loss": 0.7537, "step": 5842 }, { "epoch": 0.37, "grad_norm": 1.7296611278875633, "learning_rate": 7.204972279070623e-06, "loss": 0.5949, "step": 5843 }, { "epoch": 0.37, "grad_norm": 1.6192632756557952, "learning_rate": 7.204041912874736e-06, "loss": 0.7095, "step": 5844 }, { "epoch": 0.37, "grad_norm": 1.5920587967116817, "learning_rate": 7.203111451953761e-06, "loss": 0.6835, "step": 5845 }, { "epoch": 0.37, "grad_norm": 1.793724467339687, "learning_rate": 7.202180896347684e-06, "loss": 0.902, "step": 5846 }, { "epoch": 0.37, "grad_norm": 1.533165218963351, "learning_rate": 7.201250246096501e-06, "loss": 0.8541, "step": 5847 }, { "epoch": 0.37, "grad_norm": 1.5354631747799528, "learning_rate": 7.200319501240206e-06, "loss": 0.722, "step": 5848 }, { "epoch": 0.37, "grad_norm": 1.5862829400692373, "learning_rate": 7.1993886618188025e-06, "loss": 0.7253, "step": 5849 }, { "epoch": 0.37, "grad_norm": 1.6570458137810677, "learning_rate": 7.198457727872297e-06, "loss": 0.8341, "step": 5850 }, { "epoch": 0.37, "grad_norm": 1.7207845666911754, "learning_rate": 7.1975266994406965e-06, "loss": 0.866, "step": 5851 }, { "epoch": 0.37, "grad_norm": 1.59358844129683, "learning_rate": 7.196595576564017e-06, "loss": 0.6138, "step": 5852 }, { "epoch": 0.37, "grad_norm": 1.4926909165125088, "learning_rate": 7.195664359282275e-06, "loss": 0.6536, "step": 5853 }, { "epoch": 0.37, "grad_norm": 1.8959166658292184, "learning_rate": 7.194733047635494e-06, "loss": 0.8487, "step": 5854 }, { "epoch": 0.37, "grad_norm": 1.7332348775864013, "learning_rate": 7.193801641663697e-06, "loss": 0.8804, "step": 5855 }, { "epoch": 0.37, "grad_norm": 1.7115088816024981, "learning_rate": 7.192870141406916e-06, "loss": 0.7039, "step": 5856 }, { "epoch": 0.37, "grad_norm": 1.7826039109614158, "learning_rate": 7.191938546905183e-06, "loss": 0.7465, "step": 5857 }, { "epoch": 0.37, "grad_norm": 1.758683291290345, "learning_rate": 7.191006858198538e-06, "loss": 0.7694, "step": 5858 }, { "epoch": 0.38, "grad_norm": 1.7036480224195711, "learning_rate": 7.190075075327021e-06, "loss": 0.8157, "step": 5859 }, { "epoch": 0.38, "grad_norm": 1.6373040257873026, "learning_rate": 7.1891431983306805e-06, "loss": 0.7925, "step": 5860 }, { "epoch": 0.38, "grad_norm": 1.7737442007307744, "learning_rate": 7.188211227249565e-06, "loss": 0.855, "step": 5861 }, { "epoch": 0.38, "grad_norm": 1.857770265194684, "learning_rate": 7.1872791621237305e-06, "loss": 0.8261, "step": 5862 }, { "epoch": 0.38, "grad_norm": 1.756824163623485, "learning_rate": 7.186347002993233e-06, "loss": 0.6975, "step": 5863 }, { "epoch": 0.38, "grad_norm": 1.6909674999135136, "learning_rate": 7.185414749898134e-06, "loss": 0.7571, "step": 5864 }, { "epoch": 0.38, "grad_norm": 1.6127830604973696, "learning_rate": 7.184482402878501e-06, "loss": 0.6942, "step": 5865 }, { "epoch": 0.38, "grad_norm": 1.817659421483684, "learning_rate": 7.183549961974406e-06, "loss": 0.7796, "step": 5866 }, { "epoch": 0.38, "grad_norm": 1.5833780265491886, "learning_rate": 7.182617427225922e-06, "loss": 0.6139, "step": 5867 }, { "epoch": 0.38, "grad_norm": 1.5744154345699302, "learning_rate": 7.1816847986731256e-06, "loss": 0.7039, "step": 5868 }, { "epoch": 0.38, "grad_norm": 1.631668566311495, "learning_rate": 7.180752076356102e-06, "loss": 0.7591, "step": 5869 }, { "epoch": 0.38, "grad_norm": 1.695614058263291, "learning_rate": 7.179819260314937e-06, "loss": 0.8948, "step": 5870 }, { "epoch": 0.38, "grad_norm": 1.0541897760976375, "learning_rate": 7.178886350589721e-06, "loss": 0.6846, "step": 5871 }, { "epoch": 0.38, "grad_norm": 2.0427914834307144, "learning_rate": 7.177953347220546e-06, "loss": 0.7307, "step": 5872 }, { "epoch": 0.38, "grad_norm": 1.5639506884134509, "learning_rate": 7.177020250247515e-06, "loss": 0.8394, "step": 5873 }, { "epoch": 0.38, "grad_norm": 1.5534851169926294, "learning_rate": 7.176087059710728e-06, "loss": 0.7813, "step": 5874 }, { "epoch": 0.38, "grad_norm": 1.478546381868069, "learning_rate": 7.17515377565029e-06, "loss": 0.7238, "step": 5875 }, { "epoch": 0.38, "grad_norm": 4.8213661143576205, "learning_rate": 7.174220398106315e-06, "loss": 0.7275, "step": 5876 }, { "epoch": 0.38, "grad_norm": 1.676030092876962, "learning_rate": 7.173286927118914e-06, "loss": 0.7399, "step": 5877 }, { "epoch": 0.38, "grad_norm": 3.232698197074359, "learning_rate": 7.17235336272821e-06, "loss": 0.7835, "step": 5878 }, { "epoch": 0.38, "grad_norm": 2.4415921600805874, "learning_rate": 7.171419704974321e-06, "loss": 0.738, "step": 5879 }, { "epoch": 0.38, "grad_norm": 1.0591802912531567, "learning_rate": 7.170485953897377e-06, "loss": 0.7102, "step": 5880 }, { "epoch": 0.38, "grad_norm": 1.6389714531493362, "learning_rate": 7.169552109537507e-06, "loss": 0.7968, "step": 5881 }, { "epoch": 0.38, "grad_norm": 1.9154097627160824, "learning_rate": 7.168618171934848e-06, "loss": 0.7945, "step": 5882 }, { "epoch": 0.38, "grad_norm": 1.2027787151823885, "learning_rate": 7.167684141129536e-06, "loss": 0.7264, "step": 5883 }, { "epoch": 0.38, "grad_norm": 1.4588671911601672, "learning_rate": 7.166750017161715e-06, "loss": 0.7134, "step": 5884 }, { "epoch": 0.38, "grad_norm": 1.618615683347289, "learning_rate": 7.165815800071529e-06, "loss": 0.721, "step": 5885 }, { "epoch": 0.38, "grad_norm": 1.0921968710184782, "learning_rate": 7.164881489899131e-06, "loss": 0.5677, "step": 5886 }, { "epoch": 0.38, "grad_norm": 1.614266827211718, "learning_rate": 7.163947086684677e-06, "loss": 0.6939, "step": 5887 }, { "epoch": 0.38, "grad_norm": 1.179583365740277, "learning_rate": 7.1630125904683245e-06, "loss": 0.6555, "step": 5888 }, { "epoch": 0.38, "grad_norm": 1.9735826822309093, "learning_rate": 7.1620780012902356e-06, "loss": 0.9054, "step": 5889 }, { "epoch": 0.38, "grad_norm": 1.6431951393731206, "learning_rate": 7.161143319190577e-06, "loss": 0.7844, "step": 5890 }, { "epoch": 0.38, "grad_norm": 1.82202731361571, "learning_rate": 7.160208544209521e-06, "loss": 0.8086, "step": 5891 }, { "epoch": 0.38, "grad_norm": 1.5951957590466852, "learning_rate": 7.159273676387241e-06, "loss": 0.7151, "step": 5892 }, { "epoch": 0.38, "grad_norm": 1.5492843572817676, "learning_rate": 7.158338715763912e-06, "loss": 0.7298, "step": 5893 }, { "epoch": 0.38, "grad_norm": 1.7668543354153596, "learning_rate": 7.157403662379725e-06, "loss": 0.855, "step": 5894 }, { "epoch": 0.38, "grad_norm": 1.6774617059914245, "learning_rate": 7.156468516274859e-06, "loss": 0.771, "step": 5895 }, { "epoch": 0.38, "grad_norm": 1.5581358870702198, "learning_rate": 7.155533277489508e-06, "loss": 0.8171, "step": 5896 }, { "epoch": 0.38, "grad_norm": 2.085189637312533, "learning_rate": 7.154597946063867e-06, "loss": 0.754, "step": 5897 }, { "epoch": 0.38, "grad_norm": 1.5958411402884782, "learning_rate": 7.153662522038134e-06, "loss": 0.8091, "step": 5898 }, { "epoch": 0.38, "grad_norm": 1.5448091034330411, "learning_rate": 7.152727005452511e-06, "loss": 0.706, "step": 5899 }, { "epoch": 0.38, "grad_norm": 3.2011399846267605, "learning_rate": 7.151791396347203e-06, "loss": 0.7475, "step": 5900 }, { "epoch": 0.38, "grad_norm": 1.9989797792830684, "learning_rate": 7.1508556947624245e-06, "loss": 0.7415, "step": 5901 }, { "epoch": 0.38, "grad_norm": 1.910914532968909, "learning_rate": 7.149919900738387e-06, "loss": 0.787, "step": 5902 }, { "epoch": 0.38, "grad_norm": 1.9970179693031833, "learning_rate": 7.14898401431531e-06, "loss": 0.696, "step": 5903 }, { "epoch": 0.38, "grad_norm": 1.4813200132695423, "learning_rate": 7.1480480355334155e-06, "loss": 1.0643, "step": 5904 }, { "epoch": 0.38, "grad_norm": 1.3863804940893192, "learning_rate": 7.14711196443293e-06, "loss": 0.6823, "step": 5905 }, { "epoch": 0.38, "grad_norm": 1.5280265422214216, "learning_rate": 7.146175801054084e-06, "loss": 0.667, "step": 5906 }, { "epoch": 0.38, "grad_norm": 1.744230863266303, "learning_rate": 7.145239545437113e-06, "loss": 0.7086, "step": 5907 }, { "epoch": 0.38, "grad_norm": 1.1939182097680499, "learning_rate": 7.144303197622251e-06, "loss": 0.675, "step": 5908 }, { "epoch": 0.38, "grad_norm": 1.879015299121106, "learning_rate": 7.143366757649746e-06, "loss": 0.8132, "step": 5909 }, { "epoch": 0.38, "grad_norm": 1.0256557136940825, "learning_rate": 7.142430225559841e-06, "loss": 0.6537, "step": 5910 }, { "epoch": 0.38, "grad_norm": 1.6446854000381563, "learning_rate": 7.141493601392787e-06, "loss": 0.7546, "step": 5911 }, { "epoch": 0.38, "grad_norm": 1.6736528104686073, "learning_rate": 7.1405568851888384e-06, "loss": 0.7011, "step": 5912 }, { "epoch": 0.38, "grad_norm": 1.2544915374633958, "learning_rate": 7.139620076988252e-06, "loss": 0.6521, "step": 5913 }, { "epoch": 0.38, "grad_norm": 1.4826873142259456, "learning_rate": 7.138683176831289e-06, "loss": 0.6934, "step": 5914 }, { "epoch": 0.38, "grad_norm": 1.5273278393520429, "learning_rate": 7.137746184758218e-06, "loss": 0.7758, "step": 5915 }, { "epoch": 0.38, "grad_norm": 1.6218352962921212, "learning_rate": 7.13680910080931e-06, "loss": 0.8276, "step": 5916 }, { "epoch": 0.38, "grad_norm": 1.6383494362675726, "learning_rate": 7.135871925024835e-06, "loss": 0.7014, "step": 5917 }, { "epoch": 0.38, "grad_norm": 1.7772830508334367, "learning_rate": 7.134934657445074e-06, "loss": 0.7071, "step": 5918 }, { "epoch": 0.38, "grad_norm": 1.57095836242197, "learning_rate": 7.133997298110308e-06, "loss": 0.7777, "step": 5919 }, { "epoch": 0.38, "grad_norm": 1.6178794554270013, "learning_rate": 7.133059847060821e-06, "loss": 0.7505, "step": 5920 }, { "epoch": 0.38, "grad_norm": 1.0745513551917496, "learning_rate": 7.1321223043369034e-06, "loss": 0.6227, "step": 5921 }, { "epoch": 0.38, "grad_norm": 1.172835640261111, "learning_rate": 7.13118466997885e-06, "loss": 0.6252, "step": 5922 }, { "epoch": 0.38, "grad_norm": 1.460869390441956, "learning_rate": 7.130246944026958e-06, "loss": 0.7856, "step": 5923 }, { "epoch": 0.38, "grad_norm": 1.7554142004965836, "learning_rate": 7.129309126521528e-06, "loss": 0.7155, "step": 5924 }, { "epoch": 0.38, "grad_norm": 1.6513560324916718, "learning_rate": 7.128371217502868e-06, "loss": 0.7, "step": 5925 }, { "epoch": 0.38, "grad_norm": 1.6344988593661527, "learning_rate": 7.127433217011283e-06, "loss": 0.7555, "step": 5926 }, { "epoch": 0.38, "grad_norm": 1.6589199257969782, "learning_rate": 7.12649512508709e-06, "loss": 0.7587, "step": 5927 }, { "epoch": 0.38, "grad_norm": 1.483352238379884, "learning_rate": 7.125556941770604e-06, "loss": 0.7647, "step": 5928 }, { "epoch": 0.38, "grad_norm": 1.5350622225262989, "learning_rate": 7.1246186671021475e-06, "loss": 0.9006, "step": 5929 }, { "epoch": 0.38, "grad_norm": 1.560645336187597, "learning_rate": 7.123680301122044e-06, "loss": 0.7888, "step": 5930 }, { "epoch": 0.38, "grad_norm": 1.7315056169679064, "learning_rate": 7.122741843870626e-06, "loss": 0.6599, "step": 5931 }, { "epoch": 0.38, "grad_norm": 1.5894538975782593, "learning_rate": 7.121803295388223e-06, "loss": 0.6013, "step": 5932 }, { "epoch": 0.38, "grad_norm": 1.589901818714016, "learning_rate": 7.120864655715172e-06, "loss": 0.7389, "step": 5933 }, { "epoch": 0.38, "grad_norm": 1.6582258905990217, "learning_rate": 7.119925924891815e-06, "loss": 0.7937, "step": 5934 }, { "epoch": 0.38, "grad_norm": 1.455774074966249, "learning_rate": 7.118987102958498e-06, "loss": 0.7862, "step": 5935 }, { "epoch": 0.38, "grad_norm": 1.4933918134503352, "learning_rate": 7.1180481899555655e-06, "loss": 0.7665, "step": 5936 }, { "epoch": 0.38, "grad_norm": 1.6088474289729706, "learning_rate": 7.117109185923374e-06, "loss": 0.698, "step": 5937 }, { "epoch": 0.38, "grad_norm": 1.9225925470940066, "learning_rate": 7.1161700909022776e-06, "loss": 0.939, "step": 5938 }, { "epoch": 0.38, "grad_norm": 1.719943783945957, "learning_rate": 7.115230904932639e-06, "loss": 0.8439, "step": 5939 }, { "epoch": 0.38, "grad_norm": 4.39842816732842, "learning_rate": 7.1142916280548195e-06, "loss": 0.7373, "step": 5940 }, { "epoch": 0.38, "grad_norm": 1.2012259690350784, "learning_rate": 7.113352260309189e-06, "loss": 0.6657, "step": 5941 }, { "epoch": 0.38, "grad_norm": 1.7019838549150312, "learning_rate": 7.112412801736117e-06, "loss": 0.7095, "step": 5942 }, { "epoch": 0.38, "grad_norm": 1.560164558559922, "learning_rate": 7.111473252375983e-06, "loss": 0.7547, "step": 5943 }, { "epoch": 0.38, "grad_norm": 1.4875039867909619, "learning_rate": 7.110533612269166e-06, "loss": 0.8005, "step": 5944 }, { "epoch": 0.38, "grad_norm": 1.4568122327770545, "learning_rate": 7.109593881456048e-06, "loss": 0.769, "step": 5945 }, { "epoch": 0.38, "grad_norm": 1.9544866788484603, "learning_rate": 7.108654059977019e-06, "loss": 0.6793, "step": 5946 }, { "epoch": 0.38, "grad_norm": 1.5561391437731213, "learning_rate": 7.10771414787247e-06, "loss": 0.8017, "step": 5947 }, { "epoch": 0.38, "grad_norm": 1.6856005096649427, "learning_rate": 7.106774145182796e-06, "loss": 0.7845, "step": 5948 }, { "epoch": 0.38, "grad_norm": 2.072204736167412, "learning_rate": 7.105834051948395e-06, "loss": 0.861, "step": 5949 }, { "epoch": 0.38, "grad_norm": 1.9065341966787157, "learning_rate": 7.10489386820967e-06, "loss": 0.6893, "step": 5950 }, { "epoch": 0.38, "grad_norm": 1.6608868674150568, "learning_rate": 7.1039535940070305e-06, "loss": 0.7063, "step": 5951 }, { "epoch": 0.38, "grad_norm": 2.077031835081406, "learning_rate": 7.103013229380887e-06, "loss": 0.8012, "step": 5952 }, { "epoch": 0.38, "grad_norm": 1.6915050766203539, "learning_rate": 7.102072774371654e-06, "loss": 0.7429, "step": 5953 }, { "epoch": 0.38, "grad_norm": 1.558182209972778, "learning_rate": 7.1011322290197515e-06, "loss": 0.7046, "step": 5954 }, { "epoch": 0.38, "grad_norm": 1.0925985203491535, "learning_rate": 7.1001915933655994e-06, "loss": 0.5542, "step": 5955 }, { "epoch": 0.38, "grad_norm": 1.1484271092689524, "learning_rate": 7.099250867449626e-06, "loss": 0.7129, "step": 5956 }, { "epoch": 0.38, "grad_norm": 1.736080818430388, "learning_rate": 7.098310051312261e-06, "loss": 0.8598, "step": 5957 }, { "epoch": 0.38, "grad_norm": 1.5976323619599044, "learning_rate": 7.09736914499394e-06, "loss": 0.7388, "step": 5958 }, { "epoch": 0.38, "grad_norm": 1.639157680722172, "learning_rate": 7.096428148535101e-06, "loss": 0.7058, "step": 5959 }, { "epoch": 0.38, "grad_norm": 1.671525563124907, "learning_rate": 7.095487061976183e-06, "loss": 0.6478, "step": 5960 }, { "epoch": 0.38, "grad_norm": 1.9737496136717672, "learning_rate": 7.094545885357636e-06, "loss": 0.8423, "step": 5961 }, { "epoch": 0.38, "grad_norm": 1.1098405487491056, "learning_rate": 7.093604618719907e-06, "loss": 0.6664, "step": 5962 }, { "epoch": 0.38, "grad_norm": 1.702742234760001, "learning_rate": 7.092663262103452e-06, "loss": 0.7209, "step": 5963 }, { "epoch": 0.38, "grad_norm": 1.0888616135794806, "learning_rate": 7.091721815548727e-06, "loss": 0.5716, "step": 5964 }, { "epoch": 0.38, "grad_norm": 2.073560080889434, "learning_rate": 7.0907802790961925e-06, "loss": 0.7467, "step": 5965 }, { "epoch": 0.38, "grad_norm": 1.799855820723636, "learning_rate": 7.089838652786316e-06, "loss": 0.7374, "step": 5966 }, { "epoch": 0.38, "grad_norm": 1.2594050898855, "learning_rate": 7.088896936659566e-06, "loss": 0.6599, "step": 5967 }, { "epoch": 0.38, "grad_norm": 1.8421273157166251, "learning_rate": 7.087955130756414e-06, "loss": 0.7865, "step": 5968 }, { "epoch": 0.38, "grad_norm": 2.4620404269544904, "learning_rate": 7.087013235117339e-06, "loss": 0.7461, "step": 5969 }, { "epoch": 0.38, "grad_norm": 1.5290007684202005, "learning_rate": 7.08607124978282e-06, "loss": 0.7637, "step": 5970 }, { "epoch": 0.38, "grad_norm": 2.5627622624993536, "learning_rate": 7.0851291747933415e-06, "loss": 0.7026, "step": 5971 }, { "epoch": 0.38, "grad_norm": 1.7129033922841632, "learning_rate": 7.084187010189393e-06, "loss": 0.8135, "step": 5972 }, { "epoch": 0.38, "grad_norm": 1.7201464329454719, "learning_rate": 7.083244756011466e-06, "loss": 0.7836, "step": 5973 }, { "epoch": 0.38, "grad_norm": 1.500204712830942, "learning_rate": 7.082302412300057e-06, "loss": 0.7698, "step": 5974 }, { "epoch": 0.38, "grad_norm": 1.6212100525916104, "learning_rate": 7.081359979095667e-06, "loss": 0.6768, "step": 5975 }, { "epoch": 0.38, "grad_norm": 2.042201171780482, "learning_rate": 7.080417456438798e-06, "loss": 0.7924, "step": 5976 }, { "epoch": 0.38, "grad_norm": 1.6318014727828507, "learning_rate": 7.079474844369958e-06, "loss": 0.8055, "step": 5977 }, { "epoch": 0.38, "grad_norm": 1.0644035694045493, "learning_rate": 7.0785321429296585e-06, "loss": 0.7298, "step": 5978 }, { "epoch": 0.38, "grad_norm": 1.653239482835797, "learning_rate": 7.077589352158415e-06, "loss": 0.6925, "step": 5979 }, { "epoch": 0.38, "grad_norm": 1.7245705816329995, "learning_rate": 7.0766464720967466e-06, "loss": 0.9745, "step": 5980 }, { "epoch": 0.38, "grad_norm": 1.4940399276631664, "learning_rate": 7.075703502785178e-06, "loss": 0.7813, "step": 5981 }, { "epoch": 0.38, "grad_norm": 1.6080823234406187, "learning_rate": 7.0747604442642324e-06, "loss": 0.7666, "step": 5982 }, { "epoch": 0.38, "grad_norm": 1.7370253517509553, "learning_rate": 7.073817296574444e-06, "loss": 0.7449, "step": 5983 }, { "epoch": 0.38, "grad_norm": 0.9564698248993988, "learning_rate": 7.072874059756346e-06, "loss": 0.5909, "step": 5984 }, { "epoch": 0.38, "grad_norm": 1.7802720452480414, "learning_rate": 7.071930733850476e-06, "loss": 0.7338, "step": 5985 }, { "epoch": 0.38, "grad_norm": 1.0497505079078477, "learning_rate": 7.070987318897377e-06, "loss": 0.6207, "step": 5986 }, { "epoch": 0.38, "grad_norm": 1.5343405359766535, "learning_rate": 7.070043814937595e-06, "loss": 0.752, "step": 5987 }, { "epoch": 0.38, "grad_norm": 1.9546680746740488, "learning_rate": 7.069100222011678e-06, "loss": 0.7931, "step": 5988 }, { "epoch": 0.38, "grad_norm": 1.5634706918912245, "learning_rate": 7.068156540160182e-06, "loss": 0.8222, "step": 5989 }, { "epoch": 0.38, "grad_norm": 1.665631143603118, "learning_rate": 7.0672127694236655e-06, "loss": 0.7392, "step": 5990 }, { "epoch": 0.38, "grad_norm": 1.614891869108096, "learning_rate": 7.066268909842687e-06, "loss": 0.7306, "step": 5991 }, { "epoch": 0.38, "grad_norm": 1.651732897908317, "learning_rate": 7.065324961457812e-06, "loss": 0.663, "step": 5992 }, { "epoch": 0.38, "grad_norm": 1.736836548160492, "learning_rate": 7.06438092430961e-06, "loss": 0.7715, "step": 5993 }, { "epoch": 0.38, "grad_norm": 2.535621324994064, "learning_rate": 7.0634367984386545e-06, "loss": 0.8794, "step": 5994 }, { "epoch": 0.38, "grad_norm": 1.5718362112652415, "learning_rate": 7.062492583885521e-06, "loss": 0.8158, "step": 5995 }, { "epoch": 0.38, "grad_norm": 1.7593423487503042, "learning_rate": 7.061548280690791e-06, "loss": 0.771, "step": 5996 }, { "epoch": 0.38, "grad_norm": 1.054726608445796, "learning_rate": 7.060603888895046e-06, "loss": 0.7195, "step": 5997 }, { "epoch": 0.38, "grad_norm": 1.1127523478779202, "learning_rate": 7.059659408538876e-06, "loss": 0.572, "step": 5998 }, { "epoch": 0.38, "grad_norm": 1.743668131726937, "learning_rate": 7.058714839662874e-06, "loss": 0.6885, "step": 5999 }, { "epoch": 0.38, "grad_norm": 1.6326319287838913, "learning_rate": 7.057770182307633e-06, "loss": 0.7486, "step": 6000 }, { "epoch": 0.38, "grad_norm": 1.5791387348276813, "learning_rate": 7.056825436513754e-06, "loss": 0.7173, "step": 6001 }, { "epoch": 0.38, "grad_norm": 1.6310983668739008, "learning_rate": 7.055880602321839e-06, "loss": 0.6915, "step": 6002 }, { "epoch": 0.38, "grad_norm": 1.9313695018315524, "learning_rate": 7.054935679772497e-06, "loss": 0.7981, "step": 6003 }, { "epoch": 0.38, "grad_norm": 1.6238051705807004, "learning_rate": 7.0539906689063364e-06, "loss": 0.8026, "step": 6004 }, { "epoch": 0.38, "grad_norm": 1.1048090498197356, "learning_rate": 7.053045569763973e-06, "loss": 0.6542, "step": 6005 }, { "epoch": 0.38, "grad_norm": 1.6530757537090586, "learning_rate": 7.052100382386026e-06, "loss": 0.729, "step": 6006 }, { "epoch": 0.38, "grad_norm": 1.0794928077735608, "learning_rate": 7.051155106813114e-06, "loss": 0.6383, "step": 6007 }, { "epoch": 0.38, "grad_norm": 1.7676115950484106, "learning_rate": 7.050209743085867e-06, "loss": 0.7935, "step": 6008 }, { "epoch": 0.38, "grad_norm": 1.2028528124802877, "learning_rate": 7.049264291244915e-06, "loss": 0.7556, "step": 6009 }, { "epoch": 0.38, "grad_norm": 1.6732099910632185, "learning_rate": 7.048318751330889e-06, "loss": 0.7332, "step": 6010 }, { "epoch": 0.38, "grad_norm": 1.6192651030648053, "learning_rate": 7.047373123384426e-06, "loss": 0.739, "step": 6011 }, { "epoch": 0.38, "grad_norm": 1.6446605516228212, "learning_rate": 7.04642740744617e-06, "loss": 0.8058, "step": 6012 }, { "epoch": 0.38, "grad_norm": 1.0270165003332596, "learning_rate": 7.045481603556763e-06, "loss": 0.6972, "step": 6013 }, { "epoch": 0.38, "grad_norm": 1.5855506606785144, "learning_rate": 7.044535711756855e-06, "loss": 0.6932, "step": 6014 }, { "epoch": 0.38, "grad_norm": 1.5108572628717847, "learning_rate": 7.043589732087098e-06, "loss": 0.6937, "step": 6015 }, { "epoch": 0.39, "grad_norm": 1.114394131678884, "learning_rate": 7.042643664588149e-06, "loss": 0.6842, "step": 6016 }, { "epoch": 0.39, "grad_norm": 2.0661878218409364, "learning_rate": 7.041697509300667e-06, "loss": 0.7495, "step": 6017 }, { "epoch": 0.39, "grad_norm": 1.7321135812443356, "learning_rate": 7.0407512662653174e-06, "loss": 0.7233, "step": 6018 }, { "epoch": 0.39, "grad_norm": 1.6625061023736991, "learning_rate": 7.039804935522766e-06, "loss": 0.7408, "step": 6019 }, { "epoch": 0.39, "grad_norm": 1.1125178530297395, "learning_rate": 7.038858517113684e-06, "loss": 0.6394, "step": 6020 }, { "epoch": 0.39, "grad_norm": 1.5668910562990896, "learning_rate": 7.037912011078749e-06, "loss": 0.8148, "step": 6021 }, { "epoch": 0.39, "grad_norm": 1.5153461610528394, "learning_rate": 7.036965417458635e-06, "loss": 0.7557, "step": 6022 }, { "epoch": 0.39, "grad_norm": 1.6482105229066597, "learning_rate": 7.03601873629403e-06, "loss": 0.673, "step": 6023 }, { "epoch": 0.39, "grad_norm": 1.1326852603253421, "learning_rate": 7.035071967625617e-06, "loss": 0.5759, "step": 6024 }, { "epoch": 0.39, "grad_norm": 1.7532817800099385, "learning_rate": 7.0341251114940864e-06, "loss": 0.77, "step": 6025 }, { "epoch": 0.39, "grad_norm": 1.5845749250877705, "learning_rate": 7.0331781679401345e-06, "loss": 0.7769, "step": 6026 }, { "epoch": 0.39, "grad_norm": 1.6046129319176066, "learning_rate": 7.032231137004457e-06, "loss": 0.792, "step": 6027 }, { "epoch": 0.39, "grad_norm": 1.60927332107341, "learning_rate": 7.031284018727756e-06, "loss": 0.7287, "step": 6028 }, { "epoch": 0.39, "grad_norm": 1.527178835143813, "learning_rate": 7.030336813150734e-06, "loss": 0.8617, "step": 6029 }, { "epoch": 0.39, "grad_norm": 1.632188502990274, "learning_rate": 7.029389520314103e-06, "loss": 0.6625, "step": 6030 }, { "epoch": 0.39, "grad_norm": 1.7746815643022085, "learning_rate": 7.028442140258576e-06, "loss": 0.7606, "step": 6031 }, { "epoch": 0.39, "grad_norm": 1.6416713160557383, "learning_rate": 7.027494673024867e-06, "loss": 0.6619, "step": 6032 }, { "epoch": 0.39, "grad_norm": 1.7480254386966427, "learning_rate": 7.026547118653697e-06, "loss": 0.7234, "step": 6033 }, { "epoch": 0.39, "grad_norm": 1.7000031708005963, "learning_rate": 7.0255994771857906e-06, "loss": 0.7746, "step": 6034 }, { "epoch": 0.39, "grad_norm": 1.7805266160459337, "learning_rate": 7.024651748661875e-06, "loss": 0.7579, "step": 6035 }, { "epoch": 0.39, "grad_norm": 1.27636114652636, "learning_rate": 7.023703933122683e-06, "loss": 0.6542, "step": 6036 }, { "epoch": 0.39, "grad_norm": 2.6756802698698685, "learning_rate": 7.022756030608946e-06, "loss": 0.721, "step": 6037 }, { "epoch": 0.39, "grad_norm": 1.7233175235455083, "learning_rate": 7.0218080411614065e-06, "loss": 0.7891, "step": 6038 }, { "epoch": 0.39, "grad_norm": 1.6487892606262582, "learning_rate": 7.0208599648208054e-06, "loss": 0.7541, "step": 6039 }, { "epoch": 0.39, "grad_norm": 2.3138795154411103, "learning_rate": 7.01991180162789e-06, "loss": 0.7081, "step": 6040 }, { "epoch": 0.39, "grad_norm": 2.036798770725653, "learning_rate": 7.01896355162341e-06, "loss": 0.8079, "step": 6041 }, { "epoch": 0.39, "grad_norm": 1.7854452544318815, "learning_rate": 7.018015214848119e-06, "loss": 0.8928, "step": 6042 }, { "epoch": 0.39, "grad_norm": 1.9743597335869176, "learning_rate": 7.017066791342773e-06, "loss": 0.8018, "step": 6043 }, { "epoch": 0.39, "grad_norm": 1.771929688782078, "learning_rate": 7.016118281148134e-06, "loss": 0.891, "step": 6044 }, { "epoch": 0.39, "grad_norm": 1.803592013185319, "learning_rate": 7.01516968430497e-06, "loss": 0.9008, "step": 6045 }, { "epoch": 0.39, "grad_norm": 1.3516552995324238, "learning_rate": 7.014221000854047e-06, "loss": 0.641, "step": 6046 }, { "epoch": 0.39, "grad_norm": 1.7170076919324715, "learning_rate": 7.013272230836139e-06, "loss": 0.7413, "step": 6047 }, { "epoch": 0.39, "grad_norm": 1.6592358889743213, "learning_rate": 7.01232337429202e-06, "loss": 0.665, "step": 6048 }, { "epoch": 0.39, "grad_norm": 1.4732332277432403, "learning_rate": 7.01137443126247e-06, "loss": 0.7783, "step": 6049 }, { "epoch": 0.39, "grad_norm": 1.7402514137942928, "learning_rate": 7.010425401788273e-06, "loss": 0.7901, "step": 6050 }, { "epoch": 0.39, "grad_norm": 1.1416840005765525, "learning_rate": 7.009476285910218e-06, "loss": 0.7231, "step": 6051 }, { "epoch": 0.39, "grad_norm": 8.13205712204547, "learning_rate": 7.008527083669094e-06, "loss": 0.6753, "step": 6052 }, { "epoch": 0.39, "grad_norm": 2.209633253813717, "learning_rate": 7.007577795105697e-06, "loss": 0.8125, "step": 6053 }, { "epoch": 0.39, "grad_norm": 1.6673897409040326, "learning_rate": 7.0066284202608245e-06, "loss": 0.817, "step": 6054 }, { "epoch": 0.39, "grad_norm": 1.6722176246075904, "learning_rate": 7.005678959175279e-06, "loss": 0.6842, "step": 6055 }, { "epoch": 0.39, "grad_norm": 1.7968559418100916, "learning_rate": 7.0047294118898675e-06, "loss": 0.9215, "step": 6056 }, { "epoch": 0.39, "grad_norm": 2.1057428286636846, "learning_rate": 7.003779778445398e-06, "loss": 0.7411, "step": 6057 }, { "epoch": 0.39, "grad_norm": 1.694886328493761, "learning_rate": 7.0028300588826825e-06, "loss": 0.8249, "step": 6058 }, { "epoch": 0.39, "grad_norm": 1.9039599356806145, "learning_rate": 7.001880253242541e-06, "loss": 0.7443, "step": 6059 }, { "epoch": 0.39, "grad_norm": 1.6284062870572895, "learning_rate": 7.000930361565792e-06, "loss": 0.9065, "step": 6060 }, { "epoch": 0.39, "grad_norm": 1.55236944299828, "learning_rate": 6.999980383893261e-06, "loss": 0.6442, "step": 6061 }, { "epoch": 0.39, "grad_norm": 1.227911430958016, "learning_rate": 6.999030320265775e-06, "loss": 0.7862, "step": 6062 }, { "epoch": 0.39, "grad_norm": 1.1486362389463751, "learning_rate": 6.998080170724167e-06, "loss": 0.7602, "step": 6063 }, { "epoch": 0.39, "grad_norm": 1.9469821020043334, "learning_rate": 6.997129935309272e-06, "loss": 1.1325, "step": 6064 }, { "epoch": 0.39, "grad_norm": 1.5649801990080456, "learning_rate": 6.996179614061929e-06, "loss": 0.7589, "step": 6065 }, { "epoch": 0.39, "grad_norm": 1.052016301324147, "learning_rate": 6.99522920702298e-06, "loss": 0.6139, "step": 6066 }, { "epoch": 0.39, "grad_norm": 1.6713484695158574, "learning_rate": 6.9942787142332735e-06, "loss": 0.7977, "step": 6067 }, { "epoch": 0.39, "grad_norm": 1.5205722072698165, "learning_rate": 6.993328135733658e-06, "loss": 0.6585, "step": 6068 }, { "epoch": 0.39, "grad_norm": 1.4522830468922685, "learning_rate": 6.992377471564987e-06, "loss": 0.7741, "step": 6069 }, { "epoch": 0.39, "grad_norm": 1.543570954705882, "learning_rate": 6.9914267217681195e-06, "loss": 0.7713, "step": 6070 }, { "epoch": 0.39, "grad_norm": 1.7835502072305298, "learning_rate": 6.990475886383915e-06, "loss": 0.759, "step": 6071 }, { "epoch": 0.39, "grad_norm": 1.5849906852316096, "learning_rate": 6.98952496545324e-06, "loss": 0.6422, "step": 6072 }, { "epoch": 0.39, "grad_norm": 1.8532248118500516, "learning_rate": 6.988573959016963e-06, "loss": 0.6684, "step": 6073 }, { "epoch": 0.39, "grad_norm": 1.09791662990496, "learning_rate": 6.987622867115956e-06, "loss": 0.7453, "step": 6074 }, { "epoch": 0.39, "grad_norm": 1.6664281520782405, "learning_rate": 6.9866716897910945e-06, "loss": 0.6696, "step": 6075 }, { "epoch": 0.39, "grad_norm": 1.6530999108846598, "learning_rate": 6.985720427083258e-06, "loss": 0.841, "step": 6076 }, { "epoch": 0.39, "grad_norm": 2.139598716729011, "learning_rate": 6.984769079033331e-06, "loss": 0.8022, "step": 6077 }, { "epoch": 0.39, "grad_norm": 2.7799398536494366, "learning_rate": 6.983817645682199e-06, "loss": 0.8811, "step": 6078 }, { "epoch": 0.39, "grad_norm": 1.7830617028323965, "learning_rate": 6.982866127070753e-06, "loss": 0.8732, "step": 6079 }, { "epoch": 0.39, "grad_norm": 1.731259847788506, "learning_rate": 6.981914523239888e-06, "loss": 0.6869, "step": 6080 }, { "epoch": 0.39, "grad_norm": 1.539178808310402, "learning_rate": 6.9809628342305e-06, "loss": 0.7354, "step": 6081 }, { "epoch": 0.39, "grad_norm": 1.5775629049345499, "learning_rate": 6.980011060083493e-06, "loss": 0.8699, "step": 6082 }, { "epoch": 0.39, "grad_norm": 1.0422465945583512, "learning_rate": 6.9790592008397705e-06, "loss": 0.5884, "step": 6083 }, { "epoch": 0.39, "grad_norm": 1.695896667427238, "learning_rate": 6.978107256540243e-06, "loss": 0.7049, "step": 6084 }, { "epoch": 0.39, "grad_norm": 1.7511218559770605, "learning_rate": 6.9771552272258226e-06, "loss": 0.7727, "step": 6085 }, { "epoch": 0.39, "grad_norm": 0.9968803369348591, "learning_rate": 6.976203112937423e-06, "loss": 0.6769, "step": 6086 }, { "epoch": 0.39, "grad_norm": 2.3790888816906737, "learning_rate": 6.975250913715968e-06, "loss": 0.7966, "step": 6087 }, { "epoch": 0.39, "grad_norm": 1.5745095262683255, "learning_rate": 6.97429862960238e-06, "loss": 0.7037, "step": 6088 }, { "epoch": 0.39, "grad_norm": 1.4519122740068342, "learning_rate": 6.973346260637583e-06, "loss": 0.8441, "step": 6089 }, { "epoch": 0.39, "grad_norm": 1.0240051894511604, "learning_rate": 6.972393806862512e-06, "loss": 0.6691, "step": 6090 }, { "epoch": 0.39, "grad_norm": 1.5571702132674152, "learning_rate": 6.9714412683181e-06, "loss": 0.6768, "step": 6091 }, { "epoch": 0.39, "grad_norm": 1.616079084813874, "learning_rate": 6.970488645045284e-06, "loss": 0.8043, "step": 6092 }, { "epoch": 0.39, "grad_norm": 1.0558440532626807, "learning_rate": 6.969535937085006e-06, "loss": 0.652, "step": 6093 }, { "epoch": 0.39, "grad_norm": 1.728681554474812, "learning_rate": 6.968583144478214e-06, "loss": 0.8075, "step": 6094 }, { "epoch": 0.39, "grad_norm": 1.6180938343163849, "learning_rate": 6.967630267265854e-06, "loss": 0.6641, "step": 6095 }, { "epoch": 0.39, "grad_norm": 1.6761646271843453, "learning_rate": 6.966677305488879e-06, "loss": 0.6715, "step": 6096 }, { "epoch": 0.39, "grad_norm": 1.6273173412453286, "learning_rate": 6.965724259188246e-06, "loss": 0.8165, "step": 6097 }, { "epoch": 0.39, "grad_norm": 1.6912468030325642, "learning_rate": 6.9647711284049166e-06, "loss": 0.8059, "step": 6098 }, { "epoch": 0.39, "grad_norm": 1.5648800200800344, "learning_rate": 6.96381791317985e-06, "loss": 0.6589, "step": 6099 }, { "epoch": 0.39, "grad_norm": 1.0674337570040273, "learning_rate": 6.962864613554018e-06, "loss": 0.6175, "step": 6100 }, { "epoch": 0.39, "grad_norm": 1.7070555482957408, "learning_rate": 6.961911229568388e-06, "loss": 0.7711, "step": 6101 }, { "epoch": 0.39, "grad_norm": 1.0350952824300867, "learning_rate": 6.9609577612639375e-06, "loss": 0.6882, "step": 6102 }, { "epoch": 0.39, "grad_norm": 1.7076841629420925, "learning_rate": 6.9600042086816424e-06, "loss": 0.6712, "step": 6103 }, { "epoch": 0.39, "grad_norm": 1.594639132866691, "learning_rate": 6.959050571862485e-06, "loss": 0.7786, "step": 6104 }, { "epoch": 0.39, "grad_norm": 1.5989820169385403, "learning_rate": 6.958096850847451e-06, "loss": 0.705, "step": 6105 }, { "epoch": 0.39, "grad_norm": 1.1581299083771857, "learning_rate": 6.957143045677528e-06, "loss": 0.6601, "step": 6106 }, { "epoch": 0.39, "grad_norm": 1.487628473707143, "learning_rate": 6.956189156393709e-06, "loss": 0.7177, "step": 6107 }, { "epoch": 0.39, "grad_norm": 1.887901332979519, "learning_rate": 6.95523518303699e-06, "loss": 0.6761, "step": 6108 }, { "epoch": 0.39, "grad_norm": 1.6979407848497265, "learning_rate": 6.954281125648373e-06, "loss": 0.668, "step": 6109 }, { "epoch": 0.39, "grad_norm": 1.5805836803789213, "learning_rate": 6.95332698426886e-06, "loss": 0.782, "step": 6110 }, { "epoch": 0.39, "grad_norm": 1.734780132096487, "learning_rate": 6.952372758939457e-06, "loss": 0.7586, "step": 6111 }, { "epoch": 0.39, "grad_norm": 1.6636731714918338, "learning_rate": 6.951418449701176e-06, "loss": 0.7467, "step": 6112 }, { "epoch": 0.39, "grad_norm": 1.4931046635443321, "learning_rate": 6.9504640565950295e-06, "loss": 0.8714, "step": 6113 }, { "epoch": 0.39, "grad_norm": 1.4588032965343096, "learning_rate": 6.949509579662037e-06, "loss": 0.6871, "step": 6114 }, { "epoch": 0.39, "grad_norm": 1.6534486349164546, "learning_rate": 6.948555018943219e-06, "loss": 0.6719, "step": 6115 }, { "epoch": 0.39, "grad_norm": 2.324759491596308, "learning_rate": 6.947600374479602e-06, "loss": 0.8765, "step": 6116 }, { "epoch": 0.39, "grad_norm": 1.5155411294228673, "learning_rate": 6.946645646312212e-06, "loss": 0.6732, "step": 6117 }, { "epoch": 0.39, "grad_norm": 1.7004663328994392, "learning_rate": 6.945690834482082e-06, "loss": 0.7111, "step": 6118 }, { "epoch": 0.39, "grad_norm": 1.5098039992879448, "learning_rate": 6.944735939030249e-06, "loss": 0.6614, "step": 6119 }, { "epoch": 0.39, "grad_norm": 1.1332037279898128, "learning_rate": 6.943780959997753e-06, "loss": 0.6681, "step": 6120 }, { "epoch": 0.39, "grad_norm": 1.823296782610319, "learning_rate": 6.942825897425633e-06, "loss": 0.9024, "step": 6121 }, { "epoch": 0.39, "grad_norm": 1.229385899830729, "learning_rate": 6.94187075135494e-06, "loss": 0.6536, "step": 6122 }, { "epoch": 0.39, "grad_norm": 1.5871855801792631, "learning_rate": 6.940915521826723e-06, "loss": 0.7754, "step": 6123 }, { "epoch": 0.39, "grad_norm": 1.5302658349036218, "learning_rate": 6.939960208882035e-06, "loss": 0.7321, "step": 6124 }, { "epoch": 0.39, "grad_norm": 1.4484485557664128, "learning_rate": 6.939004812561934e-06, "loss": 0.6855, "step": 6125 }, { "epoch": 0.39, "grad_norm": 1.9515905635064488, "learning_rate": 6.938049332907481e-06, "loss": 0.9383, "step": 6126 }, { "epoch": 0.39, "grad_norm": 1.664356749997062, "learning_rate": 6.937093769959737e-06, "loss": 0.7992, "step": 6127 }, { "epoch": 0.39, "grad_norm": 1.5774997917332128, "learning_rate": 6.9361381237597766e-06, "loss": 0.7281, "step": 6128 }, { "epoch": 0.39, "grad_norm": 1.1351083687753374, "learning_rate": 6.9351823943486654e-06, "loss": 0.6414, "step": 6129 }, { "epoch": 0.39, "grad_norm": 1.6846297168942697, "learning_rate": 6.934226581767484e-06, "loss": 0.8895, "step": 6130 }, { "epoch": 0.39, "grad_norm": 1.4760838328722032, "learning_rate": 6.933270686057308e-06, "loss": 0.6095, "step": 6131 }, { "epoch": 0.39, "grad_norm": 1.6116060887457706, "learning_rate": 6.932314707259218e-06, "loss": 0.7084, "step": 6132 }, { "epoch": 0.39, "grad_norm": 1.6139914657856054, "learning_rate": 6.931358645414304e-06, "loss": 0.8284, "step": 6133 }, { "epoch": 0.39, "grad_norm": 1.514781633146093, "learning_rate": 6.930402500563653e-06, "loss": 0.8294, "step": 6134 }, { "epoch": 0.39, "grad_norm": 3.6231897054507813, "learning_rate": 6.929446272748361e-06, "loss": 0.7292, "step": 6135 }, { "epoch": 0.39, "grad_norm": 1.941751345431335, "learning_rate": 6.928489962009519e-06, "loss": 0.7251, "step": 6136 }, { "epoch": 0.39, "grad_norm": 1.0643160673172176, "learning_rate": 6.927533568388232e-06, "loss": 0.596, "step": 6137 }, { "epoch": 0.39, "grad_norm": 1.1786632843105462, "learning_rate": 6.926577091925604e-06, "loss": 0.6355, "step": 6138 }, { "epoch": 0.39, "grad_norm": 1.0941319548165283, "learning_rate": 6.92562053266274e-06, "loss": 0.7062, "step": 6139 }, { "epoch": 0.39, "grad_norm": 2.1906718391429627, "learning_rate": 6.924663890640752e-06, "loss": 0.7597, "step": 6140 }, { "epoch": 0.39, "grad_norm": 1.8751297844980859, "learning_rate": 6.923707165900753e-06, "loss": 0.6468, "step": 6141 }, { "epoch": 0.39, "grad_norm": 1.7934796504135369, "learning_rate": 6.922750358483865e-06, "loss": 0.7675, "step": 6142 }, { "epoch": 0.39, "grad_norm": 1.709672053633803, "learning_rate": 6.921793468431204e-06, "loss": 0.7816, "step": 6143 }, { "epoch": 0.39, "grad_norm": 1.6782761418444578, "learning_rate": 6.920836495783899e-06, "loss": 0.7355, "step": 6144 }, { "epoch": 0.39, "grad_norm": 1.762275613211287, "learning_rate": 6.9198794405830776e-06, "loss": 0.7199, "step": 6145 }, { "epoch": 0.39, "grad_norm": 1.5146248166883982, "learning_rate": 6.918922302869873e-06, "loss": 0.6898, "step": 6146 }, { "epoch": 0.39, "grad_norm": 1.574695805461041, "learning_rate": 6.917965082685418e-06, "loss": 0.9707, "step": 6147 }, { "epoch": 0.39, "grad_norm": 1.5950962742180237, "learning_rate": 6.917007780070856e-06, "loss": 0.6573, "step": 6148 }, { "epoch": 0.39, "grad_norm": 1.5490645374920764, "learning_rate": 6.9160503950673276e-06, "loss": 0.7885, "step": 6149 }, { "epoch": 0.39, "grad_norm": 1.5956617742977017, "learning_rate": 6.915092927715979e-06, "loss": 0.7206, "step": 6150 }, { "epoch": 0.39, "grad_norm": 1.63960240077228, "learning_rate": 6.914135378057959e-06, "loss": 0.8667, "step": 6151 }, { "epoch": 0.39, "grad_norm": 1.1973543931398454, "learning_rate": 6.9131777461344255e-06, "loss": 0.5988, "step": 6152 }, { "epoch": 0.39, "grad_norm": 1.6741843878951588, "learning_rate": 6.912220031986531e-06, "loss": 0.8115, "step": 6153 }, { "epoch": 0.39, "grad_norm": 1.6559243943162554, "learning_rate": 6.911262235655437e-06, "loss": 0.7532, "step": 6154 }, { "epoch": 0.39, "grad_norm": 1.6039066443974062, "learning_rate": 6.910304357182308e-06, "loss": 0.7748, "step": 6155 }, { "epoch": 0.39, "grad_norm": 1.6383634611676814, "learning_rate": 6.909346396608313e-06, "loss": 0.8341, "step": 6156 }, { "epoch": 0.39, "grad_norm": 1.6826690703537037, "learning_rate": 6.908388353974622e-06, "loss": 0.7394, "step": 6157 }, { "epoch": 0.39, "grad_norm": 1.2186663203346548, "learning_rate": 6.907430229322409e-06, "loss": 0.662, "step": 6158 }, { "epoch": 0.39, "grad_norm": 1.9295392100865603, "learning_rate": 6.906472022692854e-06, "loss": 0.8693, "step": 6159 }, { "epoch": 0.39, "grad_norm": 0.9609982984482096, "learning_rate": 6.9055137341271365e-06, "loss": 0.6362, "step": 6160 }, { "epoch": 0.39, "grad_norm": 1.7758839359945329, "learning_rate": 6.904555363666443e-06, "loss": 0.784, "step": 6161 }, { "epoch": 0.39, "grad_norm": 1.5105757930787558, "learning_rate": 6.903596911351962e-06, "loss": 0.7522, "step": 6162 }, { "epoch": 0.39, "grad_norm": 1.718365983216789, "learning_rate": 6.902638377224886e-06, "loss": 0.7005, "step": 6163 }, { "epoch": 0.39, "grad_norm": 1.6349389447832472, "learning_rate": 6.901679761326409e-06, "loss": 0.6983, "step": 6164 }, { "epoch": 0.39, "grad_norm": 1.6415498348967283, "learning_rate": 6.900721063697733e-06, "loss": 0.6099, "step": 6165 }, { "epoch": 0.39, "grad_norm": 1.7991226715565254, "learning_rate": 6.899762284380059e-06, "loss": 0.8353, "step": 6166 }, { "epoch": 0.39, "grad_norm": 1.5832024691922153, "learning_rate": 6.898803423414595e-06, "loss": 0.8825, "step": 6167 }, { "epoch": 0.39, "grad_norm": 1.493422592152948, "learning_rate": 6.89784448084255e-06, "loss": 0.6805, "step": 6168 }, { "epoch": 0.39, "grad_norm": 1.670091470337444, "learning_rate": 6.896885456705137e-06, "loss": 0.7467, "step": 6169 }, { "epoch": 0.39, "grad_norm": 1.9732172276663478, "learning_rate": 6.895926351043573e-06, "loss": 0.7522, "step": 6170 }, { "epoch": 0.39, "grad_norm": 1.5275842761431606, "learning_rate": 6.894967163899077e-06, "loss": 0.7074, "step": 6171 }, { "epoch": 0.4, "grad_norm": 1.2210084340294722, "learning_rate": 6.894007895312875e-06, "loss": 0.549, "step": 6172 }, { "epoch": 0.4, "grad_norm": 2.351974943908842, "learning_rate": 6.893048545326193e-06, "loss": 0.7838, "step": 6173 }, { "epoch": 0.4, "grad_norm": 1.5529343268401907, "learning_rate": 6.892089113980262e-06, "loss": 0.7994, "step": 6174 }, { "epoch": 0.4, "grad_norm": 1.6148195397328209, "learning_rate": 6.891129601316319e-06, "loss": 0.7302, "step": 6175 }, { "epoch": 0.4, "grad_norm": 1.6962913105661523, "learning_rate": 6.890170007375598e-06, "loss": 0.8753, "step": 6176 }, { "epoch": 0.4, "grad_norm": 1.7523674242560614, "learning_rate": 6.889210332199343e-06, "loss": 0.7199, "step": 6177 }, { "epoch": 0.4, "grad_norm": 1.7454897261654025, "learning_rate": 6.888250575828797e-06, "loss": 0.8163, "step": 6178 }, { "epoch": 0.4, "grad_norm": 1.7162367287762386, "learning_rate": 6.887290738305208e-06, "loss": 0.7717, "step": 6179 }, { "epoch": 0.4, "grad_norm": 1.7501004711941521, "learning_rate": 6.88633081966983e-06, "loss": 0.7712, "step": 6180 }, { "epoch": 0.4, "grad_norm": 1.6555222903632247, "learning_rate": 6.885370819963917e-06, "loss": 0.7536, "step": 6181 }, { "epoch": 0.4, "grad_norm": 1.2392978418749587, "learning_rate": 6.884410739228727e-06, "loss": 0.6537, "step": 6182 }, { "epoch": 0.4, "grad_norm": 2.242143473546505, "learning_rate": 6.883450577505524e-06, "loss": 0.8283, "step": 6183 }, { "epoch": 0.4, "grad_norm": 1.6077516408577683, "learning_rate": 6.882490334835572e-06, "loss": 0.7915, "step": 6184 }, { "epoch": 0.4, "grad_norm": 1.574146900758901, "learning_rate": 6.881530011260142e-06, "loss": 0.7111, "step": 6185 }, { "epoch": 0.4, "grad_norm": 1.1384458712272751, "learning_rate": 6.880569606820504e-06, "loss": 0.6246, "step": 6186 }, { "epoch": 0.4, "grad_norm": 1.7564871134914422, "learning_rate": 6.879609121557938e-06, "loss": 0.8415, "step": 6187 }, { "epoch": 0.4, "grad_norm": 1.186528472763982, "learning_rate": 6.878648555513721e-06, "loss": 0.6462, "step": 6188 }, { "epoch": 0.4, "grad_norm": 1.7835350712612514, "learning_rate": 6.877687908729137e-06, "loss": 0.7016, "step": 6189 }, { "epoch": 0.4, "grad_norm": 1.6284447093999914, "learning_rate": 6.876727181245472e-06, "loss": 0.7454, "step": 6190 }, { "epoch": 0.4, "grad_norm": 1.6463618989269704, "learning_rate": 6.875766373104016e-06, "loss": 0.7233, "step": 6191 }, { "epoch": 0.4, "grad_norm": 1.7165410045658607, "learning_rate": 6.874805484346062e-06, "loss": 0.7821, "step": 6192 }, { "epoch": 0.4, "grad_norm": 1.826203079746553, "learning_rate": 6.873844515012909e-06, "loss": 0.946, "step": 6193 }, { "epoch": 0.4, "grad_norm": 1.8656874893077082, "learning_rate": 6.872883465145855e-06, "loss": 0.7592, "step": 6194 }, { "epoch": 0.4, "grad_norm": 1.8136365815596256, "learning_rate": 6.871922334786206e-06, "loss": 0.7828, "step": 6195 }, { "epoch": 0.4, "grad_norm": 1.6754894300581902, "learning_rate": 6.870961123975269e-06, "loss": 0.6912, "step": 6196 }, { "epoch": 0.4, "grad_norm": 1.1927006949219232, "learning_rate": 6.8699998327543545e-06, "loss": 0.6592, "step": 6197 }, { "epoch": 0.4, "grad_norm": 1.9010892327343727, "learning_rate": 6.869038461164776e-06, "loss": 0.7566, "step": 6198 }, { "epoch": 0.4, "grad_norm": 1.6890505633449169, "learning_rate": 6.868077009247852e-06, "loss": 0.7252, "step": 6199 }, { "epoch": 0.4, "grad_norm": 1.4702374695388991, "learning_rate": 6.867115477044902e-06, "loss": 0.6788, "step": 6200 }, { "epoch": 0.4, "grad_norm": 1.7167272171989558, "learning_rate": 6.866153864597254e-06, "loss": 0.7725, "step": 6201 }, { "epoch": 0.4, "grad_norm": 1.0094449866079387, "learning_rate": 6.865192171946234e-06, "loss": 0.7045, "step": 6202 }, { "epoch": 0.4, "grad_norm": 1.6434063036795583, "learning_rate": 6.864230399133172e-06, "loss": 0.7745, "step": 6203 }, { "epoch": 0.4, "grad_norm": 1.6920015342895534, "learning_rate": 6.863268546199408e-06, "loss": 0.7995, "step": 6204 }, { "epoch": 0.4, "grad_norm": 1.2621109853771406, "learning_rate": 6.862306613186275e-06, "loss": 0.6547, "step": 6205 }, { "epoch": 0.4, "grad_norm": 2.0265176713523387, "learning_rate": 6.861344600135118e-06, "loss": 0.8136, "step": 6206 }, { "epoch": 0.4, "grad_norm": 1.5233890386041768, "learning_rate": 6.86038250708728e-06, "loss": 0.7592, "step": 6207 }, { "epoch": 0.4, "grad_norm": 1.7655920036377988, "learning_rate": 6.859420334084111e-06, "loss": 0.7162, "step": 6208 }, { "epoch": 0.4, "grad_norm": 1.8263112193913607, "learning_rate": 6.858458081166964e-06, "loss": 0.7831, "step": 6209 }, { "epoch": 0.4, "grad_norm": 2.032337913559392, "learning_rate": 6.857495748377193e-06, "loss": 0.8395, "step": 6210 }, { "epoch": 0.4, "grad_norm": 1.0527719097217176, "learning_rate": 6.856533335756159e-06, "loss": 0.5705, "step": 6211 }, { "epoch": 0.4, "grad_norm": 1.7646069382870877, "learning_rate": 6.855570843345223e-06, "loss": 0.7548, "step": 6212 }, { "epoch": 0.4, "grad_norm": 1.4203611932837334, "learning_rate": 6.854608271185752e-06, "loss": 0.7083, "step": 6213 }, { "epoch": 0.4, "grad_norm": 1.680920620924377, "learning_rate": 6.853645619319114e-06, "loss": 0.8158, "step": 6214 }, { "epoch": 0.4, "grad_norm": 2.306461219455333, "learning_rate": 6.852682887786681e-06, "loss": 0.7072, "step": 6215 }, { "epoch": 0.4, "grad_norm": 1.8855042074759973, "learning_rate": 6.851720076629832e-06, "loss": 0.7409, "step": 6216 }, { "epoch": 0.4, "grad_norm": 1.6270334734768124, "learning_rate": 6.850757185889945e-06, "loss": 0.6882, "step": 6217 }, { "epoch": 0.4, "grad_norm": 0.9738996977469698, "learning_rate": 6.849794215608403e-06, "loss": 0.6201, "step": 6218 }, { "epoch": 0.4, "grad_norm": 2.2632221411737428, "learning_rate": 6.848831165826591e-06, "loss": 0.7836, "step": 6219 }, { "epoch": 0.4, "grad_norm": 1.493513719704982, "learning_rate": 6.847868036585903e-06, "loss": 0.6932, "step": 6220 }, { "epoch": 0.4, "grad_norm": 1.239728182970323, "learning_rate": 6.846904827927728e-06, "loss": 0.678, "step": 6221 }, { "epoch": 0.4, "grad_norm": 1.172572338639198, "learning_rate": 6.845941539893465e-06, "loss": 0.7759, "step": 6222 }, { "epoch": 0.4, "grad_norm": 1.7711034095837472, "learning_rate": 6.844978172524514e-06, "loss": 0.6808, "step": 6223 }, { "epoch": 0.4, "grad_norm": 1.9957807658378512, "learning_rate": 6.844014725862277e-06, "loss": 0.8299, "step": 6224 }, { "epoch": 0.4, "grad_norm": 1.7577964190342206, "learning_rate": 6.843051199948162e-06, "loss": 0.6785, "step": 6225 }, { "epoch": 0.4, "grad_norm": 2.401075258340459, "learning_rate": 6.84208759482358e-06, "loss": 0.8442, "step": 6226 }, { "epoch": 0.4, "grad_norm": 1.766859121392551, "learning_rate": 6.841123910529943e-06, "loss": 0.6759, "step": 6227 }, { "epoch": 0.4, "grad_norm": 1.6426645799385593, "learning_rate": 6.840160147108669e-06, "loss": 0.6783, "step": 6228 }, { "epoch": 0.4, "grad_norm": 1.562045075686714, "learning_rate": 6.839196304601179e-06, "loss": 0.7828, "step": 6229 }, { "epoch": 0.4, "grad_norm": 2.4023256921493776, "learning_rate": 6.838232383048896e-06, "loss": 0.7949, "step": 6230 }, { "epoch": 0.4, "grad_norm": 1.5860159783253696, "learning_rate": 6.837268382493248e-06, "loss": 0.7315, "step": 6231 }, { "epoch": 0.4, "grad_norm": 1.9622150174332529, "learning_rate": 6.836304302975667e-06, "loss": 0.8175, "step": 6232 }, { "epoch": 0.4, "grad_norm": 1.8982462506141164, "learning_rate": 6.835340144537584e-06, "loss": 0.8242, "step": 6233 }, { "epoch": 0.4, "grad_norm": 1.787582551355132, "learning_rate": 6.83437590722044e-06, "loss": 0.8584, "step": 6234 }, { "epoch": 0.4, "grad_norm": 1.6015688943975877, "learning_rate": 6.833411591065673e-06, "loss": 0.9168, "step": 6235 }, { "epoch": 0.4, "grad_norm": 1.8134970790599083, "learning_rate": 6.832447196114728e-06, "loss": 0.7361, "step": 6236 }, { "epoch": 0.4, "grad_norm": 1.6451719417133495, "learning_rate": 6.831482722409053e-06, "loss": 0.7487, "step": 6237 }, { "epoch": 0.4, "grad_norm": 1.756300345111116, "learning_rate": 6.830518169990098e-06, "loss": 0.7751, "step": 6238 }, { "epoch": 0.4, "grad_norm": 1.5952401670246656, "learning_rate": 6.829553538899321e-06, "loss": 0.7638, "step": 6239 }, { "epoch": 0.4, "grad_norm": 1.797277198804149, "learning_rate": 6.828588829178175e-06, "loss": 0.7363, "step": 6240 }, { "epoch": 0.4, "grad_norm": 1.9808485996752647, "learning_rate": 6.8276240408681245e-06, "loss": 0.722, "step": 6241 }, { "epoch": 0.4, "grad_norm": 1.5273052458576077, "learning_rate": 6.826659174010635e-06, "loss": 0.6562, "step": 6242 }, { "epoch": 0.4, "grad_norm": 1.6544057881024226, "learning_rate": 6.825694228647169e-06, "loss": 0.7105, "step": 6243 }, { "epoch": 0.4, "grad_norm": 1.6057825130156131, "learning_rate": 6.824729204819203e-06, "loss": 0.603, "step": 6244 }, { "epoch": 0.4, "grad_norm": 1.0409473684515895, "learning_rate": 6.823764102568211e-06, "loss": 0.5699, "step": 6245 }, { "epoch": 0.4, "grad_norm": 1.2757557769159473, "learning_rate": 6.82279892193567e-06, "loss": 0.7602, "step": 6246 }, { "epoch": 0.4, "grad_norm": 1.6006003298359632, "learning_rate": 6.82183366296306e-06, "loss": 0.8065, "step": 6247 }, { "epoch": 0.4, "grad_norm": 1.7888052112377906, "learning_rate": 6.820868325691867e-06, "loss": 0.778, "step": 6248 }, { "epoch": 0.4, "grad_norm": 1.5545712337515856, "learning_rate": 6.819902910163582e-06, "loss": 0.6738, "step": 6249 }, { "epoch": 0.4, "grad_norm": 3.458280330025777, "learning_rate": 6.818937416419693e-06, "loss": 0.8363, "step": 6250 }, { "epoch": 0.4, "grad_norm": 1.8011445083216546, "learning_rate": 6.817971844501695e-06, "loss": 0.832, "step": 6251 }, { "epoch": 0.4, "grad_norm": 1.3845875561446868, "learning_rate": 6.817006194451088e-06, "loss": 0.7799, "step": 6252 }, { "epoch": 0.4, "grad_norm": 2.1279461943790143, "learning_rate": 6.8160404663093725e-06, "loss": 0.6766, "step": 6253 }, { "epoch": 0.4, "grad_norm": 2.03691136850807, "learning_rate": 6.815074660118055e-06, "loss": 0.6942, "step": 6254 }, { "epoch": 0.4, "grad_norm": 1.6931133019707931, "learning_rate": 6.814108775918642e-06, "loss": 0.7369, "step": 6255 }, { "epoch": 0.4, "grad_norm": 1.7180640186962133, "learning_rate": 6.813142813752645e-06, "loss": 0.8275, "step": 6256 }, { "epoch": 0.4, "grad_norm": 1.7023375670643155, "learning_rate": 6.812176773661579e-06, "loss": 0.7101, "step": 6257 }, { "epoch": 0.4, "grad_norm": 2.268234971191666, "learning_rate": 6.8112106556869635e-06, "loss": 0.7423, "step": 6258 }, { "epoch": 0.4, "grad_norm": 1.536251530868423, "learning_rate": 6.810244459870322e-06, "loss": 0.7549, "step": 6259 }, { "epoch": 0.4, "grad_norm": 1.7045815973462053, "learning_rate": 6.809278186253177e-06, "loss": 0.6533, "step": 6260 }, { "epoch": 0.4, "grad_norm": 1.5679573058536067, "learning_rate": 6.808311834877057e-06, "loss": 0.7698, "step": 6261 }, { "epoch": 0.4, "grad_norm": 2.256872573657108, "learning_rate": 6.807345405783494e-06, "loss": 0.761, "step": 6262 }, { "epoch": 0.4, "grad_norm": 1.7823211973278057, "learning_rate": 6.806378899014023e-06, "loss": 0.8228, "step": 6263 }, { "epoch": 0.4, "grad_norm": 1.3936715641720214, "learning_rate": 6.805412314610181e-06, "loss": 0.7076, "step": 6264 }, { "epoch": 0.4, "grad_norm": 1.6999372870366969, "learning_rate": 6.804445652613514e-06, "loss": 0.863, "step": 6265 }, { "epoch": 0.4, "grad_norm": 1.575737540731734, "learning_rate": 6.803478913065563e-06, "loss": 0.7159, "step": 6266 }, { "epoch": 0.4, "grad_norm": 1.8121775113279284, "learning_rate": 6.802512096007879e-06, "loss": 0.7242, "step": 6267 }, { "epoch": 0.4, "grad_norm": 1.5739873654748961, "learning_rate": 6.801545201482012e-06, "loss": 0.6906, "step": 6268 }, { "epoch": 0.4, "grad_norm": 2.6955291436539515, "learning_rate": 6.800578229529519e-06, "loss": 0.6793, "step": 6269 }, { "epoch": 0.4, "grad_norm": 1.5224397578388704, "learning_rate": 6.799611180191956e-06, "loss": 0.7232, "step": 6270 }, { "epoch": 0.4, "grad_norm": 1.6255117509899177, "learning_rate": 6.798644053510886e-06, "loss": 0.7579, "step": 6271 }, { "epoch": 0.4, "grad_norm": 1.4703318012946727, "learning_rate": 6.797676849527875e-06, "loss": 0.7371, "step": 6272 }, { "epoch": 0.4, "grad_norm": 1.573151556851721, "learning_rate": 6.796709568284488e-06, "loss": 0.7526, "step": 6273 }, { "epoch": 0.4, "grad_norm": 1.8292414895849687, "learning_rate": 6.795742209822302e-06, "loss": 0.7413, "step": 6274 }, { "epoch": 0.4, "grad_norm": 1.8110746025779247, "learning_rate": 6.794774774182887e-06, "loss": 0.7881, "step": 6275 }, { "epoch": 0.4, "grad_norm": 1.6440060262528744, "learning_rate": 6.793807261407825e-06, "loss": 0.6842, "step": 6276 }, { "epoch": 0.4, "grad_norm": 1.5545490385374536, "learning_rate": 6.792839671538696e-06, "loss": 0.7564, "step": 6277 }, { "epoch": 0.4, "grad_norm": 1.5579278577348745, "learning_rate": 6.791872004617086e-06, "loss": 0.7296, "step": 6278 }, { "epoch": 0.4, "grad_norm": 1.4934336248850848, "learning_rate": 6.790904260684581e-06, "loss": 0.7025, "step": 6279 }, { "epoch": 0.4, "grad_norm": 1.6227319167165102, "learning_rate": 6.789936439782774e-06, "loss": 0.735, "step": 6280 }, { "epoch": 0.4, "grad_norm": 1.5426895006027908, "learning_rate": 6.788968541953262e-06, "loss": 0.7944, "step": 6281 }, { "epoch": 0.4, "grad_norm": 1.0239674825656182, "learning_rate": 6.7880005672376394e-06, "loss": 0.693, "step": 6282 }, { "epoch": 0.4, "grad_norm": 1.6725547559026708, "learning_rate": 6.787032515677509e-06, "loss": 0.8676, "step": 6283 }, { "epoch": 0.4, "grad_norm": 1.4791314014928352, "learning_rate": 6.786064387314477e-06, "loss": 0.8449, "step": 6284 }, { "epoch": 0.4, "grad_norm": 2.0785999247369844, "learning_rate": 6.785096182190152e-06, "loss": 0.7277, "step": 6285 }, { "epoch": 0.4, "grad_norm": 1.6063025802388882, "learning_rate": 6.7841279003461425e-06, "loss": 0.6943, "step": 6286 }, { "epoch": 0.4, "grad_norm": 1.7517619897047692, "learning_rate": 6.783159541824065e-06, "loss": 0.8386, "step": 6287 }, { "epoch": 0.4, "grad_norm": 1.143593961275908, "learning_rate": 6.782191106665536e-06, "loss": 0.5974, "step": 6288 }, { "epoch": 0.4, "grad_norm": 1.7054602563693868, "learning_rate": 6.781222594912182e-06, "loss": 0.6928, "step": 6289 }, { "epoch": 0.4, "grad_norm": 1.7278140335243495, "learning_rate": 6.780254006605621e-06, "loss": 0.8929, "step": 6290 }, { "epoch": 0.4, "grad_norm": 1.5273682508594173, "learning_rate": 6.779285341787484e-06, "loss": 0.7407, "step": 6291 }, { "epoch": 0.4, "grad_norm": 1.7451524402321714, "learning_rate": 6.778316600499401e-06, "loss": 0.6783, "step": 6292 }, { "epoch": 0.4, "grad_norm": 1.7794379901835038, "learning_rate": 6.7773477827830085e-06, "loss": 0.7441, "step": 6293 }, { "epoch": 0.4, "grad_norm": 1.7650187272857163, "learning_rate": 6.776378888679942e-06, "loss": 0.7987, "step": 6294 }, { "epoch": 0.4, "grad_norm": 1.4213958761226249, "learning_rate": 6.775409918231843e-06, "loss": 0.6682, "step": 6295 }, { "epoch": 0.4, "grad_norm": 1.6867783869363944, "learning_rate": 6.774440871480359e-06, "loss": 0.7085, "step": 6296 }, { "epoch": 0.4, "grad_norm": 1.6376026843818148, "learning_rate": 6.773471748467133e-06, "loss": 0.7796, "step": 6297 }, { "epoch": 0.4, "grad_norm": 1.6769926328978912, "learning_rate": 6.772502549233819e-06, "loss": 0.7316, "step": 6298 }, { "epoch": 0.4, "grad_norm": 1.5980198822503218, "learning_rate": 6.771533273822069e-06, "loss": 0.7854, "step": 6299 }, { "epoch": 0.4, "grad_norm": 1.5741556017538008, "learning_rate": 6.770563922273541e-06, "loss": 0.7201, "step": 6300 }, { "epoch": 0.4, "grad_norm": 1.531937810224232, "learning_rate": 6.769594494629898e-06, "loss": 0.7056, "step": 6301 }, { "epoch": 0.4, "grad_norm": 0.9987596792118582, "learning_rate": 6.7686249909328e-06, "loss": 0.6957, "step": 6302 }, { "epoch": 0.4, "grad_norm": 1.6099958284769393, "learning_rate": 6.767655411223917e-06, "loss": 0.7663, "step": 6303 }, { "epoch": 0.4, "grad_norm": 1.6329092229958322, "learning_rate": 6.766685755544919e-06, "loss": 0.8129, "step": 6304 }, { "epoch": 0.4, "grad_norm": 1.6599060489802284, "learning_rate": 6.76571602393748e-06, "loss": 0.7616, "step": 6305 }, { "epoch": 0.4, "grad_norm": 1.8346068793405994, "learning_rate": 6.764746216443277e-06, "loss": 0.8364, "step": 6306 }, { "epoch": 0.4, "grad_norm": 2.0564409145987566, "learning_rate": 6.7637763331039885e-06, "loss": 0.8528, "step": 6307 }, { "epoch": 0.4, "grad_norm": 1.7238802230760615, "learning_rate": 6.7628063739612985e-06, "loss": 0.6218, "step": 6308 }, { "epoch": 0.4, "grad_norm": 1.728573279466462, "learning_rate": 6.761836339056896e-06, "loss": 0.8605, "step": 6309 }, { "epoch": 0.4, "grad_norm": 1.4776140623612701, "learning_rate": 6.76086622843247e-06, "loss": 0.6201, "step": 6310 }, { "epoch": 0.4, "grad_norm": 1.6412596911165989, "learning_rate": 6.759896042129713e-06, "loss": 0.8214, "step": 6311 }, { "epoch": 0.4, "grad_norm": 2.3043782094237257, "learning_rate": 6.758925780190322e-06, "loss": 0.7269, "step": 6312 }, { "epoch": 0.4, "grad_norm": 1.7485127229747166, "learning_rate": 6.757955442655998e-06, "loss": 0.8537, "step": 6313 }, { "epoch": 0.4, "grad_norm": 1.8564666172969138, "learning_rate": 6.756985029568443e-06, "loss": 0.7592, "step": 6314 }, { "epoch": 0.4, "grad_norm": 1.7064024893185692, "learning_rate": 6.756014540969362e-06, "loss": 0.6638, "step": 6315 }, { "epoch": 0.4, "grad_norm": 1.601797428692721, "learning_rate": 6.755043976900467e-06, "loss": 0.7046, "step": 6316 }, { "epoch": 0.4, "grad_norm": 1.9626264569372165, "learning_rate": 6.754073337403469e-06, "loss": 0.7713, "step": 6317 }, { "epoch": 0.4, "grad_norm": 1.555091247542353, "learning_rate": 6.753102622520087e-06, "loss": 0.7633, "step": 6318 }, { "epoch": 0.4, "grad_norm": 1.7734476884413106, "learning_rate": 6.752131832292036e-06, "loss": 0.7685, "step": 6319 }, { "epoch": 0.4, "grad_norm": 1.7537982559602563, "learning_rate": 6.751160966761041e-06, "loss": 0.8422, "step": 6320 }, { "epoch": 0.4, "grad_norm": 1.0333669067665057, "learning_rate": 6.750190025968827e-06, "loss": 0.6538, "step": 6321 }, { "epoch": 0.4, "grad_norm": 1.88669741621802, "learning_rate": 6.749219009957122e-06, "loss": 0.8826, "step": 6322 }, { "epoch": 0.4, "grad_norm": 2.005879830134969, "learning_rate": 6.748247918767662e-06, "loss": 0.8675, "step": 6323 }, { "epoch": 0.4, "grad_norm": 1.6075474427047052, "learning_rate": 6.74727675244218e-06, "loss": 0.8022, "step": 6324 }, { "epoch": 0.4, "grad_norm": 1.622512064410187, "learning_rate": 6.746305511022414e-06, "loss": 0.7243, "step": 6325 }, { "epoch": 0.4, "grad_norm": 1.8577870105820664, "learning_rate": 6.745334194550106e-06, "loss": 0.8357, "step": 6326 }, { "epoch": 0.4, "grad_norm": 1.1368802915879936, "learning_rate": 6.744362803067003e-06, "loss": 0.6647, "step": 6327 }, { "epoch": 0.41, "grad_norm": 1.0729488653847012, "learning_rate": 6.7433913366148515e-06, "loss": 0.6617, "step": 6328 }, { "epoch": 0.41, "grad_norm": 1.6588690600794005, "learning_rate": 6.742419795235403e-06, "loss": 0.7811, "step": 6329 }, { "epoch": 0.41, "grad_norm": 1.608802317015918, "learning_rate": 6.741448178970413e-06, "loss": 0.7425, "step": 6330 }, { "epoch": 0.41, "grad_norm": 1.52075581675915, "learning_rate": 6.74047648786164e-06, "loss": 0.6035, "step": 6331 }, { "epoch": 0.41, "grad_norm": 1.9096158485584036, "learning_rate": 6.739504721950845e-06, "loss": 0.7775, "step": 6332 }, { "epoch": 0.41, "grad_norm": 1.6227210132481473, "learning_rate": 6.738532881279794e-06, "loss": 0.6441, "step": 6333 }, { "epoch": 0.41, "grad_norm": 1.7087409124592359, "learning_rate": 6.7375609658902505e-06, "loss": 0.7788, "step": 6334 }, { "epoch": 0.41, "grad_norm": 1.7663078998431627, "learning_rate": 6.73658897582399e-06, "loss": 0.7965, "step": 6335 }, { "epoch": 0.41, "grad_norm": 1.6088117799204447, "learning_rate": 6.735616911122782e-06, "loss": 0.7804, "step": 6336 }, { "epoch": 0.41, "grad_norm": 1.6942252956180384, "learning_rate": 6.734644771828407e-06, "loss": 0.7112, "step": 6337 }, { "epoch": 0.41, "grad_norm": 1.8404777236376757, "learning_rate": 6.733672557982645e-06, "loss": 0.8788, "step": 6338 }, { "epoch": 0.41, "grad_norm": 1.7438093005754942, "learning_rate": 6.7327002696272795e-06, "loss": 0.8368, "step": 6339 }, { "epoch": 0.41, "grad_norm": 1.9574788012897066, "learning_rate": 6.7317279068040965e-06, "loss": 0.7904, "step": 6340 }, { "epoch": 0.41, "grad_norm": 1.7803505527733945, "learning_rate": 6.730755469554888e-06, "loss": 0.8547, "step": 6341 }, { "epoch": 0.41, "grad_norm": 1.6874389207964258, "learning_rate": 6.729782957921446e-06, "loss": 0.8696, "step": 6342 }, { "epoch": 0.41, "grad_norm": 1.5012656990925175, "learning_rate": 6.728810371945567e-06, "loss": 0.7902, "step": 6343 }, { "epoch": 0.41, "grad_norm": 1.1697539529491443, "learning_rate": 6.72783771166905e-06, "loss": 0.6571, "step": 6344 }, { "epoch": 0.41, "grad_norm": 1.569074237899523, "learning_rate": 6.7268649771337e-06, "loss": 0.8283, "step": 6345 }, { "epoch": 0.41, "grad_norm": 1.6534478237472778, "learning_rate": 6.725892168381323e-06, "loss": 0.7492, "step": 6346 }, { "epoch": 0.41, "grad_norm": 1.572927263521729, "learning_rate": 6.724919285453726e-06, "loss": 0.8019, "step": 6347 }, { "epoch": 0.41, "grad_norm": 2.1372168755308056, "learning_rate": 6.723946328392722e-06, "loss": 0.8007, "step": 6348 }, { "epoch": 0.41, "grad_norm": 1.5129809746163947, "learning_rate": 6.722973297240128e-06, "loss": 0.8017, "step": 6349 }, { "epoch": 0.41, "grad_norm": 1.4851999994582008, "learning_rate": 6.722000192037761e-06, "loss": 0.7219, "step": 6350 }, { "epoch": 0.41, "grad_norm": 1.8639130735422633, "learning_rate": 6.721027012827444e-06, "loss": 0.7654, "step": 6351 }, { "epoch": 0.41, "grad_norm": 1.638783205515493, "learning_rate": 6.720053759651005e-06, "loss": 0.7415, "step": 6352 }, { "epoch": 0.41, "grad_norm": 1.5691959990742717, "learning_rate": 6.719080432550269e-06, "loss": 0.6744, "step": 6353 }, { "epoch": 0.41, "grad_norm": 1.1234053580278938, "learning_rate": 6.718107031567067e-06, "loss": 0.6328, "step": 6354 }, { "epoch": 0.41, "grad_norm": 1.6446417355274905, "learning_rate": 6.7171335567432365e-06, "loss": 0.7455, "step": 6355 }, { "epoch": 0.41, "grad_norm": 1.674148458763892, "learning_rate": 6.716160008120613e-06, "loss": 0.8106, "step": 6356 }, { "epoch": 0.41, "grad_norm": 1.7481557990581251, "learning_rate": 6.7151863857410375e-06, "loss": 0.7485, "step": 6357 }, { "epoch": 0.41, "grad_norm": 1.8918161224749181, "learning_rate": 6.714212689646355e-06, "loss": 0.7201, "step": 6358 }, { "epoch": 0.41, "grad_norm": 2.5257939356216523, "learning_rate": 6.713238919878415e-06, "loss": 0.7537, "step": 6359 }, { "epoch": 0.41, "grad_norm": 1.035794444122516, "learning_rate": 6.712265076479067e-06, "loss": 0.6253, "step": 6360 }, { "epoch": 0.41, "grad_norm": 0.9969776372212619, "learning_rate": 6.711291159490162e-06, "loss": 0.554, "step": 6361 }, { "epoch": 0.41, "grad_norm": 1.7140459409022804, "learning_rate": 6.71031716895356e-06, "loss": 0.6521, "step": 6362 }, { "epoch": 0.41, "grad_norm": 1.6318754466798062, "learning_rate": 6.709343104911119e-06, "loss": 0.689, "step": 6363 }, { "epoch": 0.41, "grad_norm": 2.2896099526252156, "learning_rate": 6.708368967404704e-06, "loss": 0.741, "step": 6364 }, { "epoch": 0.41, "grad_norm": 1.4226294725318993, "learning_rate": 6.70739475647618e-06, "loss": 0.6678, "step": 6365 }, { "epoch": 0.41, "grad_norm": 1.522941902374311, "learning_rate": 6.706420472167417e-06, "loss": 0.8101, "step": 6366 }, { "epoch": 0.41, "grad_norm": 1.7978598757919473, "learning_rate": 6.705446114520289e-06, "loss": 0.8394, "step": 6367 }, { "epoch": 0.41, "grad_norm": 1.5791855511589596, "learning_rate": 6.704471683576669e-06, "loss": 0.854, "step": 6368 }, { "epoch": 0.41, "grad_norm": 1.7573090055571277, "learning_rate": 6.7034971793784385e-06, "loss": 0.9504, "step": 6369 }, { "epoch": 0.41, "grad_norm": 1.3932138018723639, "learning_rate": 6.702522601967479e-06, "loss": 0.7117, "step": 6370 }, { "epoch": 0.41, "grad_norm": 1.0499807968459596, "learning_rate": 6.701547951385675e-06, "loss": 0.5935, "step": 6371 }, { "epoch": 0.41, "grad_norm": 1.8976147272042208, "learning_rate": 6.700573227674916e-06, "loss": 0.7253, "step": 6372 }, { "epoch": 0.41, "grad_norm": 1.2705534933547296, "learning_rate": 6.699598430877092e-06, "loss": 0.6543, "step": 6373 }, { "epoch": 0.41, "grad_norm": 1.642987289538627, "learning_rate": 6.6986235610341e-06, "loss": 0.7192, "step": 6374 }, { "epoch": 0.41, "grad_norm": 1.7919746080521926, "learning_rate": 6.697648618187836e-06, "loss": 0.9968, "step": 6375 }, { "epoch": 0.41, "grad_norm": 1.770515317652257, "learning_rate": 6.696673602380203e-06, "loss": 0.7717, "step": 6376 }, { "epoch": 0.41, "grad_norm": 1.6755450362295068, "learning_rate": 6.6956985136531015e-06, "loss": 0.7483, "step": 6377 }, { "epoch": 0.41, "grad_norm": 1.8630436637779386, "learning_rate": 6.694723352048442e-06, "loss": 0.8126, "step": 6378 }, { "epoch": 0.41, "grad_norm": 1.6857546209529506, "learning_rate": 6.693748117608134e-06, "loss": 0.7207, "step": 6379 }, { "epoch": 0.41, "grad_norm": 1.158476585032959, "learning_rate": 6.69277281037409e-06, "loss": 0.5934, "step": 6380 }, { "epoch": 0.41, "grad_norm": 1.4896384346874296, "learning_rate": 6.69179743038823e-06, "loss": 0.6992, "step": 6381 }, { "epoch": 0.41, "grad_norm": 1.747596759760348, "learning_rate": 6.69082197769247e-06, "loss": 0.6471, "step": 6382 }, { "epoch": 0.41, "grad_norm": 1.2131833358512982, "learning_rate": 6.6898464523287354e-06, "loss": 0.741, "step": 6383 }, { "epoch": 0.41, "grad_norm": 0.965053087435027, "learning_rate": 6.68887085433895e-06, "loss": 0.6652, "step": 6384 }, { "epoch": 0.41, "grad_norm": 1.2725491614113957, "learning_rate": 6.687895183765043e-06, "loss": 0.7287, "step": 6385 }, { "epoch": 0.41, "grad_norm": 1.7433538213458135, "learning_rate": 6.686919440648949e-06, "loss": 0.82, "step": 6386 }, { "epoch": 0.41, "grad_norm": 1.0836103730961066, "learning_rate": 6.685943625032602e-06, "loss": 0.6878, "step": 6387 }, { "epoch": 0.41, "grad_norm": 1.5025715547988794, "learning_rate": 6.684967736957941e-06, "loss": 0.7559, "step": 6388 }, { "epoch": 0.41, "grad_norm": 1.1094813910811478, "learning_rate": 6.683991776466907e-06, "loss": 0.6911, "step": 6389 }, { "epoch": 0.41, "grad_norm": 1.7973827675785348, "learning_rate": 6.683015743601445e-06, "loss": 0.7663, "step": 6390 }, { "epoch": 0.41, "grad_norm": 1.6566238949279728, "learning_rate": 6.682039638403503e-06, "loss": 0.7371, "step": 6391 }, { "epoch": 0.41, "grad_norm": 1.962612401182092, "learning_rate": 6.681063460915033e-06, "loss": 0.7801, "step": 6392 }, { "epoch": 0.41, "grad_norm": 1.7178304307649865, "learning_rate": 6.6800872111779854e-06, "loss": 0.8693, "step": 6393 }, { "epoch": 0.41, "grad_norm": 1.6407477788865996, "learning_rate": 6.679110889234322e-06, "loss": 0.8413, "step": 6394 }, { "epoch": 0.41, "grad_norm": 1.6572137637549003, "learning_rate": 6.678134495125999e-06, "loss": 0.6138, "step": 6395 }, { "epoch": 0.41, "grad_norm": 1.0923169029560655, "learning_rate": 6.677158028894983e-06, "loss": 0.6226, "step": 6396 }, { "epoch": 0.41, "grad_norm": 1.6774938284978245, "learning_rate": 6.676181490583238e-06, "loss": 0.7357, "step": 6397 }, { "epoch": 0.41, "grad_norm": 2.023378544547136, "learning_rate": 6.675204880232735e-06, "loss": 0.7201, "step": 6398 }, { "epoch": 0.41, "grad_norm": 1.705575548894952, "learning_rate": 6.674228197885448e-06, "loss": 0.7219, "step": 6399 }, { "epoch": 0.41, "grad_norm": 1.7900279723770096, "learning_rate": 6.6732514435833485e-06, "loss": 0.7519, "step": 6400 }, { "epoch": 0.41, "grad_norm": 1.693453850657707, "learning_rate": 6.67227461736842e-06, "loss": 0.7636, "step": 6401 }, { "epoch": 0.41, "grad_norm": 1.6557129786426192, "learning_rate": 6.671297719282641e-06, "loss": 0.709, "step": 6402 }, { "epoch": 0.41, "grad_norm": 1.7270231304637373, "learning_rate": 6.670320749367998e-06, "loss": 0.83, "step": 6403 }, { "epoch": 0.41, "grad_norm": 1.5371377274851374, "learning_rate": 6.6693437076664795e-06, "loss": 0.7837, "step": 6404 }, { "epoch": 0.41, "grad_norm": 1.5555248791189138, "learning_rate": 6.668366594220076e-06, "loss": 0.8079, "step": 6405 }, { "epoch": 0.41, "grad_norm": 1.837390858938125, "learning_rate": 6.667389409070782e-06, "loss": 0.8969, "step": 6406 }, { "epoch": 0.41, "grad_norm": 1.7960658394614926, "learning_rate": 6.666412152260595e-06, "loss": 0.6852, "step": 6407 }, { "epoch": 0.41, "grad_norm": 1.7919611348966107, "learning_rate": 6.665434823831515e-06, "loss": 0.8274, "step": 6408 }, { "epoch": 0.41, "grad_norm": 1.6268153396876663, "learning_rate": 6.664457423825547e-06, "loss": 0.8532, "step": 6409 }, { "epoch": 0.41, "grad_norm": 1.489008897907892, "learning_rate": 6.663479952284695e-06, "loss": 0.8109, "step": 6410 }, { "epoch": 0.41, "grad_norm": 1.0752829889536397, "learning_rate": 6.662502409250971e-06, "loss": 0.6821, "step": 6411 }, { "epoch": 0.41, "grad_norm": 3.622321765292955, "learning_rate": 6.661524794766387e-06, "loss": 0.8016, "step": 6412 }, { "epoch": 0.41, "grad_norm": 1.0988337127640435, "learning_rate": 6.660547108872959e-06, "loss": 0.6603, "step": 6413 }, { "epoch": 0.41, "grad_norm": 1.8336065115888005, "learning_rate": 6.659569351612704e-06, "loss": 0.9084, "step": 6414 }, { "epoch": 0.41, "grad_norm": 2.0729024856298945, "learning_rate": 6.6585915230276445e-06, "loss": 0.7803, "step": 6415 }, { "epoch": 0.41, "grad_norm": 1.7836553718573425, "learning_rate": 6.657613623159808e-06, "loss": 0.9298, "step": 6416 }, { "epoch": 0.41, "grad_norm": 1.2422028053324161, "learning_rate": 6.656635652051222e-06, "loss": 0.6913, "step": 6417 }, { "epoch": 0.41, "grad_norm": 1.76276493728413, "learning_rate": 6.655657609743917e-06, "loss": 0.7452, "step": 6418 }, { "epoch": 0.41, "grad_norm": 1.6898677627503789, "learning_rate": 6.654679496279925e-06, "loss": 0.7441, "step": 6419 }, { "epoch": 0.41, "grad_norm": 1.528756331179182, "learning_rate": 6.653701311701288e-06, "loss": 0.725, "step": 6420 }, { "epoch": 0.41, "grad_norm": 1.6026984177263726, "learning_rate": 6.6527230560500415e-06, "loss": 0.8783, "step": 6421 }, { "epoch": 0.41, "grad_norm": 1.869992466902335, "learning_rate": 6.65174472936823e-06, "loss": 0.8798, "step": 6422 }, { "epoch": 0.41, "grad_norm": 1.5793259399886548, "learning_rate": 6.6507663316979025e-06, "loss": 0.8463, "step": 6423 }, { "epoch": 0.41, "grad_norm": 1.5493976275093113, "learning_rate": 6.649787863081107e-06, "loss": 0.6735, "step": 6424 }, { "epoch": 0.41, "grad_norm": 1.6445560573909377, "learning_rate": 6.648809323559895e-06, "loss": 0.8254, "step": 6425 }, { "epoch": 0.41, "grad_norm": 2.4156731218877296, "learning_rate": 6.6478307131763235e-06, "loss": 0.7285, "step": 6426 }, { "epoch": 0.41, "grad_norm": 1.8656081372237874, "learning_rate": 6.64685203197245e-06, "loss": 0.962, "step": 6427 }, { "epoch": 0.41, "grad_norm": 1.5757256609614558, "learning_rate": 6.645873279990337e-06, "loss": 0.6698, "step": 6428 }, { "epoch": 0.41, "grad_norm": 2.09438609183353, "learning_rate": 6.644894457272048e-06, "loss": 0.8305, "step": 6429 }, { "epoch": 0.41, "grad_norm": 2.906695971042945, "learning_rate": 6.643915563859652e-06, "loss": 0.855, "step": 6430 }, { "epoch": 0.41, "grad_norm": 1.4859091919590315, "learning_rate": 6.64293659979522e-06, "loss": 0.8686, "step": 6431 }, { "epoch": 0.41, "grad_norm": 1.6828503623164022, "learning_rate": 6.641957565120824e-06, "loss": 0.9294, "step": 6432 }, { "epoch": 0.41, "grad_norm": 2.046426539153946, "learning_rate": 6.640978459878543e-06, "loss": 0.909, "step": 6433 }, { "epoch": 0.41, "grad_norm": 1.1149785451117293, "learning_rate": 6.639999284110457e-06, "loss": 0.6199, "step": 6434 }, { "epoch": 0.41, "grad_norm": 2.2933670736208613, "learning_rate": 6.639020037858647e-06, "loss": 0.7851, "step": 6435 }, { "epoch": 0.41, "grad_norm": 1.5510668418738576, "learning_rate": 6.638040721165199e-06, "loss": 0.715, "step": 6436 }, { "epoch": 0.41, "grad_norm": 1.6222431760106077, "learning_rate": 6.637061334072204e-06, "loss": 0.7686, "step": 6437 }, { "epoch": 0.41, "grad_norm": 1.207116781598777, "learning_rate": 6.636081876621752e-06, "loss": 0.7293, "step": 6438 }, { "epoch": 0.41, "grad_norm": 1.922890322775199, "learning_rate": 6.635102348855939e-06, "loss": 0.7145, "step": 6439 }, { "epoch": 0.41, "grad_norm": 1.7770662175978709, "learning_rate": 6.634122750816863e-06, "loss": 0.8355, "step": 6440 }, { "epoch": 0.41, "grad_norm": 2.0000549421310225, "learning_rate": 6.6331430825466245e-06, "loss": 0.7596, "step": 6441 }, { "epoch": 0.41, "grad_norm": 1.5840331152161367, "learning_rate": 6.632163344087328e-06, "loss": 0.8973, "step": 6442 }, { "epoch": 0.41, "grad_norm": 1.3930212972484504, "learning_rate": 6.631183535481082e-06, "loss": 0.6547, "step": 6443 }, { "epoch": 0.41, "grad_norm": 1.0428108713356614, "learning_rate": 6.630203656769994e-06, "loss": 0.6555, "step": 6444 }, { "epoch": 0.41, "grad_norm": 1.6149805350094442, "learning_rate": 6.629223707996177e-06, "loss": 0.8386, "step": 6445 }, { "epoch": 0.41, "grad_norm": 1.5455350206145633, "learning_rate": 6.628243689201752e-06, "loss": 0.7916, "step": 6446 }, { "epoch": 0.41, "grad_norm": 1.5449858885756877, "learning_rate": 6.627263600428833e-06, "loss": 0.7357, "step": 6447 }, { "epoch": 0.41, "grad_norm": 1.6264982454006587, "learning_rate": 6.6262834417195434e-06, "loss": 0.7672, "step": 6448 }, { "epoch": 0.41, "grad_norm": 1.5386963261538, "learning_rate": 6.625303213116009e-06, "loss": 0.6466, "step": 6449 }, { "epoch": 0.41, "grad_norm": 2.1886038273289916, "learning_rate": 6.624322914660357e-06, "loss": 0.7094, "step": 6450 }, { "epoch": 0.41, "grad_norm": 1.8667268337935363, "learning_rate": 6.62334254639472e-06, "loss": 0.7973, "step": 6451 }, { "epoch": 0.41, "grad_norm": 1.6857654900121903, "learning_rate": 6.622362108361231e-06, "loss": 0.8928, "step": 6452 }, { "epoch": 0.41, "grad_norm": 1.742137460635731, "learning_rate": 6.621381600602028e-06, "loss": 0.7634, "step": 6453 }, { "epoch": 0.41, "grad_norm": 1.7207254107184309, "learning_rate": 6.620401023159251e-06, "loss": 0.6884, "step": 6454 }, { "epoch": 0.41, "grad_norm": 1.505674241751728, "learning_rate": 6.619420376075043e-06, "loss": 0.6911, "step": 6455 }, { "epoch": 0.41, "grad_norm": 2.218940486529801, "learning_rate": 6.618439659391551e-06, "loss": 0.7793, "step": 6456 }, { "epoch": 0.41, "grad_norm": 1.9431234970241176, "learning_rate": 6.617458873150922e-06, "loss": 0.7077, "step": 6457 }, { "epoch": 0.41, "grad_norm": 1.900514573397476, "learning_rate": 6.616478017395309e-06, "loss": 0.8091, "step": 6458 }, { "epoch": 0.41, "grad_norm": 2.071003343715569, "learning_rate": 6.6154970921668685e-06, "loss": 0.6966, "step": 6459 }, { "epoch": 0.41, "grad_norm": 1.709637968001026, "learning_rate": 6.614516097507756e-06, "loss": 0.7058, "step": 6460 }, { "epoch": 0.41, "grad_norm": 1.2741729238532071, "learning_rate": 6.613535033460135e-06, "loss": 0.6685, "step": 6461 }, { "epoch": 0.41, "grad_norm": 1.5196466377182194, "learning_rate": 6.6125539000661694e-06, "loss": 0.7183, "step": 6462 }, { "epoch": 0.41, "grad_norm": 2.0380345423375754, "learning_rate": 6.611572697368026e-06, "loss": 0.8101, "step": 6463 }, { "epoch": 0.41, "grad_norm": 1.1953836858041134, "learning_rate": 6.610591425407875e-06, "loss": 0.6283, "step": 6464 }, { "epoch": 0.41, "grad_norm": 1.6407881619764841, "learning_rate": 6.6096100842278865e-06, "loss": 0.7216, "step": 6465 }, { "epoch": 0.41, "grad_norm": 1.6804996108108192, "learning_rate": 6.60862867387024e-06, "loss": 0.7562, "step": 6466 }, { "epoch": 0.41, "grad_norm": 1.0591318201985263, "learning_rate": 6.607647194377113e-06, "loss": 0.6464, "step": 6467 }, { "epoch": 0.41, "grad_norm": 1.5604427851426776, "learning_rate": 6.606665645790689e-06, "loss": 0.8237, "step": 6468 }, { "epoch": 0.41, "grad_norm": 1.6688067156616906, "learning_rate": 6.60568402815315e-06, "loss": 0.7067, "step": 6469 }, { "epoch": 0.41, "grad_norm": 1.5324152981189176, "learning_rate": 6.604702341506686e-06, "loss": 0.7189, "step": 6470 }, { "epoch": 0.41, "grad_norm": 1.6094168004857161, "learning_rate": 6.603720585893487e-06, "loss": 0.8327, "step": 6471 }, { "epoch": 0.41, "grad_norm": 1.5972567675226375, "learning_rate": 6.602738761355747e-06, "loss": 0.7733, "step": 6472 }, { "epoch": 0.41, "grad_norm": 1.8192052076947793, "learning_rate": 6.601756867935664e-06, "loss": 0.8239, "step": 6473 }, { "epoch": 0.41, "grad_norm": 1.9517114989494893, "learning_rate": 6.600774905675436e-06, "loss": 0.7155, "step": 6474 }, { "epoch": 0.41, "grad_norm": 1.5985732728744138, "learning_rate": 6.599792874617267e-06, "loss": 0.7188, "step": 6475 }, { "epoch": 0.41, "grad_norm": 1.8199754481606008, "learning_rate": 6.598810774803361e-06, "loss": 0.7851, "step": 6476 }, { "epoch": 0.41, "grad_norm": 1.585791965995891, "learning_rate": 6.597828606275928e-06, "loss": 0.798, "step": 6477 }, { "epoch": 0.41, "grad_norm": 1.5948986041317101, "learning_rate": 6.5968463690771775e-06, "loss": 0.7802, "step": 6478 }, { "epoch": 0.41, "grad_norm": 2.0879178890335557, "learning_rate": 6.595864063249326e-06, "loss": 0.6984, "step": 6479 }, { "epoch": 0.41, "grad_norm": 1.894428440893173, "learning_rate": 6.5948816888345915e-06, "loss": 0.7275, "step": 6480 }, { "epoch": 0.41, "grad_norm": 1.0165014496284686, "learning_rate": 6.593899245875193e-06, "loss": 0.6302, "step": 6481 }, { "epoch": 0.41, "grad_norm": 1.66519407853807, "learning_rate": 6.592916734413354e-06, "loss": 0.7007, "step": 6482 }, { "epoch": 0.41, "grad_norm": 1.6371215137411836, "learning_rate": 6.591934154491301e-06, "loss": 0.8333, "step": 6483 }, { "epoch": 0.42, "grad_norm": 1.6633921848719047, "learning_rate": 6.590951506151263e-06, "loss": 0.7489, "step": 6484 }, { "epoch": 0.42, "grad_norm": 1.1602030241724615, "learning_rate": 6.589968789435472e-06, "loss": 0.5592, "step": 6485 }, { "epoch": 0.42, "grad_norm": 1.2257059828428332, "learning_rate": 6.5889860043861644e-06, "loss": 0.641, "step": 6486 }, { "epoch": 0.42, "grad_norm": 1.7522603641513845, "learning_rate": 6.588003151045577e-06, "loss": 0.8177, "step": 6487 }, { "epoch": 0.42, "grad_norm": 1.637111823607234, "learning_rate": 6.58702022945595e-06, "loss": 0.7054, "step": 6488 }, { "epoch": 0.42, "grad_norm": 1.6113199449323563, "learning_rate": 6.586037239659529e-06, "loss": 0.6973, "step": 6489 }, { "epoch": 0.42, "grad_norm": 1.9142428231476782, "learning_rate": 6.5850541816985595e-06, "loss": 0.5632, "step": 6490 }, { "epoch": 0.42, "grad_norm": 2.0640198327777872, "learning_rate": 6.584071055615293e-06, "loss": 0.747, "step": 6491 }, { "epoch": 0.42, "grad_norm": 1.8427733019302042, "learning_rate": 6.5830878614519815e-06, "loss": 0.6687, "step": 6492 }, { "epoch": 0.42, "grad_norm": 1.6814018858007784, "learning_rate": 6.582104599250878e-06, "loss": 0.6953, "step": 6493 }, { "epoch": 0.42, "grad_norm": 1.8351349112838682, "learning_rate": 6.581121269054244e-06, "loss": 0.7126, "step": 6494 }, { "epoch": 0.42, "grad_norm": 1.521602807845381, "learning_rate": 6.580137870904342e-06, "loss": 0.581, "step": 6495 }, { "epoch": 0.42, "grad_norm": 1.2065510562639574, "learning_rate": 6.5791544048434346e-06, "loss": 0.7705, "step": 6496 }, { "epoch": 0.42, "grad_norm": 1.882567738973289, "learning_rate": 6.578170870913787e-06, "loss": 0.7017, "step": 6497 }, { "epoch": 0.42, "grad_norm": 1.5647711049148807, "learning_rate": 6.577187269157672e-06, "loss": 0.7607, "step": 6498 }, { "epoch": 0.42, "grad_norm": 1.617330241129649, "learning_rate": 6.576203599617363e-06, "loss": 0.7537, "step": 6499 }, { "epoch": 0.42, "grad_norm": 1.8602746599617956, "learning_rate": 6.575219862335136e-06, "loss": 0.7384, "step": 6500 }, { "epoch": 0.42, "grad_norm": 1.8269385107774416, "learning_rate": 6.574236057353268e-06, "loss": 0.7859, "step": 6501 }, { "epoch": 0.42, "grad_norm": 1.7825099720833766, "learning_rate": 6.573252184714043e-06, "loss": 0.8406, "step": 6502 }, { "epoch": 0.42, "grad_norm": 1.5370212624503248, "learning_rate": 6.572268244459745e-06, "loss": 0.6947, "step": 6503 }, { "epoch": 0.42, "grad_norm": 1.7351330228445303, "learning_rate": 6.5712842366326625e-06, "loss": 0.8065, "step": 6504 }, { "epoch": 0.42, "grad_norm": 1.5488218781499792, "learning_rate": 6.570300161275084e-06, "loss": 0.7413, "step": 6505 }, { "epoch": 0.42, "grad_norm": 1.7527842771791173, "learning_rate": 6.569316018429304e-06, "loss": 0.7539, "step": 6506 }, { "epoch": 0.42, "grad_norm": 1.920581249233248, "learning_rate": 6.568331808137619e-06, "loss": 0.7758, "step": 6507 }, { "epoch": 0.42, "grad_norm": 1.6095093604131134, "learning_rate": 6.567347530442328e-06, "loss": 0.7318, "step": 6508 }, { "epoch": 0.42, "grad_norm": 1.4931523403843792, "learning_rate": 6.5663631853857355e-06, "loss": 0.7049, "step": 6509 }, { "epoch": 0.42, "grad_norm": 2.948182726378901, "learning_rate": 6.565378773010144e-06, "loss": 0.7924, "step": 6510 }, { "epoch": 0.42, "grad_norm": 1.9321136704111717, "learning_rate": 6.564394293357861e-06, "loss": 0.7883, "step": 6511 }, { "epoch": 0.42, "grad_norm": 1.2832401362216561, "learning_rate": 6.5634097464711995e-06, "loss": 0.5423, "step": 6512 }, { "epoch": 0.42, "grad_norm": 1.661668968011036, "learning_rate": 6.562425132392473e-06, "loss": 0.8196, "step": 6513 }, { "epoch": 0.42, "grad_norm": 1.775419345100872, "learning_rate": 6.5614404511639964e-06, "loss": 0.8184, "step": 6514 }, { "epoch": 0.42, "grad_norm": 1.5556784051528827, "learning_rate": 6.560455702828089e-06, "loss": 0.7885, "step": 6515 }, { "epoch": 0.42, "grad_norm": 1.782707923790627, "learning_rate": 6.559470887427076e-06, "loss": 0.7315, "step": 6516 }, { "epoch": 0.42, "grad_norm": 1.6120662079323003, "learning_rate": 6.558486005003281e-06, "loss": 0.7814, "step": 6517 }, { "epoch": 0.42, "grad_norm": 1.6149532679928431, "learning_rate": 6.557501055599032e-06, "loss": 0.7929, "step": 6518 }, { "epoch": 0.42, "grad_norm": 1.6042000701152914, "learning_rate": 6.556516039256662e-06, "loss": 0.6651, "step": 6519 }, { "epoch": 0.42, "grad_norm": 1.207360773285148, "learning_rate": 6.555530956018502e-06, "loss": 0.6821, "step": 6520 }, { "epoch": 0.42, "grad_norm": 2.304261070788727, "learning_rate": 6.554545805926891e-06, "loss": 0.6704, "step": 6521 }, { "epoch": 0.42, "grad_norm": 1.6571835247777944, "learning_rate": 6.553560589024166e-06, "loss": 0.7867, "step": 6522 }, { "epoch": 0.42, "grad_norm": 0.9560674135278038, "learning_rate": 6.552575305352672e-06, "loss": 0.5885, "step": 6523 }, { "epoch": 0.42, "grad_norm": 2.057829084737305, "learning_rate": 6.551589954954754e-06, "loss": 0.7507, "step": 6524 }, { "epoch": 0.42, "grad_norm": 1.7866007413716403, "learning_rate": 6.55060453787276e-06, "loss": 0.8747, "step": 6525 }, { "epoch": 0.42, "grad_norm": 1.491405370634791, "learning_rate": 6.549619054149041e-06, "loss": 0.6862, "step": 6526 }, { "epoch": 0.42, "grad_norm": 2.036967743691026, "learning_rate": 6.548633503825953e-06, "loss": 0.8127, "step": 6527 }, { "epoch": 0.42, "grad_norm": 2.2285054595647726, "learning_rate": 6.5476478869458496e-06, "loss": 0.832, "step": 6528 }, { "epoch": 0.42, "grad_norm": 1.1753650735205061, "learning_rate": 6.546662203551092e-06, "loss": 0.5911, "step": 6529 }, { "epoch": 0.42, "grad_norm": 1.0124710608424117, "learning_rate": 6.545676453684043e-06, "loss": 0.6829, "step": 6530 }, { "epoch": 0.42, "grad_norm": 1.565468769742194, "learning_rate": 6.544690637387068e-06, "loss": 0.7842, "step": 6531 }, { "epoch": 0.42, "grad_norm": 1.5590034451286885, "learning_rate": 6.543704754702536e-06, "loss": 0.7961, "step": 6532 }, { "epoch": 0.42, "grad_norm": 1.4557679290520609, "learning_rate": 6.5427188056728165e-06, "loss": 0.7144, "step": 6533 }, { "epoch": 0.42, "grad_norm": 1.7142575622712326, "learning_rate": 6.541732790340285e-06, "loss": 0.7955, "step": 6534 }, { "epoch": 0.42, "grad_norm": 2.0234216848976394, "learning_rate": 6.540746708747317e-06, "loss": 0.7219, "step": 6535 }, { "epoch": 0.42, "grad_norm": 1.6513540733622414, "learning_rate": 6.539760560936292e-06, "loss": 0.6831, "step": 6536 }, { "epoch": 0.42, "grad_norm": 1.6513106472335362, "learning_rate": 6.538774346949597e-06, "loss": 0.7089, "step": 6537 }, { "epoch": 0.42, "grad_norm": 1.5331750442994436, "learning_rate": 6.537788066829611e-06, "loss": 0.7152, "step": 6538 }, { "epoch": 0.42, "grad_norm": 1.1506491121178986, "learning_rate": 6.536801720618727e-06, "loss": 0.6831, "step": 6539 }, { "epoch": 0.42, "grad_norm": 2.0309197890098263, "learning_rate": 6.5358153083593345e-06, "loss": 0.8998, "step": 6540 }, { "epoch": 0.42, "grad_norm": 1.613466673722333, "learning_rate": 6.534828830093827e-06, "loss": 0.7004, "step": 6541 }, { "epoch": 0.42, "grad_norm": 1.2107302215113727, "learning_rate": 6.5338422858646e-06, "loss": 0.6309, "step": 6542 }, { "epoch": 0.42, "grad_norm": 1.780470926166601, "learning_rate": 6.532855675714055e-06, "loss": 0.6776, "step": 6543 }, { "epoch": 0.42, "grad_norm": 1.1048354125368012, "learning_rate": 6.531868999684594e-06, "loss": 0.7358, "step": 6544 }, { "epoch": 0.42, "grad_norm": 1.5880533502715666, "learning_rate": 6.5308822578186225e-06, "loss": 0.6679, "step": 6545 }, { "epoch": 0.42, "grad_norm": 1.5306006719018124, "learning_rate": 6.529895450158549e-06, "loss": 0.7681, "step": 6546 }, { "epoch": 0.42, "grad_norm": 1.619525307990875, "learning_rate": 6.528908576746784e-06, "loss": 0.7351, "step": 6547 }, { "epoch": 0.42, "grad_norm": 1.7719669434336012, "learning_rate": 6.527921637625741e-06, "loss": 0.6753, "step": 6548 }, { "epoch": 0.42, "grad_norm": 2.0872511110685035, "learning_rate": 6.526934632837835e-06, "loss": 0.7197, "step": 6549 }, { "epoch": 0.42, "grad_norm": 1.7640913493787003, "learning_rate": 6.5259475624254875e-06, "loss": 0.8851, "step": 6550 }, { "epoch": 0.42, "grad_norm": 1.6644198592267856, "learning_rate": 6.5249604264311216e-06, "loss": 0.7952, "step": 6551 }, { "epoch": 0.42, "grad_norm": 1.725550182044188, "learning_rate": 6.5239732248971595e-06, "loss": 0.6887, "step": 6552 }, { "epoch": 0.42, "grad_norm": 1.4477693026692626, "learning_rate": 6.522985957866032e-06, "loss": 0.5691, "step": 6553 }, { "epoch": 0.42, "grad_norm": 1.8219597251239206, "learning_rate": 6.521998625380167e-06, "loss": 0.6965, "step": 6554 }, { "epoch": 0.42, "grad_norm": 1.7656030276079393, "learning_rate": 6.521011227482e-06, "loss": 0.7572, "step": 6555 }, { "epoch": 0.42, "grad_norm": 1.8397848478872327, "learning_rate": 6.520023764213968e-06, "loss": 0.7821, "step": 6556 }, { "epoch": 0.42, "grad_norm": 1.689411408699623, "learning_rate": 6.519036235618505e-06, "loss": 0.7605, "step": 6557 }, { "epoch": 0.42, "grad_norm": 1.8371450340434194, "learning_rate": 6.518048641738059e-06, "loss": 0.7311, "step": 6558 }, { "epoch": 0.42, "grad_norm": 2.034745619148353, "learning_rate": 6.517060982615071e-06, "loss": 0.7933, "step": 6559 }, { "epoch": 0.42, "grad_norm": 1.5325454628984012, "learning_rate": 6.51607325829199e-06, "loss": 0.7092, "step": 6560 }, { "epoch": 0.42, "grad_norm": 1.76609624893382, "learning_rate": 6.5150854688112686e-06, "loss": 0.6834, "step": 6561 }, { "epoch": 0.42, "grad_norm": 1.1265507183676817, "learning_rate": 6.514097614215353e-06, "loss": 0.8326, "step": 6562 }, { "epoch": 0.42, "grad_norm": 1.5761149093341882, "learning_rate": 6.513109694546707e-06, "loss": 0.768, "step": 6563 }, { "epoch": 0.42, "grad_norm": 1.6877486806775615, "learning_rate": 6.512121709847785e-06, "loss": 0.8026, "step": 6564 }, { "epoch": 0.42, "grad_norm": 1.2019011040600138, "learning_rate": 6.511133660161047e-06, "loss": 0.6791, "step": 6565 }, { "epoch": 0.42, "grad_norm": 1.8253011678142703, "learning_rate": 6.510145545528963e-06, "loss": 0.7242, "step": 6566 }, { "epoch": 0.42, "grad_norm": 1.78701502698466, "learning_rate": 6.5091573659939945e-06, "loss": 0.8516, "step": 6567 }, { "epoch": 0.42, "grad_norm": 1.6901977959879162, "learning_rate": 6.508169121598615e-06, "loss": 0.7743, "step": 6568 }, { "epoch": 0.42, "grad_norm": 1.331822877867265, "learning_rate": 6.507180812385295e-06, "loss": 0.6671, "step": 6569 }, { "epoch": 0.42, "grad_norm": 1.108446665405529, "learning_rate": 6.506192438396512e-06, "loss": 0.5781, "step": 6570 }, { "epoch": 0.42, "grad_norm": 1.7411933213171027, "learning_rate": 6.50520399967474e-06, "loss": 0.7804, "step": 6571 }, { "epoch": 0.42, "grad_norm": 1.721588912388923, "learning_rate": 6.504215496262464e-06, "loss": 0.8366, "step": 6572 }, { "epoch": 0.42, "grad_norm": 1.494580078015409, "learning_rate": 6.503226928202167e-06, "loss": 0.838, "step": 6573 }, { "epoch": 0.42, "grad_norm": 3.4652218292479056, "learning_rate": 6.502238295536336e-06, "loss": 0.9111, "step": 6574 }, { "epoch": 0.42, "grad_norm": 1.512645639261613, "learning_rate": 6.5012495983074605e-06, "loss": 0.6662, "step": 6575 }, { "epoch": 0.42, "grad_norm": 1.517879508655737, "learning_rate": 6.500260836558031e-06, "loss": 0.7233, "step": 6576 }, { "epoch": 0.42, "grad_norm": 1.6266311508309523, "learning_rate": 6.499272010330543e-06, "loss": 0.8423, "step": 6577 }, { "epoch": 0.42, "grad_norm": 1.6367459784635265, "learning_rate": 6.4982831196674945e-06, "loss": 0.7301, "step": 6578 }, { "epoch": 0.42, "grad_norm": 1.6207838697713002, "learning_rate": 6.497294164611385e-06, "loss": 0.9069, "step": 6579 }, { "epoch": 0.42, "grad_norm": 1.494228406868283, "learning_rate": 6.496305145204719e-06, "loss": 0.6234, "step": 6580 }, { "epoch": 0.42, "grad_norm": 1.595218746084694, "learning_rate": 6.495316061490003e-06, "loss": 0.787, "step": 6581 }, { "epoch": 0.42, "grad_norm": 1.6389599711235958, "learning_rate": 6.494326913509744e-06, "loss": 0.7362, "step": 6582 }, { "epoch": 0.42, "grad_norm": 1.4729281540521881, "learning_rate": 6.493337701306454e-06, "loss": 0.7254, "step": 6583 }, { "epoch": 0.42, "grad_norm": 1.107338193603369, "learning_rate": 6.492348424922648e-06, "loss": 0.7247, "step": 6584 }, { "epoch": 0.42, "grad_norm": 1.5161854494827596, "learning_rate": 6.4913590844008436e-06, "loss": 1.0536, "step": 6585 }, { "epoch": 0.42, "grad_norm": 1.575812361224069, "learning_rate": 6.490369679783557e-06, "loss": 0.7368, "step": 6586 }, { "epoch": 0.42, "grad_norm": 1.9470711372949667, "learning_rate": 6.489380211113316e-06, "loss": 0.7643, "step": 6587 }, { "epoch": 0.42, "grad_norm": 1.5910487267818023, "learning_rate": 6.488390678432641e-06, "loss": 0.6601, "step": 6588 }, { "epoch": 0.42, "grad_norm": 1.6945741832853665, "learning_rate": 6.487401081784063e-06, "loss": 0.8167, "step": 6589 }, { "epoch": 0.42, "grad_norm": 1.3484616263843052, "learning_rate": 6.486411421210112e-06, "loss": 0.7276, "step": 6590 }, { "epoch": 0.42, "grad_norm": 1.9898343715547884, "learning_rate": 6.485421696753321e-06, "loss": 0.811, "step": 6591 }, { "epoch": 0.42, "grad_norm": 1.0957336606925172, "learning_rate": 6.484431908456228e-06, "loss": 0.6885, "step": 6592 }, { "epoch": 0.42, "grad_norm": 1.0191500167422913, "learning_rate": 6.4834420563613685e-06, "loss": 0.6939, "step": 6593 }, { "epoch": 0.42, "grad_norm": 2.179525271274707, "learning_rate": 6.482452140511288e-06, "loss": 0.8026, "step": 6594 }, { "epoch": 0.42, "grad_norm": 1.3863101210523512, "learning_rate": 6.481462160948531e-06, "loss": 0.7089, "step": 6595 }, { "epoch": 0.42, "grad_norm": 1.6608046801818086, "learning_rate": 6.480472117715642e-06, "loss": 0.7231, "step": 6596 }, { "epoch": 0.42, "grad_norm": 1.578748029641274, "learning_rate": 6.479482010855172e-06, "loss": 0.8751, "step": 6597 }, { "epoch": 0.42, "grad_norm": 2.05596370807156, "learning_rate": 6.478491840409675e-06, "loss": 0.8592, "step": 6598 }, { "epoch": 0.42, "grad_norm": 1.7658427793238323, "learning_rate": 6.477501606421703e-06, "loss": 0.8381, "step": 6599 }, { "epoch": 0.42, "grad_norm": 1.6810280695043525, "learning_rate": 6.47651130893382e-06, "loss": 0.8018, "step": 6600 }, { "epoch": 0.42, "grad_norm": 1.5889539075877774, "learning_rate": 6.4755209479885806e-06, "loss": 0.7076, "step": 6601 }, { "epoch": 0.42, "grad_norm": 1.984217186529552, "learning_rate": 6.474530523628553e-06, "loss": 0.837, "step": 6602 }, { "epoch": 0.42, "grad_norm": 1.4651088388050952, "learning_rate": 6.473540035896301e-06, "loss": 0.7021, "step": 6603 }, { "epoch": 0.42, "grad_norm": 1.9418867109922113, "learning_rate": 6.472549484834395e-06, "loss": 0.8875, "step": 6604 }, { "epoch": 0.42, "grad_norm": 1.6611939464176713, "learning_rate": 6.471558870485407e-06, "loss": 0.7414, "step": 6605 }, { "epoch": 0.42, "grad_norm": 1.582092900148672, "learning_rate": 6.470568192891911e-06, "loss": 0.8039, "step": 6606 }, { "epoch": 0.42, "grad_norm": 1.5787270415888295, "learning_rate": 6.469577452096483e-06, "loss": 0.7465, "step": 6607 }, { "epoch": 0.42, "grad_norm": 1.5981539687292492, "learning_rate": 6.468586648141704e-06, "loss": 0.9958, "step": 6608 }, { "epoch": 0.42, "grad_norm": 1.4114910818305049, "learning_rate": 6.467595781070158e-06, "loss": 0.7009, "step": 6609 }, { "epoch": 0.42, "grad_norm": 1.5827407621977851, "learning_rate": 6.466604850924427e-06, "loss": 0.8061, "step": 6610 }, { "epoch": 0.42, "grad_norm": 1.997937971213964, "learning_rate": 6.4656138577471036e-06, "loss": 0.6585, "step": 6611 }, { "epoch": 0.42, "grad_norm": 3.314501108465161, "learning_rate": 6.464622801580776e-06, "loss": 0.7302, "step": 6612 }, { "epoch": 0.42, "grad_norm": 5.177117923503868, "learning_rate": 6.463631682468038e-06, "loss": 0.6941, "step": 6613 }, { "epoch": 0.42, "grad_norm": 1.5563170436195493, "learning_rate": 6.462640500451484e-06, "loss": 0.6908, "step": 6614 }, { "epoch": 0.42, "grad_norm": 1.7540783383746266, "learning_rate": 6.461649255573716e-06, "loss": 0.8267, "step": 6615 }, { "epoch": 0.42, "grad_norm": 1.5751499116791854, "learning_rate": 6.460657947877335e-06, "loss": 0.7039, "step": 6616 }, { "epoch": 0.42, "grad_norm": 1.4228716614406751, "learning_rate": 6.459666577404944e-06, "loss": 0.6719, "step": 6617 }, { "epoch": 0.42, "grad_norm": 1.7609254887359425, "learning_rate": 6.45867514419915e-06, "loss": 0.769, "step": 6618 }, { "epoch": 0.42, "grad_norm": 1.7721136982332655, "learning_rate": 6.457683648302565e-06, "loss": 0.6885, "step": 6619 }, { "epoch": 0.42, "grad_norm": 1.0530524774781915, "learning_rate": 6.456692089757799e-06, "loss": 0.63, "step": 6620 }, { "epoch": 0.42, "grad_norm": 1.7601249688658225, "learning_rate": 6.455700468607469e-06, "loss": 0.7415, "step": 6621 }, { "epoch": 0.42, "grad_norm": 1.5119214618492576, "learning_rate": 6.454708784894189e-06, "loss": 0.6195, "step": 6622 }, { "epoch": 0.42, "grad_norm": 1.7469396380462892, "learning_rate": 6.453717038660584e-06, "loss": 0.7568, "step": 6623 }, { "epoch": 0.42, "grad_norm": 1.6327747273302988, "learning_rate": 6.452725229949275e-06, "loss": 1.0219, "step": 6624 }, { "epoch": 0.42, "grad_norm": 1.6591342742080848, "learning_rate": 6.451733358802889e-06, "loss": 0.7071, "step": 6625 }, { "epoch": 0.42, "grad_norm": 1.5755297832344786, "learning_rate": 6.450741425264052e-06, "loss": 0.717, "step": 6626 }, { "epoch": 0.42, "grad_norm": 1.6282762561883808, "learning_rate": 6.449749429375398e-06, "loss": 0.7634, "step": 6627 }, { "epoch": 0.42, "grad_norm": 1.5438146520880298, "learning_rate": 6.4487573711795604e-06, "loss": 0.6705, "step": 6628 }, { "epoch": 0.42, "grad_norm": 1.7024987406902419, "learning_rate": 6.4477652507191744e-06, "loss": 0.8084, "step": 6629 }, { "epoch": 0.42, "grad_norm": 1.1981627946176003, "learning_rate": 6.44677306803688e-06, "loss": 0.7565, "step": 6630 }, { "epoch": 0.42, "grad_norm": 1.6341222605212304, "learning_rate": 6.44578082317532e-06, "loss": 0.6613, "step": 6631 }, { "epoch": 0.42, "grad_norm": 1.6793697723221024, "learning_rate": 6.444788516177138e-06, "loss": 0.7323, "step": 6632 }, { "epoch": 0.42, "grad_norm": 1.349408873872307, "learning_rate": 6.443796147084982e-06, "loss": 0.6453, "step": 6633 }, { "epoch": 0.42, "grad_norm": 1.6339889390801996, "learning_rate": 6.442803715941501e-06, "loss": 0.7604, "step": 6634 }, { "epoch": 0.42, "grad_norm": 1.6804605386273017, "learning_rate": 6.441811222789347e-06, "loss": 0.6979, "step": 6635 }, { "epoch": 0.42, "grad_norm": 1.8109255594796208, "learning_rate": 6.440818667671178e-06, "loss": 0.705, "step": 6636 }, { "epoch": 0.42, "grad_norm": 1.816475506035272, "learning_rate": 6.439826050629649e-06, "loss": 0.6163, "step": 6637 }, { "epoch": 0.42, "grad_norm": 1.87506537867898, "learning_rate": 6.4388333717074226e-06, "loss": 0.6777, "step": 6638 }, { "epoch": 0.42, "grad_norm": 1.1307410815734187, "learning_rate": 6.4378406309471605e-06, "loss": 0.7177, "step": 6639 }, { "epoch": 0.43, "grad_norm": 1.531881153411719, "learning_rate": 6.43684782839153e-06, "loss": 0.7225, "step": 6640 }, { "epoch": 0.43, "grad_norm": 1.7016918304371738, "learning_rate": 6.435854964083199e-06, "loss": 0.6223, "step": 6641 }, { "epoch": 0.43, "grad_norm": 1.9004237445206953, "learning_rate": 6.434862038064839e-06, "loss": 0.9287, "step": 6642 }, { "epoch": 0.43, "grad_norm": 1.6496922947313857, "learning_rate": 6.433869050379122e-06, "loss": 0.8547, "step": 6643 }, { "epoch": 0.43, "grad_norm": 1.7091946404060872, "learning_rate": 6.432876001068729e-06, "loss": 0.6748, "step": 6644 }, { "epoch": 0.43, "grad_norm": 1.4835112658125262, "learning_rate": 6.431882890176334e-06, "loss": 0.6383, "step": 6645 }, { "epoch": 0.43, "grad_norm": 1.5625055355174537, "learning_rate": 6.430889717744622e-06, "loss": 0.7173, "step": 6646 }, { "epoch": 0.43, "grad_norm": 1.1743844776927082, "learning_rate": 6.429896483816277e-06, "loss": 0.7065, "step": 6647 }, { "epoch": 0.43, "grad_norm": 1.5801429721901588, "learning_rate": 6.4289031884339855e-06, "loss": 0.7605, "step": 6648 }, { "epoch": 0.43, "grad_norm": 1.4716694752437842, "learning_rate": 6.427909831640438e-06, "loss": 0.8204, "step": 6649 }, { "epoch": 0.43, "grad_norm": 1.0510262029679096, "learning_rate": 6.426916413478326e-06, "loss": 0.6177, "step": 6650 }, { "epoch": 0.43, "grad_norm": 1.5232575085627607, "learning_rate": 6.425922933990344e-06, "loss": 0.9483, "step": 6651 }, { "epoch": 0.43, "grad_norm": 1.6255025198300541, "learning_rate": 6.424929393219192e-06, "loss": 0.7266, "step": 6652 }, { "epoch": 0.43, "grad_norm": 1.8644208616907358, "learning_rate": 6.423935791207568e-06, "loss": 0.7185, "step": 6653 }, { "epoch": 0.43, "grad_norm": 1.6694031888266774, "learning_rate": 6.422942127998175e-06, "loss": 0.8116, "step": 6654 }, { "epoch": 0.43, "grad_norm": 1.8208253455190189, "learning_rate": 6.421948403633721e-06, "loss": 0.689, "step": 6655 }, { "epoch": 0.43, "grad_norm": 1.5983500794445193, "learning_rate": 6.420954618156912e-06, "loss": 0.6911, "step": 6656 }, { "epoch": 0.43, "grad_norm": 1.664041509571603, "learning_rate": 6.4199607716104605e-06, "loss": 0.7977, "step": 6657 }, { "epoch": 0.43, "grad_norm": 1.5569499138971303, "learning_rate": 6.418966864037076e-06, "loss": 0.7148, "step": 6658 }, { "epoch": 0.43, "grad_norm": 1.080545472807263, "learning_rate": 6.41797289547948e-06, "loss": 0.6707, "step": 6659 }, { "epoch": 0.43, "grad_norm": 1.4501536376895636, "learning_rate": 6.416978865980388e-06, "loss": 0.8524, "step": 6660 }, { "epoch": 0.43, "grad_norm": 1.3358598085210203, "learning_rate": 6.415984775582521e-06, "loss": 0.7018, "step": 6661 }, { "epoch": 0.43, "grad_norm": 1.9792053881402492, "learning_rate": 6.414990624328604e-06, "loss": 0.789, "step": 6662 }, { "epoch": 0.43, "grad_norm": 1.821576472637808, "learning_rate": 6.413996412261363e-06, "loss": 0.7996, "step": 6663 }, { "epoch": 0.43, "grad_norm": 1.6158565951232635, "learning_rate": 6.413002139423527e-06, "loss": 0.7854, "step": 6664 }, { "epoch": 0.43, "grad_norm": 1.8701302850380006, "learning_rate": 6.412007805857828e-06, "loss": 0.7822, "step": 6665 }, { "epoch": 0.43, "grad_norm": 1.535209087156244, "learning_rate": 6.411013411607002e-06, "loss": 0.772, "step": 6666 }, { "epoch": 0.43, "grad_norm": 1.0817193525531055, "learning_rate": 6.410018956713784e-06, "loss": 0.6164, "step": 6667 }, { "epoch": 0.43, "grad_norm": 1.019567478773173, "learning_rate": 6.409024441220915e-06, "loss": 0.6577, "step": 6668 }, { "epoch": 0.43, "grad_norm": 1.0810693263130229, "learning_rate": 6.408029865171135e-06, "loss": 0.647, "step": 6669 }, { "epoch": 0.43, "grad_norm": 1.4901374715558555, "learning_rate": 6.40703522860719e-06, "loss": 0.7409, "step": 6670 }, { "epoch": 0.43, "grad_norm": 1.5549107093701169, "learning_rate": 6.406040531571828e-06, "loss": 0.7201, "step": 6671 }, { "epoch": 0.43, "grad_norm": 1.1103127935969006, "learning_rate": 6.405045774107798e-06, "loss": 0.7038, "step": 6672 }, { "epoch": 0.43, "grad_norm": 1.227449419596621, "learning_rate": 6.404050956257853e-06, "loss": 0.5858, "step": 6673 }, { "epoch": 0.43, "grad_norm": 1.6021835543804173, "learning_rate": 6.403056078064749e-06, "loss": 0.7377, "step": 6674 }, { "epoch": 0.43, "grad_norm": 1.8223848835198058, "learning_rate": 6.402061139571243e-06, "loss": 0.7806, "step": 6675 }, { "epoch": 0.43, "grad_norm": 1.3753750277790013, "learning_rate": 6.401066140820095e-06, "loss": 0.747, "step": 6676 }, { "epoch": 0.43, "grad_norm": 1.754806206723207, "learning_rate": 6.400071081854068e-06, "loss": 0.7705, "step": 6677 }, { "epoch": 0.43, "grad_norm": 1.1104434408112673, "learning_rate": 6.3990759627159285e-06, "loss": 0.7453, "step": 6678 }, { "epoch": 0.43, "grad_norm": 1.9854552607850837, "learning_rate": 6.398080783448443e-06, "loss": 0.8191, "step": 6679 }, { "epoch": 0.43, "grad_norm": 1.6450592009390477, "learning_rate": 6.397085544094383e-06, "loss": 0.6876, "step": 6680 }, { "epoch": 0.43, "grad_norm": 1.6288907256563363, "learning_rate": 6.396090244696523e-06, "loss": 0.8489, "step": 6681 }, { "epoch": 0.43, "grad_norm": 1.6973089121220657, "learning_rate": 6.395094885297637e-06, "loss": 0.8114, "step": 6682 }, { "epoch": 0.43, "grad_norm": 1.7589000975805655, "learning_rate": 6.394099465940505e-06, "loss": 0.6741, "step": 6683 }, { "epoch": 0.43, "grad_norm": 1.5484395887048106, "learning_rate": 6.393103986667908e-06, "loss": 0.6439, "step": 6684 }, { "epoch": 0.43, "grad_norm": 1.5215393130582178, "learning_rate": 6.3921084475226295e-06, "loss": 0.6104, "step": 6685 }, { "epoch": 0.43, "grad_norm": 1.4434705878841534, "learning_rate": 6.3911128485474515e-06, "loss": 0.7395, "step": 6686 }, { "epoch": 0.43, "grad_norm": 1.665586707995063, "learning_rate": 6.39011718978517e-06, "loss": 0.7567, "step": 6687 }, { "epoch": 0.43, "grad_norm": 1.7267015852748517, "learning_rate": 6.389121471278572e-06, "loss": 0.7219, "step": 6688 }, { "epoch": 0.43, "grad_norm": 1.4952389280196177, "learning_rate": 6.388125693070452e-06, "loss": 0.7009, "step": 6689 }, { "epoch": 0.43, "grad_norm": 2.8059718548135746, "learning_rate": 6.387129855203606e-06, "loss": 0.775, "step": 6690 }, { "epoch": 0.43, "grad_norm": 1.6480490684211384, "learning_rate": 6.386133957720833e-06, "loss": 1.0525, "step": 6691 }, { "epoch": 0.43, "grad_norm": 1.5031105056507972, "learning_rate": 6.385138000664937e-06, "loss": 0.7554, "step": 6692 }, { "epoch": 0.43, "grad_norm": 2.123164604996875, "learning_rate": 6.384141984078719e-06, "loss": 0.7662, "step": 6693 }, { "epoch": 0.43, "grad_norm": 1.6224293385249178, "learning_rate": 6.3831459080049865e-06, "loss": 0.7668, "step": 6694 }, { "epoch": 0.43, "grad_norm": 2.2604760536560304, "learning_rate": 6.38214977248655e-06, "loss": 0.7793, "step": 6695 }, { "epoch": 0.43, "grad_norm": 1.491495600929566, "learning_rate": 6.381153577566222e-06, "loss": 0.7627, "step": 6696 }, { "epoch": 0.43, "grad_norm": 1.6930780671358783, "learning_rate": 6.380157323286813e-06, "loss": 0.8056, "step": 6697 }, { "epoch": 0.43, "grad_norm": 1.2124889752563308, "learning_rate": 6.3791610096911435e-06, "loss": 0.6319, "step": 6698 }, { "epoch": 0.43, "grad_norm": 1.4822681091397043, "learning_rate": 6.378164636822033e-06, "loss": 1.0285, "step": 6699 }, { "epoch": 0.43, "grad_norm": 1.766730610697495, "learning_rate": 6.3771682047223e-06, "loss": 0.8037, "step": 6700 }, { "epoch": 0.43, "grad_norm": 1.0525538945810924, "learning_rate": 6.376171713434771e-06, "loss": 0.6368, "step": 6701 }, { "epoch": 0.43, "grad_norm": 1.524545661583296, "learning_rate": 6.375175163002275e-06, "loss": 0.6407, "step": 6702 }, { "epoch": 0.43, "grad_norm": 1.74755054292325, "learning_rate": 6.3741785534676404e-06, "loss": 0.7711, "step": 6703 }, { "epoch": 0.43, "grad_norm": 1.851035550267048, "learning_rate": 6.373181884873699e-06, "loss": 0.6607, "step": 6704 }, { "epoch": 0.43, "grad_norm": 1.5431057446643877, "learning_rate": 6.372185157263287e-06, "loss": 0.755, "step": 6705 }, { "epoch": 0.43, "grad_norm": 1.149141398860906, "learning_rate": 6.3711883706792375e-06, "loss": 0.6512, "step": 6706 }, { "epoch": 0.43, "grad_norm": 1.668328120774233, "learning_rate": 6.370191525164394e-06, "loss": 0.649, "step": 6707 }, { "epoch": 0.43, "grad_norm": 1.7733509040068767, "learning_rate": 6.369194620761598e-06, "loss": 0.7702, "step": 6708 }, { "epoch": 0.43, "grad_norm": 1.7858444301540661, "learning_rate": 6.368197657513695e-06, "loss": 0.8125, "step": 6709 }, { "epoch": 0.43, "grad_norm": 1.800047621003558, "learning_rate": 6.367200635463531e-06, "loss": 0.8417, "step": 6710 }, { "epoch": 0.43, "grad_norm": 1.7585099026772046, "learning_rate": 6.366203554653957e-06, "loss": 0.7768, "step": 6711 }, { "epoch": 0.43, "grad_norm": 1.6676440496157114, "learning_rate": 6.365206415127825e-06, "loss": 0.7135, "step": 6712 }, { "epoch": 0.43, "grad_norm": 1.0861187066324849, "learning_rate": 6.36420921692799e-06, "loss": 0.7416, "step": 6713 }, { "epoch": 0.43, "grad_norm": 1.5673149995333788, "learning_rate": 6.363211960097309e-06, "loss": 0.801, "step": 6714 }, { "epoch": 0.43, "grad_norm": 1.655534396976137, "learning_rate": 6.362214644678641e-06, "loss": 0.8004, "step": 6715 }, { "epoch": 0.43, "grad_norm": 1.252708771211258, "learning_rate": 6.361217270714854e-06, "loss": 0.7333, "step": 6716 }, { "epoch": 0.43, "grad_norm": 1.579528012937374, "learning_rate": 6.360219838248806e-06, "loss": 0.6934, "step": 6717 }, { "epoch": 0.43, "grad_norm": 1.7245053502253398, "learning_rate": 6.359222347323368e-06, "loss": 0.8226, "step": 6718 }, { "epoch": 0.43, "grad_norm": 1.59760072823176, "learning_rate": 6.358224797981409e-06, "loss": 0.7342, "step": 6719 }, { "epoch": 0.43, "grad_norm": 1.7373277310424242, "learning_rate": 6.357227190265804e-06, "loss": 0.7746, "step": 6720 }, { "epoch": 0.43, "grad_norm": 1.9248720387172527, "learning_rate": 6.356229524219425e-06, "loss": 0.7174, "step": 6721 }, { "epoch": 0.43, "grad_norm": 1.5767128014984757, "learning_rate": 6.355231799885151e-06, "loss": 0.7461, "step": 6722 }, { "epoch": 0.43, "grad_norm": 1.5698540562184549, "learning_rate": 6.354234017305863e-06, "loss": 0.7844, "step": 6723 }, { "epoch": 0.43, "grad_norm": 1.7455117483297877, "learning_rate": 6.353236176524441e-06, "loss": 0.8153, "step": 6724 }, { "epoch": 0.43, "grad_norm": 1.4273026413913141, "learning_rate": 6.352238277583773e-06, "loss": 0.7577, "step": 6725 }, { "epoch": 0.43, "grad_norm": 1.5289990332409844, "learning_rate": 6.351240320526744e-06, "loss": 0.7503, "step": 6726 }, { "epoch": 0.43, "grad_norm": 1.1872911047766883, "learning_rate": 6.350242305396247e-06, "loss": 0.7581, "step": 6727 }, { "epoch": 0.43, "grad_norm": 1.7018656450882275, "learning_rate": 6.349244232235172e-06, "loss": 0.7537, "step": 6728 }, { "epoch": 0.43, "grad_norm": 1.621876624132327, "learning_rate": 6.348246101086414e-06, "loss": 0.7585, "step": 6729 }, { "epoch": 0.43, "grad_norm": 1.5956557963037168, "learning_rate": 6.347247911992873e-06, "loss": 0.7571, "step": 6730 }, { "epoch": 0.43, "grad_norm": 1.1981821878803023, "learning_rate": 6.346249664997448e-06, "loss": 0.5778, "step": 6731 }, { "epoch": 0.43, "grad_norm": 1.7423116105656813, "learning_rate": 6.345251360143041e-06, "loss": 0.8615, "step": 6732 }, { "epoch": 0.43, "grad_norm": 1.6046492432935877, "learning_rate": 6.344252997472556e-06, "loss": 0.7341, "step": 6733 }, { "epoch": 0.43, "grad_norm": 1.5510809624702022, "learning_rate": 6.343254577028903e-06, "loss": 0.7127, "step": 6734 }, { "epoch": 0.43, "grad_norm": 1.4400806476375914, "learning_rate": 6.342256098854992e-06, "loss": 0.7334, "step": 6735 }, { "epoch": 0.43, "grad_norm": 1.6869808602722602, "learning_rate": 6.341257562993732e-06, "loss": 0.8262, "step": 6736 }, { "epoch": 0.43, "grad_norm": 1.461350750712655, "learning_rate": 6.34025896948804e-06, "loss": 0.7173, "step": 6737 }, { "epoch": 0.43, "grad_norm": 1.6914812279669207, "learning_rate": 6.339260318380835e-06, "loss": 0.7875, "step": 6738 }, { "epoch": 0.43, "grad_norm": 1.4840381038757902, "learning_rate": 6.338261609715037e-06, "loss": 0.7019, "step": 6739 }, { "epoch": 0.43, "grad_norm": 1.6269469135441688, "learning_rate": 6.337262843533566e-06, "loss": 0.7633, "step": 6740 }, { "epoch": 0.43, "grad_norm": 1.5329959110939042, "learning_rate": 6.336264019879348e-06, "loss": 0.6929, "step": 6741 }, { "epoch": 0.43, "grad_norm": 1.7358162850903065, "learning_rate": 6.33526513879531e-06, "loss": 0.7192, "step": 6742 }, { "epoch": 0.43, "grad_norm": 1.7578640772908383, "learning_rate": 6.334266200324381e-06, "loss": 0.7788, "step": 6743 }, { "epoch": 0.43, "grad_norm": 1.5515686593055156, "learning_rate": 6.333267204509497e-06, "loss": 0.6697, "step": 6744 }, { "epoch": 0.43, "grad_norm": 1.5186049323393507, "learning_rate": 6.332268151393589e-06, "loss": 0.6179, "step": 6745 }, { "epoch": 0.43, "grad_norm": 1.5418674362944376, "learning_rate": 6.331269041019596e-06, "loss": 0.849, "step": 6746 }, { "epoch": 0.43, "grad_norm": 1.666899602130408, "learning_rate": 6.330269873430455e-06, "loss": 0.7522, "step": 6747 }, { "epoch": 0.43, "grad_norm": 1.5044355359850645, "learning_rate": 6.329270648669111e-06, "loss": 0.7019, "step": 6748 }, { "epoch": 0.43, "grad_norm": 0.9796778538916266, "learning_rate": 6.3282713667785086e-06, "loss": 0.6533, "step": 6749 }, { "epoch": 0.43, "grad_norm": 1.3418161704737386, "learning_rate": 6.327272027801592e-06, "loss": 0.6717, "step": 6750 }, { "epoch": 0.43, "grad_norm": 1.7813040295334006, "learning_rate": 6.326272631781314e-06, "loss": 0.7983, "step": 6751 }, { "epoch": 0.43, "grad_norm": 1.768804313541908, "learning_rate": 6.3252731787606256e-06, "loss": 0.7709, "step": 6752 }, { "epoch": 0.43, "grad_norm": 1.8157749890850072, "learning_rate": 6.32427366878248e-06, "loss": 0.6774, "step": 6753 }, { "epoch": 0.43, "grad_norm": 1.7080670332525651, "learning_rate": 6.323274101889836e-06, "loss": 0.8043, "step": 6754 }, { "epoch": 0.43, "grad_norm": 1.4991474011606993, "learning_rate": 6.322274478125651e-06, "loss": 0.6257, "step": 6755 }, { "epoch": 0.43, "grad_norm": 1.2490520298054235, "learning_rate": 6.321274797532886e-06, "loss": 0.7828, "step": 6756 }, { "epoch": 0.43, "grad_norm": 1.4413475985835795, "learning_rate": 6.320275060154508e-06, "loss": 0.6736, "step": 6757 }, { "epoch": 0.43, "grad_norm": 1.5697990666281911, "learning_rate": 6.319275266033481e-06, "loss": 0.638, "step": 6758 }, { "epoch": 0.43, "grad_norm": 1.545214567660786, "learning_rate": 6.318275415212777e-06, "loss": 0.7482, "step": 6759 }, { "epoch": 0.43, "grad_norm": 1.7415119437726607, "learning_rate": 6.317275507735364e-06, "loss": 0.899, "step": 6760 }, { "epoch": 0.43, "grad_norm": 1.4634044443326981, "learning_rate": 6.316275543644219e-06, "loss": 0.724, "step": 6761 }, { "epoch": 0.43, "grad_norm": 1.6014555966674309, "learning_rate": 6.315275522982317e-06, "loss": 0.8107, "step": 6762 }, { "epoch": 0.43, "grad_norm": 1.7134169033975193, "learning_rate": 6.314275445792637e-06, "loss": 0.6849, "step": 6763 }, { "epoch": 0.43, "grad_norm": 1.5766031046574351, "learning_rate": 6.313275312118159e-06, "loss": 0.7216, "step": 6764 }, { "epoch": 0.43, "grad_norm": 1.636617502185943, "learning_rate": 6.312275122001867e-06, "loss": 0.8064, "step": 6765 }, { "epoch": 0.43, "grad_norm": 2.2522096915961254, "learning_rate": 6.311274875486748e-06, "loss": 0.723, "step": 6766 }, { "epoch": 0.43, "grad_norm": 1.6899870319585113, "learning_rate": 6.310274572615792e-06, "loss": 0.7805, "step": 6767 }, { "epoch": 0.43, "grad_norm": 1.3151226164392271, "learning_rate": 6.309274213431987e-06, "loss": 0.684, "step": 6768 }, { "epoch": 0.43, "grad_norm": 1.165941421983151, "learning_rate": 6.308273797978328e-06, "loss": 0.6258, "step": 6769 }, { "epoch": 0.43, "grad_norm": 1.6607016943467252, "learning_rate": 6.307273326297811e-06, "loss": 0.7169, "step": 6770 }, { "epoch": 0.43, "grad_norm": 1.5805879761386774, "learning_rate": 6.30627279843343e-06, "loss": 0.767, "step": 6771 }, { "epoch": 0.43, "grad_norm": 1.8225366064157327, "learning_rate": 6.305272214428192e-06, "loss": 0.656, "step": 6772 }, { "epoch": 0.43, "grad_norm": 1.7104555253796028, "learning_rate": 6.304271574325096e-06, "loss": 0.8752, "step": 6773 }, { "epoch": 0.43, "grad_norm": 1.594539785388071, "learning_rate": 6.303270878167148e-06, "loss": 0.7817, "step": 6774 }, { "epoch": 0.43, "grad_norm": 1.4387904177770487, "learning_rate": 6.3022701259973565e-06, "loss": 0.5419, "step": 6775 }, { "epoch": 0.43, "grad_norm": 1.382271472587081, "learning_rate": 6.301269317858733e-06, "loss": 0.7061, "step": 6776 }, { "epoch": 0.43, "grad_norm": 2.307339311773177, "learning_rate": 6.300268453794287e-06, "loss": 0.773, "step": 6777 }, { "epoch": 0.43, "grad_norm": 1.6410192582471985, "learning_rate": 6.299267533847035e-06, "loss": 0.6875, "step": 6778 }, { "epoch": 0.43, "grad_norm": 1.6130873565729105, "learning_rate": 6.298266558059995e-06, "loss": 0.6205, "step": 6779 }, { "epoch": 0.43, "grad_norm": 1.4096882054609425, "learning_rate": 6.297265526476186e-06, "loss": 0.6078, "step": 6780 }, { "epoch": 0.43, "grad_norm": 1.715878961427221, "learning_rate": 6.296264439138631e-06, "loss": 0.7921, "step": 6781 }, { "epoch": 0.43, "grad_norm": 1.7764961767580103, "learning_rate": 6.295263296090355e-06, "loss": 0.7387, "step": 6782 }, { "epoch": 0.43, "grad_norm": 1.6090419435058931, "learning_rate": 6.294262097374383e-06, "loss": 0.6364, "step": 6783 }, { "epoch": 0.43, "grad_norm": 1.6198480074647588, "learning_rate": 6.293260843033745e-06, "loss": 0.6285, "step": 6784 }, { "epoch": 0.43, "grad_norm": 0.9801721647174241, "learning_rate": 6.292259533111474e-06, "loss": 0.6487, "step": 6785 }, { "epoch": 0.43, "grad_norm": 1.88486054068257, "learning_rate": 6.291258167650605e-06, "loss": 0.871, "step": 6786 }, { "epoch": 0.43, "grad_norm": 1.8799207947863177, "learning_rate": 6.2902567466941725e-06, "loss": 0.8051, "step": 6787 }, { "epoch": 0.43, "grad_norm": 1.5793903726473848, "learning_rate": 6.289255270285215e-06, "loss": 1.0307, "step": 6788 }, { "epoch": 0.43, "grad_norm": 1.742572132914128, "learning_rate": 6.288253738466777e-06, "loss": 0.7216, "step": 6789 }, { "epoch": 0.43, "grad_norm": 1.7321336356152017, "learning_rate": 6.2872521512819e-06, "loss": 0.7682, "step": 6790 }, { "epoch": 0.43, "grad_norm": 1.5501704946581596, "learning_rate": 6.286250508773631e-06, "loss": 0.7098, "step": 6791 }, { "epoch": 0.43, "grad_norm": 1.6368073342512017, "learning_rate": 6.285248810985015e-06, "loss": 0.8209, "step": 6792 }, { "epoch": 0.43, "grad_norm": 2.537837523108455, "learning_rate": 6.284247057959107e-06, "loss": 0.7661, "step": 6793 }, { "epoch": 0.43, "grad_norm": 1.7567270008306535, "learning_rate": 6.28324524973896e-06, "loss": 0.8961, "step": 6794 }, { "epoch": 0.43, "grad_norm": 1.7429718658149302, "learning_rate": 6.282243386367628e-06, "loss": 0.9016, "step": 6795 }, { "epoch": 0.43, "grad_norm": 1.6200094241134109, "learning_rate": 6.281241467888171e-06, "loss": 0.7698, "step": 6796 }, { "epoch": 0.44, "grad_norm": 1.1938744639401344, "learning_rate": 6.280239494343647e-06, "loss": 0.6297, "step": 6797 }, { "epoch": 0.44, "grad_norm": 1.7052435876019088, "learning_rate": 6.2792374657771195e-06, "loss": 0.8565, "step": 6798 }, { "epoch": 0.44, "grad_norm": 1.5537223580542048, "learning_rate": 6.278235382231654e-06, "loss": 0.7003, "step": 6799 }, { "epoch": 0.44, "grad_norm": 1.459222122377263, "learning_rate": 6.277233243750317e-06, "loss": 0.6077, "step": 6800 }, { "epoch": 0.44, "grad_norm": 1.6301487771663996, "learning_rate": 6.27623105037618e-06, "loss": 0.7447, "step": 6801 }, { "epoch": 0.44, "grad_norm": 1.5819433952933424, "learning_rate": 6.275228802152313e-06, "loss": 0.7583, "step": 6802 }, { "epoch": 0.44, "grad_norm": 1.005515856473313, "learning_rate": 6.274226499121793e-06, "loss": 0.6384, "step": 6803 }, { "epoch": 0.44, "grad_norm": 1.535138652257406, "learning_rate": 6.273224141327695e-06, "loss": 0.6938, "step": 6804 }, { "epoch": 0.44, "grad_norm": 1.7921025628605216, "learning_rate": 6.272221728813099e-06, "loss": 0.8879, "step": 6805 }, { "epoch": 0.44, "grad_norm": 2.264171150081156, "learning_rate": 6.2712192616210866e-06, "loss": 0.8186, "step": 6806 }, { "epoch": 0.44, "grad_norm": 1.5647146622852208, "learning_rate": 6.270216739794741e-06, "loss": 0.7877, "step": 6807 }, { "epoch": 0.44, "grad_norm": 1.4487761415733635, "learning_rate": 6.26921416337715e-06, "loss": 0.5979, "step": 6808 }, { "epoch": 0.44, "grad_norm": 1.6215492871438346, "learning_rate": 6.2682115324114e-06, "loss": 0.778, "step": 6809 }, { "epoch": 0.44, "grad_norm": 1.6341406874592668, "learning_rate": 6.267208846940584e-06, "loss": 0.7492, "step": 6810 }, { "epoch": 0.44, "grad_norm": 1.109656617636538, "learning_rate": 6.266206107007793e-06, "loss": 0.5915, "step": 6811 }, { "epoch": 0.44, "grad_norm": 1.6922723023094184, "learning_rate": 6.265203312656126e-06, "loss": 0.7414, "step": 6812 }, { "epoch": 0.44, "grad_norm": 1.8441443324191462, "learning_rate": 6.264200463928679e-06, "loss": 0.7512, "step": 6813 }, { "epoch": 0.44, "grad_norm": 1.7490154758031276, "learning_rate": 6.263197560868553e-06, "loss": 0.8144, "step": 6814 }, { "epoch": 0.44, "grad_norm": 9.37082683784363, "learning_rate": 6.2621946035188474e-06, "loss": 0.7453, "step": 6815 }, { "epoch": 0.44, "grad_norm": 1.9244130346077302, "learning_rate": 6.261191591922673e-06, "loss": 0.7431, "step": 6816 }, { "epoch": 0.44, "grad_norm": 1.6134083070042935, "learning_rate": 6.260188526123133e-06, "loss": 0.8043, "step": 6817 }, { "epoch": 0.44, "grad_norm": 1.509335923897447, "learning_rate": 6.259185406163338e-06, "loss": 0.7579, "step": 6818 }, { "epoch": 0.44, "grad_norm": 1.6100802940953776, "learning_rate": 6.2581822320864e-06, "loss": 0.7409, "step": 6819 }, { "epoch": 0.44, "grad_norm": 1.7309519649320109, "learning_rate": 6.257179003935435e-06, "loss": 0.7743, "step": 6820 }, { "epoch": 0.44, "grad_norm": 1.499828750526965, "learning_rate": 6.256175721753556e-06, "loss": 0.7437, "step": 6821 }, { "epoch": 0.44, "grad_norm": 1.7704350635917885, "learning_rate": 6.255172385583884e-06, "loss": 0.7901, "step": 6822 }, { "epoch": 0.44, "grad_norm": 1.5194688496665054, "learning_rate": 6.254168995469541e-06, "loss": 0.6557, "step": 6823 }, { "epoch": 0.44, "grad_norm": 1.296926807614399, "learning_rate": 6.253165551453652e-06, "loss": 0.6542, "step": 6824 }, { "epoch": 0.44, "grad_norm": 1.4901795234996265, "learning_rate": 6.252162053579338e-06, "loss": 0.611, "step": 6825 }, { "epoch": 0.44, "grad_norm": 1.8119227404207976, "learning_rate": 6.251158501889732e-06, "loss": 0.8133, "step": 6826 }, { "epoch": 0.44, "grad_norm": 1.7979796312066778, "learning_rate": 6.250154896427962e-06, "loss": 0.8261, "step": 6827 }, { "epoch": 0.44, "grad_norm": 2.106886672050227, "learning_rate": 6.249151237237161e-06, "loss": 0.7841, "step": 6828 }, { "epoch": 0.44, "grad_norm": 1.849914662098529, "learning_rate": 6.2481475243604654e-06, "loss": 0.6579, "step": 6829 }, { "epoch": 0.44, "grad_norm": 1.1860802773384551, "learning_rate": 6.24714375784101e-06, "loss": 0.6857, "step": 6830 }, { "epoch": 0.44, "grad_norm": 2.3840458845899755, "learning_rate": 6.246139937721939e-06, "loss": 0.731, "step": 6831 }, { "epoch": 0.44, "grad_norm": 1.6550299201975498, "learning_rate": 6.245136064046391e-06, "loss": 0.7201, "step": 6832 }, { "epoch": 0.44, "grad_norm": 1.8400198921697348, "learning_rate": 6.244132136857511e-06, "loss": 0.8152, "step": 6833 }, { "epoch": 0.44, "grad_norm": 1.6150349912867257, "learning_rate": 6.243128156198447e-06, "loss": 0.6489, "step": 6834 }, { "epoch": 0.44, "grad_norm": 1.691841068121404, "learning_rate": 6.242124122112347e-06, "loss": 0.9012, "step": 6835 }, { "epoch": 0.44, "grad_norm": 1.051170716823803, "learning_rate": 6.241120034642361e-06, "loss": 0.6395, "step": 6836 }, { "epoch": 0.44, "grad_norm": 1.2773082454785847, "learning_rate": 6.240115893831644e-06, "loss": 0.6275, "step": 6837 }, { "epoch": 0.44, "grad_norm": 1.6695539416178085, "learning_rate": 6.239111699723353e-06, "loss": 0.7267, "step": 6838 }, { "epoch": 0.44, "grad_norm": 1.684263694175303, "learning_rate": 6.238107452360643e-06, "loss": 0.907, "step": 6839 }, { "epoch": 0.44, "grad_norm": 1.6375652035411354, "learning_rate": 6.2371031517866785e-06, "loss": 0.9336, "step": 6840 }, { "epoch": 0.44, "grad_norm": 1.5922963228349407, "learning_rate": 6.236098798044619e-06, "loss": 0.7581, "step": 6841 }, { "epoch": 0.44, "grad_norm": 2.237530272901415, "learning_rate": 6.235094391177631e-06, "loss": 0.782, "step": 6842 }, { "epoch": 0.44, "grad_norm": 1.8587192515701005, "learning_rate": 6.2340899312288795e-06, "loss": 0.699, "step": 6843 }, { "epoch": 0.44, "grad_norm": 1.5936428914433611, "learning_rate": 6.233085418241538e-06, "loss": 0.7753, "step": 6844 }, { "epoch": 0.44, "grad_norm": 1.833096025249523, "learning_rate": 6.232080852258776e-06, "loss": 0.6305, "step": 6845 }, { "epoch": 0.44, "grad_norm": 1.5150225841718084, "learning_rate": 6.231076233323767e-06, "loss": 0.7573, "step": 6846 }, { "epoch": 0.44, "grad_norm": 1.7694505250549273, "learning_rate": 6.23007156147969e-06, "loss": 0.8588, "step": 6847 }, { "epoch": 0.44, "grad_norm": 1.5802568613055659, "learning_rate": 6.229066836769721e-06, "loss": 0.8141, "step": 6848 }, { "epoch": 0.44, "grad_norm": 1.1896927524899377, "learning_rate": 6.228062059237041e-06, "loss": 0.7215, "step": 6849 }, { "epoch": 0.44, "grad_norm": 2.0538654852572225, "learning_rate": 6.227057228924836e-06, "loss": 0.7183, "step": 6850 }, { "epoch": 0.44, "grad_norm": 1.8436364923524795, "learning_rate": 6.226052345876288e-06, "loss": 0.8273, "step": 6851 }, { "epoch": 0.44, "grad_norm": 3.0459958635829567, "learning_rate": 6.225047410134588e-06, "loss": 0.7138, "step": 6852 }, { "epoch": 0.44, "grad_norm": 2.746149300948341, "learning_rate": 6.224042421742924e-06, "loss": 0.7027, "step": 6853 }, { "epoch": 0.44, "grad_norm": 1.897395382516836, "learning_rate": 6.223037380744489e-06, "loss": 0.724, "step": 6854 }, { "epoch": 0.44, "grad_norm": 1.9187138904304957, "learning_rate": 6.222032287182477e-06, "loss": 0.8516, "step": 6855 }, { "epoch": 0.44, "grad_norm": 1.6547104559809014, "learning_rate": 6.221027141100084e-06, "loss": 0.7604, "step": 6856 }, { "epoch": 0.44, "grad_norm": 1.604929983119433, "learning_rate": 6.220021942540512e-06, "loss": 0.6098, "step": 6857 }, { "epoch": 0.44, "grad_norm": 2.017239984862663, "learning_rate": 6.2190166915469586e-06, "loss": 0.765, "step": 6858 }, { "epoch": 0.44, "grad_norm": 1.2541502742612396, "learning_rate": 6.21801138816263e-06, "loss": 0.5509, "step": 6859 }, { "epoch": 0.44, "grad_norm": 0.9662323798276211, "learning_rate": 6.217006032430732e-06, "loss": 0.5374, "step": 6860 }, { "epoch": 0.44, "grad_norm": 1.7412554231884347, "learning_rate": 6.2160006243944726e-06, "loss": 0.908, "step": 6861 }, { "epoch": 0.44, "grad_norm": 1.5517377031294657, "learning_rate": 6.214995164097062e-06, "loss": 0.6616, "step": 6862 }, { "epoch": 0.44, "grad_norm": 1.8311552215690672, "learning_rate": 6.213989651581711e-06, "loss": 0.7851, "step": 6863 }, { "epoch": 0.44, "grad_norm": 1.4652255494407824, "learning_rate": 6.212984086891635e-06, "loss": 0.8126, "step": 6864 }, { "epoch": 0.44, "grad_norm": 1.8820400136846622, "learning_rate": 6.211978470070052e-06, "loss": 0.7463, "step": 6865 }, { "epoch": 0.44, "grad_norm": 3.2911242249299635, "learning_rate": 6.210972801160182e-06, "loss": 0.6375, "step": 6866 }, { "epoch": 0.44, "grad_norm": 1.9630377539881882, "learning_rate": 6.209967080205244e-06, "loss": 0.7269, "step": 6867 }, { "epoch": 0.44, "grad_norm": 1.7591569003996965, "learning_rate": 6.208961307248466e-06, "loss": 0.7993, "step": 6868 }, { "epoch": 0.44, "grad_norm": 1.6718778135414567, "learning_rate": 6.20795548233307e-06, "loss": 0.6778, "step": 6869 }, { "epoch": 0.44, "grad_norm": 1.8357014764507944, "learning_rate": 6.206949605502286e-06, "loss": 0.795, "step": 6870 }, { "epoch": 0.44, "grad_norm": 1.6105881649446905, "learning_rate": 6.205943676799344e-06, "loss": 0.6504, "step": 6871 }, { "epoch": 0.44, "grad_norm": 1.749048001592789, "learning_rate": 6.204937696267475e-06, "loss": 0.6268, "step": 6872 }, { "epoch": 0.44, "grad_norm": 1.1613862199077185, "learning_rate": 6.203931663949918e-06, "loss": 0.7674, "step": 6873 }, { "epoch": 0.44, "grad_norm": 1.1621845620330367, "learning_rate": 6.202925579889908e-06, "loss": 0.7404, "step": 6874 }, { "epoch": 0.44, "grad_norm": 1.7745430437445657, "learning_rate": 6.201919444130684e-06, "loss": 0.7118, "step": 6875 }, { "epoch": 0.44, "grad_norm": 1.0621586216573085, "learning_rate": 6.200913256715486e-06, "loss": 0.634, "step": 6876 }, { "epoch": 0.44, "grad_norm": 1.5777491364628817, "learning_rate": 6.199907017687562e-06, "loss": 0.8041, "step": 6877 }, { "epoch": 0.44, "grad_norm": 1.196258770317886, "learning_rate": 6.198900727090155e-06, "loss": 0.6501, "step": 6878 }, { "epoch": 0.44, "grad_norm": 2.5226617133616176, "learning_rate": 6.197894384966513e-06, "loss": 0.7807, "step": 6879 }, { "epoch": 0.44, "grad_norm": 1.8340041035659145, "learning_rate": 6.1968879913598874e-06, "loss": 0.7608, "step": 6880 }, { "epoch": 0.44, "grad_norm": 1.6106774552217047, "learning_rate": 6.195881546313533e-06, "loss": 0.6263, "step": 6881 }, { "epoch": 0.44, "grad_norm": 1.760122224123128, "learning_rate": 6.194875049870701e-06, "loss": 0.6784, "step": 6882 }, { "epoch": 0.44, "grad_norm": 1.6690841138414871, "learning_rate": 6.193868502074651e-06, "loss": 0.7355, "step": 6883 }, { "epoch": 0.44, "grad_norm": 1.1531132631930066, "learning_rate": 6.192861902968641e-06, "loss": 0.6072, "step": 6884 }, { "epoch": 0.44, "grad_norm": 1.6252902248302121, "learning_rate": 6.191855252595933e-06, "loss": 0.7896, "step": 6885 }, { "epoch": 0.44, "grad_norm": 1.5670855774709922, "learning_rate": 6.1908485509997905e-06, "loss": 0.7343, "step": 6886 }, { "epoch": 0.44, "grad_norm": 1.6097323587229124, "learning_rate": 6.189841798223479e-06, "loss": 0.6862, "step": 6887 }, { "epoch": 0.44, "grad_norm": 1.932056932891044, "learning_rate": 6.188834994310268e-06, "loss": 0.691, "step": 6888 }, { "epoch": 0.44, "grad_norm": 1.7687847370862002, "learning_rate": 6.1878281393034275e-06, "loss": 0.733, "step": 6889 }, { "epoch": 0.44, "grad_norm": 1.581162160904567, "learning_rate": 6.18682123324623e-06, "loss": 0.7312, "step": 6890 }, { "epoch": 0.44, "grad_norm": 1.6494863991209687, "learning_rate": 6.1858142761819484e-06, "loss": 0.787, "step": 6891 }, { "epoch": 0.44, "grad_norm": 1.7120933067667679, "learning_rate": 6.184807268153862e-06, "loss": 0.8028, "step": 6892 }, { "epoch": 0.44, "grad_norm": 1.5712612042529186, "learning_rate": 6.1838002092052465e-06, "loss": 0.671, "step": 6893 }, { "epoch": 0.44, "grad_norm": 1.7339407860699096, "learning_rate": 6.182793099379387e-06, "loss": 0.7662, "step": 6894 }, { "epoch": 0.44, "grad_norm": 3.2921891249025927, "learning_rate": 6.181785938719566e-06, "loss": 0.6833, "step": 6895 }, { "epoch": 0.44, "grad_norm": 1.7817418319024079, "learning_rate": 6.180778727269067e-06, "loss": 0.8071, "step": 6896 }, { "epoch": 0.44, "grad_norm": 1.5622563993335097, "learning_rate": 6.179771465071182e-06, "loss": 0.6826, "step": 6897 }, { "epoch": 0.44, "grad_norm": 2.3962522379841498, "learning_rate": 6.178764152169198e-06, "loss": 0.8746, "step": 6898 }, { "epoch": 0.44, "grad_norm": 1.8426296979981043, "learning_rate": 6.177756788606406e-06, "loss": 0.821, "step": 6899 }, { "epoch": 0.44, "grad_norm": 1.7830391988462029, "learning_rate": 6.176749374426103e-06, "loss": 0.8161, "step": 6900 }, { "epoch": 0.44, "grad_norm": 1.5884304543792764, "learning_rate": 6.175741909671584e-06, "loss": 0.8107, "step": 6901 }, { "epoch": 0.44, "grad_norm": 1.586833520882367, "learning_rate": 6.174734394386149e-06, "loss": 0.8449, "step": 6902 }, { "epoch": 0.44, "grad_norm": 1.60236854603766, "learning_rate": 6.173726828613098e-06, "loss": 0.802, "step": 6903 }, { "epoch": 0.44, "grad_norm": 1.5657617155769925, "learning_rate": 6.172719212395734e-06, "loss": 0.7068, "step": 6904 }, { "epoch": 0.44, "grad_norm": 1.733434243271957, "learning_rate": 6.171711545777363e-06, "loss": 0.7658, "step": 6905 }, { "epoch": 0.44, "grad_norm": 1.7521904017002308, "learning_rate": 6.170703828801292e-06, "loss": 0.8216, "step": 6906 }, { "epoch": 0.44, "grad_norm": 1.1510465529599616, "learning_rate": 6.169696061510831e-06, "loss": 0.7128, "step": 6907 }, { "epoch": 0.44, "grad_norm": 1.6505977340254834, "learning_rate": 6.168688243949288e-06, "loss": 0.8084, "step": 6908 }, { "epoch": 0.44, "grad_norm": 1.7711035188782218, "learning_rate": 6.167680376159983e-06, "loss": 0.9209, "step": 6909 }, { "epoch": 0.44, "grad_norm": 1.9503998955884585, "learning_rate": 6.166672458186228e-06, "loss": 0.7613, "step": 6910 }, { "epoch": 0.44, "grad_norm": 1.3417604644987842, "learning_rate": 6.165664490071343e-06, "loss": 0.6827, "step": 6911 }, { "epoch": 0.44, "grad_norm": 1.6883621380510752, "learning_rate": 6.164656471858648e-06, "loss": 0.7725, "step": 6912 }, { "epoch": 0.44, "grad_norm": 1.5851266566299358, "learning_rate": 6.163648403591462e-06, "loss": 0.619, "step": 6913 }, { "epoch": 0.44, "grad_norm": 1.7441530844173116, "learning_rate": 6.162640285313116e-06, "loss": 0.6758, "step": 6914 }, { "epoch": 0.44, "grad_norm": 1.099566134776045, "learning_rate": 6.161632117066932e-06, "loss": 0.5889, "step": 6915 }, { "epoch": 0.44, "grad_norm": 1.0948902257704376, "learning_rate": 6.1606238988962405e-06, "loss": 0.6299, "step": 6916 }, { "epoch": 0.44, "grad_norm": 1.7318811666966087, "learning_rate": 6.1596156308443746e-06, "loss": 0.7788, "step": 6917 }, { "epoch": 0.44, "grad_norm": 1.1554178852861847, "learning_rate": 6.158607312954664e-06, "loss": 0.5865, "step": 6918 }, { "epoch": 0.44, "grad_norm": 1.5841159167109693, "learning_rate": 6.157598945270447e-06, "loss": 0.7385, "step": 6919 }, { "epoch": 0.44, "grad_norm": 1.66345129225343, "learning_rate": 6.156590527835058e-06, "loss": 0.7417, "step": 6920 }, { "epoch": 0.44, "grad_norm": 3.700670835954799, "learning_rate": 6.1555820606918384e-06, "loss": 0.7884, "step": 6921 }, { "epoch": 0.44, "grad_norm": 1.68465433718627, "learning_rate": 6.15457354388413e-06, "loss": 0.7466, "step": 6922 }, { "epoch": 0.44, "grad_norm": 1.5620955393781888, "learning_rate": 6.153564977455278e-06, "loss": 0.7685, "step": 6923 }, { "epoch": 0.44, "grad_norm": 1.8505749161100506, "learning_rate": 6.152556361448627e-06, "loss": 0.7794, "step": 6924 }, { "epoch": 0.44, "grad_norm": 2.07540806319285, "learning_rate": 6.151547695907525e-06, "loss": 0.8513, "step": 6925 }, { "epoch": 0.44, "grad_norm": 2.033881508486801, "learning_rate": 6.150538980875323e-06, "loss": 0.8416, "step": 6926 }, { "epoch": 0.44, "grad_norm": 1.9575509619620204, "learning_rate": 6.149530216395374e-06, "loss": 0.7082, "step": 6927 }, { "epoch": 0.44, "grad_norm": 1.2665942704243716, "learning_rate": 6.148521402511031e-06, "loss": 0.721, "step": 6928 }, { "epoch": 0.44, "grad_norm": 1.1691253900659246, "learning_rate": 6.1475125392656506e-06, "loss": 0.7257, "step": 6929 }, { "epoch": 0.44, "grad_norm": 1.4613407500494864, "learning_rate": 6.146503626702593e-06, "loss": 0.6904, "step": 6930 }, { "epoch": 0.44, "grad_norm": 1.7376425536673838, "learning_rate": 6.1454946648652204e-06, "loss": 0.7579, "step": 6931 }, { "epoch": 0.44, "grad_norm": 1.9013844688906136, "learning_rate": 6.144485653796891e-06, "loss": 0.7951, "step": 6932 }, { "epoch": 0.44, "grad_norm": 1.1705509850764322, "learning_rate": 6.143476593540976e-06, "loss": 0.6602, "step": 6933 }, { "epoch": 0.44, "grad_norm": 1.6167325062107643, "learning_rate": 6.142467484140838e-06, "loss": 0.7526, "step": 6934 }, { "epoch": 0.44, "grad_norm": 1.772564026166895, "learning_rate": 6.1414583256398494e-06, "loss": 0.6645, "step": 6935 }, { "epoch": 0.44, "grad_norm": 1.493942370890187, "learning_rate": 6.14044911808138e-06, "loss": 0.7863, "step": 6936 }, { "epoch": 0.44, "grad_norm": 1.622408968760355, "learning_rate": 6.139439861508804e-06, "loss": 0.7249, "step": 6937 }, { "epoch": 0.44, "grad_norm": 1.6708599250032714, "learning_rate": 6.138430555965497e-06, "loss": 0.9295, "step": 6938 }, { "epoch": 0.44, "grad_norm": 1.16689141834658, "learning_rate": 6.137421201494837e-06, "loss": 0.5902, "step": 6939 }, { "epoch": 0.44, "grad_norm": 1.1266373895575672, "learning_rate": 6.1364117981402035e-06, "loss": 0.6552, "step": 6940 }, { "epoch": 0.44, "grad_norm": 1.7143737676669883, "learning_rate": 6.135402345944979e-06, "loss": 0.7501, "step": 6941 }, { "epoch": 0.44, "grad_norm": 1.5072125921859147, "learning_rate": 6.134392844952547e-06, "loss": 0.7559, "step": 6942 }, { "epoch": 0.44, "grad_norm": 1.542389400157854, "learning_rate": 6.1333832952062945e-06, "loss": 0.6623, "step": 6943 }, { "epoch": 0.44, "grad_norm": 2.2192239105565217, "learning_rate": 6.132373696749609e-06, "loss": 0.7862, "step": 6944 }, { "epoch": 0.44, "grad_norm": 1.6738338083579434, "learning_rate": 6.1313640496258834e-06, "loss": 0.8153, "step": 6945 }, { "epoch": 0.44, "grad_norm": 1.7825524605742802, "learning_rate": 6.130354353878507e-06, "loss": 0.7791, "step": 6946 }, { "epoch": 0.44, "grad_norm": 1.737554576923618, "learning_rate": 6.129344609550876e-06, "loss": 0.732, "step": 6947 }, { "epoch": 0.44, "grad_norm": 2.0786504247130644, "learning_rate": 6.128334816686387e-06, "loss": 0.8586, "step": 6948 }, { "epoch": 0.44, "grad_norm": 1.9375139915300854, "learning_rate": 6.127324975328437e-06, "loss": 0.7975, "step": 6949 }, { "epoch": 0.44, "grad_norm": 1.589417583504712, "learning_rate": 6.1263150855204286e-06, "loss": 0.8402, "step": 6950 }, { "epoch": 0.44, "grad_norm": 1.6394899997184953, "learning_rate": 6.125305147305764e-06, "loss": 0.7296, "step": 6951 }, { "epoch": 0.44, "grad_norm": 1.7409114865791957, "learning_rate": 6.124295160727851e-06, "loss": 0.7259, "step": 6952 }, { "epoch": 0.45, "grad_norm": 1.5637197589202498, "learning_rate": 6.1232851258300944e-06, "loss": 0.7589, "step": 6953 }, { "epoch": 0.45, "grad_norm": 1.5817476140036835, "learning_rate": 6.122275042655902e-06, "loss": 0.7493, "step": 6954 }, { "epoch": 0.45, "grad_norm": 1.6259401823082171, "learning_rate": 6.121264911248688e-06, "loss": 0.7452, "step": 6955 }, { "epoch": 0.45, "grad_norm": 1.752755852264356, "learning_rate": 6.120254731651864e-06, "loss": 0.7894, "step": 6956 }, { "epoch": 0.45, "grad_norm": 1.0574388533755845, "learning_rate": 6.1192445039088435e-06, "loss": 0.6921, "step": 6957 }, { "epoch": 0.45, "grad_norm": 1.728206046838139, "learning_rate": 6.1182342280630466e-06, "loss": 0.8232, "step": 6958 }, { "epoch": 0.45, "grad_norm": 2.0204307763808966, "learning_rate": 6.117223904157893e-06, "loss": 0.7509, "step": 6959 }, { "epoch": 0.45, "grad_norm": 1.6245760711152948, "learning_rate": 6.1162135322368045e-06, "loss": 0.8372, "step": 6960 }, { "epoch": 0.45, "grad_norm": 1.1800658744097765, "learning_rate": 6.115203112343203e-06, "loss": 0.6759, "step": 6961 }, { "epoch": 0.45, "grad_norm": 1.5748465070375435, "learning_rate": 6.114192644520516e-06, "loss": 0.6354, "step": 6962 }, { "epoch": 0.45, "grad_norm": 1.2422998980003483, "learning_rate": 6.11318212881217e-06, "loss": 0.7878, "step": 6963 }, { "epoch": 0.45, "grad_norm": 1.0991411978711068, "learning_rate": 6.112171565261594e-06, "loss": 0.6238, "step": 6964 }, { "epoch": 0.45, "grad_norm": 1.7286646068852172, "learning_rate": 6.111160953912222e-06, "loss": 0.7973, "step": 6965 }, { "epoch": 0.45, "grad_norm": 1.9225075611135924, "learning_rate": 6.110150294807487e-06, "loss": 0.8366, "step": 6966 }, { "epoch": 0.45, "grad_norm": 1.6131964007773456, "learning_rate": 6.1091395879908255e-06, "loss": 0.7136, "step": 6967 }, { "epoch": 0.45, "grad_norm": 1.587252373918241, "learning_rate": 6.108128833505675e-06, "loss": 0.7541, "step": 6968 }, { "epoch": 0.45, "grad_norm": 1.6317668233226248, "learning_rate": 6.107118031395475e-06, "loss": 0.6952, "step": 6969 }, { "epoch": 0.45, "grad_norm": 1.3331487600858487, "learning_rate": 6.106107181703669e-06, "loss": 0.5492, "step": 6970 }, { "epoch": 0.45, "grad_norm": 1.7235205918332035, "learning_rate": 6.1050962844737005e-06, "loss": 0.7077, "step": 6971 }, { "epoch": 0.45, "grad_norm": 1.8935731062523091, "learning_rate": 6.104085339749015e-06, "loss": 0.7837, "step": 6972 }, { "epoch": 0.45, "grad_norm": 1.9451096059427515, "learning_rate": 6.103074347573062e-06, "loss": 0.7281, "step": 6973 }, { "epoch": 0.45, "grad_norm": 2.366225303053287, "learning_rate": 6.102063307989293e-06, "loss": 0.7296, "step": 6974 }, { "epoch": 0.45, "grad_norm": 1.5898720280860568, "learning_rate": 6.1010522210411575e-06, "loss": 0.6881, "step": 6975 }, { "epoch": 0.45, "grad_norm": 1.6476734443189516, "learning_rate": 6.100041086772111e-06, "loss": 0.6814, "step": 6976 }, { "epoch": 0.45, "grad_norm": 1.710388329674516, "learning_rate": 6.0990299052256105e-06, "loss": 0.8194, "step": 6977 }, { "epoch": 0.45, "grad_norm": 1.7714802559161293, "learning_rate": 6.098018676445114e-06, "loss": 0.7684, "step": 6978 }, { "epoch": 0.45, "grad_norm": 1.6041099477074183, "learning_rate": 6.097007400474081e-06, "loss": 0.7466, "step": 6979 }, { "epoch": 0.45, "grad_norm": 1.806440611504776, "learning_rate": 6.095996077355976e-06, "loss": 0.7114, "step": 6980 }, { "epoch": 0.45, "grad_norm": 1.2811568320936697, "learning_rate": 6.094984707134263e-06, "loss": 0.7121, "step": 6981 }, { "epoch": 0.45, "grad_norm": 1.0658546017480353, "learning_rate": 6.093973289852409e-06, "loss": 0.6404, "step": 6982 }, { "epoch": 0.45, "grad_norm": 3.4507882845679974, "learning_rate": 6.092961825553881e-06, "loss": 0.6602, "step": 6983 }, { "epoch": 0.45, "grad_norm": 1.5053112824822947, "learning_rate": 6.091950314282149e-06, "loss": 0.7044, "step": 6984 }, { "epoch": 0.45, "grad_norm": 1.6739193876004588, "learning_rate": 6.090938756080688e-06, "loss": 0.7258, "step": 6985 }, { "epoch": 0.45, "grad_norm": 1.4682328632605508, "learning_rate": 6.089927150992971e-06, "loss": 0.8876, "step": 6986 }, { "epoch": 0.45, "grad_norm": 2.2483156357408984, "learning_rate": 6.088915499062475e-06, "loss": 0.8307, "step": 6987 }, { "epoch": 0.45, "grad_norm": 1.5525562931407113, "learning_rate": 6.08790380033268e-06, "loss": 0.7833, "step": 6988 }, { "epoch": 0.45, "grad_norm": 1.7989565559243574, "learning_rate": 6.0868920548470654e-06, "loss": 0.7922, "step": 6989 }, { "epoch": 0.45, "grad_norm": 1.63559325338075, "learning_rate": 6.0858802626491155e-06, "loss": 0.7427, "step": 6990 }, { "epoch": 0.45, "grad_norm": 2.5582234071482945, "learning_rate": 6.084868423782312e-06, "loss": 0.7651, "step": 6991 }, { "epoch": 0.45, "grad_norm": 2.1420105342418765, "learning_rate": 6.0838565382901435e-06, "loss": 0.8141, "step": 6992 }, { "epoch": 0.45, "grad_norm": 2.4818553497738662, "learning_rate": 6.082844606216098e-06, "loss": 0.7757, "step": 6993 }, { "epoch": 0.45, "grad_norm": 1.953816565280967, "learning_rate": 6.0818326276036675e-06, "loss": 0.7878, "step": 6994 }, { "epoch": 0.45, "grad_norm": 1.817764590893989, "learning_rate": 6.080820602496345e-06, "loss": 0.7611, "step": 6995 }, { "epoch": 0.45, "grad_norm": 1.7012989541735264, "learning_rate": 6.079808530937621e-06, "loss": 0.682, "step": 6996 }, { "epoch": 0.45, "grad_norm": 1.6533722545784648, "learning_rate": 6.078796412970997e-06, "loss": 0.8409, "step": 6997 }, { "epoch": 0.45, "grad_norm": 1.6905579014057766, "learning_rate": 6.077784248639971e-06, "loss": 0.7612, "step": 6998 }, { "epoch": 0.45, "grad_norm": 1.3247062528406157, "learning_rate": 6.076772037988042e-06, "loss": 0.7647, "step": 6999 }, { "epoch": 0.45, "grad_norm": 1.6254140237130872, "learning_rate": 6.075759781058713e-06, "loss": 0.8165, "step": 7000 }, { "epoch": 0.45, "grad_norm": 1.1597232389093766, "learning_rate": 6.07474747789549e-06, "loss": 0.7492, "step": 7001 }, { "epoch": 0.45, "grad_norm": 1.8172974805786049, "learning_rate": 6.073735128541878e-06, "loss": 0.7443, "step": 7002 }, { "epoch": 0.45, "grad_norm": 2.0869838322369523, "learning_rate": 6.072722733041387e-06, "loss": 0.6454, "step": 7003 }, { "epoch": 0.45, "grad_norm": 1.8164976360689014, "learning_rate": 6.071710291437527e-06, "loss": 0.8447, "step": 7004 }, { "epoch": 0.45, "grad_norm": 1.8020328442398095, "learning_rate": 6.07069780377381e-06, "loss": 1.0632, "step": 7005 }, { "epoch": 0.45, "grad_norm": 1.9256386178918152, "learning_rate": 6.069685270093751e-06, "loss": 0.7582, "step": 7006 }, { "epoch": 0.45, "grad_norm": 1.508805621759005, "learning_rate": 6.068672690440868e-06, "loss": 0.7481, "step": 7007 }, { "epoch": 0.45, "grad_norm": 1.8321659913160946, "learning_rate": 6.067660064858677e-06, "loss": 0.7601, "step": 7008 }, { "epoch": 0.45, "grad_norm": 1.726454771774996, "learning_rate": 6.066647393390701e-06, "loss": 0.747, "step": 7009 }, { "epoch": 0.45, "grad_norm": 1.8158069505175718, "learning_rate": 6.0656346760804605e-06, "loss": 0.7015, "step": 7010 }, { "epoch": 0.45, "grad_norm": 1.4688153798671675, "learning_rate": 6.064621912971483e-06, "loss": 0.8305, "step": 7011 }, { "epoch": 0.45, "grad_norm": 1.7304638501999106, "learning_rate": 6.063609104107291e-06, "loss": 0.6954, "step": 7012 }, { "epoch": 0.45, "grad_norm": 1.7310175620608592, "learning_rate": 6.062596249531414e-06, "loss": 0.8109, "step": 7013 }, { "epoch": 0.45, "grad_norm": 2.3785452733967536, "learning_rate": 6.061583349287383e-06, "loss": 0.7046, "step": 7014 }, { "epoch": 0.45, "grad_norm": 1.647951805108666, "learning_rate": 6.060570403418731e-06, "loss": 0.7483, "step": 7015 }, { "epoch": 0.45, "grad_norm": 1.6812759013854708, "learning_rate": 6.0595574119689915e-06, "loss": 0.7158, "step": 7016 }, { "epoch": 0.45, "grad_norm": 1.6078558983987643, "learning_rate": 6.058544374981701e-06, "loss": 0.7206, "step": 7017 }, { "epoch": 0.45, "grad_norm": 1.9260380422791075, "learning_rate": 6.057531292500398e-06, "loss": 0.8852, "step": 7018 }, { "epoch": 0.45, "grad_norm": 1.702472929022917, "learning_rate": 6.056518164568622e-06, "loss": 0.7804, "step": 7019 }, { "epoch": 0.45, "grad_norm": 1.0613297921743978, "learning_rate": 6.055504991229916e-06, "loss": 0.6005, "step": 7020 }, { "epoch": 0.45, "grad_norm": 1.6567399687574185, "learning_rate": 6.054491772527822e-06, "loss": 0.7126, "step": 7021 }, { "epoch": 0.45, "grad_norm": 1.7785905608625114, "learning_rate": 6.053478508505888e-06, "loss": 0.7756, "step": 7022 }, { "epoch": 0.45, "grad_norm": 1.2814608590190868, "learning_rate": 6.052465199207661e-06, "loss": 0.6373, "step": 7023 }, { "epoch": 0.45, "grad_norm": 1.1342693521914684, "learning_rate": 6.051451844676691e-06, "loss": 0.6415, "step": 7024 }, { "epoch": 0.45, "grad_norm": 1.820093632404195, "learning_rate": 6.050438444956531e-06, "loss": 0.728, "step": 7025 }, { "epoch": 0.45, "grad_norm": 1.5760056666361084, "learning_rate": 6.049425000090734e-06, "loss": 0.7086, "step": 7026 }, { "epoch": 0.45, "grad_norm": 1.521669240655106, "learning_rate": 6.048411510122855e-06, "loss": 0.7233, "step": 7027 }, { "epoch": 0.45, "grad_norm": 1.7723336112673536, "learning_rate": 6.047397975096454e-06, "loss": 0.8041, "step": 7028 }, { "epoch": 0.45, "grad_norm": 1.8642825162926362, "learning_rate": 6.046384395055086e-06, "loss": 0.744, "step": 7029 }, { "epoch": 0.45, "grad_norm": 1.584307960508003, "learning_rate": 6.045370770042318e-06, "loss": 0.7617, "step": 7030 }, { "epoch": 0.45, "grad_norm": 0.9835518452274576, "learning_rate": 6.04435710010171e-06, "loss": 0.5864, "step": 7031 }, { "epoch": 0.45, "grad_norm": 1.553640501289419, "learning_rate": 6.0433433852768285e-06, "loss": 0.6988, "step": 7032 }, { "epoch": 0.45, "grad_norm": 1.5076293007735768, "learning_rate": 6.042329625611239e-06, "loss": 0.6489, "step": 7033 }, { "epoch": 0.45, "grad_norm": 1.9562896797379652, "learning_rate": 6.041315821148514e-06, "loss": 0.7419, "step": 7034 }, { "epoch": 0.45, "grad_norm": 1.2732479833329147, "learning_rate": 6.040301971932223e-06, "loss": 0.6827, "step": 7035 }, { "epoch": 0.45, "grad_norm": 1.3860525207310268, "learning_rate": 6.0392880780059395e-06, "loss": 0.7472, "step": 7036 }, { "epoch": 0.45, "grad_norm": 1.6382413639410403, "learning_rate": 6.038274139413238e-06, "loss": 0.6747, "step": 7037 }, { "epoch": 0.45, "grad_norm": 1.5890646727608788, "learning_rate": 6.0372601561976955e-06, "loss": 0.749, "step": 7038 }, { "epoch": 0.45, "grad_norm": 1.845153464081311, "learning_rate": 6.036246128402892e-06, "loss": 0.6528, "step": 7039 }, { "epoch": 0.45, "grad_norm": 1.4000997742445214, "learning_rate": 6.0352320560724066e-06, "loss": 0.6471, "step": 7040 }, { "epoch": 0.45, "grad_norm": 1.228055950073123, "learning_rate": 6.034217939249823e-06, "loss": 0.6684, "step": 7041 }, { "epoch": 0.45, "grad_norm": 1.6794392289023212, "learning_rate": 6.033203777978724e-06, "loss": 0.7534, "step": 7042 }, { "epoch": 0.45, "grad_norm": 1.5030400440712635, "learning_rate": 6.0321895723027e-06, "loss": 0.7023, "step": 7043 }, { "epoch": 0.45, "grad_norm": 1.6210741088726883, "learning_rate": 6.031175322265335e-06, "loss": 0.6925, "step": 7044 }, { "epoch": 0.45, "grad_norm": 1.625545414158288, "learning_rate": 6.030161027910223e-06, "loss": 0.7897, "step": 7045 }, { "epoch": 0.45, "grad_norm": 1.127933342458784, "learning_rate": 6.029146689280954e-06, "loss": 0.6752, "step": 7046 }, { "epoch": 0.45, "grad_norm": 1.5414636159117616, "learning_rate": 6.028132306421124e-06, "loss": 0.7226, "step": 7047 }, { "epoch": 0.45, "grad_norm": 1.615988718593534, "learning_rate": 6.027117879374327e-06, "loss": 0.6786, "step": 7048 }, { "epoch": 0.45, "grad_norm": 1.5547441985297639, "learning_rate": 6.026103408184162e-06, "loss": 0.7117, "step": 7049 }, { "epoch": 0.45, "grad_norm": 1.8248587867760222, "learning_rate": 6.025088892894227e-06, "loss": 0.9348, "step": 7050 }, { "epoch": 0.45, "grad_norm": 2.0143421980214575, "learning_rate": 6.0240743335481265e-06, "loss": 0.6995, "step": 7051 }, { "epoch": 0.45, "grad_norm": 1.6504899806309008, "learning_rate": 6.023059730189464e-06, "loss": 0.798, "step": 7052 }, { "epoch": 0.45, "grad_norm": 1.676789948937666, "learning_rate": 6.0220450828618424e-06, "loss": 0.6666, "step": 7053 }, { "epoch": 0.45, "grad_norm": 1.4985709398450338, "learning_rate": 6.021030391608872e-06, "loss": 0.7806, "step": 7054 }, { "epoch": 0.45, "grad_norm": 1.5565346755008067, "learning_rate": 6.0200156564741606e-06, "loss": 0.6652, "step": 7055 }, { "epoch": 0.45, "grad_norm": 1.1540606444062247, "learning_rate": 6.019000877501321e-06, "loss": 0.5512, "step": 7056 }, { "epoch": 0.45, "grad_norm": 1.5456239768775626, "learning_rate": 6.017986054733962e-06, "loss": 1.0437, "step": 7057 }, { "epoch": 0.45, "grad_norm": 1.6649580398731196, "learning_rate": 6.016971188215703e-06, "loss": 1.0978, "step": 7058 }, { "epoch": 0.45, "grad_norm": 3.2586304543110347, "learning_rate": 6.0159562779901605e-06, "loss": 0.7216, "step": 7059 }, { "epoch": 0.45, "grad_norm": 1.0499518224889586, "learning_rate": 6.0149413241009504e-06, "loss": 0.642, "step": 7060 }, { "epoch": 0.45, "grad_norm": 1.3210571983612256, "learning_rate": 6.013926326591695e-06, "loss": 0.7467, "step": 7061 }, { "epoch": 0.45, "grad_norm": 1.7946629556595746, "learning_rate": 6.012911285506016e-06, "loss": 0.7966, "step": 7062 }, { "epoch": 0.45, "grad_norm": 1.2032844803697953, "learning_rate": 6.0118962008875395e-06, "loss": 0.6254, "step": 7063 }, { "epoch": 0.45, "grad_norm": 1.5541425861303308, "learning_rate": 6.010881072779891e-06, "loss": 0.7804, "step": 7064 }, { "epoch": 0.45, "grad_norm": 1.1022328402758548, "learning_rate": 6.009865901226697e-06, "loss": 0.6458, "step": 7065 }, { "epoch": 0.45, "grad_norm": 1.8191239156720342, "learning_rate": 6.008850686271589e-06, "loss": 0.8853, "step": 7066 }, { "epoch": 0.45, "grad_norm": 1.7431480430922048, "learning_rate": 6.007835427958199e-06, "loss": 0.7367, "step": 7067 }, { "epoch": 0.45, "grad_norm": 1.5649066935177005, "learning_rate": 6.006820126330159e-06, "loss": 0.7043, "step": 7068 }, { "epoch": 0.45, "grad_norm": 3.309354492864207, "learning_rate": 6.005804781431106e-06, "loss": 0.7397, "step": 7069 }, { "epoch": 0.45, "grad_norm": 1.883596582882439, "learning_rate": 6.0047893933046765e-06, "loss": 0.7837, "step": 7070 }, { "epoch": 0.45, "grad_norm": 1.7927872409456833, "learning_rate": 6.0037739619945114e-06, "loss": 0.7579, "step": 7071 }, { "epoch": 0.45, "grad_norm": 1.516095919841051, "learning_rate": 6.002758487544249e-06, "loss": 0.6912, "step": 7072 }, { "epoch": 0.45, "grad_norm": 2.667227499186474, "learning_rate": 6.001742969997535e-06, "loss": 0.8024, "step": 7073 }, { "epoch": 0.45, "grad_norm": 1.8351962170068543, "learning_rate": 6.000727409398013e-06, "loss": 0.8507, "step": 7074 }, { "epoch": 0.45, "grad_norm": 1.711428491791812, "learning_rate": 5.99971180578933e-06, "loss": 0.7547, "step": 7075 }, { "epoch": 0.45, "grad_norm": 1.327077219700842, "learning_rate": 5.998696159215134e-06, "loss": 0.7644, "step": 7076 }, { "epoch": 0.45, "grad_norm": 1.2728892670128382, "learning_rate": 5.997680469719076e-06, "loss": 0.6687, "step": 7077 }, { "epoch": 0.45, "grad_norm": 1.7419139727608037, "learning_rate": 5.996664737344808e-06, "loss": 0.7641, "step": 7078 }, { "epoch": 0.45, "grad_norm": 1.8328265093159528, "learning_rate": 5.995648962135983e-06, "loss": 0.7391, "step": 7079 }, { "epoch": 0.45, "grad_norm": 1.5636073811931965, "learning_rate": 5.994633144136257e-06, "loss": 0.7247, "step": 7080 }, { "epoch": 0.45, "grad_norm": 1.6883234140261016, "learning_rate": 5.993617283389289e-06, "loss": 0.6824, "step": 7081 }, { "epoch": 0.45, "grad_norm": 1.9068246248606078, "learning_rate": 5.9926013799387396e-06, "loss": 0.7255, "step": 7082 }, { "epoch": 0.45, "grad_norm": 1.4332686998741218, "learning_rate": 5.991585433828267e-06, "loss": 0.6851, "step": 7083 }, { "epoch": 0.45, "grad_norm": 1.6207501039789, "learning_rate": 5.990569445101537e-06, "loss": 0.7945, "step": 7084 }, { "epoch": 0.45, "grad_norm": 1.217808597758691, "learning_rate": 5.9895534138022136e-06, "loss": 0.6005, "step": 7085 }, { "epoch": 0.45, "grad_norm": 1.6937018303933737, "learning_rate": 5.988537339973963e-06, "loss": 0.7355, "step": 7086 }, { "epoch": 0.45, "grad_norm": 1.5143610681161264, "learning_rate": 5.9875212236604564e-06, "loss": 0.77, "step": 7087 }, { "epoch": 0.45, "grad_norm": 1.589707255790641, "learning_rate": 5.986505064905361e-06, "loss": 0.6958, "step": 7088 }, { "epoch": 0.45, "grad_norm": 1.6045971838901345, "learning_rate": 5.985488863752351e-06, "loss": 0.8218, "step": 7089 }, { "epoch": 0.45, "grad_norm": 1.6738780599794074, "learning_rate": 5.984472620245101e-06, "loss": 0.7369, "step": 7090 }, { "epoch": 0.45, "grad_norm": 1.5875521508660448, "learning_rate": 5.983456334427286e-06, "loss": 0.7936, "step": 7091 }, { "epoch": 0.45, "grad_norm": 1.6005232848615285, "learning_rate": 5.982440006342586e-06, "loss": 0.7263, "step": 7092 }, { "epoch": 0.45, "grad_norm": 1.0597338518804293, "learning_rate": 5.9814236360346765e-06, "loss": 0.6993, "step": 7093 }, { "epoch": 0.45, "grad_norm": 1.5734126088198948, "learning_rate": 5.980407223547243e-06, "loss": 0.67, "step": 7094 }, { "epoch": 0.45, "grad_norm": 1.7237416740747675, "learning_rate": 5.9793907689239675e-06, "loss": 0.717, "step": 7095 }, { "epoch": 0.45, "grad_norm": 1.937666460186794, "learning_rate": 5.978374272208534e-06, "loss": 0.7894, "step": 7096 }, { "epoch": 0.45, "grad_norm": 1.6550920998503365, "learning_rate": 5.97735773344463e-06, "loss": 0.7933, "step": 7097 }, { "epoch": 0.45, "grad_norm": 2.0208591964286, "learning_rate": 5.976341152675943e-06, "loss": 0.8492, "step": 7098 }, { "epoch": 0.45, "grad_norm": 1.5458146048510626, "learning_rate": 5.975324529946166e-06, "loss": 0.7499, "step": 7099 }, { "epoch": 0.45, "grad_norm": 1.1185390597392662, "learning_rate": 5.9743078652989905e-06, "loss": 0.5963, "step": 7100 }, { "epoch": 0.45, "grad_norm": 1.869689519197889, "learning_rate": 5.973291158778109e-06, "loss": 0.7187, "step": 7101 }, { "epoch": 0.45, "grad_norm": 1.6161002285630441, "learning_rate": 5.97227441042722e-06, "loss": 0.8322, "step": 7102 }, { "epoch": 0.45, "grad_norm": 1.6654044139074604, "learning_rate": 5.97125762029002e-06, "loss": 0.6371, "step": 7103 }, { "epoch": 0.45, "grad_norm": 1.7283177124810865, "learning_rate": 5.970240788410209e-06, "loss": 0.8503, "step": 7104 }, { "epoch": 0.45, "grad_norm": 1.6966419543336955, "learning_rate": 5.969223914831485e-06, "loss": 0.7639, "step": 7105 }, { "epoch": 0.45, "grad_norm": 1.6711262477926716, "learning_rate": 5.968206999597557e-06, "loss": 0.7185, "step": 7106 }, { "epoch": 0.45, "grad_norm": 1.041275547677823, "learning_rate": 5.967190042752123e-06, "loss": 0.6461, "step": 7107 }, { "epoch": 0.45, "grad_norm": 2.1500526305753125, "learning_rate": 5.966173044338895e-06, "loss": 0.7562, "step": 7108 }, { "epoch": 0.46, "grad_norm": 1.2705492015972202, "learning_rate": 5.965156004401581e-06, "loss": 0.6591, "step": 7109 }, { "epoch": 0.46, "grad_norm": 1.9981527992292143, "learning_rate": 5.964138922983889e-06, "loss": 0.7914, "step": 7110 }, { "epoch": 0.46, "grad_norm": 2.0146744918746773, "learning_rate": 5.9631218001295325e-06, "loss": 0.8348, "step": 7111 }, { "epoch": 0.46, "grad_norm": 1.610235393136685, "learning_rate": 5.962104635882225e-06, "loss": 0.7216, "step": 7112 }, { "epoch": 0.46, "grad_norm": 2.2507874714555607, "learning_rate": 5.961087430285681e-06, "loss": 0.8413, "step": 7113 }, { "epoch": 0.46, "grad_norm": 1.0975965927529006, "learning_rate": 5.9600701833836185e-06, "loss": 0.5875, "step": 7114 }, { "epoch": 0.46, "grad_norm": 2.3082486497286254, "learning_rate": 5.959052895219758e-06, "loss": 0.7702, "step": 7115 }, { "epoch": 0.46, "grad_norm": 1.6151696786456269, "learning_rate": 5.958035565837819e-06, "loss": 0.8506, "step": 7116 }, { "epoch": 0.46, "grad_norm": 4.724462496625771, "learning_rate": 5.957018195281523e-06, "loss": 0.7445, "step": 7117 }, { "epoch": 0.46, "grad_norm": 1.7778992952510373, "learning_rate": 5.956000783594598e-06, "loss": 0.7486, "step": 7118 }, { "epoch": 0.46, "grad_norm": 1.6661721063551989, "learning_rate": 5.954983330820767e-06, "loss": 0.6484, "step": 7119 }, { "epoch": 0.46, "grad_norm": 1.7865615474861505, "learning_rate": 5.95396583700376e-06, "loss": 0.7525, "step": 7120 }, { "epoch": 0.46, "grad_norm": 1.7734928020998797, "learning_rate": 5.9529483021873055e-06, "loss": 0.7272, "step": 7121 }, { "epoch": 0.46, "grad_norm": 1.7900827925318687, "learning_rate": 5.951930726415135e-06, "loss": 0.8375, "step": 7122 }, { "epoch": 0.46, "grad_norm": 1.5628706300518678, "learning_rate": 5.950913109730983e-06, "loss": 0.7408, "step": 7123 }, { "epoch": 0.46, "grad_norm": 1.490032034401537, "learning_rate": 5.949895452178582e-06, "loss": 1.0081, "step": 7124 }, { "epoch": 0.46, "grad_norm": 1.5519412407092408, "learning_rate": 5.948877753801673e-06, "loss": 0.6528, "step": 7125 }, { "epoch": 0.46, "grad_norm": 1.5743401863132758, "learning_rate": 5.947860014643989e-06, "loss": 0.7724, "step": 7126 }, { "epoch": 0.46, "grad_norm": 1.7108830006146742, "learning_rate": 5.946842234749275e-06, "loss": 0.7122, "step": 7127 }, { "epoch": 0.46, "grad_norm": 2.5846322402548543, "learning_rate": 5.945824414161272e-06, "loss": 0.7405, "step": 7128 }, { "epoch": 0.46, "grad_norm": 1.903900689639095, "learning_rate": 5.944806552923722e-06, "loss": 0.7539, "step": 7129 }, { "epoch": 0.46, "grad_norm": 1.7842565823104528, "learning_rate": 5.943788651080372e-06, "loss": 0.8372, "step": 7130 }, { "epoch": 0.46, "grad_norm": 1.5323589195666012, "learning_rate": 5.942770708674969e-06, "loss": 0.7239, "step": 7131 }, { "epoch": 0.46, "grad_norm": 1.6927202525163916, "learning_rate": 5.941752725751262e-06, "loss": 0.7134, "step": 7132 }, { "epoch": 0.46, "grad_norm": 1.8658467389544333, "learning_rate": 5.940734702353002e-06, "loss": 0.7868, "step": 7133 }, { "epoch": 0.46, "grad_norm": 1.5598267527848306, "learning_rate": 5.939716638523941e-06, "loss": 0.7031, "step": 7134 }, { "epoch": 0.46, "grad_norm": 1.2518565987761958, "learning_rate": 5.938698534307833e-06, "loss": 0.6952, "step": 7135 }, { "epoch": 0.46, "grad_norm": 1.1014740625193704, "learning_rate": 5.937680389748436e-06, "loss": 0.6635, "step": 7136 }, { "epoch": 0.46, "grad_norm": 1.9119818816910348, "learning_rate": 5.936662204889504e-06, "loss": 0.7133, "step": 7137 }, { "epoch": 0.46, "grad_norm": 1.7838317161164743, "learning_rate": 5.9356439797748e-06, "loss": 0.7842, "step": 7138 }, { "epoch": 0.46, "grad_norm": 1.7381229271692453, "learning_rate": 5.934625714448084e-06, "loss": 0.7904, "step": 7139 }, { "epoch": 0.46, "grad_norm": 1.7641283084996613, "learning_rate": 5.933607408953118e-06, "loss": 0.7986, "step": 7140 }, { "epoch": 0.46, "grad_norm": 1.8876044795556584, "learning_rate": 5.932589063333668e-06, "loss": 0.6995, "step": 7141 }, { "epoch": 0.46, "grad_norm": 1.8106201279175833, "learning_rate": 5.9315706776335005e-06, "loss": 0.8415, "step": 7142 }, { "epoch": 0.46, "grad_norm": 1.9593778072783583, "learning_rate": 5.9305522518963795e-06, "loss": 0.7306, "step": 7143 }, { "epoch": 0.46, "grad_norm": 1.5272991690130615, "learning_rate": 5.9295337861660795e-06, "loss": 0.8924, "step": 7144 }, { "epoch": 0.46, "grad_norm": 1.7173674608939127, "learning_rate": 5.928515280486372e-06, "loss": 0.7277, "step": 7145 }, { "epoch": 0.46, "grad_norm": 1.813310262059513, "learning_rate": 5.9274967349010286e-06, "loss": 0.7577, "step": 7146 }, { "epoch": 0.46, "grad_norm": 1.321483436115431, "learning_rate": 5.9264781494538235e-06, "loss": 0.6866, "step": 7147 }, { "epoch": 0.46, "grad_norm": 2.1414940744179733, "learning_rate": 5.925459524188535e-06, "loss": 0.8146, "step": 7148 }, { "epoch": 0.46, "grad_norm": 1.8995181943942572, "learning_rate": 5.924440859148941e-06, "loss": 0.709, "step": 7149 }, { "epoch": 0.46, "grad_norm": 2.061050874138158, "learning_rate": 5.923422154378821e-06, "loss": 0.7973, "step": 7150 }, { "epoch": 0.46, "grad_norm": 1.089885286565069, "learning_rate": 5.922403409921957e-06, "loss": 0.6559, "step": 7151 }, { "epoch": 0.46, "grad_norm": 2.464908959389956, "learning_rate": 5.921384625822133e-06, "loss": 0.8526, "step": 7152 }, { "epoch": 0.46, "grad_norm": 1.0507931509827433, "learning_rate": 5.9203658021231335e-06, "loss": 0.6405, "step": 7153 }, { "epoch": 0.46, "grad_norm": 1.6998451973207924, "learning_rate": 5.919346938868745e-06, "loss": 0.6641, "step": 7154 }, { "epoch": 0.46, "grad_norm": 1.1936568211197918, "learning_rate": 5.918328036102758e-06, "loss": 0.6787, "step": 7155 }, { "epoch": 0.46, "grad_norm": 1.8400411684686453, "learning_rate": 5.9173090938689626e-06, "loss": 0.6951, "step": 7156 }, { "epoch": 0.46, "grad_norm": 1.4222318519827057, "learning_rate": 5.916290112211149e-06, "loss": 0.645, "step": 7157 }, { "epoch": 0.46, "grad_norm": 1.8172638379812007, "learning_rate": 5.91527109117311e-06, "loss": 0.7458, "step": 7158 }, { "epoch": 0.46, "grad_norm": 1.5626500537429422, "learning_rate": 5.9142520307986455e-06, "loss": 0.782, "step": 7159 }, { "epoch": 0.46, "grad_norm": 2.0331904710200033, "learning_rate": 5.91323293113155e-06, "loss": 0.7244, "step": 7160 }, { "epoch": 0.46, "grad_norm": 1.7495180649367932, "learning_rate": 5.91221379221562e-06, "loss": 0.7432, "step": 7161 }, { "epoch": 0.46, "grad_norm": 1.7806497172900235, "learning_rate": 5.91119461409466e-06, "loss": 0.7965, "step": 7162 }, { "epoch": 0.46, "grad_norm": 2.8627923071492574, "learning_rate": 5.910175396812468e-06, "loss": 0.6637, "step": 7163 }, { "epoch": 0.46, "grad_norm": 1.5876301850417958, "learning_rate": 5.9091561404128505e-06, "loss": 0.7565, "step": 7164 }, { "epoch": 0.46, "grad_norm": 2.0378609478145577, "learning_rate": 5.908136844939612e-06, "loss": 0.8069, "step": 7165 }, { "epoch": 0.46, "grad_norm": 2.619856237801779, "learning_rate": 5.9071175104365616e-06, "loss": 0.6833, "step": 7166 }, { "epoch": 0.46, "grad_norm": 1.7951255126705616, "learning_rate": 5.906098136947506e-06, "loss": 0.765, "step": 7167 }, { "epoch": 0.46, "grad_norm": 1.217661104626467, "learning_rate": 5.905078724516258e-06, "loss": 0.5993, "step": 7168 }, { "epoch": 0.46, "grad_norm": 1.77844672517055, "learning_rate": 5.904059273186627e-06, "loss": 0.7215, "step": 7169 }, { "epoch": 0.46, "grad_norm": 2.0280904253220564, "learning_rate": 5.903039783002428e-06, "loss": 0.8721, "step": 7170 }, { "epoch": 0.46, "grad_norm": 1.4954788786655433, "learning_rate": 5.9020202540074755e-06, "loss": 0.7052, "step": 7171 }, { "epoch": 0.46, "grad_norm": 1.7759962919748276, "learning_rate": 5.901000686245588e-06, "loss": 0.7492, "step": 7172 }, { "epoch": 0.46, "grad_norm": 1.5928451701569166, "learning_rate": 5.899981079760586e-06, "loss": 0.7472, "step": 7173 }, { "epoch": 0.46, "grad_norm": 1.6122694341297412, "learning_rate": 5.898961434596289e-06, "loss": 0.7446, "step": 7174 }, { "epoch": 0.46, "grad_norm": 1.7760372681102594, "learning_rate": 5.897941750796517e-06, "loss": 0.7144, "step": 7175 }, { "epoch": 0.46, "grad_norm": 1.9666979146297885, "learning_rate": 5.896922028405095e-06, "loss": 0.8361, "step": 7176 }, { "epoch": 0.46, "grad_norm": 1.6908271051651385, "learning_rate": 5.895902267465851e-06, "loss": 0.7055, "step": 7177 }, { "epoch": 0.46, "grad_norm": 2.002104314973639, "learning_rate": 5.894882468022608e-06, "loss": 0.8157, "step": 7178 }, { "epoch": 0.46, "grad_norm": 1.7245776083854083, "learning_rate": 5.893862630119197e-06, "loss": 0.8078, "step": 7179 }, { "epoch": 0.46, "grad_norm": 1.6132020332285353, "learning_rate": 5.892842753799449e-06, "loss": 0.7009, "step": 7180 }, { "epoch": 0.46, "grad_norm": 1.5127358887393638, "learning_rate": 5.891822839107195e-06, "loss": 0.7441, "step": 7181 }, { "epoch": 0.46, "grad_norm": 0.9681535786015578, "learning_rate": 5.8908028860862695e-06, "loss": 0.5715, "step": 7182 }, { "epoch": 0.46, "grad_norm": 1.593003630228042, "learning_rate": 5.8897828947805094e-06, "loss": 0.7096, "step": 7183 }, { "epoch": 0.46, "grad_norm": 1.6578693058349345, "learning_rate": 5.8887628652337495e-06, "loss": 0.7054, "step": 7184 }, { "epoch": 0.46, "grad_norm": 0.9791538860412012, "learning_rate": 5.887742797489828e-06, "loss": 0.6432, "step": 7185 }, { "epoch": 0.46, "grad_norm": 1.1538067807278605, "learning_rate": 5.886722691592587e-06, "loss": 0.6908, "step": 7186 }, { "epoch": 0.46, "grad_norm": 1.637026062086787, "learning_rate": 5.8857025475858676e-06, "loss": 0.6937, "step": 7187 }, { "epoch": 0.46, "grad_norm": 1.628726866272429, "learning_rate": 5.8846823655135155e-06, "loss": 0.8469, "step": 7188 }, { "epoch": 0.46, "grad_norm": 1.5585953582266407, "learning_rate": 5.883662145419373e-06, "loss": 0.5924, "step": 7189 }, { "epoch": 0.46, "grad_norm": 0.9947406806411863, "learning_rate": 5.882641887347289e-06, "loss": 0.5114, "step": 7190 }, { "epoch": 0.46, "grad_norm": 1.1879462303782646, "learning_rate": 5.881621591341109e-06, "loss": 0.6112, "step": 7191 }, { "epoch": 0.46, "grad_norm": 1.0279185893748477, "learning_rate": 5.880601257444688e-06, "loss": 0.633, "step": 7192 }, { "epoch": 0.46, "grad_norm": 1.6803790796524645, "learning_rate": 5.879580885701874e-06, "loss": 0.758, "step": 7193 }, { "epoch": 0.46, "grad_norm": 1.4234975061031117, "learning_rate": 5.878560476156523e-06, "loss": 0.611, "step": 7194 }, { "epoch": 0.46, "grad_norm": 2.056978477010377, "learning_rate": 5.877540028852489e-06, "loss": 0.9311, "step": 7195 }, { "epoch": 0.46, "grad_norm": 1.1924137726018194, "learning_rate": 5.876519543833628e-06, "loss": 0.7174, "step": 7196 }, { "epoch": 0.46, "grad_norm": 1.6968102088628045, "learning_rate": 5.875499021143799e-06, "loss": 0.6913, "step": 7197 }, { "epoch": 0.46, "grad_norm": 1.9454608636728679, "learning_rate": 5.874478460826861e-06, "loss": 0.8975, "step": 7198 }, { "epoch": 0.46, "grad_norm": 1.1578970261513333, "learning_rate": 5.873457862926677e-06, "loss": 0.6015, "step": 7199 }, { "epoch": 0.46, "grad_norm": 1.9434119444952644, "learning_rate": 5.872437227487109e-06, "loss": 0.7784, "step": 7200 }, { "epoch": 0.46, "grad_norm": 1.1108360271819901, "learning_rate": 5.871416554552021e-06, "loss": 0.6015, "step": 7201 }, { "epoch": 0.46, "grad_norm": 1.639999920756059, "learning_rate": 5.870395844165282e-06, "loss": 0.854, "step": 7202 }, { "epoch": 0.46, "grad_norm": 1.6091495781298693, "learning_rate": 5.869375096370759e-06, "loss": 0.7124, "step": 7203 }, { "epoch": 0.46, "grad_norm": 1.7942458143662783, "learning_rate": 5.868354311212321e-06, "loss": 0.8301, "step": 7204 }, { "epoch": 0.46, "grad_norm": 1.6919524153455314, "learning_rate": 5.86733348873384e-06, "loss": 0.8058, "step": 7205 }, { "epoch": 0.46, "grad_norm": 1.5766397252932514, "learning_rate": 5.866312628979188e-06, "loss": 0.6341, "step": 7206 }, { "epoch": 0.46, "grad_norm": 1.7147615671602596, "learning_rate": 5.8652917319922374e-06, "loss": 0.7069, "step": 7207 }, { "epoch": 0.46, "grad_norm": 1.6312005150794129, "learning_rate": 5.864270797816868e-06, "loss": 0.703, "step": 7208 }, { "epoch": 0.46, "grad_norm": 1.7685543398066357, "learning_rate": 5.863249826496955e-06, "loss": 0.7415, "step": 7209 }, { "epoch": 0.46, "grad_norm": 5.196417377958676, "learning_rate": 5.862228818076378e-06, "loss": 0.8324, "step": 7210 }, { "epoch": 0.46, "grad_norm": 1.7184406208351548, "learning_rate": 5.8612077725990206e-06, "loss": 0.7684, "step": 7211 }, { "epoch": 0.46, "grad_norm": 1.7633618377806393, "learning_rate": 5.860186690108762e-06, "loss": 0.8638, "step": 7212 }, { "epoch": 0.46, "grad_norm": 1.752060318837531, "learning_rate": 5.859165570649485e-06, "loss": 0.6321, "step": 7213 }, { "epoch": 0.46, "grad_norm": 1.0927701216538912, "learning_rate": 5.858144414265079e-06, "loss": 0.6651, "step": 7214 }, { "epoch": 0.46, "grad_norm": 1.603485952089353, "learning_rate": 5.857123220999429e-06, "loss": 0.7038, "step": 7215 }, { "epoch": 0.46, "grad_norm": 2.178475293578287, "learning_rate": 5.856101990896424e-06, "loss": 0.7968, "step": 7216 }, { "epoch": 0.46, "grad_norm": 1.7736506965688406, "learning_rate": 5.855080723999954e-06, "loss": 0.8289, "step": 7217 }, { "epoch": 0.46, "grad_norm": 1.845214559273462, "learning_rate": 5.85405942035391e-06, "loss": 0.6744, "step": 7218 }, { "epoch": 0.46, "grad_norm": 1.801231794790083, "learning_rate": 5.853038080002189e-06, "loss": 0.6853, "step": 7219 }, { "epoch": 0.46, "grad_norm": 1.6100185203233404, "learning_rate": 5.852016702988683e-06, "loss": 0.7519, "step": 7220 }, { "epoch": 0.46, "grad_norm": 1.759130706405992, "learning_rate": 5.85099528935729e-06, "loss": 0.6841, "step": 7221 }, { "epoch": 0.46, "grad_norm": 1.8578504320235418, "learning_rate": 5.849973839151906e-06, "loss": 0.7667, "step": 7222 }, { "epoch": 0.46, "grad_norm": 1.7288715254443385, "learning_rate": 5.848952352416434e-06, "loss": 0.8033, "step": 7223 }, { "epoch": 0.46, "grad_norm": 1.0070442009059, "learning_rate": 5.847930829194773e-06, "loss": 0.6181, "step": 7224 }, { "epoch": 0.46, "grad_norm": 1.7399384788197678, "learning_rate": 5.8469092695308274e-06, "loss": 0.7846, "step": 7225 }, { "epoch": 0.46, "grad_norm": 1.8302568644711155, "learning_rate": 5.845887673468501e-06, "loss": 0.8528, "step": 7226 }, { "epoch": 0.46, "grad_norm": 1.6378221118389453, "learning_rate": 5.844866041051699e-06, "loss": 0.6214, "step": 7227 }, { "epoch": 0.46, "grad_norm": 1.6956192986969636, "learning_rate": 5.84384437232433e-06, "loss": 0.7605, "step": 7228 }, { "epoch": 0.46, "grad_norm": 1.7713469057011957, "learning_rate": 5.8428226673303026e-06, "loss": 0.6219, "step": 7229 }, { "epoch": 0.46, "grad_norm": 2.7098736837130413, "learning_rate": 5.8418009261135286e-06, "loss": 0.7394, "step": 7230 }, { "epoch": 0.46, "grad_norm": 1.6293063537811476, "learning_rate": 5.84077914871792e-06, "loss": 0.8217, "step": 7231 }, { "epoch": 0.46, "grad_norm": 1.4661844311554186, "learning_rate": 5.83975733518739e-06, "loss": 0.6963, "step": 7232 }, { "epoch": 0.46, "grad_norm": 1.5981667464745422, "learning_rate": 5.838735485565855e-06, "loss": 0.975, "step": 7233 }, { "epoch": 0.46, "grad_norm": 1.6585431118777147, "learning_rate": 5.83771359989723e-06, "loss": 0.798, "step": 7234 }, { "epoch": 0.46, "grad_norm": 1.6720284146883249, "learning_rate": 5.8366916782254345e-06, "loss": 0.7024, "step": 7235 }, { "epoch": 0.46, "grad_norm": 1.5981005157365333, "learning_rate": 5.83566972059439e-06, "loss": 0.7967, "step": 7236 }, { "epoch": 0.46, "grad_norm": 1.1189675303378692, "learning_rate": 5.834647727048016e-06, "loss": 0.5155, "step": 7237 }, { "epoch": 0.46, "grad_norm": 1.462890553684252, "learning_rate": 5.833625697630237e-06, "loss": 0.6629, "step": 7238 }, { "epoch": 0.46, "grad_norm": 1.949171029857703, "learning_rate": 5.832603632384978e-06, "loss": 0.872, "step": 7239 }, { "epoch": 0.46, "grad_norm": 1.7078499422918216, "learning_rate": 5.831581531356164e-06, "loss": 0.6672, "step": 7240 }, { "epoch": 0.46, "grad_norm": 1.6329092987781166, "learning_rate": 5.8305593945877236e-06, "loss": 0.7044, "step": 7241 }, { "epoch": 0.46, "grad_norm": 1.5644531800822372, "learning_rate": 5.829537222123585e-06, "loss": 0.7797, "step": 7242 }, { "epoch": 0.46, "grad_norm": 1.6067410496260441, "learning_rate": 5.828515014007678e-06, "loss": 0.7353, "step": 7243 }, { "epoch": 0.46, "grad_norm": 1.1706394786643979, "learning_rate": 5.827492770283939e-06, "loss": 0.6885, "step": 7244 }, { "epoch": 0.46, "grad_norm": 1.8793470210517784, "learning_rate": 5.826470490996299e-06, "loss": 0.8337, "step": 7245 }, { "epoch": 0.46, "grad_norm": 1.2668100300063883, "learning_rate": 5.825448176188693e-06, "loss": 0.7155, "step": 7246 }, { "epoch": 0.46, "grad_norm": 1.7674935737048383, "learning_rate": 5.82442582590506e-06, "loss": 0.8455, "step": 7247 }, { "epoch": 0.46, "grad_norm": 2.1109881979681555, "learning_rate": 5.823403440189337e-06, "loss": 0.8397, "step": 7248 }, { "epoch": 0.46, "grad_norm": 1.8191839872298277, "learning_rate": 5.822381019085466e-06, "loss": 0.7821, "step": 7249 }, { "epoch": 0.46, "grad_norm": 1.560266036473381, "learning_rate": 5.821358562637384e-06, "loss": 0.7029, "step": 7250 }, { "epoch": 0.46, "grad_norm": 1.5079734041613329, "learning_rate": 5.820336070889038e-06, "loss": 0.7825, "step": 7251 }, { "epoch": 0.46, "grad_norm": 1.5625331597191339, "learning_rate": 5.819313543884372e-06, "loss": 0.7596, "step": 7252 }, { "epoch": 0.46, "grad_norm": 1.7038809710420428, "learning_rate": 5.8182909816673316e-06, "loss": 0.657, "step": 7253 }, { "epoch": 0.46, "grad_norm": 1.7758642036375891, "learning_rate": 5.817268384281864e-06, "loss": 0.7962, "step": 7254 }, { "epoch": 0.46, "grad_norm": 1.5996328235026787, "learning_rate": 5.816245751771917e-06, "loss": 0.6742, "step": 7255 }, { "epoch": 0.46, "grad_norm": 1.1310655497283717, "learning_rate": 5.815223084181444e-06, "loss": 0.7591, "step": 7256 }, { "epoch": 0.46, "grad_norm": 1.5355553861599063, "learning_rate": 5.814200381554397e-06, "loss": 0.5934, "step": 7257 }, { "epoch": 0.46, "grad_norm": 1.9678084510085339, "learning_rate": 5.813177643934726e-06, "loss": 0.7601, "step": 7258 }, { "epoch": 0.46, "grad_norm": 1.729726605868565, "learning_rate": 5.81215487136639e-06, "loss": 0.877, "step": 7259 }, { "epoch": 0.46, "grad_norm": 1.9422672931479275, "learning_rate": 5.8111320638933446e-06, "loss": 0.73, "step": 7260 }, { "epoch": 0.46, "grad_norm": 1.673833469371961, "learning_rate": 5.810109221559548e-06, "loss": 0.6584, "step": 7261 }, { "epoch": 0.46, "grad_norm": 1.7229966884780243, "learning_rate": 5.809086344408958e-06, "loss": 0.8396, "step": 7262 }, { "epoch": 0.46, "grad_norm": 1.6732199257985985, "learning_rate": 5.808063432485538e-06, "loss": 0.8481, "step": 7263 }, { "epoch": 0.46, "grad_norm": 1.6567086574027545, "learning_rate": 5.807040485833248e-06, "loss": 0.7461, "step": 7264 }, { "epoch": 0.47, "grad_norm": 2.122343873603796, "learning_rate": 5.806017504496055e-06, "loss": 0.7921, "step": 7265 }, { "epoch": 0.47, "grad_norm": 1.9698131173760063, "learning_rate": 5.804994488517922e-06, "loss": 0.7486, "step": 7266 }, { "epoch": 0.47, "grad_norm": 1.8979939440015878, "learning_rate": 5.803971437942819e-06, "loss": 0.7236, "step": 7267 }, { "epoch": 0.47, "grad_norm": 1.6382835213633058, "learning_rate": 5.8029483528147136e-06, "loss": 0.817, "step": 7268 }, { "epoch": 0.47, "grad_norm": 1.8497534236507414, "learning_rate": 5.801925233177574e-06, "loss": 0.7496, "step": 7269 }, { "epoch": 0.47, "grad_norm": 1.7316160677762855, "learning_rate": 5.8009020790753735e-06, "loss": 0.6521, "step": 7270 }, { "epoch": 0.47, "grad_norm": 1.0832618305738901, "learning_rate": 5.799878890552083e-06, "loss": 0.7001, "step": 7271 }, { "epoch": 0.47, "grad_norm": 1.4878247606320112, "learning_rate": 5.798855667651681e-06, "loss": 0.6379, "step": 7272 }, { "epoch": 0.47, "grad_norm": 1.678071382545607, "learning_rate": 5.79783241041814e-06, "loss": 0.8577, "step": 7273 }, { "epoch": 0.47, "grad_norm": 1.7781734709943866, "learning_rate": 5.796809118895437e-06, "loss": 0.7519, "step": 7274 }, { "epoch": 0.47, "grad_norm": 1.1194359365844138, "learning_rate": 5.795785793127554e-06, "loss": 0.7292, "step": 7275 }, { "epoch": 0.47, "grad_norm": 1.4632041741357082, "learning_rate": 5.794762433158469e-06, "loss": 0.717, "step": 7276 }, { "epoch": 0.47, "grad_norm": 1.54195897712926, "learning_rate": 5.793739039032166e-06, "loss": 0.6573, "step": 7277 }, { "epoch": 0.47, "grad_norm": 1.5802656595251297, "learning_rate": 5.7927156107926264e-06, "loss": 0.6885, "step": 7278 }, { "epoch": 0.47, "grad_norm": 1.7444306991698844, "learning_rate": 5.791692148483834e-06, "loss": 0.7851, "step": 7279 }, { "epoch": 0.47, "grad_norm": 1.5367869990084555, "learning_rate": 5.790668652149778e-06, "loss": 0.7225, "step": 7280 }, { "epoch": 0.47, "grad_norm": 1.66702548056181, "learning_rate": 5.789645121834445e-06, "loss": 0.7465, "step": 7281 }, { "epoch": 0.47, "grad_norm": 1.9309575249215747, "learning_rate": 5.788621557581824e-06, "loss": 0.6463, "step": 7282 }, { "epoch": 0.47, "grad_norm": 1.8544313983411886, "learning_rate": 5.7875979594359045e-06, "loss": 0.8045, "step": 7283 }, { "epoch": 0.47, "grad_norm": 2.134457154924678, "learning_rate": 5.78657432744068e-06, "loss": 0.8083, "step": 7284 }, { "epoch": 0.47, "grad_norm": 1.4592295820957575, "learning_rate": 5.785550661640145e-06, "loss": 0.7565, "step": 7285 }, { "epoch": 0.47, "grad_norm": 1.5668740951576077, "learning_rate": 5.784526962078292e-06, "loss": 0.739, "step": 7286 }, { "epoch": 0.47, "grad_norm": 1.5766091231772033, "learning_rate": 5.783503228799119e-06, "loss": 0.8061, "step": 7287 }, { "epoch": 0.47, "grad_norm": 1.6781193614166316, "learning_rate": 5.782479461846624e-06, "loss": 0.7957, "step": 7288 }, { "epoch": 0.47, "grad_norm": 1.5903220965590883, "learning_rate": 5.781455661264805e-06, "loss": 0.7506, "step": 7289 }, { "epoch": 0.47, "grad_norm": 1.4699025234334537, "learning_rate": 5.7804318270976655e-06, "loss": 0.8061, "step": 7290 }, { "epoch": 0.47, "grad_norm": 1.5198047668221348, "learning_rate": 5.779407959389205e-06, "loss": 0.6957, "step": 7291 }, { "epoch": 0.47, "grad_norm": 1.5241673371821984, "learning_rate": 5.778384058183426e-06, "loss": 0.6993, "step": 7292 }, { "epoch": 0.47, "grad_norm": 1.5779042383989235, "learning_rate": 5.777360123524338e-06, "loss": 0.7114, "step": 7293 }, { "epoch": 0.47, "grad_norm": 1.5481851083449687, "learning_rate": 5.776336155455945e-06, "loss": 0.8013, "step": 7294 }, { "epoch": 0.47, "grad_norm": 1.9774304056441812, "learning_rate": 5.775312154022256e-06, "loss": 0.8108, "step": 7295 }, { "epoch": 0.47, "grad_norm": 2.1188131902094423, "learning_rate": 5.774288119267279e-06, "loss": 0.7954, "step": 7296 }, { "epoch": 0.47, "grad_norm": 1.812534799720651, "learning_rate": 5.773264051235026e-06, "loss": 0.6052, "step": 7297 }, { "epoch": 0.47, "grad_norm": 1.4811738675864796, "learning_rate": 5.772239949969509e-06, "loss": 0.7631, "step": 7298 }, { "epoch": 0.47, "grad_norm": 1.2447133540480213, "learning_rate": 5.771215815514741e-06, "loss": 0.5984, "step": 7299 }, { "epoch": 0.47, "grad_norm": 1.7061604337986194, "learning_rate": 5.770191647914738e-06, "loss": 0.7833, "step": 7300 }, { "epoch": 0.47, "grad_norm": 1.6729334198087003, "learning_rate": 5.7691674472135175e-06, "loss": 0.842, "step": 7301 }, { "epoch": 0.47, "grad_norm": 1.5041938908565249, "learning_rate": 5.768143213455094e-06, "loss": 0.7862, "step": 7302 }, { "epoch": 0.47, "grad_norm": 1.754235457307817, "learning_rate": 5.767118946683491e-06, "loss": 0.7726, "step": 7303 }, { "epoch": 0.47, "grad_norm": 1.6466056364592492, "learning_rate": 5.766094646942728e-06, "loss": 0.8644, "step": 7304 }, { "epoch": 0.47, "grad_norm": 1.7638287475405134, "learning_rate": 5.765070314276826e-06, "loss": 0.8839, "step": 7305 }, { "epoch": 0.47, "grad_norm": 1.879184746907512, "learning_rate": 5.764045948729809e-06, "loss": 0.6455, "step": 7306 }, { "epoch": 0.47, "grad_norm": 1.6412514806310043, "learning_rate": 5.763021550345703e-06, "loss": 0.756, "step": 7307 }, { "epoch": 0.47, "grad_norm": 1.9273090696285062, "learning_rate": 5.761997119168535e-06, "loss": 0.7961, "step": 7308 }, { "epoch": 0.47, "grad_norm": 1.663738978599078, "learning_rate": 5.760972655242332e-06, "loss": 0.7879, "step": 7309 }, { "epoch": 0.47, "grad_norm": 1.5563993405480225, "learning_rate": 5.7599481586111225e-06, "loss": 0.6678, "step": 7310 }, { "epoch": 0.47, "grad_norm": 1.6924737445581328, "learning_rate": 5.758923629318938e-06, "loss": 0.7585, "step": 7311 }, { "epoch": 0.47, "grad_norm": 2.1309114379385754, "learning_rate": 5.757899067409811e-06, "loss": 0.7915, "step": 7312 }, { "epoch": 0.47, "grad_norm": 1.7237447082977457, "learning_rate": 5.756874472927775e-06, "loss": 0.5911, "step": 7313 }, { "epoch": 0.47, "grad_norm": 1.6521812686501811, "learning_rate": 5.7558498459168635e-06, "loss": 0.7246, "step": 7314 }, { "epoch": 0.47, "grad_norm": 1.8651048977804097, "learning_rate": 5.7548251864211135e-06, "loss": 0.7655, "step": 7315 }, { "epoch": 0.47, "grad_norm": 1.9040927336184115, "learning_rate": 5.753800494484565e-06, "loss": 0.639, "step": 7316 }, { "epoch": 0.47, "grad_norm": 3.864466171474196, "learning_rate": 5.752775770151254e-06, "loss": 0.7056, "step": 7317 }, { "epoch": 0.47, "grad_norm": 1.1553596623489095, "learning_rate": 5.751751013465223e-06, "loss": 0.6668, "step": 7318 }, { "epoch": 0.47, "grad_norm": 1.5229925598372165, "learning_rate": 5.750726224470513e-06, "loss": 0.7787, "step": 7319 }, { "epoch": 0.47, "grad_norm": 1.5057003338346329, "learning_rate": 5.749701403211166e-06, "loss": 0.6404, "step": 7320 }, { "epoch": 0.47, "grad_norm": 1.1627141765616493, "learning_rate": 5.748676549731229e-06, "loss": 0.641, "step": 7321 }, { "epoch": 0.47, "grad_norm": 2.172956552997636, "learning_rate": 5.747651664074746e-06, "loss": 0.7423, "step": 7322 }, { "epoch": 0.47, "grad_norm": 1.4229092121232165, "learning_rate": 5.746626746285767e-06, "loss": 0.6446, "step": 7323 }, { "epoch": 0.47, "grad_norm": 1.5490977770553214, "learning_rate": 5.745601796408339e-06, "loss": 0.9012, "step": 7324 }, { "epoch": 0.47, "grad_norm": 1.6936527191766542, "learning_rate": 5.744576814486512e-06, "loss": 0.8453, "step": 7325 }, { "epoch": 0.47, "grad_norm": 1.2123511019729427, "learning_rate": 5.74355180056434e-06, "loss": 0.7787, "step": 7326 }, { "epoch": 0.47, "grad_norm": 1.6608389197613735, "learning_rate": 5.742526754685872e-06, "loss": 0.7689, "step": 7327 }, { "epoch": 0.47, "grad_norm": 1.4949775874713294, "learning_rate": 5.741501676895164e-06, "loss": 0.6003, "step": 7328 }, { "epoch": 0.47, "grad_norm": 1.696812003285958, "learning_rate": 5.740476567236273e-06, "loss": 0.8111, "step": 7329 }, { "epoch": 0.47, "grad_norm": 1.026999113880503, "learning_rate": 5.739451425753254e-06, "loss": 0.5999, "step": 7330 }, { "epoch": 0.47, "grad_norm": 1.8755944025001148, "learning_rate": 5.738426252490168e-06, "loss": 0.6871, "step": 7331 }, { "epoch": 0.47, "grad_norm": 1.5206546875596674, "learning_rate": 5.737401047491074e-06, "loss": 0.7551, "step": 7332 }, { "epoch": 0.47, "grad_norm": 1.6650422629394885, "learning_rate": 5.73637581080003e-06, "loss": 0.7709, "step": 7333 }, { "epoch": 0.47, "grad_norm": 1.6239936638574, "learning_rate": 5.735350542461103e-06, "loss": 0.7606, "step": 7334 }, { "epoch": 0.47, "grad_norm": 1.0135124113540683, "learning_rate": 5.734325242518355e-06, "loss": 0.6467, "step": 7335 }, { "epoch": 0.47, "grad_norm": 1.976566956935353, "learning_rate": 5.733299911015848e-06, "loss": 0.8428, "step": 7336 }, { "epoch": 0.47, "grad_norm": 1.4760716558792135, "learning_rate": 5.732274547997655e-06, "loss": 0.7394, "step": 7337 }, { "epoch": 0.47, "grad_norm": 1.6875506446380444, "learning_rate": 5.73124915350784e-06, "loss": 0.7584, "step": 7338 }, { "epoch": 0.47, "grad_norm": 1.4048781479447388, "learning_rate": 5.730223727590472e-06, "loss": 0.6665, "step": 7339 }, { "epoch": 0.47, "grad_norm": 1.4359612863805415, "learning_rate": 5.7291982702896234e-06, "loss": 0.6833, "step": 7340 }, { "epoch": 0.47, "grad_norm": 1.2464346356811924, "learning_rate": 5.728172781649365e-06, "loss": 0.7945, "step": 7341 }, { "epoch": 0.47, "grad_norm": 2.0040887990305136, "learning_rate": 5.72714726171377e-06, "loss": 0.8279, "step": 7342 }, { "epoch": 0.47, "grad_norm": 1.757271724701797, "learning_rate": 5.726121710526914e-06, "loss": 0.7252, "step": 7343 }, { "epoch": 0.47, "grad_norm": 1.9322374610464466, "learning_rate": 5.725096128132873e-06, "loss": 0.7894, "step": 7344 }, { "epoch": 0.47, "grad_norm": 1.5759831470218777, "learning_rate": 5.7240705145757244e-06, "loss": 0.7518, "step": 7345 }, { "epoch": 0.47, "grad_norm": 1.4610246967080858, "learning_rate": 5.7230448698995466e-06, "loss": 0.7295, "step": 7346 }, { "epoch": 0.47, "grad_norm": 1.486083826365748, "learning_rate": 5.722019194148419e-06, "loss": 0.6915, "step": 7347 }, { "epoch": 0.47, "grad_norm": 1.6405202190562145, "learning_rate": 5.7209934873664245e-06, "loss": 0.7683, "step": 7348 }, { "epoch": 0.47, "grad_norm": 1.7644630406905855, "learning_rate": 5.719967749597644e-06, "loss": 0.922, "step": 7349 }, { "epoch": 0.47, "grad_norm": 1.7291289077252512, "learning_rate": 5.718941980886162e-06, "loss": 0.8175, "step": 7350 }, { "epoch": 0.47, "grad_norm": 1.8376576915414806, "learning_rate": 5.717916181276065e-06, "loss": 0.6927, "step": 7351 }, { "epoch": 0.47, "grad_norm": 1.6547407706930075, "learning_rate": 5.716890350811439e-06, "loss": 0.6442, "step": 7352 }, { "epoch": 0.47, "grad_norm": 1.6188609674663932, "learning_rate": 5.715864489536373e-06, "loss": 0.8526, "step": 7353 }, { "epoch": 0.47, "grad_norm": 1.6476610529999933, "learning_rate": 5.714838597494955e-06, "loss": 0.699, "step": 7354 }, { "epoch": 0.47, "grad_norm": 1.0440217638749987, "learning_rate": 5.713812674731276e-06, "loss": 0.5891, "step": 7355 }, { "epoch": 0.47, "grad_norm": 1.8504192041033733, "learning_rate": 5.712786721289428e-06, "loss": 0.75, "step": 7356 }, { "epoch": 0.47, "grad_norm": 1.461530763791528, "learning_rate": 5.711760737213504e-06, "loss": 0.7651, "step": 7357 }, { "epoch": 0.47, "grad_norm": 1.9362687150756535, "learning_rate": 5.7107347225476e-06, "loss": 0.8349, "step": 7358 }, { "epoch": 0.47, "grad_norm": 1.5893450994520113, "learning_rate": 5.70970867733581e-06, "loss": 0.8269, "step": 7359 }, { "epoch": 0.47, "grad_norm": 1.6613654369438857, "learning_rate": 5.708682601622233e-06, "loss": 0.7276, "step": 7360 }, { "epoch": 0.47, "grad_norm": 1.6372060399032047, "learning_rate": 5.707656495450969e-06, "loss": 0.7004, "step": 7361 }, { "epoch": 0.47, "grad_norm": 1.7158881558974919, "learning_rate": 5.706630358866113e-06, "loss": 0.7174, "step": 7362 }, { "epoch": 0.47, "grad_norm": 1.157864727188121, "learning_rate": 5.70560419191177e-06, "loss": 0.7437, "step": 7363 }, { "epoch": 0.47, "grad_norm": 1.5617086456282547, "learning_rate": 5.7045779946320416e-06, "loss": 0.5848, "step": 7364 }, { "epoch": 0.47, "grad_norm": 1.8497820260247682, "learning_rate": 5.7035517670710304e-06, "loss": 0.7284, "step": 7365 }, { "epoch": 0.47, "grad_norm": 1.1621779192429142, "learning_rate": 5.702525509272845e-06, "loss": 0.6327, "step": 7366 }, { "epoch": 0.47, "grad_norm": 1.670901577125208, "learning_rate": 5.701499221281587e-06, "loss": 0.8402, "step": 7367 }, { "epoch": 0.47, "grad_norm": 1.569264063973293, "learning_rate": 5.700472903141368e-06, "loss": 0.7069, "step": 7368 }, { "epoch": 0.47, "grad_norm": 1.3114324769437127, "learning_rate": 5.699446554896294e-06, "loss": 0.715, "step": 7369 }, { "epoch": 0.47, "grad_norm": 1.6659178047103849, "learning_rate": 5.698420176590479e-06, "loss": 0.6592, "step": 7370 }, { "epoch": 0.47, "grad_norm": 1.6439602283597132, "learning_rate": 5.697393768268031e-06, "loss": 0.7417, "step": 7371 }, { "epoch": 0.47, "grad_norm": 1.6820567464968692, "learning_rate": 5.696367329973064e-06, "loss": 0.7626, "step": 7372 }, { "epoch": 0.47, "grad_norm": 2.009874706836018, "learning_rate": 5.695340861749693e-06, "loss": 0.7081, "step": 7373 }, { "epoch": 0.47, "grad_norm": 1.901643776500633, "learning_rate": 5.694314363642033e-06, "loss": 0.7037, "step": 7374 }, { "epoch": 0.47, "grad_norm": 1.0526127437416386, "learning_rate": 5.6932878356942005e-06, "loss": 0.5298, "step": 7375 }, { "epoch": 0.47, "grad_norm": 1.0011040409145802, "learning_rate": 5.692261277950313e-06, "loss": 0.6114, "step": 7376 }, { "epoch": 0.47, "grad_norm": 1.1840696039121223, "learning_rate": 5.691234690454491e-06, "loss": 0.7214, "step": 7377 }, { "epoch": 0.47, "grad_norm": 1.1642611139627645, "learning_rate": 5.690208073250855e-06, "loss": 0.6603, "step": 7378 }, { "epoch": 0.47, "grad_norm": 1.992590695525721, "learning_rate": 5.6891814263835245e-06, "loss": 0.7584, "step": 7379 }, { "epoch": 0.47, "grad_norm": 1.5700126665036978, "learning_rate": 5.688154749896626e-06, "loss": 0.7195, "step": 7380 }, { "epoch": 0.47, "grad_norm": 1.2790995227263209, "learning_rate": 5.687128043834282e-06, "loss": 0.6754, "step": 7381 }, { "epoch": 0.47, "grad_norm": 1.4771759279209447, "learning_rate": 5.686101308240619e-06, "loss": 0.6678, "step": 7382 }, { "epoch": 0.47, "grad_norm": 1.886512351050851, "learning_rate": 5.685074543159763e-06, "loss": 0.8272, "step": 7383 }, { "epoch": 0.47, "grad_norm": 1.6534361400571802, "learning_rate": 5.684047748635844e-06, "loss": 0.7209, "step": 7384 }, { "epoch": 0.47, "grad_norm": 1.9306692073280782, "learning_rate": 5.683020924712987e-06, "loss": 0.7971, "step": 7385 }, { "epoch": 0.47, "grad_norm": 2.265277472274094, "learning_rate": 5.681994071435327e-06, "loss": 0.7965, "step": 7386 }, { "epoch": 0.47, "grad_norm": 1.850646646579589, "learning_rate": 5.680967188846996e-06, "loss": 0.7232, "step": 7387 }, { "epoch": 0.47, "grad_norm": 1.7329737816470658, "learning_rate": 5.679940276992127e-06, "loss": 0.795, "step": 7388 }, { "epoch": 0.47, "grad_norm": 1.717298999298238, "learning_rate": 5.678913335914852e-06, "loss": 0.8307, "step": 7389 }, { "epoch": 0.47, "grad_norm": 1.4912456588082605, "learning_rate": 5.67788636565931e-06, "loss": 0.6721, "step": 7390 }, { "epoch": 0.47, "grad_norm": 1.5260626077468462, "learning_rate": 5.676859366269635e-06, "loss": 0.6648, "step": 7391 }, { "epoch": 0.47, "grad_norm": 6.706232621206631, "learning_rate": 5.675832337789967e-06, "loss": 0.847, "step": 7392 }, { "epoch": 0.47, "grad_norm": 2.1082881357790426, "learning_rate": 5.6748052802644445e-06, "loss": 0.6612, "step": 7393 }, { "epoch": 0.47, "grad_norm": 1.697539034775054, "learning_rate": 5.67377819373721e-06, "loss": 0.6598, "step": 7394 }, { "epoch": 0.47, "grad_norm": 1.6611612971430292, "learning_rate": 5.672751078252403e-06, "loss": 0.7715, "step": 7395 }, { "epoch": 0.47, "grad_norm": 1.639333835662625, "learning_rate": 5.671723933854171e-06, "loss": 0.6922, "step": 7396 }, { "epoch": 0.47, "grad_norm": 1.5702744995771627, "learning_rate": 5.670696760586654e-06, "loss": 0.7054, "step": 7397 }, { "epoch": 0.47, "grad_norm": 1.6510637354326625, "learning_rate": 5.669669558494e-06, "loss": 0.8347, "step": 7398 }, { "epoch": 0.47, "grad_norm": 1.607894754213148, "learning_rate": 5.668642327620356e-06, "loss": 0.7216, "step": 7399 }, { "epoch": 0.47, "grad_norm": 1.801431124145852, "learning_rate": 5.6676150680098685e-06, "loss": 0.7253, "step": 7400 }, { "epoch": 0.47, "grad_norm": 1.5942250538751168, "learning_rate": 5.66658777970669e-06, "loss": 0.7526, "step": 7401 }, { "epoch": 0.47, "grad_norm": 2.071920090407781, "learning_rate": 5.665560462754968e-06, "loss": 0.7403, "step": 7402 }, { "epoch": 0.47, "grad_norm": 1.5132822612472359, "learning_rate": 5.664533117198856e-06, "loss": 0.7459, "step": 7403 }, { "epoch": 0.47, "grad_norm": 1.9370497884675015, "learning_rate": 5.663505743082507e-06, "loss": 0.5773, "step": 7404 }, { "epoch": 0.47, "grad_norm": 1.2128552207937247, "learning_rate": 5.662478340450076e-06, "loss": 0.7009, "step": 7405 }, { "epoch": 0.47, "grad_norm": 2.8186069175731876, "learning_rate": 5.661450909345718e-06, "loss": 0.6606, "step": 7406 }, { "epoch": 0.47, "grad_norm": 1.4411053219608474, "learning_rate": 5.660423449813588e-06, "loss": 0.6017, "step": 7407 }, { "epoch": 0.47, "grad_norm": 1.5915494876136205, "learning_rate": 5.659395961897848e-06, "loss": 0.7095, "step": 7408 }, { "epoch": 0.47, "grad_norm": 1.6319681914713553, "learning_rate": 5.658368445642655e-06, "loss": 0.7084, "step": 7409 }, { "epoch": 0.47, "grad_norm": 1.8809535630191423, "learning_rate": 5.657340901092169e-06, "loss": 0.7212, "step": 7410 }, { "epoch": 0.47, "grad_norm": 2.118439004355154, "learning_rate": 5.656313328290552e-06, "loss": 1.0453, "step": 7411 }, { "epoch": 0.47, "grad_norm": 1.6313697799846418, "learning_rate": 5.655285727281968e-06, "loss": 0.7881, "step": 7412 }, { "epoch": 0.47, "grad_norm": 1.4882217445936992, "learning_rate": 5.65425809811058e-06, "loss": 0.68, "step": 7413 }, { "epoch": 0.47, "grad_norm": 1.6018801197746153, "learning_rate": 5.653230440820554e-06, "loss": 0.6994, "step": 7414 }, { "epoch": 0.47, "grad_norm": 1.1269978585162854, "learning_rate": 5.652202755456055e-06, "loss": 0.6692, "step": 7415 }, { "epoch": 0.47, "grad_norm": 1.5923537231344718, "learning_rate": 5.651175042061254e-06, "loss": 0.6228, "step": 7416 }, { "epoch": 0.47, "grad_norm": 1.5070611287413684, "learning_rate": 5.650147300680318e-06, "loss": 0.6566, "step": 7417 }, { "epoch": 0.47, "grad_norm": 1.1866519113782696, "learning_rate": 5.649119531357415e-06, "loss": 0.682, "step": 7418 }, { "epoch": 0.47, "grad_norm": 1.5854950478532348, "learning_rate": 5.6480917341367205e-06, "loss": 0.7844, "step": 7419 }, { "epoch": 0.47, "grad_norm": 0.9211922885429141, "learning_rate": 5.647063909062404e-06, "loss": 0.6677, "step": 7420 }, { "epoch": 0.47, "grad_norm": 1.592117361654128, "learning_rate": 5.646036056178641e-06, "loss": 0.6702, "step": 7421 }, { "epoch": 0.48, "grad_norm": 1.0680826510272903, "learning_rate": 5.645008175529605e-06, "loss": 0.7063, "step": 7422 }, { "epoch": 0.48, "grad_norm": 1.597175338331696, "learning_rate": 5.643980267159474e-06, "loss": 0.6496, "step": 7423 }, { "epoch": 0.48, "grad_norm": 1.8028021040085154, "learning_rate": 5.642952331112423e-06, "loss": 0.7491, "step": 7424 }, { "epoch": 0.48, "grad_norm": 1.453411953393637, "learning_rate": 5.641924367432634e-06, "loss": 0.7661, "step": 7425 }, { "epoch": 0.48, "grad_norm": 2.0201692102034157, "learning_rate": 5.640896376164283e-06, "loss": 0.6987, "step": 7426 }, { "epoch": 0.48, "grad_norm": 1.8276481886440388, "learning_rate": 5.639868357351554e-06, "loss": 0.7948, "step": 7427 }, { "epoch": 0.48, "grad_norm": 1.5791600783457753, "learning_rate": 5.638840311038625e-06, "loss": 0.7463, "step": 7428 }, { "epoch": 0.48, "grad_norm": 1.5477264792509295, "learning_rate": 5.637812237269683e-06, "loss": 0.7263, "step": 7429 }, { "epoch": 0.48, "grad_norm": 1.6145966958297209, "learning_rate": 5.6367841360889105e-06, "loss": 0.7869, "step": 7430 }, { "epoch": 0.48, "grad_norm": 1.911208776512783, "learning_rate": 5.635756007540495e-06, "loss": 0.8229, "step": 7431 }, { "epoch": 0.48, "grad_norm": 1.209127319048807, "learning_rate": 5.634727851668622e-06, "loss": 0.6543, "step": 7432 }, { "epoch": 0.48, "grad_norm": 1.7076281132177715, "learning_rate": 5.6336996685174795e-06, "loss": 0.7672, "step": 7433 }, { "epoch": 0.48, "grad_norm": 1.5576564864903373, "learning_rate": 5.632671458131258e-06, "loss": 0.6741, "step": 7434 }, { "epoch": 0.48, "grad_norm": 1.7841253119833906, "learning_rate": 5.631643220554146e-06, "loss": 0.7807, "step": 7435 }, { "epoch": 0.48, "grad_norm": 1.109146017199209, "learning_rate": 5.630614955830334e-06, "loss": 0.61, "step": 7436 }, { "epoch": 0.48, "grad_norm": 1.3461375473928607, "learning_rate": 5.629586664004018e-06, "loss": 0.4998, "step": 7437 }, { "epoch": 0.48, "grad_norm": 1.6377889587173957, "learning_rate": 5.628558345119389e-06, "loss": 0.7444, "step": 7438 }, { "epoch": 0.48, "grad_norm": 2.145194928506426, "learning_rate": 5.6275299992206444e-06, "loss": 0.801, "step": 7439 }, { "epoch": 0.48, "grad_norm": 1.802651130327298, "learning_rate": 5.626501626351979e-06, "loss": 0.9023, "step": 7440 }, { "epoch": 0.48, "grad_norm": 2.074100400245031, "learning_rate": 5.625473226557588e-06, "loss": 0.6439, "step": 7441 }, { "epoch": 0.48, "grad_norm": 1.6592213344414846, "learning_rate": 5.624444799881674e-06, "loss": 0.6909, "step": 7442 }, { "epoch": 0.48, "grad_norm": 1.6404408455529882, "learning_rate": 5.623416346368434e-06, "loss": 0.6727, "step": 7443 }, { "epoch": 0.48, "grad_norm": 1.5959975207062724, "learning_rate": 5.62238786606207e-06, "loss": 0.7531, "step": 7444 }, { "epoch": 0.48, "grad_norm": 1.8066450989046139, "learning_rate": 5.621359359006784e-06, "loss": 0.7322, "step": 7445 }, { "epoch": 0.48, "grad_norm": 1.7432242587035964, "learning_rate": 5.620330825246777e-06, "loss": 0.7356, "step": 7446 }, { "epoch": 0.48, "grad_norm": 1.1308201300496945, "learning_rate": 5.6193022648262555e-06, "loss": 0.6781, "step": 7447 }, { "epoch": 0.48, "grad_norm": 1.672376616575124, "learning_rate": 5.618273677789424e-06, "loss": 0.8202, "step": 7448 }, { "epoch": 0.48, "grad_norm": 1.1810223236327388, "learning_rate": 5.617245064180488e-06, "loss": 0.6608, "step": 7449 }, { "epoch": 0.48, "grad_norm": 1.1184061111033952, "learning_rate": 5.616216424043657e-06, "loss": 0.5457, "step": 7450 }, { "epoch": 0.48, "grad_norm": 1.5859338914729586, "learning_rate": 5.615187757423137e-06, "loss": 0.8483, "step": 7451 }, { "epoch": 0.48, "grad_norm": 1.7838188083945494, "learning_rate": 5.614159064363142e-06, "loss": 0.8696, "step": 7452 }, { "epoch": 0.48, "grad_norm": 1.6180358056529316, "learning_rate": 5.613130344907881e-06, "loss": 0.7856, "step": 7453 }, { "epoch": 0.48, "grad_norm": 1.5222204991926753, "learning_rate": 5.612101599101566e-06, "loss": 0.6756, "step": 7454 }, { "epoch": 0.48, "grad_norm": 2.360115639143661, "learning_rate": 5.611072826988409e-06, "loss": 0.7172, "step": 7455 }, { "epoch": 0.48, "grad_norm": 1.99210535120698, "learning_rate": 5.610044028612628e-06, "loss": 0.8011, "step": 7456 }, { "epoch": 0.48, "grad_norm": 1.5908029424554377, "learning_rate": 5.609015204018435e-06, "loss": 0.7233, "step": 7457 }, { "epoch": 0.48, "grad_norm": 1.0379183242657917, "learning_rate": 5.607986353250049e-06, "loss": 0.6699, "step": 7458 }, { "epoch": 0.48, "grad_norm": 1.0534906287645953, "learning_rate": 5.6069574763516866e-06, "loss": 0.5545, "step": 7459 }, { "epoch": 0.48, "grad_norm": 1.6753740303199014, "learning_rate": 5.605928573367568e-06, "loss": 0.7947, "step": 7460 }, { "epoch": 0.48, "grad_norm": 2.0897085869867076, "learning_rate": 5.604899644341911e-06, "loss": 0.6308, "step": 7461 }, { "epoch": 0.48, "grad_norm": 1.0509922425329141, "learning_rate": 5.60387068931894e-06, "loss": 0.665, "step": 7462 }, { "epoch": 0.48, "grad_norm": 1.5536344280590626, "learning_rate": 5.602841708342876e-06, "loss": 0.7961, "step": 7463 }, { "epoch": 0.48, "grad_norm": 1.4823459065100908, "learning_rate": 5.6018127014579404e-06, "loss": 0.7427, "step": 7464 }, { "epoch": 0.48, "grad_norm": 1.649405856212933, "learning_rate": 5.60078366870836e-06, "loss": 0.6437, "step": 7465 }, { "epoch": 0.48, "grad_norm": 1.8667951928887134, "learning_rate": 5.599754610138361e-06, "loss": 0.8704, "step": 7466 }, { "epoch": 0.48, "grad_norm": 1.1943429948114193, "learning_rate": 5.598725525792168e-06, "loss": 0.7129, "step": 7467 }, { "epoch": 0.48, "grad_norm": 1.662529619386252, "learning_rate": 5.59769641571401e-06, "loss": 0.7018, "step": 7468 }, { "epoch": 0.48, "grad_norm": 1.6344026097440723, "learning_rate": 5.596667279948117e-06, "loss": 0.6724, "step": 7469 }, { "epoch": 0.48, "grad_norm": 1.592047838541969, "learning_rate": 5.595638118538718e-06, "loss": 0.6867, "step": 7470 }, { "epoch": 0.48, "grad_norm": 1.8940534805508447, "learning_rate": 5.594608931530044e-06, "loss": 0.7911, "step": 7471 }, { "epoch": 0.48, "grad_norm": 2.296969904687042, "learning_rate": 5.593579718966325e-06, "loss": 0.713, "step": 7472 }, { "epoch": 0.48, "grad_norm": 1.6367577083057379, "learning_rate": 5.592550480891801e-06, "loss": 0.6697, "step": 7473 }, { "epoch": 0.48, "grad_norm": 1.3501116015136463, "learning_rate": 5.5915212173507e-06, "loss": 0.6336, "step": 7474 }, { "epoch": 0.48, "grad_norm": 1.6830201268746436, "learning_rate": 5.590491928387261e-06, "loss": 0.7097, "step": 7475 }, { "epoch": 0.48, "grad_norm": 1.7020646205062262, "learning_rate": 5.58946261404572e-06, "loss": 0.7367, "step": 7476 }, { "epoch": 0.48, "grad_norm": 1.5278579631161515, "learning_rate": 5.588433274370314e-06, "loss": 0.8287, "step": 7477 }, { "epoch": 0.48, "grad_norm": 1.0976502146979836, "learning_rate": 5.587403909405281e-06, "loss": 0.5519, "step": 7478 }, { "epoch": 0.48, "grad_norm": 1.4914768591527034, "learning_rate": 5.586374519194863e-06, "loss": 0.8246, "step": 7479 }, { "epoch": 0.48, "grad_norm": 1.542198121323656, "learning_rate": 5.585345103783302e-06, "loss": 0.7812, "step": 7480 }, { "epoch": 0.48, "grad_norm": 1.1515109160745483, "learning_rate": 5.584315663214836e-06, "loss": 0.6504, "step": 7481 }, { "epoch": 0.48, "grad_norm": 2.2186272427628726, "learning_rate": 5.583286197533713e-06, "loss": 0.7585, "step": 7482 }, { "epoch": 0.48, "grad_norm": 1.8415447065070487, "learning_rate": 5.582256706784174e-06, "loss": 0.8294, "step": 7483 }, { "epoch": 0.48, "grad_norm": 1.9126688467817359, "learning_rate": 5.581227191010465e-06, "loss": 0.7807, "step": 7484 }, { "epoch": 0.48, "grad_norm": 1.5970793827620957, "learning_rate": 5.580197650256832e-06, "loss": 0.7573, "step": 7485 }, { "epoch": 0.48, "grad_norm": 1.7319029798326233, "learning_rate": 5.5791680845675245e-06, "loss": 0.6738, "step": 7486 }, { "epoch": 0.48, "grad_norm": 1.5939859699535257, "learning_rate": 5.578138493986788e-06, "loss": 0.6916, "step": 7487 }, { "epoch": 0.48, "grad_norm": 1.916341659359769, "learning_rate": 5.577108878558875e-06, "loss": 0.8233, "step": 7488 }, { "epoch": 0.48, "grad_norm": 1.6574728903853586, "learning_rate": 5.576079238328035e-06, "loss": 0.7222, "step": 7489 }, { "epoch": 0.48, "grad_norm": 1.668730066501389, "learning_rate": 5.57504957333852e-06, "loss": 0.8144, "step": 7490 }, { "epoch": 0.48, "grad_norm": 1.7365045319394035, "learning_rate": 5.574019883634582e-06, "loss": 0.7129, "step": 7491 }, { "epoch": 0.48, "grad_norm": 1.6845911826564262, "learning_rate": 5.572990169260477e-06, "loss": 0.6717, "step": 7492 }, { "epoch": 0.48, "grad_norm": 1.6487429784126637, "learning_rate": 5.571960430260457e-06, "loss": 0.6282, "step": 7493 }, { "epoch": 0.48, "grad_norm": 1.8881156132494905, "learning_rate": 5.570930666678781e-06, "loss": 0.8152, "step": 7494 }, { "epoch": 0.48, "grad_norm": 1.1253584376238115, "learning_rate": 5.569900878559704e-06, "loss": 0.5564, "step": 7495 }, { "epoch": 0.48, "grad_norm": 1.614844958277035, "learning_rate": 5.568871065947483e-06, "loss": 0.8008, "step": 7496 }, { "epoch": 0.48, "grad_norm": 1.6194319156895869, "learning_rate": 5.567841228886381e-06, "loss": 0.6839, "step": 7497 }, { "epoch": 0.48, "grad_norm": 1.3454951932989698, "learning_rate": 5.566811367420656e-06, "loss": 0.6781, "step": 7498 }, { "epoch": 0.48, "grad_norm": 2.0573816336036246, "learning_rate": 5.56578148159457e-06, "loss": 0.6867, "step": 7499 }, { "epoch": 0.48, "grad_norm": 2.302095043894979, "learning_rate": 5.564751571452383e-06, "loss": 0.6965, "step": 7500 }, { "epoch": 0.48, "grad_norm": 1.5685280802241564, "learning_rate": 5.5637216370383615e-06, "loss": 0.7075, "step": 7501 }, { "epoch": 0.48, "grad_norm": 1.6678778528002336, "learning_rate": 5.562691678396768e-06, "loss": 0.7289, "step": 7502 }, { "epoch": 0.48, "grad_norm": 1.6937870549015577, "learning_rate": 5.561661695571869e-06, "loss": 0.7368, "step": 7503 }, { "epoch": 0.48, "grad_norm": 1.5433399255346234, "learning_rate": 5.56063168860793e-06, "loss": 0.6265, "step": 7504 }, { "epoch": 0.48, "grad_norm": 1.5102806591336881, "learning_rate": 5.559601657549219e-06, "loss": 0.7242, "step": 7505 }, { "epoch": 0.48, "grad_norm": 1.5673544974993618, "learning_rate": 5.558571602440006e-06, "loss": 0.7289, "step": 7506 }, { "epoch": 0.48, "grad_norm": 1.7985172125314222, "learning_rate": 5.557541523324558e-06, "loss": 0.8171, "step": 7507 }, { "epoch": 0.48, "grad_norm": 1.5517476484400103, "learning_rate": 5.556511420247146e-06, "loss": 0.7524, "step": 7508 }, { "epoch": 0.48, "grad_norm": 1.562252359655425, "learning_rate": 5.555481293252044e-06, "loss": 0.7818, "step": 7509 }, { "epoch": 0.48, "grad_norm": 1.9385238324890322, "learning_rate": 5.554451142383524e-06, "loss": 0.8783, "step": 7510 }, { "epoch": 0.48, "grad_norm": 1.7763166372975734, "learning_rate": 5.553420967685857e-06, "loss": 0.708, "step": 7511 }, { "epoch": 0.48, "grad_norm": 1.8039379709478305, "learning_rate": 5.552390769203322e-06, "loss": 0.7889, "step": 7512 }, { "epoch": 0.48, "grad_norm": 1.6803541251118848, "learning_rate": 5.551360546980191e-06, "loss": 0.731, "step": 7513 }, { "epoch": 0.48, "grad_norm": 1.519955292919611, "learning_rate": 5.550330301060741e-06, "loss": 0.6481, "step": 7514 }, { "epoch": 0.48, "grad_norm": 0.9791283427300413, "learning_rate": 5.549300031489252e-06, "loss": 0.6439, "step": 7515 }, { "epoch": 0.48, "grad_norm": 1.6095941061709647, "learning_rate": 5.548269738310002e-06, "loss": 0.7078, "step": 7516 }, { "epoch": 0.48, "grad_norm": 1.1367108074954246, "learning_rate": 5.547239421567272e-06, "loss": 0.6504, "step": 7517 }, { "epoch": 0.48, "grad_norm": 1.9786202598821008, "learning_rate": 5.546209081305341e-06, "loss": 0.6734, "step": 7518 }, { "epoch": 0.48, "grad_norm": 1.649889736213418, "learning_rate": 5.545178717568491e-06, "loss": 0.7696, "step": 7519 }, { "epoch": 0.48, "grad_norm": 1.6133351743699795, "learning_rate": 5.5441483304010055e-06, "loss": 0.7529, "step": 7520 }, { "epoch": 0.48, "grad_norm": 1.5653449523276948, "learning_rate": 5.5431179198471674e-06, "loss": 0.8236, "step": 7521 }, { "epoch": 0.48, "grad_norm": 1.8746619625450542, "learning_rate": 5.542087485951263e-06, "loss": 0.7554, "step": 7522 }, { "epoch": 0.48, "grad_norm": 1.8580947148149543, "learning_rate": 5.5410570287575775e-06, "loss": 0.8078, "step": 7523 }, { "epoch": 0.48, "grad_norm": 0.9013592474072027, "learning_rate": 5.540026548310397e-06, "loss": 0.6019, "step": 7524 }, { "epoch": 0.48, "grad_norm": 1.8785971901997818, "learning_rate": 5.538996044654011e-06, "loss": 0.5902, "step": 7525 }, { "epoch": 0.48, "grad_norm": 1.510571947595413, "learning_rate": 5.537965517832708e-06, "loss": 0.6338, "step": 7526 }, { "epoch": 0.48, "grad_norm": 1.7516345288572335, "learning_rate": 5.536934967890778e-06, "loss": 0.8307, "step": 7527 }, { "epoch": 0.48, "grad_norm": 1.644912064320586, "learning_rate": 5.53590439487251e-06, "loss": 0.7801, "step": 7528 }, { "epoch": 0.48, "grad_norm": 1.611415999280644, "learning_rate": 5.5348737988221964e-06, "loss": 0.7135, "step": 7529 }, { "epoch": 0.48, "grad_norm": 1.8484279075987804, "learning_rate": 5.533843179784133e-06, "loss": 0.6098, "step": 7530 }, { "epoch": 0.48, "grad_norm": 1.9360029662493679, "learning_rate": 5.532812537802611e-06, "loss": 0.7952, "step": 7531 }, { "epoch": 0.48, "grad_norm": 1.8740859543026391, "learning_rate": 5.5317818729219255e-06, "loss": 0.6942, "step": 7532 }, { "epoch": 0.48, "grad_norm": 1.1426208194853829, "learning_rate": 5.530751185186372e-06, "loss": 0.7358, "step": 7533 }, { "epoch": 0.48, "grad_norm": 1.593236350055704, "learning_rate": 5.529720474640248e-06, "loss": 0.6721, "step": 7534 }, { "epoch": 0.48, "grad_norm": 1.6152154901168887, "learning_rate": 5.528689741327851e-06, "loss": 0.7134, "step": 7535 }, { "epoch": 0.48, "grad_norm": 1.886248891792799, "learning_rate": 5.527658985293479e-06, "loss": 0.7464, "step": 7536 }, { "epoch": 0.48, "grad_norm": 1.141303434935055, "learning_rate": 5.526628206581434e-06, "loss": 0.6307, "step": 7537 }, { "epoch": 0.48, "grad_norm": 1.048946892400728, "learning_rate": 5.5255974052360155e-06, "loss": 0.7087, "step": 7538 }, { "epoch": 0.48, "grad_norm": 2.2095887280245528, "learning_rate": 5.5245665813015245e-06, "loss": 0.7587, "step": 7539 }, { "epoch": 0.48, "grad_norm": 1.5481435541446504, "learning_rate": 5.523535734822264e-06, "loss": 0.8015, "step": 7540 }, { "epoch": 0.48, "grad_norm": 1.6852220513397473, "learning_rate": 5.522504865842537e-06, "loss": 0.7664, "step": 7541 }, { "epoch": 0.48, "grad_norm": 1.6997720739545186, "learning_rate": 5.521473974406649e-06, "loss": 0.7254, "step": 7542 }, { "epoch": 0.48, "grad_norm": 1.690978112798288, "learning_rate": 5.520443060558907e-06, "loss": 0.7558, "step": 7543 }, { "epoch": 0.48, "grad_norm": 1.8835419687709256, "learning_rate": 5.519412124343614e-06, "loss": 0.7412, "step": 7544 }, { "epoch": 0.48, "grad_norm": 1.6970443110960844, "learning_rate": 5.51838116580508e-06, "loss": 0.7263, "step": 7545 }, { "epoch": 0.48, "grad_norm": 1.7529102337728422, "learning_rate": 5.517350184987613e-06, "loss": 0.7594, "step": 7546 }, { "epoch": 0.48, "grad_norm": 1.6677512610560397, "learning_rate": 5.5163191819355225e-06, "loss": 0.6824, "step": 7547 }, { "epoch": 0.48, "grad_norm": 1.76475437804292, "learning_rate": 5.515288156693117e-06, "loss": 0.7672, "step": 7548 }, { "epoch": 0.48, "grad_norm": 1.6547340118315426, "learning_rate": 5.514257109304712e-06, "loss": 0.7101, "step": 7549 }, { "epoch": 0.48, "grad_norm": 1.4016006862972559, "learning_rate": 5.513226039814613e-06, "loss": 0.647, "step": 7550 }, { "epoch": 0.48, "grad_norm": 3.3749328652154027, "learning_rate": 5.51219494826714e-06, "loss": 0.8109, "step": 7551 }, { "epoch": 0.48, "grad_norm": 1.3744368058664242, "learning_rate": 5.511163834706603e-06, "loss": 0.5822, "step": 7552 }, { "epoch": 0.48, "grad_norm": 1.8812604117244152, "learning_rate": 5.51013269917732e-06, "loss": 0.7623, "step": 7553 }, { "epoch": 0.48, "grad_norm": 1.5721525892017019, "learning_rate": 5.509101541723605e-06, "loss": 0.6621, "step": 7554 }, { "epoch": 0.48, "grad_norm": 1.5921922887414524, "learning_rate": 5.5080703623897754e-06, "loss": 0.7167, "step": 7555 }, { "epoch": 0.48, "grad_norm": 1.6949649140264902, "learning_rate": 5.50703916122015e-06, "loss": 0.8813, "step": 7556 }, { "epoch": 0.48, "grad_norm": 2.048209451341224, "learning_rate": 5.506007938259045e-06, "loss": 0.6683, "step": 7557 }, { "epoch": 0.48, "grad_norm": 2.026157733239297, "learning_rate": 5.504976693550783e-06, "loss": 0.7474, "step": 7558 }, { "epoch": 0.48, "grad_norm": 1.70179096087602, "learning_rate": 5.5039454271396856e-06, "loss": 0.7813, "step": 7559 }, { "epoch": 0.48, "grad_norm": 1.1808523085978517, "learning_rate": 5.50291413907007e-06, "loss": 0.6393, "step": 7560 }, { "epoch": 0.48, "grad_norm": 1.2332750058074256, "learning_rate": 5.501882829386262e-06, "loss": 0.6443, "step": 7561 }, { "epoch": 0.48, "grad_norm": 1.5510775153284186, "learning_rate": 5.500851498132585e-06, "loss": 0.7992, "step": 7562 }, { "epoch": 0.48, "grad_norm": 1.2039485135872028, "learning_rate": 5.499820145353364e-06, "loss": 0.6602, "step": 7563 }, { "epoch": 0.48, "grad_norm": 1.718366588415428, "learning_rate": 5.4987887710929235e-06, "loss": 0.6724, "step": 7564 }, { "epoch": 0.48, "grad_norm": 1.1937253051610035, "learning_rate": 5.497757375395588e-06, "loss": 0.7251, "step": 7565 }, { "epoch": 0.48, "grad_norm": 1.7216411155938705, "learning_rate": 5.496725958305687e-06, "loss": 0.6474, "step": 7566 }, { "epoch": 0.48, "grad_norm": 1.6358590592417628, "learning_rate": 5.49569451986755e-06, "loss": 0.7691, "step": 7567 }, { "epoch": 0.48, "grad_norm": 2.1530409289387817, "learning_rate": 5.494663060125501e-06, "loss": 0.8479, "step": 7568 }, { "epoch": 0.48, "grad_norm": 1.1791477446736374, "learning_rate": 5.493631579123876e-06, "loss": 0.7785, "step": 7569 }, { "epoch": 0.48, "grad_norm": 2.1700722847577274, "learning_rate": 5.492600076907e-06, "loss": 0.7701, "step": 7570 }, { "epoch": 0.48, "grad_norm": 1.683588402239428, "learning_rate": 5.49156855351921e-06, "loss": 0.725, "step": 7571 }, { "epoch": 0.48, "grad_norm": 2.0605649619556763, "learning_rate": 5.490537009004835e-06, "loss": 0.6115, "step": 7572 }, { "epoch": 0.48, "grad_norm": 1.1328925835909045, "learning_rate": 5.4895054434082115e-06, "loss": 0.6077, "step": 7573 }, { "epoch": 0.48, "grad_norm": 1.6752608636964021, "learning_rate": 5.4884738567736715e-06, "loss": 0.8248, "step": 7574 }, { "epoch": 0.48, "grad_norm": 1.667630551443734, "learning_rate": 5.487442249145554e-06, "loss": 0.7977, "step": 7575 }, { "epoch": 0.48, "grad_norm": 1.5046802539478688, "learning_rate": 5.48641062056819e-06, "loss": 0.7123, "step": 7576 }, { "epoch": 0.48, "grad_norm": 1.5537038188841936, "learning_rate": 5.485378971085921e-06, "loss": 0.7364, "step": 7577 }, { "epoch": 0.49, "grad_norm": 1.1904366967996682, "learning_rate": 5.484347300743083e-06, "loss": 0.6421, "step": 7578 }, { "epoch": 0.49, "grad_norm": 1.4353369714806175, "learning_rate": 5.4833156095840155e-06, "loss": 0.7067, "step": 7579 }, { "epoch": 0.49, "grad_norm": 2.3087528381748657, "learning_rate": 5.48228389765306e-06, "loss": 0.7528, "step": 7580 }, { "epoch": 0.49, "grad_norm": 1.4924515228096062, "learning_rate": 5.481252164994555e-06, "loss": 0.7173, "step": 7581 }, { "epoch": 0.49, "grad_norm": 1.5520463977664654, "learning_rate": 5.480220411652845e-06, "loss": 0.725, "step": 7582 }, { "epoch": 0.49, "grad_norm": 1.2545296704751632, "learning_rate": 5.479188637672269e-06, "loss": 0.6195, "step": 7583 }, { "epoch": 0.49, "grad_norm": 1.6650452781369993, "learning_rate": 5.478156843097173e-06, "loss": 0.6494, "step": 7584 }, { "epoch": 0.49, "grad_norm": 1.655026564016454, "learning_rate": 5.4771250279719e-06, "loss": 0.7428, "step": 7585 }, { "epoch": 0.49, "grad_norm": 1.4123815858183835, "learning_rate": 5.476093192340796e-06, "loss": 0.6159, "step": 7586 }, { "epoch": 0.49, "grad_norm": 1.7417807238521212, "learning_rate": 5.475061336248208e-06, "loss": 0.8242, "step": 7587 }, { "epoch": 0.49, "grad_norm": 1.5891313125924826, "learning_rate": 5.4740294597384804e-06, "loss": 0.7911, "step": 7588 }, { "epoch": 0.49, "grad_norm": 1.1055346214278654, "learning_rate": 5.4729975628559626e-06, "loss": 0.5883, "step": 7589 }, { "epoch": 0.49, "grad_norm": 1.8532891974907668, "learning_rate": 5.471965645645005e-06, "loss": 0.6982, "step": 7590 }, { "epoch": 0.49, "grad_norm": 1.8892622004964654, "learning_rate": 5.470933708149955e-06, "loss": 0.6979, "step": 7591 }, { "epoch": 0.49, "grad_norm": 1.54395651424937, "learning_rate": 5.4699017504151644e-06, "loss": 0.7504, "step": 7592 }, { "epoch": 0.49, "grad_norm": 1.2666781373543428, "learning_rate": 5.468869772484982e-06, "loss": 0.6351, "step": 7593 }, { "epoch": 0.49, "grad_norm": 1.600695007467307, "learning_rate": 5.467837774403763e-06, "loss": 0.7232, "step": 7594 }, { "epoch": 0.49, "grad_norm": 1.6214522749459348, "learning_rate": 5.4668057562158604e-06, "loss": 0.7731, "step": 7595 }, { "epoch": 0.49, "grad_norm": 2.0533599610752504, "learning_rate": 5.465773717965628e-06, "loss": 0.7935, "step": 7596 }, { "epoch": 0.49, "grad_norm": 1.8750831088468638, "learning_rate": 5.464741659697419e-06, "loss": 0.8118, "step": 7597 }, { "epoch": 0.49, "grad_norm": 1.7080890364303853, "learning_rate": 5.463709581455588e-06, "loss": 0.7676, "step": 7598 }, { "epoch": 0.49, "grad_norm": 1.695523158268677, "learning_rate": 5.462677483284496e-06, "loss": 0.7206, "step": 7599 }, { "epoch": 0.49, "grad_norm": 1.5050630688585345, "learning_rate": 5.461645365228496e-06, "loss": 0.7448, "step": 7600 }, { "epoch": 0.49, "grad_norm": 1.5654870904745128, "learning_rate": 5.46061322733195e-06, "loss": 0.6098, "step": 7601 }, { "epoch": 0.49, "grad_norm": 1.1200390701693004, "learning_rate": 5.459581069639215e-06, "loss": 0.615, "step": 7602 }, { "epoch": 0.49, "grad_norm": 1.6160156015059017, "learning_rate": 5.458548892194652e-06, "loss": 0.6875, "step": 7603 }, { "epoch": 0.49, "grad_norm": 1.8539386886956988, "learning_rate": 5.457516695042621e-06, "loss": 0.7057, "step": 7604 }, { "epoch": 0.49, "grad_norm": 1.4661480695706524, "learning_rate": 5.4564844782274826e-06, "loss": 0.7749, "step": 7605 }, { "epoch": 0.49, "grad_norm": 1.6128413371761487, "learning_rate": 5.455452241793602e-06, "loss": 0.7658, "step": 7606 }, { "epoch": 0.49, "grad_norm": 1.6872520515031895, "learning_rate": 5.45441998578534e-06, "loss": 0.7743, "step": 7607 }, { "epoch": 0.49, "grad_norm": 1.744945065529596, "learning_rate": 5.4533877102470616e-06, "loss": 0.6455, "step": 7608 }, { "epoch": 0.49, "grad_norm": 1.176961714821248, "learning_rate": 5.452355415223132e-06, "loss": 0.587, "step": 7609 }, { "epoch": 0.49, "grad_norm": 1.7810318846696958, "learning_rate": 5.4513231007579185e-06, "loss": 0.8426, "step": 7610 }, { "epoch": 0.49, "grad_norm": 0.9860872720459614, "learning_rate": 5.450290766895786e-06, "loss": 0.5487, "step": 7611 }, { "epoch": 0.49, "grad_norm": 1.7029720740066778, "learning_rate": 5.449258413681102e-06, "loss": 0.7042, "step": 7612 }, { "epoch": 0.49, "grad_norm": 1.2800286520159119, "learning_rate": 5.4482260411582365e-06, "loss": 0.6711, "step": 7613 }, { "epoch": 0.49, "grad_norm": 1.7457328167160788, "learning_rate": 5.447193649371556e-06, "loss": 0.7776, "step": 7614 }, { "epoch": 0.49, "grad_norm": 1.6763487580562326, "learning_rate": 5.446161238365433e-06, "loss": 0.7486, "step": 7615 }, { "epoch": 0.49, "grad_norm": 2.386363415882087, "learning_rate": 5.4451288081842365e-06, "loss": 0.7344, "step": 7616 }, { "epoch": 0.49, "grad_norm": 2.016059571326882, "learning_rate": 5.44409635887234e-06, "loss": 0.9304, "step": 7617 }, { "epoch": 0.49, "grad_norm": 1.533151454257492, "learning_rate": 5.443063890474116e-06, "loss": 0.807, "step": 7618 }, { "epoch": 0.49, "grad_norm": 1.649490177517847, "learning_rate": 5.4420314030339375e-06, "loss": 0.6555, "step": 7619 }, { "epoch": 0.49, "grad_norm": 1.119529790911275, "learning_rate": 5.440998896596177e-06, "loss": 0.6695, "step": 7620 }, { "epoch": 0.49, "grad_norm": 1.7098512598448743, "learning_rate": 5.439966371205209e-06, "loss": 0.6836, "step": 7621 }, { "epoch": 0.49, "grad_norm": 3.0204556095909436, "learning_rate": 5.438933826905412e-06, "loss": 0.645, "step": 7622 }, { "epoch": 0.49, "grad_norm": 1.5959110529049594, "learning_rate": 5.437901263741163e-06, "loss": 0.7275, "step": 7623 }, { "epoch": 0.49, "grad_norm": 1.2461369016346693, "learning_rate": 5.436868681756837e-06, "loss": 0.763, "step": 7624 }, { "epoch": 0.49, "grad_norm": 1.8220107497681295, "learning_rate": 5.435836080996812e-06, "loss": 0.8061, "step": 7625 }, { "epoch": 0.49, "grad_norm": 1.316490104356071, "learning_rate": 5.434803461505469e-06, "loss": 0.7051, "step": 7626 }, { "epoch": 0.49, "grad_norm": 1.6256583305508734, "learning_rate": 5.433770823327187e-06, "loss": 0.6989, "step": 7627 }, { "epoch": 0.49, "grad_norm": 1.8409452468750307, "learning_rate": 5.432738166506346e-06, "loss": 0.9098, "step": 7628 }, { "epoch": 0.49, "grad_norm": 1.6667652702231852, "learning_rate": 5.431705491087327e-06, "loss": 0.7806, "step": 7629 }, { "epoch": 0.49, "grad_norm": 1.1092675211486467, "learning_rate": 5.4306727971145145e-06, "loss": 0.6434, "step": 7630 }, { "epoch": 0.49, "grad_norm": 1.9345476497471934, "learning_rate": 5.42964008463229e-06, "loss": 0.7375, "step": 7631 }, { "epoch": 0.49, "grad_norm": 1.0749994354207368, "learning_rate": 5.428607353685038e-06, "loss": 0.7016, "step": 7632 }, { "epoch": 0.49, "grad_norm": 1.8733931056138322, "learning_rate": 5.427574604317142e-06, "loss": 0.7382, "step": 7633 }, { "epoch": 0.49, "grad_norm": 1.2196292133414206, "learning_rate": 5.426541836572988e-06, "loss": 0.5849, "step": 7634 }, { "epoch": 0.49, "grad_norm": 1.5946313454333465, "learning_rate": 5.4255090504969606e-06, "loss": 0.8775, "step": 7635 }, { "epoch": 0.49, "grad_norm": 1.250739320466975, "learning_rate": 5.42447624613345e-06, "loss": 0.6174, "step": 7636 }, { "epoch": 0.49, "grad_norm": 1.6091468670597828, "learning_rate": 5.423443423526842e-06, "loss": 0.7215, "step": 7637 }, { "epoch": 0.49, "grad_norm": 1.6724492971102456, "learning_rate": 5.422410582721526e-06, "loss": 0.8248, "step": 7638 }, { "epoch": 0.49, "grad_norm": 1.6862571777245883, "learning_rate": 5.42137772376189e-06, "loss": 0.7298, "step": 7639 }, { "epoch": 0.49, "grad_norm": 1.4659589358607352, "learning_rate": 5.420344846692326e-06, "loss": 0.6168, "step": 7640 }, { "epoch": 0.49, "grad_norm": 2.54997186863958, "learning_rate": 5.419311951557222e-06, "loss": 0.7695, "step": 7641 }, { "epoch": 0.49, "grad_norm": 1.7193514346497576, "learning_rate": 5.418279038400973e-06, "loss": 0.7755, "step": 7642 }, { "epoch": 0.49, "grad_norm": 1.1832775459722453, "learning_rate": 5.417246107267968e-06, "loss": 0.6577, "step": 7643 }, { "epoch": 0.49, "grad_norm": 1.6071464704683096, "learning_rate": 5.416213158202602e-06, "loss": 0.738, "step": 7644 }, { "epoch": 0.49, "grad_norm": 1.4694552685689957, "learning_rate": 5.415180191249271e-06, "loss": 0.6687, "step": 7645 }, { "epoch": 0.49, "grad_norm": 1.7457557356892093, "learning_rate": 5.414147206452367e-06, "loss": 0.8085, "step": 7646 }, { "epoch": 0.49, "grad_norm": 1.4842369784172045, "learning_rate": 5.413114203856287e-06, "loss": 0.8773, "step": 7647 }, { "epoch": 0.49, "grad_norm": 4.862656144527682, "learning_rate": 5.4120811835054265e-06, "loss": 0.8258, "step": 7648 }, { "epoch": 0.49, "grad_norm": 1.806656132009553, "learning_rate": 5.411048145444182e-06, "loss": 0.8243, "step": 7649 }, { "epoch": 0.49, "grad_norm": 1.6470295094657978, "learning_rate": 5.4100150897169514e-06, "loss": 0.7017, "step": 7650 }, { "epoch": 0.49, "grad_norm": 1.4075776997570055, "learning_rate": 5.408982016368134e-06, "loss": 0.6698, "step": 7651 }, { "epoch": 0.49, "grad_norm": 1.4808818392145162, "learning_rate": 5.40794892544213e-06, "loss": 0.6829, "step": 7652 }, { "epoch": 0.49, "grad_norm": 2.0042485749212378, "learning_rate": 5.406915816983337e-06, "loss": 0.7949, "step": 7653 }, { "epoch": 0.49, "grad_norm": 1.6242588359377055, "learning_rate": 5.405882691036158e-06, "loss": 0.75, "step": 7654 }, { "epoch": 0.49, "grad_norm": 1.4112845003596755, "learning_rate": 5.404849547644993e-06, "loss": 0.7719, "step": 7655 }, { "epoch": 0.49, "grad_norm": 1.4369098869703671, "learning_rate": 5.403816386854247e-06, "loss": 0.6562, "step": 7656 }, { "epoch": 0.49, "grad_norm": 1.4592659265294996, "learning_rate": 5.40278320870832e-06, "loss": 0.7422, "step": 7657 }, { "epoch": 0.49, "grad_norm": 1.6086718380908238, "learning_rate": 5.401750013251617e-06, "loss": 0.6973, "step": 7658 }, { "epoch": 0.49, "grad_norm": 1.8179611929477395, "learning_rate": 5.400716800528542e-06, "loss": 0.7578, "step": 7659 }, { "epoch": 0.49, "grad_norm": 1.9131660888270106, "learning_rate": 5.399683570583503e-06, "loss": 0.7847, "step": 7660 }, { "epoch": 0.49, "grad_norm": 1.5366897441116254, "learning_rate": 5.398650323460904e-06, "loss": 0.7876, "step": 7661 }, { "epoch": 0.49, "grad_norm": 1.037162028923531, "learning_rate": 5.3976170592051505e-06, "loss": 0.6807, "step": 7662 }, { "epoch": 0.49, "grad_norm": 1.6609584270288549, "learning_rate": 5.39658377786065e-06, "loss": 0.6916, "step": 7663 }, { "epoch": 0.49, "grad_norm": 1.879511571565194, "learning_rate": 5.3955504794718135e-06, "loss": 0.9263, "step": 7664 }, { "epoch": 0.49, "grad_norm": 2.1147854513852207, "learning_rate": 5.394517164083047e-06, "loss": 0.7332, "step": 7665 }, { "epoch": 0.49, "grad_norm": 1.8903669383767274, "learning_rate": 5.393483831738764e-06, "loss": 0.8359, "step": 7666 }, { "epoch": 0.49, "grad_norm": 1.5715803202747922, "learning_rate": 5.392450482483372e-06, "loss": 0.6339, "step": 7667 }, { "epoch": 0.49, "grad_norm": 1.6200194771616865, "learning_rate": 5.391417116361284e-06, "loss": 0.7667, "step": 7668 }, { "epoch": 0.49, "grad_norm": 1.8621013380987534, "learning_rate": 5.39038373341691e-06, "loss": 0.6701, "step": 7669 }, { "epoch": 0.49, "grad_norm": 1.6760869413172796, "learning_rate": 5.3893503336946625e-06, "loss": 0.818, "step": 7670 }, { "epoch": 0.49, "grad_norm": 1.832363501240108, "learning_rate": 5.388316917238956e-06, "loss": 0.8777, "step": 7671 }, { "epoch": 0.49, "grad_norm": 1.5784441612175315, "learning_rate": 5.3872834840942035e-06, "loss": 0.7148, "step": 7672 }, { "epoch": 0.49, "grad_norm": 1.8827614914902382, "learning_rate": 5.386250034304823e-06, "loss": 0.8301, "step": 7673 }, { "epoch": 0.49, "grad_norm": 1.0064885485937871, "learning_rate": 5.385216567915226e-06, "loss": 0.5146, "step": 7674 }, { "epoch": 0.49, "grad_norm": 1.636713086073729, "learning_rate": 5.384183084969832e-06, "loss": 0.7641, "step": 7675 }, { "epoch": 0.49, "grad_norm": 2.28859694351705, "learning_rate": 5.383149585513055e-06, "loss": 0.8297, "step": 7676 }, { "epoch": 0.49, "grad_norm": 1.6925335371430483, "learning_rate": 5.382116069589315e-06, "loss": 0.7583, "step": 7677 }, { "epoch": 0.49, "grad_norm": 1.6379702124282585, "learning_rate": 5.381082537243028e-06, "loss": 0.7428, "step": 7678 }, { "epoch": 0.49, "grad_norm": 1.8751319528317483, "learning_rate": 5.380048988518613e-06, "loss": 0.7088, "step": 7679 }, { "epoch": 0.49, "grad_norm": 1.5817884059173197, "learning_rate": 5.379015423460494e-06, "loss": 0.7615, "step": 7680 }, { "epoch": 0.49, "grad_norm": 1.6481398196736186, "learning_rate": 5.3779818421130865e-06, "loss": 0.7898, "step": 7681 }, { "epoch": 0.49, "grad_norm": 2.7411222804801123, "learning_rate": 5.3769482445208145e-06, "loss": 0.6975, "step": 7682 }, { "epoch": 0.49, "grad_norm": 1.7830519414320316, "learning_rate": 5.3759146307281e-06, "loss": 0.684, "step": 7683 }, { "epoch": 0.49, "grad_norm": 1.8251365748765198, "learning_rate": 5.374881000779364e-06, "loss": 0.8979, "step": 7684 }, { "epoch": 0.49, "grad_norm": 1.712668660650155, "learning_rate": 5.37384735471903e-06, "loss": 0.7187, "step": 7685 }, { "epoch": 0.49, "grad_norm": 1.5483954048054438, "learning_rate": 5.372813692591521e-06, "loss": 0.7787, "step": 7686 }, { "epoch": 0.49, "grad_norm": 1.7003620941114712, "learning_rate": 5.371780014441265e-06, "loss": 0.761, "step": 7687 }, { "epoch": 0.49, "grad_norm": 1.877292893844533, "learning_rate": 5.370746320312684e-06, "loss": 0.7361, "step": 7688 }, { "epoch": 0.49, "grad_norm": 1.8802812138153673, "learning_rate": 5.369712610250206e-06, "loss": 0.7186, "step": 7689 }, { "epoch": 0.49, "grad_norm": 1.9733054377722075, "learning_rate": 5.368678884298255e-06, "loss": 0.7114, "step": 7690 }, { "epoch": 0.49, "grad_norm": 1.803827193359925, "learning_rate": 5.367645142501262e-06, "loss": 0.7406, "step": 7691 }, { "epoch": 0.49, "grad_norm": 1.872281599626367, "learning_rate": 5.366611384903653e-06, "loss": 0.6986, "step": 7692 }, { "epoch": 0.49, "grad_norm": 1.6850383473158619, "learning_rate": 5.365577611549856e-06, "loss": 0.7819, "step": 7693 }, { "epoch": 0.49, "grad_norm": 1.5022978186753404, "learning_rate": 5.364543822484303e-06, "loss": 0.6848, "step": 7694 }, { "epoch": 0.49, "grad_norm": 1.1776199353529209, "learning_rate": 5.363510017751422e-06, "loss": 0.6296, "step": 7695 }, { "epoch": 0.49, "grad_norm": 1.5161548068674282, "learning_rate": 5.3624761973956456e-06, "loss": 0.6593, "step": 7696 }, { "epoch": 0.49, "grad_norm": 1.8044042934833828, "learning_rate": 5.3614423614614016e-06, "loss": 0.8321, "step": 7697 }, { "epoch": 0.49, "grad_norm": 2.1108529579612427, "learning_rate": 5.360408509993126e-06, "loss": 0.7865, "step": 7698 }, { "epoch": 0.49, "grad_norm": 1.4934997614311485, "learning_rate": 5.359374643035248e-06, "loss": 0.8045, "step": 7699 }, { "epoch": 0.49, "grad_norm": 1.8873288224426392, "learning_rate": 5.358340760632205e-06, "loss": 0.6861, "step": 7700 }, { "epoch": 0.49, "grad_norm": 1.6119186343845944, "learning_rate": 5.357306862828427e-06, "loss": 0.8188, "step": 7701 }, { "epoch": 0.49, "grad_norm": 1.5249107059417712, "learning_rate": 5.356272949668353e-06, "loss": 0.7135, "step": 7702 }, { "epoch": 0.49, "grad_norm": 1.8639545505442787, "learning_rate": 5.355239021196416e-06, "loss": 0.7107, "step": 7703 }, { "epoch": 0.49, "grad_norm": 1.6296306810613455, "learning_rate": 5.3542050774570505e-06, "loss": 0.777, "step": 7704 }, { "epoch": 0.49, "grad_norm": 1.3608223477535075, "learning_rate": 5.353171118494698e-06, "loss": 0.6436, "step": 7705 }, { "epoch": 0.49, "grad_norm": 1.0605033155370056, "learning_rate": 5.3521371443537904e-06, "loss": 0.6254, "step": 7706 }, { "epoch": 0.49, "grad_norm": 1.8959949855474139, "learning_rate": 5.351103155078768e-06, "loss": 0.6306, "step": 7707 }, { "epoch": 0.49, "grad_norm": 1.6915768370016357, "learning_rate": 5.350069150714072e-06, "loss": 0.8402, "step": 7708 }, { "epoch": 0.49, "grad_norm": 1.0983130875642513, "learning_rate": 5.349035131304138e-06, "loss": 0.6898, "step": 7709 }, { "epoch": 0.49, "grad_norm": 1.7537532796083544, "learning_rate": 5.348001096893408e-06, "loss": 0.6916, "step": 7710 }, { "epoch": 0.49, "grad_norm": 1.6324147311409711, "learning_rate": 5.346967047526321e-06, "loss": 0.6742, "step": 7711 }, { "epoch": 0.49, "grad_norm": 1.1625902006913498, "learning_rate": 5.345932983247322e-06, "loss": 0.6745, "step": 7712 }, { "epoch": 0.49, "grad_norm": 1.3826425314927615, "learning_rate": 5.344898904100848e-06, "loss": 0.5935, "step": 7713 }, { "epoch": 0.49, "grad_norm": 1.8121976844673038, "learning_rate": 5.343864810131344e-06, "loss": 0.7389, "step": 7714 }, { "epoch": 0.49, "grad_norm": 1.5110704956297574, "learning_rate": 5.342830701383254e-06, "loss": 0.6788, "step": 7715 }, { "epoch": 0.49, "grad_norm": 1.8625484374331926, "learning_rate": 5.34179657790102e-06, "loss": 0.9799, "step": 7716 }, { "epoch": 0.49, "grad_norm": 1.4784671888544687, "learning_rate": 5.340762439729088e-06, "loss": 0.6966, "step": 7717 }, { "epoch": 0.49, "grad_norm": 1.6467558768688957, "learning_rate": 5.3397282869119015e-06, "loss": 0.6606, "step": 7718 }, { "epoch": 0.49, "grad_norm": 2.0755596064576793, "learning_rate": 5.338694119493908e-06, "loss": 0.7279, "step": 7719 }, { "epoch": 0.49, "grad_norm": 1.9926553616460565, "learning_rate": 5.337659937519553e-06, "loss": 0.6607, "step": 7720 }, { "epoch": 0.49, "grad_norm": 1.5340188683570977, "learning_rate": 5.336625741033283e-06, "loss": 0.7431, "step": 7721 }, { "epoch": 0.49, "grad_norm": 1.6714885729256934, "learning_rate": 5.335591530079545e-06, "loss": 0.7588, "step": 7722 }, { "epoch": 0.49, "grad_norm": 1.5009747347545404, "learning_rate": 5.33455730470279e-06, "loss": 0.7042, "step": 7723 }, { "epoch": 0.49, "grad_norm": 1.0576193013075348, "learning_rate": 5.333523064947464e-06, "loss": 0.7455, "step": 7724 }, { "epoch": 0.49, "grad_norm": 1.6749036092148044, "learning_rate": 5.332488810858017e-06, "loss": 0.7817, "step": 7725 }, { "epoch": 0.49, "grad_norm": 1.6834290733767932, "learning_rate": 5.331454542478901e-06, "loss": 0.8092, "step": 7726 }, { "epoch": 0.49, "grad_norm": 1.0961504537362623, "learning_rate": 5.330420259854564e-06, "loss": 0.6495, "step": 7727 }, { "epoch": 0.49, "grad_norm": 1.830494460174103, "learning_rate": 5.32938596302946e-06, "loss": 0.8451, "step": 7728 }, { "epoch": 0.49, "grad_norm": 2.337844693847147, "learning_rate": 5.328351652048037e-06, "loss": 0.7509, "step": 7729 }, { "epoch": 0.49, "grad_norm": 1.6904398710212973, "learning_rate": 5.327317326954752e-06, "loss": 0.7729, "step": 7730 }, { "epoch": 0.49, "grad_norm": 1.010823688277325, "learning_rate": 5.326282987794056e-06, "loss": 0.6647, "step": 7731 }, { "epoch": 0.49, "grad_norm": 1.731126241254047, "learning_rate": 5.325248634610403e-06, "loss": 0.7489, "step": 7732 }, { "epoch": 0.49, "grad_norm": 1.7450355930997246, "learning_rate": 5.3242142674482456e-06, "loss": 0.7598, "step": 7733 }, { "epoch": 0.5, "grad_norm": 1.6999793286577096, "learning_rate": 5.3231798863520415e-06, "loss": 0.7553, "step": 7734 }, { "epoch": 0.5, "grad_norm": 1.2677422343177231, "learning_rate": 5.322145491366244e-06, "loss": 0.6991, "step": 7735 }, { "epoch": 0.5, "grad_norm": 1.7317542276141336, "learning_rate": 5.32111108253531e-06, "loss": 0.757, "step": 7736 }, { "epoch": 0.5, "grad_norm": 1.1619623431510373, "learning_rate": 5.320076659903698e-06, "loss": 0.6521, "step": 7737 }, { "epoch": 0.5, "grad_norm": 1.8044938572456761, "learning_rate": 5.319042223515862e-06, "loss": 0.7521, "step": 7738 }, { "epoch": 0.5, "grad_norm": 1.390282237860246, "learning_rate": 5.318007773416264e-06, "loss": 0.6722, "step": 7739 }, { "epoch": 0.5, "grad_norm": 3.917033742435735, "learning_rate": 5.316973309649358e-06, "loss": 0.7917, "step": 7740 }, { "epoch": 0.5, "grad_norm": 1.7150648821422143, "learning_rate": 5.315938832259606e-06, "loss": 0.7926, "step": 7741 }, { "epoch": 0.5, "grad_norm": 1.7894632335895733, "learning_rate": 5.314904341291468e-06, "loss": 0.8797, "step": 7742 }, { "epoch": 0.5, "grad_norm": 1.665315808314767, "learning_rate": 5.3138698367894e-06, "loss": 0.7367, "step": 7743 }, { "epoch": 0.5, "grad_norm": 1.7857455086398206, "learning_rate": 5.312835318797868e-06, "loss": 1.1058, "step": 7744 }, { "epoch": 0.5, "grad_norm": 1.5165576267573513, "learning_rate": 5.311800787361332e-06, "loss": 0.8658, "step": 7745 }, { "epoch": 0.5, "grad_norm": 1.53853096601698, "learning_rate": 5.310766242524253e-06, "loss": 0.8858, "step": 7746 }, { "epoch": 0.5, "grad_norm": 1.8119135139472775, "learning_rate": 5.309731684331093e-06, "loss": 0.7524, "step": 7747 }, { "epoch": 0.5, "grad_norm": 1.764772262340367, "learning_rate": 5.308697112826316e-06, "loss": 0.8511, "step": 7748 }, { "epoch": 0.5, "grad_norm": 1.324362776717411, "learning_rate": 5.307662528054387e-06, "loss": 0.7173, "step": 7749 }, { "epoch": 0.5, "grad_norm": 1.6285114862250454, "learning_rate": 5.306627930059768e-06, "loss": 0.7653, "step": 7750 }, { "epoch": 0.5, "grad_norm": 1.8170639744756156, "learning_rate": 5.305593318886925e-06, "loss": 0.7369, "step": 7751 }, { "epoch": 0.5, "grad_norm": 1.204383125900346, "learning_rate": 5.304558694580324e-06, "loss": 0.6721, "step": 7752 }, { "epoch": 0.5, "grad_norm": 1.573908806077152, "learning_rate": 5.303524057184429e-06, "loss": 0.8385, "step": 7753 }, { "epoch": 0.5, "grad_norm": 1.7034364328261247, "learning_rate": 5.30248940674371e-06, "loss": 0.707, "step": 7754 }, { "epoch": 0.5, "grad_norm": 2.0122493400135744, "learning_rate": 5.301454743302629e-06, "loss": 0.809, "step": 7755 }, { "epoch": 0.5, "grad_norm": 1.6252113876392817, "learning_rate": 5.3004200669056585e-06, "loss": 0.8198, "step": 7756 }, { "epoch": 0.5, "grad_norm": 1.5075361409564587, "learning_rate": 5.299385377597265e-06, "loss": 0.7036, "step": 7757 }, { "epoch": 0.5, "grad_norm": 1.6229338685836572, "learning_rate": 5.298350675421915e-06, "loss": 0.7161, "step": 7758 }, { "epoch": 0.5, "grad_norm": 1.6270096119470867, "learning_rate": 5.297315960424081e-06, "loss": 0.7579, "step": 7759 }, { "epoch": 0.5, "grad_norm": 4.386462806177799, "learning_rate": 5.296281232648233e-06, "loss": 0.7979, "step": 7760 }, { "epoch": 0.5, "grad_norm": 1.589033309278996, "learning_rate": 5.295246492138839e-06, "loss": 0.7691, "step": 7761 }, { "epoch": 0.5, "grad_norm": 1.9632064779066296, "learning_rate": 5.294211738940373e-06, "loss": 0.8705, "step": 7762 }, { "epoch": 0.5, "grad_norm": 2.017028393444431, "learning_rate": 5.293176973097303e-06, "loss": 0.9838, "step": 7763 }, { "epoch": 0.5, "grad_norm": 1.7551490901580717, "learning_rate": 5.292142194654102e-06, "loss": 0.8036, "step": 7764 }, { "epoch": 0.5, "grad_norm": 1.8528774610568608, "learning_rate": 5.2911074036552426e-06, "loss": 0.7308, "step": 7765 }, { "epoch": 0.5, "grad_norm": 1.4922391071890653, "learning_rate": 5.290072600145201e-06, "loss": 0.786, "step": 7766 }, { "epoch": 0.5, "grad_norm": 1.5082535810520337, "learning_rate": 5.289037784168448e-06, "loss": 0.7219, "step": 7767 }, { "epoch": 0.5, "grad_norm": 1.7757248081517987, "learning_rate": 5.288002955769458e-06, "loss": 0.7976, "step": 7768 }, { "epoch": 0.5, "grad_norm": 1.4708168793476386, "learning_rate": 5.286968114992706e-06, "loss": 0.8488, "step": 7769 }, { "epoch": 0.5, "grad_norm": 1.6412467024100996, "learning_rate": 5.285933261882668e-06, "loss": 0.8151, "step": 7770 }, { "epoch": 0.5, "grad_norm": 1.5562614639265857, "learning_rate": 5.28489839648382e-06, "loss": 0.6589, "step": 7771 }, { "epoch": 0.5, "grad_norm": 2.102509456024645, "learning_rate": 5.2838635188406365e-06, "loss": 0.7087, "step": 7772 }, { "epoch": 0.5, "grad_norm": 1.4842104403174812, "learning_rate": 5.282828628997595e-06, "loss": 0.5923, "step": 7773 }, { "epoch": 0.5, "grad_norm": 1.5290711420248122, "learning_rate": 5.281793726999174e-06, "loss": 0.7223, "step": 7774 }, { "epoch": 0.5, "grad_norm": 1.4891712476036558, "learning_rate": 5.28075881288985e-06, "loss": 0.7208, "step": 7775 }, { "epoch": 0.5, "grad_norm": 1.701146508414893, "learning_rate": 5.279723886714103e-06, "loss": 0.8132, "step": 7776 }, { "epoch": 0.5, "grad_norm": 1.6720034794799035, "learning_rate": 5.2786889485164115e-06, "loss": 0.7812, "step": 7777 }, { "epoch": 0.5, "grad_norm": 1.7048228099111054, "learning_rate": 5.277653998341255e-06, "loss": 0.683, "step": 7778 }, { "epoch": 0.5, "grad_norm": 1.5908519717921494, "learning_rate": 5.276619036233111e-06, "loss": 0.801, "step": 7779 }, { "epoch": 0.5, "grad_norm": 1.633863483635564, "learning_rate": 5.275584062236463e-06, "loss": 0.7129, "step": 7780 }, { "epoch": 0.5, "grad_norm": 1.0869810532735098, "learning_rate": 5.274549076395792e-06, "loss": 0.7178, "step": 7781 }, { "epoch": 0.5, "grad_norm": 1.5774406479514842, "learning_rate": 5.273514078755577e-06, "loss": 0.7582, "step": 7782 }, { "epoch": 0.5, "grad_norm": 1.56302245045954, "learning_rate": 5.2724790693603025e-06, "loss": 0.77, "step": 7783 }, { "epoch": 0.5, "grad_norm": 1.7675355933798618, "learning_rate": 5.27144404825445e-06, "loss": 0.67, "step": 7784 }, { "epoch": 0.5, "grad_norm": 1.7653806921998407, "learning_rate": 5.270409015482504e-06, "loss": 0.8278, "step": 7785 }, { "epoch": 0.5, "grad_norm": 2.360531331365953, "learning_rate": 5.269373971088943e-06, "loss": 0.7089, "step": 7786 }, { "epoch": 0.5, "grad_norm": 1.0623602945793003, "learning_rate": 5.268338915118258e-06, "loss": 0.6252, "step": 7787 }, { "epoch": 0.5, "grad_norm": 1.4597142979787883, "learning_rate": 5.267303847614931e-06, "loss": 0.7234, "step": 7788 }, { "epoch": 0.5, "grad_norm": 1.5412419897485572, "learning_rate": 5.266268768623445e-06, "loss": 0.5702, "step": 7789 }, { "epoch": 0.5, "grad_norm": 1.8996382123995648, "learning_rate": 5.2652336781882865e-06, "loss": 0.6778, "step": 7790 }, { "epoch": 0.5, "grad_norm": 2.047479986999353, "learning_rate": 5.264198576353942e-06, "loss": 0.7747, "step": 7791 }, { "epoch": 0.5, "grad_norm": 1.0707397859104266, "learning_rate": 5.263163463164898e-06, "loss": 0.608, "step": 7792 }, { "epoch": 0.5, "grad_norm": 1.708856552024562, "learning_rate": 5.262128338665641e-06, "loss": 0.6935, "step": 7793 }, { "epoch": 0.5, "grad_norm": 1.790512546940693, "learning_rate": 5.261093202900659e-06, "loss": 0.6797, "step": 7794 }, { "epoch": 0.5, "grad_norm": 1.8175186664674037, "learning_rate": 5.26005805591444e-06, "loss": 0.7406, "step": 7795 }, { "epoch": 0.5, "grad_norm": 1.6070834249021484, "learning_rate": 5.259022897751473e-06, "loss": 0.6942, "step": 7796 }, { "epoch": 0.5, "grad_norm": 1.6984949682269683, "learning_rate": 5.257987728456244e-06, "loss": 0.7054, "step": 7797 }, { "epoch": 0.5, "grad_norm": 1.60389902264975, "learning_rate": 5.256952548073246e-06, "loss": 0.69, "step": 7798 }, { "epoch": 0.5, "grad_norm": 1.7016141658536195, "learning_rate": 5.255917356646968e-06, "loss": 0.7766, "step": 7799 }, { "epoch": 0.5, "grad_norm": 1.7186088533644206, "learning_rate": 5.254882154221898e-06, "loss": 0.7974, "step": 7800 }, { "epoch": 0.5, "grad_norm": 1.4492296967132408, "learning_rate": 5.2538469408425284e-06, "loss": 0.6371, "step": 7801 }, { "epoch": 0.5, "grad_norm": 1.3954722669817368, "learning_rate": 5.252811716553352e-06, "loss": 0.691, "step": 7802 }, { "epoch": 0.5, "grad_norm": 1.6240570773000458, "learning_rate": 5.25177648139886e-06, "loss": 0.7009, "step": 7803 }, { "epoch": 0.5, "grad_norm": 1.670409430942733, "learning_rate": 5.250741235423543e-06, "loss": 0.7567, "step": 7804 }, { "epoch": 0.5, "grad_norm": 1.7666238930532814, "learning_rate": 5.2497059786718955e-06, "loss": 0.9201, "step": 7805 }, { "epoch": 0.5, "grad_norm": 2.162109038021698, "learning_rate": 5.2486707111884085e-06, "loss": 0.8461, "step": 7806 }, { "epoch": 0.5, "grad_norm": 1.6234275300432177, "learning_rate": 5.247635433017576e-06, "loss": 0.7373, "step": 7807 }, { "epoch": 0.5, "grad_norm": 1.8587782568156483, "learning_rate": 5.246600144203895e-06, "loss": 0.7709, "step": 7808 }, { "epoch": 0.5, "grad_norm": 2.1270934655154092, "learning_rate": 5.245564844791857e-06, "loss": 0.7158, "step": 7809 }, { "epoch": 0.5, "grad_norm": 1.6643178444889055, "learning_rate": 5.244529534825958e-06, "loss": 0.7182, "step": 7810 }, { "epoch": 0.5, "grad_norm": 1.861653611412943, "learning_rate": 5.243494214350693e-06, "loss": 0.6449, "step": 7811 }, { "epoch": 0.5, "grad_norm": 1.71979711514936, "learning_rate": 5.242458883410558e-06, "loss": 0.727, "step": 7812 }, { "epoch": 0.5, "grad_norm": 1.9634160859180556, "learning_rate": 5.2414235420500516e-06, "loss": 0.7189, "step": 7813 }, { "epoch": 0.5, "grad_norm": 1.1315727637241204, "learning_rate": 5.240388190313668e-06, "loss": 0.7046, "step": 7814 }, { "epoch": 0.5, "grad_norm": 1.7200017131773349, "learning_rate": 5.2393528282459036e-06, "loss": 0.8794, "step": 7815 }, { "epoch": 0.5, "grad_norm": 1.6302859821284872, "learning_rate": 5.238317455891259e-06, "loss": 0.6915, "step": 7816 }, { "epoch": 0.5, "grad_norm": 1.8576477755047873, "learning_rate": 5.237282073294231e-06, "loss": 0.7572, "step": 7817 }, { "epoch": 0.5, "grad_norm": 1.6940660832447705, "learning_rate": 5.236246680499317e-06, "loss": 0.7663, "step": 7818 }, { "epoch": 0.5, "grad_norm": 1.728263990980916, "learning_rate": 5.235211277551019e-06, "loss": 0.7266, "step": 7819 }, { "epoch": 0.5, "grad_norm": 2.4145371026976536, "learning_rate": 5.234175864493832e-06, "loss": 0.7342, "step": 7820 }, { "epoch": 0.5, "grad_norm": 1.643652549963216, "learning_rate": 5.2331404413722595e-06, "loss": 0.6947, "step": 7821 }, { "epoch": 0.5, "grad_norm": 1.6988384276840305, "learning_rate": 5.2321050082308e-06, "loss": 0.7219, "step": 7822 }, { "epoch": 0.5, "grad_norm": 1.652513922479932, "learning_rate": 5.231069565113957e-06, "loss": 0.7611, "step": 7823 }, { "epoch": 0.5, "grad_norm": 1.0370584023446334, "learning_rate": 5.230034112066228e-06, "loss": 0.7327, "step": 7824 }, { "epoch": 0.5, "grad_norm": 1.7583514454649734, "learning_rate": 5.2289986491321176e-06, "loss": 0.7674, "step": 7825 }, { "epoch": 0.5, "grad_norm": 1.6534623610553145, "learning_rate": 5.227963176356126e-06, "loss": 0.7498, "step": 7826 }, { "epoch": 0.5, "grad_norm": 1.02914639574709, "learning_rate": 5.226927693782755e-06, "loss": 0.6047, "step": 7827 }, { "epoch": 0.5, "grad_norm": 1.8018152507526084, "learning_rate": 5.225892201456509e-06, "loss": 0.6328, "step": 7828 }, { "epoch": 0.5, "grad_norm": 1.46813641933121, "learning_rate": 5.22485669942189e-06, "loss": 0.8041, "step": 7829 }, { "epoch": 0.5, "grad_norm": 1.5168373451279193, "learning_rate": 5.223821187723403e-06, "loss": 0.7021, "step": 7830 }, { "epoch": 0.5, "grad_norm": 1.034251097720305, "learning_rate": 5.222785666405553e-06, "loss": 0.6399, "step": 7831 }, { "epoch": 0.5, "grad_norm": 1.680090724006413, "learning_rate": 5.221750135512843e-06, "loss": 0.8045, "step": 7832 }, { "epoch": 0.5, "grad_norm": 1.678088081829814, "learning_rate": 5.220714595089777e-06, "loss": 0.6766, "step": 7833 }, { "epoch": 0.5, "grad_norm": 1.5034284259861073, "learning_rate": 5.219679045180863e-06, "loss": 0.6603, "step": 7834 }, { "epoch": 0.5, "grad_norm": 1.7662271340130968, "learning_rate": 5.218643485830604e-06, "loss": 0.7497, "step": 7835 }, { "epoch": 0.5, "grad_norm": 1.5877708238374726, "learning_rate": 5.217607917083508e-06, "loss": 0.741, "step": 7836 }, { "epoch": 0.5, "grad_norm": 1.8835166812846904, "learning_rate": 5.216572338984081e-06, "loss": 0.8326, "step": 7837 }, { "epoch": 0.5, "grad_norm": 1.55024632985505, "learning_rate": 5.215536751576829e-06, "loss": 0.7512, "step": 7838 }, { "epoch": 0.5, "grad_norm": 1.5393033502592484, "learning_rate": 5.21450115490626e-06, "loss": 0.6988, "step": 7839 }, { "epoch": 0.5, "grad_norm": 1.7506620236877133, "learning_rate": 5.213465549016885e-06, "loss": 0.7808, "step": 7840 }, { "epoch": 0.5, "grad_norm": 1.710235358345941, "learning_rate": 5.212429933953207e-06, "loss": 0.7733, "step": 7841 }, { "epoch": 0.5, "grad_norm": 1.9573171759400445, "learning_rate": 5.211394309759737e-06, "loss": 0.7171, "step": 7842 }, { "epoch": 0.5, "grad_norm": 1.7202089843366857, "learning_rate": 5.210358676480983e-06, "loss": 0.7746, "step": 7843 }, { "epoch": 0.5, "grad_norm": 1.6399459329228183, "learning_rate": 5.209323034161458e-06, "loss": 0.6135, "step": 7844 }, { "epoch": 0.5, "grad_norm": 1.7329957012763941, "learning_rate": 5.208287382845666e-06, "loss": 0.6559, "step": 7845 }, { "epoch": 0.5, "grad_norm": 1.5963901113925334, "learning_rate": 5.207251722578121e-06, "loss": 0.7638, "step": 7846 }, { "epoch": 0.5, "grad_norm": 1.6996461025947782, "learning_rate": 5.206216053403333e-06, "loss": 0.7643, "step": 7847 }, { "epoch": 0.5, "grad_norm": 1.931769081761515, "learning_rate": 5.20518037536581e-06, "loss": 0.8008, "step": 7848 }, { "epoch": 0.5, "grad_norm": 1.147185850967644, "learning_rate": 5.204144688510069e-06, "loss": 0.6402, "step": 7849 }, { "epoch": 0.5, "grad_norm": 1.8707666417690894, "learning_rate": 5.203108992880616e-06, "loss": 0.8017, "step": 7850 }, { "epoch": 0.5, "grad_norm": 1.8326739415982964, "learning_rate": 5.202073288521965e-06, "loss": 0.9435, "step": 7851 }, { "epoch": 0.5, "grad_norm": 2.2605821570490745, "learning_rate": 5.20103757547863e-06, "loss": 0.7071, "step": 7852 }, { "epoch": 0.5, "grad_norm": 1.622687043706336, "learning_rate": 5.2000018537951226e-06, "loss": 0.7357, "step": 7853 }, { "epoch": 0.5, "grad_norm": 2.167365287716609, "learning_rate": 5.198966123515955e-06, "loss": 0.7987, "step": 7854 }, { "epoch": 0.5, "grad_norm": 1.4941843738933647, "learning_rate": 5.197930384685642e-06, "loss": 0.7562, "step": 7855 }, { "epoch": 0.5, "grad_norm": 1.7523497020999368, "learning_rate": 5.1968946373486964e-06, "loss": 0.8393, "step": 7856 }, { "epoch": 0.5, "grad_norm": 1.551233616389935, "learning_rate": 5.195858881549633e-06, "loss": 0.7113, "step": 7857 }, { "epoch": 0.5, "grad_norm": 1.460293379278522, "learning_rate": 5.194823117332966e-06, "loss": 0.6234, "step": 7858 }, { "epoch": 0.5, "grad_norm": 1.2899208824352506, "learning_rate": 5.19378734474321e-06, "loss": 0.6009, "step": 7859 }, { "epoch": 0.5, "grad_norm": 1.6413092800836284, "learning_rate": 5.192751563824884e-06, "loss": 0.7166, "step": 7860 }, { "epoch": 0.5, "grad_norm": 1.7617122840970376, "learning_rate": 5.1917157746225e-06, "loss": 0.6977, "step": 7861 }, { "epoch": 0.5, "grad_norm": 1.7180488209189577, "learning_rate": 5.190679977180574e-06, "loss": 0.7973, "step": 7862 }, { "epoch": 0.5, "grad_norm": 1.6003624994890062, "learning_rate": 5.189644171543624e-06, "loss": 0.7137, "step": 7863 }, { "epoch": 0.5, "grad_norm": 1.5551243235772327, "learning_rate": 5.188608357756164e-06, "loss": 0.6884, "step": 7864 }, { "epoch": 0.5, "grad_norm": 1.5691991201564763, "learning_rate": 5.187572535862715e-06, "loss": 0.727, "step": 7865 }, { "epoch": 0.5, "grad_norm": 1.7203081828097269, "learning_rate": 5.186536705907792e-06, "loss": 0.6358, "step": 7866 }, { "epoch": 0.5, "grad_norm": 1.5517311031246561, "learning_rate": 5.185500867935913e-06, "loss": 0.7109, "step": 7867 }, { "epoch": 0.5, "grad_norm": 1.7688983876955624, "learning_rate": 5.184465021991597e-06, "loss": 0.7127, "step": 7868 }, { "epoch": 0.5, "grad_norm": 1.6964678526495949, "learning_rate": 5.183429168119362e-06, "loss": 0.8087, "step": 7869 }, { "epoch": 0.5, "grad_norm": 1.5434657278144719, "learning_rate": 5.182393306363728e-06, "loss": 0.8058, "step": 7870 }, { "epoch": 0.5, "grad_norm": 1.0335807635068899, "learning_rate": 5.181357436769211e-06, "loss": 0.5689, "step": 7871 }, { "epoch": 0.5, "grad_norm": 1.7527386339352609, "learning_rate": 5.180321559380332e-06, "loss": 0.6773, "step": 7872 }, { "epoch": 0.5, "grad_norm": 1.6556793810393313, "learning_rate": 5.1792856742416145e-06, "loss": 0.7106, "step": 7873 }, { "epoch": 0.5, "grad_norm": 1.6512066741222682, "learning_rate": 5.178249781397573e-06, "loss": 0.7217, "step": 7874 }, { "epoch": 0.5, "grad_norm": 1.673735196168127, "learning_rate": 5.17721388089273e-06, "loss": 0.7352, "step": 7875 }, { "epoch": 0.5, "grad_norm": 1.6207275987691823, "learning_rate": 5.176177972771608e-06, "loss": 0.7917, "step": 7876 }, { "epoch": 0.5, "grad_norm": 1.9084105615630547, "learning_rate": 5.175142057078727e-06, "loss": 0.7987, "step": 7877 }, { "epoch": 0.5, "grad_norm": 2.795343143372523, "learning_rate": 5.174106133858607e-06, "loss": 0.7858, "step": 7878 }, { "epoch": 0.5, "grad_norm": 1.5659082606432295, "learning_rate": 5.173070203155772e-06, "loss": 0.8101, "step": 7879 }, { "epoch": 0.5, "grad_norm": 1.4968331291638897, "learning_rate": 5.172034265014743e-06, "loss": 0.6416, "step": 7880 }, { "epoch": 0.5, "grad_norm": 1.4769695995946195, "learning_rate": 5.170998319480044e-06, "loss": 0.6794, "step": 7881 }, { "epoch": 0.5, "grad_norm": 1.5486869339060443, "learning_rate": 5.1699623665961965e-06, "loss": 0.642, "step": 7882 }, { "epoch": 0.5, "grad_norm": 1.087262571553298, "learning_rate": 5.168926406407723e-06, "loss": 0.5949, "step": 7883 }, { "epoch": 0.5, "grad_norm": 1.6290382908813155, "learning_rate": 5.1678904389591474e-06, "loss": 0.8062, "step": 7884 }, { "epoch": 0.5, "grad_norm": 1.1085583088745106, "learning_rate": 5.166854464294993e-06, "loss": 0.7039, "step": 7885 }, { "epoch": 0.5, "grad_norm": 1.9040841033513236, "learning_rate": 5.165818482459784e-06, "loss": 0.7246, "step": 7886 }, { "epoch": 0.5, "grad_norm": 2.13143239072022, "learning_rate": 5.164782493498046e-06, "loss": 0.7787, "step": 7887 }, { "epoch": 0.5, "grad_norm": 1.5898816091898986, "learning_rate": 5.163746497454304e-06, "loss": 0.7105, "step": 7888 }, { "epoch": 0.5, "grad_norm": 1.6721300373166292, "learning_rate": 5.16271049437308e-06, "loss": 0.6807, "step": 7889 }, { "epoch": 0.51, "grad_norm": 1.7604839565679664, "learning_rate": 5.161674484298902e-06, "loss": 0.7371, "step": 7890 }, { "epoch": 0.51, "grad_norm": 1.6377315586725316, "learning_rate": 5.160638467276293e-06, "loss": 0.7732, "step": 7891 }, { "epoch": 0.51, "grad_norm": 1.6877615605205074, "learning_rate": 5.159602443349781e-06, "loss": 0.6881, "step": 7892 }, { "epoch": 0.51, "grad_norm": 1.1640857973916494, "learning_rate": 5.15856641256389e-06, "loss": 0.6461, "step": 7893 }, { "epoch": 0.51, "grad_norm": 1.5393840451775094, "learning_rate": 5.157530374963149e-06, "loss": 0.7101, "step": 7894 }, { "epoch": 0.51, "grad_norm": 1.5594777918012706, "learning_rate": 5.156494330592084e-06, "loss": 0.7083, "step": 7895 }, { "epoch": 0.51, "grad_norm": 1.9133237230459896, "learning_rate": 5.15545827949522e-06, "loss": 0.737, "step": 7896 }, { "epoch": 0.51, "grad_norm": 1.5859299987634736, "learning_rate": 5.154422221717087e-06, "loss": 0.8331, "step": 7897 }, { "epoch": 0.51, "grad_norm": 1.6667633287457704, "learning_rate": 5.153386157302211e-06, "loss": 0.7981, "step": 7898 }, { "epoch": 0.51, "grad_norm": 1.1736309586626226, "learning_rate": 5.152350086295121e-06, "loss": 0.587, "step": 7899 }, { "epoch": 0.51, "grad_norm": 1.6000278837403228, "learning_rate": 5.151314008740343e-06, "loss": 0.9951, "step": 7900 }, { "epoch": 0.51, "grad_norm": 1.731004341064083, "learning_rate": 5.150277924682408e-06, "loss": 0.679, "step": 7901 }, { "epoch": 0.51, "grad_norm": 1.554961080920612, "learning_rate": 5.149241834165844e-06, "loss": 0.8238, "step": 7902 }, { "epoch": 0.51, "grad_norm": 1.5154673927232056, "learning_rate": 5.148205737235178e-06, "loss": 0.9188, "step": 7903 }, { "epoch": 0.51, "grad_norm": 1.6716659263936233, "learning_rate": 5.147169633934942e-06, "loss": 0.6801, "step": 7904 }, { "epoch": 0.51, "grad_norm": 1.659411245600587, "learning_rate": 5.146133524309664e-06, "loss": 0.7901, "step": 7905 }, { "epoch": 0.51, "grad_norm": 1.8711686839731516, "learning_rate": 5.145097408403874e-06, "loss": 0.6832, "step": 7906 }, { "epoch": 0.51, "grad_norm": 1.6188729618083335, "learning_rate": 5.144061286262102e-06, "loss": 0.8171, "step": 7907 }, { "epoch": 0.51, "grad_norm": 1.6347459640632278, "learning_rate": 5.1430251579288794e-06, "loss": 0.7108, "step": 7908 }, { "epoch": 0.51, "grad_norm": 1.7789862701423234, "learning_rate": 5.141989023448736e-06, "loss": 0.8508, "step": 7909 }, { "epoch": 0.51, "grad_norm": 1.0811668215417412, "learning_rate": 5.1409528828662025e-06, "loss": 0.5098, "step": 7910 }, { "epoch": 0.51, "grad_norm": 1.7286469944969125, "learning_rate": 5.139916736225811e-06, "loss": 0.7508, "step": 7911 }, { "epoch": 0.51, "grad_norm": 1.9290050283478146, "learning_rate": 5.13888058357209e-06, "loss": 0.7545, "step": 7912 }, { "epoch": 0.51, "grad_norm": 1.8007909883318725, "learning_rate": 5.137844424949576e-06, "loss": 0.7642, "step": 7913 }, { "epoch": 0.51, "grad_norm": 1.7418662894941734, "learning_rate": 5.136808260402797e-06, "loss": 0.7089, "step": 7914 }, { "epoch": 0.51, "grad_norm": 2.248910053218125, "learning_rate": 5.135772089976285e-06, "loss": 0.8899, "step": 7915 }, { "epoch": 0.51, "grad_norm": 2.085675538355029, "learning_rate": 5.134735913714577e-06, "loss": 0.9157, "step": 7916 }, { "epoch": 0.51, "grad_norm": 1.880387410790192, "learning_rate": 5.133699731662201e-06, "loss": 0.7387, "step": 7917 }, { "epoch": 0.51, "grad_norm": 1.6181761367874228, "learning_rate": 5.132663543863692e-06, "loss": 0.9072, "step": 7918 }, { "epoch": 0.51, "grad_norm": 1.8883269373371359, "learning_rate": 5.131627350363582e-06, "loss": 0.6929, "step": 7919 }, { "epoch": 0.51, "grad_norm": 1.073039163930529, "learning_rate": 5.130591151206405e-06, "loss": 0.6437, "step": 7920 }, { "epoch": 0.51, "grad_norm": 1.682903501661339, "learning_rate": 5.1295549464366944e-06, "loss": 0.7773, "step": 7921 }, { "epoch": 0.51, "grad_norm": 3.6918369961243553, "learning_rate": 5.128518736098984e-06, "loss": 0.6222, "step": 7922 }, { "epoch": 0.51, "grad_norm": 1.7339196017223684, "learning_rate": 5.1274825202378085e-06, "loss": 0.8306, "step": 7923 }, { "epoch": 0.51, "grad_norm": 1.6863211096314625, "learning_rate": 5.126446298897704e-06, "loss": 0.7197, "step": 7924 }, { "epoch": 0.51, "grad_norm": 1.9393845348354328, "learning_rate": 5.125410072123203e-06, "loss": 0.8592, "step": 7925 }, { "epoch": 0.51, "grad_norm": 1.5650586769314905, "learning_rate": 5.12437383995884e-06, "loss": 0.6303, "step": 7926 }, { "epoch": 0.51, "grad_norm": 1.1745017188194815, "learning_rate": 5.12333760244915e-06, "loss": 0.6946, "step": 7927 }, { "epoch": 0.51, "grad_norm": 3.6136557281570743, "learning_rate": 5.122301359638669e-06, "loss": 0.8206, "step": 7928 }, { "epoch": 0.51, "grad_norm": 1.5064131471051292, "learning_rate": 5.121265111571933e-06, "loss": 0.6392, "step": 7929 }, { "epoch": 0.51, "grad_norm": 2.1932517733618084, "learning_rate": 5.120228858293477e-06, "loss": 0.7335, "step": 7930 }, { "epoch": 0.51, "grad_norm": 1.5932772526066141, "learning_rate": 5.119192599847838e-06, "loss": 0.8641, "step": 7931 }, { "epoch": 0.51, "grad_norm": 1.0274971595339553, "learning_rate": 5.118156336279552e-06, "loss": 0.6753, "step": 7932 }, { "epoch": 0.51, "grad_norm": 1.9054041459245845, "learning_rate": 5.1171200676331535e-06, "loss": 0.8024, "step": 7933 }, { "epoch": 0.51, "grad_norm": 1.749407515287478, "learning_rate": 5.116083793953181e-06, "loss": 0.6688, "step": 7934 }, { "epoch": 0.51, "grad_norm": 1.6165773893367117, "learning_rate": 5.115047515284173e-06, "loss": 0.7409, "step": 7935 }, { "epoch": 0.51, "grad_norm": 1.914349846297394, "learning_rate": 5.1140112316706615e-06, "loss": 0.7444, "step": 7936 }, { "epoch": 0.51, "grad_norm": 1.731440115307398, "learning_rate": 5.112974943157188e-06, "loss": 0.8624, "step": 7937 }, { "epoch": 0.51, "grad_norm": 1.7060091133570132, "learning_rate": 5.1119386497882896e-06, "loss": 0.817, "step": 7938 }, { "epoch": 0.51, "grad_norm": 3.1477501661569764, "learning_rate": 5.110902351608504e-06, "loss": 0.8001, "step": 7939 }, { "epoch": 0.51, "grad_norm": 1.731865073885683, "learning_rate": 5.109866048662365e-06, "loss": 0.8469, "step": 7940 }, { "epoch": 0.51, "grad_norm": 1.8740711584568455, "learning_rate": 5.108829740994417e-06, "loss": 0.7486, "step": 7941 }, { "epoch": 0.51, "grad_norm": 1.53271178942273, "learning_rate": 5.107793428649194e-06, "loss": 0.8158, "step": 7942 }, { "epoch": 0.51, "grad_norm": 1.916191493085112, "learning_rate": 5.106757111671235e-06, "loss": 0.7367, "step": 7943 }, { "epoch": 0.51, "grad_norm": 1.6436421312807616, "learning_rate": 5.105720790105082e-06, "loss": 0.6791, "step": 7944 }, { "epoch": 0.51, "grad_norm": 1.6886984914257237, "learning_rate": 5.104684463995271e-06, "loss": 0.749, "step": 7945 }, { "epoch": 0.51, "grad_norm": 1.1542337100089781, "learning_rate": 5.103648133386342e-06, "loss": 0.555, "step": 7946 }, { "epoch": 0.51, "grad_norm": 1.9034534418354951, "learning_rate": 5.102611798322833e-06, "loss": 0.7107, "step": 7947 }, { "epoch": 0.51, "grad_norm": 1.0749881664961056, "learning_rate": 5.101575458849285e-06, "loss": 0.6795, "step": 7948 }, { "epoch": 0.51, "grad_norm": 1.5958678077245618, "learning_rate": 5.100539115010237e-06, "loss": 0.709, "step": 7949 }, { "epoch": 0.51, "grad_norm": 1.7710445140293756, "learning_rate": 5.09950276685023e-06, "loss": 0.7747, "step": 7950 }, { "epoch": 0.51, "grad_norm": 1.2464637797118276, "learning_rate": 5.098466414413801e-06, "loss": 0.6881, "step": 7951 }, { "epoch": 0.51, "grad_norm": 1.2194438759281376, "learning_rate": 5.097430057745494e-06, "loss": 0.6307, "step": 7952 }, { "epoch": 0.51, "grad_norm": 1.898273586448872, "learning_rate": 5.096393696889848e-06, "loss": 0.7653, "step": 7953 }, { "epoch": 0.51, "grad_norm": 1.5525520304159497, "learning_rate": 5.095357331891403e-06, "loss": 0.7259, "step": 7954 }, { "epoch": 0.51, "grad_norm": 1.6585959072887002, "learning_rate": 5.094320962794701e-06, "loss": 0.751, "step": 7955 }, { "epoch": 0.51, "grad_norm": 1.5123084258281971, "learning_rate": 5.093284589644282e-06, "loss": 0.7231, "step": 7956 }, { "epoch": 0.51, "grad_norm": 1.6955253851608572, "learning_rate": 5.092248212484686e-06, "loss": 0.79, "step": 7957 }, { "epoch": 0.51, "grad_norm": 2.0597151316746345, "learning_rate": 5.0912118313604564e-06, "loss": 0.7914, "step": 7958 }, { "epoch": 0.51, "grad_norm": 1.7536109822239114, "learning_rate": 5.090175446316133e-06, "loss": 0.6648, "step": 7959 }, { "epoch": 0.51, "grad_norm": 1.65988291307152, "learning_rate": 5.0891390573962605e-06, "loss": 0.7331, "step": 7960 }, { "epoch": 0.51, "grad_norm": 1.7225057196301026, "learning_rate": 5.088102664645378e-06, "loss": 0.7493, "step": 7961 }, { "epoch": 0.51, "grad_norm": 1.1525925081261776, "learning_rate": 5.087066268108027e-06, "loss": 0.6094, "step": 7962 }, { "epoch": 0.51, "grad_norm": 1.543376131755739, "learning_rate": 5.086029867828752e-06, "loss": 0.6405, "step": 7963 }, { "epoch": 0.51, "grad_norm": 1.4774450863778892, "learning_rate": 5.084993463852094e-06, "loss": 0.6766, "step": 7964 }, { "epoch": 0.51, "grad_norm": 1.7151163133683056, "learning_rate": 5.083957056222595e-06, "loss": 0.8207, "step": 7965 }, { "epoch": 0.51, "grad_norm": 1.5622727845477882, "learning_rate": 5.082920644984798e-06, "loss": 0.6895, "step": 7966 }, { "epoch": 0.51, "grad_norm": 1.686366488574995, "learning_rate": 5.081884230183247e-06, "loss": 0.7414, "step": 7967 }, { "epoch": 0.51, "grad_norm": 1.775871159221051, "learning_rate": 5.080847811862482e-06, "loss": 0.8146, "step": 7968 }, { "epoch": 0.51, "grad_norm": 1.2005006636052513, "learning_rate": 5.079811390067049e-06, "loss": 0.6777, "step": 7969 }, { "epoch": 0.51, "grad_norm": 1.6803580838288223, "learning_rate": 5.078774964841491e-06, "loss": 0.6792, "step": 7970 }, { "epoch": 0.51, "grad_norm": 1.4733065414795503, "learning_rate": 5.07773853623035e-06, "loss": 0.7127, "step": 7971 }, { "epoch": 0.51, "grad_norm": 1.4979276863947384, "learning_rate": 5.076702104278168e-06, "loss": 0.7557, "step": 7972 }, { "epoch": 0.51, "grad_norm": 1.75702704587362, "learning_rate": 5.075665669029494e-06, "loss": 0.7537, "step": 7973 }, { "epoch": 0.51, "grad_norm": 1.8511037507900352, "learning_rate": 5.074629230528867e-06, "loss": 0.6359, "step": 7974 }, { "epoch": 0.51, "grad_norm": 1.5331008273940745, "learning_rate": 5.073592788820832e-06, "loss": 0.7402, "step": 7975 }, { "epoch": 0.51, "grad_norm": 1.5418051199054748, "learning_rate": 5.072556343949935e-06, "loss": 0.6905, "step": 7976 }, { "epoch": 0.51, "grad_norm": 1.8650298963389316, "learning_rate": 5.071519895960717e-06, "loss": 0.7856, "step": 7977 }, { "epoch": 0.51, "grad_norm": 1.814859119074085, "learning_rate": 5.070483444897725e-06, "loss": 0.7518, "step": 7978 }, { "epoch": 0.51, "grad_norm": 1.2361809476142946, "learning_rate": 5.069446990805501e-06, "loss": 0.7063, "step": 7979 }, { "epoch": 0.51, "grad_norm": 1.5405255175608576, "learning_rate": 5.068410533728594e-06, "loss": 0.7597, "step": 7980 }, { "epoch": 0.51, "grad_norm": 1.0367070853872697, "learning_rate": 5.067374073711545e-06, "loss": 0.5928, "step": 7981 }, { "epoch": 0.51, "grad_norm": 1.9532875869248874, "learning_rate": 5.0663376107989e-06, "loss": 0.7573, "step": 7982 }, { "epoch": 0.51, "grad_norm": 1.1125373606192865, "learning_rate": 5.065301145035204e-06, "loss": 0.6656, "step": 7983 }, { "epoch": 0.51, "grad_norm": 1.629730780133399, "learning_rate": 5.064264676465002e-06, "loss": 0.7318, "step": 7984 }, { "epoch": 0.51, "grad_norm": 1.577105140220952, "learning_rate": 5.063228205132838e-06, "loss": 0.7257, "step": 7985 }, { "epoch": 0.51, "grad_norm": 1.7132928722004201, "learning_rate": 5.06219173108326e-06, "loss": 0.7089, "step": 7986 }, { "epoch": 0.51, "grad_norm": 1.3990311592291327, "learning_rate": 5.061155254360811e-06, "loss": 0.6962, "step": 7987 }, { "epoch": 0.51, "grad_norm": 1.7061191390195218, "learning_rate": 5.060118775010037e-06, "loss": 0.7525, "step": 7988 }, { "epoch": 0.51, "grad_norm": 1.5702271018252991, "learning_rate": 5.059082293075487e-06, "loss": 0.7107, "step": 7989 }, { "epoch": 0.51, "grad_norm": 1.6877324700193745, "learning_rate": 5.058045808601704e-06, "loss": 0.5936, "step": 7990 }, { "epoch": 0.51, "grad_norm": 1.5991343102338362, "learning_rate": 5.057009321633233e-06, "loss": 0.7329, "step": 7991 }, { "epoch": 0.51, "grad_norm": 2.079751388396315, "learning_rate": 5.0559728322146204e-06, "loss": 0.633, "step": 7992 }, { "epoch": 0.51, "grad_norm": 1.7389829830784942, "learning_rate": 5.054936340390414e-06, "loss": 0.7304, "step": 7993 }, { "epoch": 0.51, "grad_norm": 1.5742634989959376, "learning_rate": 5.0538998462051595e-06, "loss": 0.8975, "step": 7994 }, { "epoch": 0.51, "grad_norm": 1.9313761245221064, "learning_rate": 5.052863349703402e-06, "loss": 0.7035, "step": 7995 }, { "epoch": 0.51, "grad_norm": 1.8402760235037585, "learning_rate": 5.051826850929688e-06, "loss": 0.84, "step": 7996 }, { "epoch": 0.51, "grad_norm": 1.4955012898180162, "learning_rate": 5.050790349928566e-06, "loss": 0.7708, "step": 7997 }, { "epoch": 0.51, "grad_norm": 1.3411268137755843, "learning_rate": 5.049753846744582e-06, "loss": 0.6477, "step": 7998 }, { "epoch": 0.51, "grad_norm": 1.4935543146088226, "learning_rate": 5.048717341422282e-06, "loss": 0.6281, "step": 7999 }, { "epoch": 0.51, "grad_norm": 1.65664586386976, "learning_rate": 5.047680834006212e-06, "loss": 0.6972, "step": 8000 }, { "epoch": 0.51, "grad_norm": 1.2448753128128285, "learning_rate": 5.0466443245409215e-06, "loss": 0.7038, "step": 8001 }, { "epoch": 0.51, "grad_norm": 1.668997960573412, "learning_rate": 5.045607813070955e-06, "loss": 0.7405, "step": 8002 }, { "epoch": 0.51, "grad_norm": 1.785173258310218, "learning_rate": 5.04457129964086e-06, "loss": 0.8332, "step": 8003 }, { "epoch": 0.51, "grad_norm": 1.5995393657293524, "learning_rate": 5.043534784295185e-06, "loss": 0.7195, "step": 8004 }, { "epoch": 0.51, "grad_norm": 1.6838219244846269, "learning_rate": 5.042498267078476e-06, "loss": 0.6861, "step": 8005 }, { "epoch": 0.51, "grad_norm": 1.5718703214926855, "learning_rate": 5.041461748035281e-06, "loss": 0.6101, "step": 8006 }, { "epoch": 0.51, "grad_norm": 1.7552645324431877, "learning_rate": 5.040425227210148e-06, "loss": 0.6601, "step": 8007 }, { "epoch": 0.51, "grad_norm": 2.382793939498459, "learning_rate": 5.039388704647622e-06, "loss": 0.6275, "step": 8008 }, { "epoch": 0.51, "grad_norm": 1.8798766154597688, "learning_rate": 5.038352180392254e-06, "loss": 0.6555, "step": 8009 }, { "epoch": 0.51, "grad_norm": 1.4818082322489523, "learning_rate": 5.037315654488589e-06, "loss": 0.6497, "step": 8010 }, { "epoch": 0.51, "grad_norm": 1.8183699800105497, "learning_rate": 5.036279126981176e-06, "loss": 0.8021, "step": 8011 }, { "epoch": 0.51, "grad_norm": 1.005138201956924, "learning_rate": 5.0352425979145626e-06, "loss": 0.6584, "step": 8012 }, { "epoch": 0.51, "grad_norm": 1.3851370676488732, "learning_rate": 5.034206067333296e-06, "loss": 0.6689, "step": 8013 }, { "epoch": 0.51, "grad_norm": 1.825490042303585, "learning_rate": 5.0331695352819236e-06, "loss": 0.7043, "step": 8014 }, { "epoch": 0.51, "grad_norm": 1.9870193582694697, "learning_rate": 5.032133001804994e-06, "loss": 0.8033, "step": 8015 }, { "epoch": 0.51, "grad_norm": 1.5968230865095732, "learning_rate": 5.031096466947056e-06, "loss": 0.6919, "step": 8016 }, { "epoch": 0.51, "grad_norm": 1.5722521347764318, "learning_rate": 5.030059930752659e-06, "loss": 0.7231, "step": 8017 }, { "epoch": 0.51, "grad_norm": 1.284933207172838, "learning_rate": 5.029023393266348e-06, "loss": 0.5996, "step": 8018 }, { "epoch": 0.51, "grad_norm": 1.7165104098152826, "learning_rate": 5.027986854532673e-06, "loss": 0.8576, "step": 8019 }, { "epoch": 0.51, "grad_norm": 1.825876466192973, "learning_rate": 5.026950314596181e-06, "loss": 0.7147, "step": 8020 }, { "epoch": 0.51, "grad_norm": 1.1851499334067352, "learning_rate": 5.025913773501421e-06, "loss": 0.6762, "step": 8021 }, { "epoch": 0.51, "grad_norm": 1.6451403700105984, "learning_rate": 5.024877231292941e-06, "loss": 0.6645, "step": 8022 }, { "epoch": 0.51, "grad_norm": 1.6740139687163014, "learning_rate": 5.023840688015291e-06, "loss": 0.7794, "step": 8023 }, { "epoch": 0.51, "grad_norm": 1.5201156002506253, "learning_rate": 5.022804143713018e-06, "loss": 0.6916, "step": 8024 }, { "epoch": 0.51, "grad_norm": 1.729806937047602, "learning_rate": 5.021767598430672e-06, "loss": 0.8118, "step": 8025 }, { "epoch": 0.51, "grad_norm": 2.1604773155553993, "learning_rate": 5.0207310522128e-06, "loss": 0.826, "step": 8026 }, { "epoch": 0.51, "grad_norm": 1.7109786725675589, "learning_rate": 5.019694505103949e-06, "loss": 0.8326, "step": 8027 }, { "epoch": 0.51, "grad_norm": 2.1773665989595488, "learning_rate": 5.018657957148672e-06, "loss": 0.8377, "step": 8028 }, { "epoch": 0.51, "grad_norm": 1.01115006854108, "learning_rate": 5.017621408391513e-06, "loss": 0.6667, "step": 8029 }, { "epoch": 0.51, "grad_norm": 1.2057695501378503, "learning_rate": 5.016584858877024e-06, "loss": 0.6816, "step": 8030 }, { "epoch": 0.51, "grad_norm": 1.6517298160429827, "learning_rate": 5.015548308649752e-06, "loss": 0.7794, "step": 8031 }, { "epoch": 0.51, "grad_norm": 1.518534705250981, "learning_rate": 5.014511757754248e-06, "loss": 0.639, "step": 8032 }, { "epoch": 0.51, "grad_norm": 1.822830634245611, "learning_rate": 5.013475206235056e-06, "loss": 0.7556, "step": 8033 }, { "epoch": 0.51, "grad_norm": 1.8619603187740918, "learning_rate": 5.012438654136731e-06, "loss": 0.8164, "step": 8034 }, { "epoch": 0.51, "grad_norm": 1.933763209808166, "learning_rate": 5.0114021015038185e-06, "loss": 0.7632, "step": 8035 }, { "epoch": 0.51, "grad_norm": 1.5896071837501695, "learning_rate": 5.010365548380865e-06, "loss": 0.5608, "step": 8036 }, { "epoch": 0.51, "grad_norm": 1.7508524150951421, "learning_rate": 5.0093289948124244e-06, "loss": 0.6688, "step": 8037 }, { "epoch": 0.51, "grad_norm": 1.8377125792208668, "learning_rate": 5.008292440843043e-06, "loss": 0.7383, "step": 8038 }, { "epoch": 0.51, "grad_norm": 1.7788027693396238, "learning_rate": 5.00725588651727e-06, "loss": 0.7774, "step": 8039 }, { "epoch": 0.51, "grad_norm": 1.8651449320360116, "learning_rate": 5.006219331879655e-06, "loss": 0.6995, "step": 8040 }, { "epoch": 0.51, "grad_norm": 1.1419439712710195, "learning_rate": 5.005182776974745e-06, "loss": 0.667, "step": 8041 }, { "epoch": 0.51, "grad_norm": 1.0440352008283789, "learning_rate": 5.00414622184709e-06, "loss": 0.6033, "step": 8042 }, { "epoch": 0.51, "grad_norm": 1.0722408819563125, "learning_rate": 5.0031096665412385e-06, "loss": 0.7067, "step": 8043 }, { "epoch": 0.51, "grad_norm": 1.5393210958790124, "learning_rate": 5.002073111101741e-06, "loss": 0.6055, "step": 8044 }, { "epoch": 0.51, "grad_norm": 2.6400741921969533, "learning_rate": 5.0010365555731455e-06, "loss": 0.8871, "step": 8045 }, { "epoch": 0.51, "grad_norm": 1.7366187731836324, "learning_rate": 5e-06, "loss": 0.7294, "step": 8046 }, { "epoch": 0.52, "grad_norm": 1.6588050772200746, "learning_rate": 4.998963444426855e-06, "loss": 0.8573, "step": 8047 }, { "epoch": 0.52, "grad_norm": 1.9866568179726771, "learning_rate": 4.997926888898262e-06, "loss": 0.7674, "step": 8048 }, { "epoch": 0.52, "grad_norm": 1.1335821320327086, "learning_rate": 4.996890333458762e-06, "loss": 0.6651, "step": 8049 }, { "epoch": 0.52, "grad_norm": 1.6550349268927262, "learning_rate": 4.995853778152912e-06, "loss": 0.7009, "step": 8050 }, { "epoch": 0.52, "grad_norm": 1.618818400796287, "learning_rate": 4.994817223025256e-06, "loss": 0.8076, "step": 8051 }, { "epoch": 0.52, "grad_norm": 1.7119452123257592, "learning_rate": 4.993780668120347e-06, "loss": 0.737, "step": 8052 }, { "epoch": 0.52, "grad_norm": 1.227803803810223, "learning_rate": 4.992744113482731e-06, "loss": 0.6269, "step": 8053 }, { "epoch": 0.52, "grad_norm": 1.6046340838725828, "learning_rate": 4.9917075591569594e-06, "loss": 0.7725, "step": 8054 }, { "epoch": 0.52, "grad_norm": 1.5208657351931922, "learning_rate": 4.990671005187576e-06, "loss": 0.8561, "step": 8055 }, { "epoch": 0.52, "grad_norm": 1.665588495360519, "learning_rate": 4.989634451619135e-06, "loss": 0.6748, "step": 8056 }, { "epoch": 0.52, "grad_norm": 1.5116673280565929, "learning_rate": 4.988597898496183e-06, "loss": 0.5516, "step": 8057 }, { "epoch": 0.52, "grad_norm": 1.478618732799167, "learning_rate": 4.987561345863269e-06, "loss": 0.7469, "step": 8058 }, { "epoch": 0.52, "grad_norm": 1.9010279493152726, "learning_rate": 4.9865247937649445e-06, "loss": 0.7154, "step": 8059 }, { "epoch": 0.52, "grad_norm": 1.6758816405484311, "learning_rate": 4.985488242245753e-06, "loss": 0.7184, "step": 8060 }, { "epoch": 0.52, "grad_norm": 1.4550547306285908, "learning_rate": 4.98445169135025e-06, "loss": 0.7586, "step": 8061 }, { "epoch": 0.52, "grad_norm": 1.7564312180343071, "learning_rate": 4.9834151411229775e-06, "loss": 0.8428, "step": 8062 }, { "epoch": 0.52, "grad_norm": 1.717083017604519, "learning_rate": 4.982378591608489e-06, "loss": 0.696, "step": 8063 }, { "epoch": 0.52, "grad_norm": 1.317689757828615, "learning_rate": 4.98134204285133e-06, "loss": 0.6332, "step": 8064 }, { "epoch": 0.52, "grad_norm": 1.5728050957177295, "learning_rate": 4.980305494896052e-06, "loss": 0.7461, "step": 8065 }, { "epoch": 0.52, "grad_norm": 2.634074697335975, "learning_rate": 4.979268947787203e-06, "loss": 0.6775, "step": 8066 }, { "epoch": 0.52, "grad_norm": 1.1999271830047233, "learning_rate": 4.978232401569329e-06, "loss": 0.6608, "step": 8067 }, { "epoch": 0.52, "grad_norm": 2.099052345207926, "learning_rate": 4.977195856286983e-06, "loss": 0.6429, "step": 8068 }, { "epoch": 0.52, "grad_norm": 1.5728032477687528, "learning_rate": 4.97615931198471e-06, "loss": 0.7258, "step": 8069 }, { "epoch": 0.52, "grad_norm": 1.7469560980787395, "learning_rate": 4.975122768707061e-06, "loss": 0.7528, "step": 8070 }, { "epoch": 0.52, "grad_norm": 1.699218409069742, "learning_rate": 4.974086226498581e-06, "loss": 0.757, "step": 8071 }, { "epoch": 0.52, "grad_norm": 1.5421519937827028, "learning_rate": 4.9730496854038215e-06, "loss": 0.6697, "step": 8072 }, { "epoch": 0.52, "grad_norm": 1.3561370352512818, "learning_rate": 4.972013145467329e-06, "loss": 0.7271, "step": 8073 }, { "epoch": 0.52, "grad_norm": 1.7278857237132208, "learning_rate": 4.970976606733653e-06, "loss": 0.7724, "step": 8074 }, { "epoch": 0.52, "grad_norm": 1.7129427479316137, "learning_rate": 4.969940069247343e-06, "loss": 0.7448, "step": 8075 }, { "epoch": 0.52, "grad_norm": 1.4536437143836103, "learning_rate": 4.968903533052944e-06, "loss": 0.7195, "step": 8076 }, { "epoch": 0.52, "grad_norm": 1.5722831641247972, "learning_rate": 4.9678669981950076e-06, "loss": 0.7734, "step": 8077 }, { "epoch": 0.52, "grad_norm": 1.155668261262075, "learning_rate": 4.966830464718078e-06, "loss": 0.6706, "step": 8078 }, { "epoch": 0.52, "grad_norm": 1.525640060339967, "learning_rate": 4.965793932666707e-06, "loss": 0.6856, "step": 8079 }, { "epoch": 0.52, "grad_norm": 1.0856359879864643, "learning_rate": 4.964757402085439e-06, "loss": 0.6532, "step": 8080 }, { "epoch": 0.52, "grad_norm": 1.657828441209794, "learning_rate": 4.963720873018826e-06, "loss": 0.8805, "step": 8081 }, { "epoch": 0.52, "grad_norm": 1.5448942449200131, "learning_rate": 4.9626843455114125e-06, "loss": 0.7779, "step": 8082 }, { "epoch": 0.52, "grad_norm": 1.641490447652328, "learning_rate": 4.961647819607749e-06, "loss": 0.7897, "step": 8083 }, { "epoch": 0.52, "grad_norm": 2.249629253265416, "learning_rate": 4.96061129535238e-06, "loss": 0.8251, "step": 8084 }, { "epoch": 0.52, "grad_norm": 0.9244787485526501, "learning_rate": 4.959574772789853e-06, "loss": 0.6639, "step": 8085 }, { "epoch": 0.52, "grad_norm": 1.6896922092861653, "learning_rate": 4.95853825196472e-06, "loss": 0.7512, "step": 8086 }, { "epoch": 0.52, "grad_norm": 2.2766772810634324, "learning_rate": 4.957501732921524e-06, "loss": 0.8167, "step": 8087 }, { "epoch": 0.52, "grad_norm": 1.5312593429801764, "learning_rate": 4.9564652157048166e-06, "loss": 0.7733, "step": 8088 }, { "epoch": 0.52, "grad_norm": 1.5741152471570947, "learning_rate": 4.955428700359141e-06, "loss": 0.7821, "step": 8089 }, { "epoch": 0.52, "grad_norm": 1.1449180375660672, "learning_rate": 4.9543921869290475e-06, "loss": 0.7099, "step": 8090 }, { "epoch": 0.52, "grad_norm": 3.4857593632638415, "learning_rate": 4.953355675459081e-06, "loss": 0.7173, "step": 8091 }, { "epoch": 0.52, "grad_norm": 1.815180266535671, "learning_rate": 4.952319165993789e-06, "loss": 0.7345, "step": 8092 }, { "epoch": 0.52, "grad_norm": 1.4580210512174434, "learning_rate": 4.951282658577719e-06, "loss": 0.7019, "step": 8093 }, { "epoch": 0.52, "grad_norm": 1.7230959091990747, "learning_rate": 4.950246153255418e-06, "loss": 0.8004, "step": 8094 }, { "epoch": 0.52, "grad_norm": 2.2729208590660908, "learning_rate": 4.9492096500714346e-06, "loss": 0.7583, "step": 8095 }, { "epoch": 0.52, "grad_norm": 1.4615343368451015, "learning_rate": 4.9481731490703124e-06, "loss": 0.6913, "step": 8096 }, { "epoch": 0.52, "grad_norm": 1.5590057380726716, "learning_rate": 4.947136650296601e-06, "loss": 0.9383, "step": 8097 }, { "epoch": 0.52, "grad_norm": 2.4169383726919684, "learning_rate": 4.946100153794843e-06, "loss": 0.7954, "step": 8098 }, { "epoch": 0.52, "grad_norm": 1.7752618701409983, "learning_rate": 4.945063659609588e-06, "loss": 0.7087, "step": 8099 }, { "epoch": 0.52, "grad_norm": 2.24000869029237, "learning_rate": 4.94402716778538e-06, "loss": 0.7422, "step": 8100 }, { "epoch": 0.52, "grad_norm": 1.6611046783691952, "learning_rate": 4.94299067836677e-06, "loss": 0.7675, "step": 8101 }, { "epoch": 0.52, "grad_norm": 1.739781403562594, "learning_rate": 4.9419541913982984e-06, "loss": 0.6668, "step": 8102 }, { "epoch": 0.52, "grad_norm": 1.6974851770507153, "learning_rate": 4.940917706924513e-06, "loss": 0.7243, "step": 8103 }, { "epoch": 0.52, "grad_norm": 1.4778280728266422, "learning_rate": 4.939881224989963e-06, "loss": 0.6763, "step": 8104 }, { "epoch": 0.52, "grad_norm": 1.1984499545462335, "learning_rate": 4.93884474563919e-06, "loss": 0.5892, "step": 8105 }, { "epoch": 0.52, "grad_norm": 1.8618951136788893, "learning_rate": 4.9378082689167415e-06, "loss": 0.7403, "step": 8106 }, { "epoch": 0.52, "grad_norm": 1.6798089831026894, "learning_rate": 4.936771794867163e-06, "loss": 0.7546, "step": 8107 }, { "epoch": 0.52, "grad_norm": 1.7317881818945065, "learning_rate": 4.935735323535e-06, "loss": 0.7343, "step": 8108 }, { "epoch": 0.52, "grad_norm": 1.9250735868900803, "learning_rate": 4.9346988549647974e-06, "loss": 0.7427, "step": 8109 }, { "epoch": 0.52, "grad_norm": 2.048763417755172, "learning_rate": 4.933662389201102e-06, "loss": 0.724, "step": 8110 }, { "epoch": 0.52, "grad_norm": 1.6028329099518648, "learning_rate": 4.9326259262884565e-06, "loss": 0.6575, "step": 8111 }, { "epoch": 0.52, "grad_norm": 1.581177559013578, "learning_rate": 4.931589466271406e-06, "loss": 0.643, "step": 8112 }, { "epoch": 0.52, "grad_norm": 1.6035670938668014, "learning_rate": 4.9305530091945e-06, "loss": 0.6131, "step": 8113 }, { "epoch": 0.52, "grad_norm": 2.1284655167650275, "learning_rate": 4.929516555102277e-06, "loss": 0.7421, "step": 8114 }, { "epoch": 0.52, "grad_norm": 1.6685902268514465, "learning_rate": 4.928480104039285e-06, "loss": 0.7069, "step": 8115 }, { "epoch": 0.52, "grad_norm": 1.1670449431035335, "learning_rate": 4.927443656050067e-06, "loss": 0.559, "step": 8116 }, { "epoch": 0.52, "grad_norm": 1.5673126763383793, "learning_rate": 4.92640721117917e-06, "loss": 0.573, "step": 8117 }, { "epoch": 0.52, "grad_norm": 1.4730758942624713, "learning_rate": 4.925370769471135e-06, "loss": 0.7889, "step": 8118 }, { "epoch": 0.52, "grad_norm": 1.6170217467697083, "learning_rate": 4.924334330970509e-06, "loss": 0.6389, "step": 8119 }, { "epoch": 0.52, "grad_norm": 1.6319179321696975, "learning_rate": 4.923297895721833e-06, "loss": 0.6853, "step": 8120 }, { "epoch": 0.52, "grad_norm": 1.047550226488885, "learning_rate": 4.922261463769652e-06, "loss": 0.7554, "step": 8121 }, { "epoch": 0.52, "grad_norm": 1.7257093618033434, "learning_rate": 4.92122503515851e-06, "loss": 0.735, "step": 8122 }, { "epoch": 0.52, "grad_norm": 2.16616414273938, "learning_rate": 4.920188609932951e-06, "loss": 0.804, "step": 8123 }, { "epoch": 0.52, "grad_norm": 1.5499426317996619, "learning_rate": 4.919152188137519e-06, "loss": 0.6617, "step": 8124 }, { "epoch": 0.52, "grad_norm": 1.6075863479202586, "learning_rate": 4.918115769816754e-06, "loss": 0.7228, "step": 8125 }, { "epoch": 0.52, "grad_norm": 1.4767287640944216, "learning_rate": 4.917079355015204e-06, "loss": 0.7569, "step": 8126 }, { "epoch": 0.52, "grad_norm": 1.5348137389964633, "learning_rate": 4.9160429437774065e-06, "loss": 0.6215, "step": 8127 }, { "epoch": 0.52, "grad_norm": 1.5965623611776976, "learning_rate": 4.915006536147908e-06, "loss": 0.7915, "step": 8128 }, { "epoch": 0.52, "grad_norm": 1.5657489802506195, "learning_rate": 4.913970132171248e-06, "loss": 0.7566, "step": 8129 }, { "epoch": 0.52, "grad_norm": 0.8774698303238422, "learning_rate": 4.912933731891972e-06, "loss": 0.5231, "step": 8130 }, { "epoch": 0.52, "grad_norm": 2.167197284299824, "learning_rate": 4.911897335354624e-06, "loss": 0.8498, "step": 8131 }, { "epoch": 0.52, "grad_norm": 1.83947079725101, "learning_rate": 4.91086094260374e-06, "loss": 0.7568, "step": 8132 }, { "epoch": 0.52, "grad_norm": 1.8025001477866094, "learning_rate": 4.909824553683868e-06, "loss": 0.7778, "step": 8133 }, { "epoch": 0.52, "grad_norm": 1.0426712515614847, "learning_rate": 4.908788168639545e-06, "loss": 0.5759, "step": 8134 }, { "epoch": 0.52, "grad_norm": 1.2081931141046611, "learning_rate": 4.907751787515316e-06, "loss": 0.5988, "step": 8135 }, { "epoch": 0.52, "grad_norm": 1.874399560183488, "learning_rate": 4.90671541035572e-06, "loss": 0.7133, "step": 8136 }, { "epoch": 0.52, "grad_norm": 1.5199048403111042, "learning_rate": 4.905679037205302e-06, "loss": 0.7477, "step": 8137 }, { "epoch": 0.52, "grad_norm": 1.3093575907805055, "learning_rate": 4.904642668108599e-06, "loss": 0.6986, "step": 8138 }, { "epoch": 0.52, "grad_norm": 1.7529036444490316, "learning_rate": 4.903606303110153e-06, "loss": 0.6899, "step": 8139 }, { "epoch": 0.52, "grad_norm": 1.5887122914726202, "learning_rate": 4.902569942254508e-06, "loss": 0.6473, "step": 8140 }, { "epoch": 0.52, "grad_norm": 1.630366799631107, "learning_rate": 4.9015335855862e-06, "loss": 0.6592, "step": 8141 }, { "epoch": 0.52, "grad_norm": 1.560186821975749, "learning_rate": 4.900497233149773e-06, "loss": 0.6484, "step": 8142 }, { "epoch": 0.52, "grad_norm": 1.709763396217331, "learning_rate": 4.899460884989764e-06, "loss": 0.7721, "step": 8143 }, { "epoch": 0.52, "grad_norm": 1.6212388522393415, "learning_rate": 4.898424541150717e-06, "loss": 0.7259, "step": 8144 }, { "epoch": 0.52, "grad_norm": 1.6912939769813669, "learning_rate": 4.897388201677169e-06, "loss": 0.7916, "step": 8145 }, { "epoch": 0.52, "grad_norm": 1.596029370603013, "learning_rate": 4.896351866613661e-06, "loss": 0.7052, "step": 8146 }, { "epoch": 0.52, "grad_norm": 1.637552139805498, "learning_rate": 4.895315536004731e-06, "loss": 0.6788, "step": 8147 }, { "epoch": 0.52, "grad_norm": 1.6931235788903747, "learning_rate": 4.8942792098949184e-06, "loss": 0.704, "step": 8148 }, { "epoch": 0.52, "grad_norm": 1.688895005935724, "learning_rate": 4.893242888328765e-06, "loss": 0.7284, "step": 8149 }, { "epoch": 0.52, "grad_norm": 1.7992297400076902, "learning_rate": 4.892206571350808e-06, "loss": 0.7388, "step": 8150 }, { "epoch": 0.52, "grad_norm": 1.7711574531344392, "learning_rate": 4.891170259005585e-06, "loss": 0.7589, "step": 8151 }, { "epoch": 0.52, "grad_norm": 1.1808239347921827, "learning_rate": 4.890133951337636e-06, "loss": 0.705, "step": 8152 }, { "epoch": 0.52, "grad_norm": 1.332567726736354, "learning_rate": 4.8890976483915e-06, "loss": 0.7146, "step": 8153 }, { "epoch": 0.52, "grad_norm": 1.2622969267912447, "learning_rate": 4.888061350211712e-06, "loss": 0.5845, "step": 8154 }, { "epoch": 0.52, "grad_norm": 1.7606777754606897, "learning_rate": 4.887025056842815e-06, "loss": 0.9228, "step": 8155 }, { "epoch": 0.52, "grad_norm": 1.1701284011184843, "learning_rate": 4.88598876832934e-06, "loss": 0.7297, "step": 8156 }, { "epoch": 0.52, "grad_norm": 1.7173205236802132, "learning_rate": 4.88495248471583e-06, "loss": 0.7484, "step": 8157 }, { "epoch": 0.52, "grad_norm": 1.5755965740476394, "learning_rate": 4.883916206046819e-06, "loss": 0.7515, "step": 8158 }, { "epoch": 0.52, "grad_norm": 1.643307339925276, "learning_rate": 4.8828799323668465e-06, "loss": 0.7362, "step": 8159 }, { "epoch": 0.52, "grad_norm": 1.5643779001847289, "learning_rate": 4.88184366372045e-06, "loss": 0.7196, "step": 8160 }, { "epoch": 0.52, "grad_norm": 1.5810737919134092, "learning_rate": 4.880807400152162e-06, "loss": 0.6492, "step": 8161 }, { "epoch": 0.52, "grad_norm": 1.1338116885238718, "learning_rate": 4.879771141706524e-06, "loss": 0.7288, "step": 8162 }, { "epoch": 0.52, "grad_norm": 1.7603766963698972, "learning_rate": 4.878734888428068e-06, "loss": 0.7641, "step": 8163 }, { "epoch": 0.52, "grad_norm": 1.7621612733488317, "learning_rate": 4.8776986403613324e-06, "loss": 0.8673, "step": 8164 }, { "epoch": 0.52, "grad_norm": 1.202918864281606, "learning_rate": 4.876662397550851e-06, "loss": 0.6314, "step": 8165 }, { "epoch": 0.52, "grad_norm": 1.8443454627383018, "learning_rate": 4.875626160041163e-06, "loss": 0.7341, "step": 8166 }, { "epoch": 0.52, "grad_norm": 1.5423282051915985, "learning_rate": 4.874589927876799e-06, "loss": 0.6651, "step": 8167 }, { "epoch": 0.52, "grad_norm": 1.6677485945278903, "learning_rate": 4.873553701102297e-06, "loss": 0.7659, "step": 8168 }, { "epoch": 0.52, "grad_norm": 2.0016570674939818, "learning_rate": 4.872517479762192e-06, "loss": 0.8526, "step": 8169 }, { "epoch": 0.52, "grad_norm": 1.560942696672117, "learning_rate": 4.871481263901017e-06, "loss": 0.6683, "step": 8170 }, { "epoch": 0.52, "grad_norm": 1.640981503968028, "learning_rate": 4.870445053563307e-06, "loss": 0.8479, "step": 8171 }, { "epoch": 0.52, "grad_norm": 1.8593776812323697, "learning_rate": 4.8694088487935964e-06, "loss": 0.9047, "step": 8172 }, { "epoch": 0.52, "grad_norm": 1.7587874111466388, "learning_rate": 4.868372649636421e-06, "loss": 0.734, "step": 8173 }, { "epoch": 0.52, "grad_norm": 1.9849400644448472, "learning_rate": 4.86733645613631e-06, "loss": 0.8605, "step": 8174 }, { "epoch": 0.52, "grad_norm": 2.0040941157324528, "learning_rate": 4.866300268337802e-06, "loss": 0.6286, "step": 8175 }, { "epoch": 0.52, "grad_norm": 1.622621534232704, "learning_rate": 4.865264086285425e-06, "loss": 0.7662, "step": 8176 }, { "epoch": 0.52, "grad_norm": 1.1012403251788154, "learning_rate": 4.864227910023714e-06, "loss": 0.7236, "step": 8177 }, { "epoch": 0.52, "grad_norm": 1.5476468804951558, "learning_rate": 4.863191739597205e-06, "loss": 0.6158, "step": 8178 }, { "epoch": 0.52, "grad_norm": 1.9437994494254327, "learning_rate": 4.862155575050426e-06, "loss": 0.6702, "step": 8179 }, { "epoch": 0.52, "grad_norm": 1.997015273894004, "learning_rate": 4.861119416427911e-06, "loss": 0.7913, "step": 8180 }, { "epoch": 0.52, "grad_norm": 1.781324842064451, "learning_rate": 4.86008326377419e-06, "loss": 0.8846, "step": 8181 }, { "epoch": 0.52, "grad_norm": 1.7749029029341026, "learning_rate": 4.859047117133799e-06, "loss": 0.8027, "step": 8182 }, { "epoch": 0.52, "grad_norm": 1.8488449655226538, "learning_rate": 4.858010976551265e-06, "loss": 0.6174, "step": 8183 }, { "epoch": 0.52, "grad_norm": 1.526444050543589, "learning_rate": 4.856974842071122e-06, "loss": 0.7802, "step": 8184 }, { "epoch": 0.52, "grad_norm": 1.766473009074391, "learning_rate": 4.855938713737899e-06, "loss": 0.7164, "step": 8185 }, { "epoch": 0.52, "grad_norm": 1.8648124451791555, "learning_rate": 4.854902591596127e-06, "loss": 0.7376, "step": 8186 }, { "epoch": 0.52, "grad_norm": 1.479611852159645, "learning_rate": 4.853866475690338e-06, "loss": 0.7365, "step": 8187 }, { "epoch": 0.52, "grad_norm": 1.4748418372227887, "learning_rate": 4.852830366065059e-06, "loss": 0.7228, "step": 8188 }, { "epoch": 0.52, "grad_norm": 1.321923465108347, "learning_rate": 4.851794262764824e-06, "loss": 0.6996, "step": 8189 }, { "epoch": 0.52, "grad_norm": 1.5796491924079672, "learning_rate": 4.850758165834157e-06, "loss": 0.8051, "step": 8190 }, { "epoch": 0.52, "grad_norm": 1.7564467702965796, "learning_rate": 4.849722075317594e-06, "loss": 0.8453, "step": 8191 }, { "epoch": 0.52, "grad_norm": 1.6212759079719878, "learning_rate": 4.8486859912596575e-06, "loss": 0.7274, "step": 8192 }, { "epoch": 0.52, "grad_norm": 1.8056770656764327, "learning_rate": 4.847649913704881e-06, "loss": 0.7536, "step": 8193 }, { "epoch": 0.52, "grad_norm": 1.7021043544087702, "learning_rate": 4.8466138426977895e-06, "loss": 0.7625, "step": 8194 }, { "epoch": 0.52, "grad_norm": 1.682362221600307, "learning_rate": 4.845577778282913e-06, "loss": 0.6503, "step": 8195 }, { "epoch": 0.52, "grad_norm": 1.8354022837122501, "learning_rate": 4.8445417205047805e-06, "loss": 0.7122, "step": 8196 }, { "epoch": 0.52, "grad_norm": 1.789010655006367, "learning_rate": 4.843505669407917e-06, "loss": 0.7645, "step": 8197 }, { "epoch": 0.52, "grad_norm": 1.1241355207697363, "learning_rate": 4.842469625036853e-06, "loss": 0.7606, "step": 8198 }, { "epoch": 0.52, "grad_norm": 1.4650208073948394, "learning_rate": 4.841433587436111e-06, "loss": 0.6336, "step": 8199 }, { "epoch": 0.52, "grad_norm": 1.5005052000686259, "learning_rate": 4.840397556650222e-06, "loss": 0.6847, "step": 8200 }, { "epoch": 0.52, "grad_norm": 1.5685511535755592, "learning_rate": 4.839361532723708e-06, "loss": 0.6858, "step": 8201 }, { "epoch": 0.52, "grad_norm": 1.6504765006016828, "learning_rate": 4.838325515701101e-06, "loss": 0.6909, "step": 8202 }, { "epoch": 0.53, "grad_norm": 1.5280169873606386, "learning_rate": 4.837289505626921e-06, "loss": 0.6634, "step": 8203 }, { "epoch": 0.53, "grad_norm": 1.8803412949693545, "learning_rate": 4.836253502545697e-06, "loss": 0.773, "step": 8204 }, { "epoch": 0.53, "grad_norm": 1.746868130979747, "learning_rate": 4.835217506501955e-06, "loss": 0.7928, "step": 8205 }, { "epoch": 0.53, "grad_norm": 1.5409752225882412, "learning_rate": 4.834181517540216e-06, "loss": 0.7258, "step": 8206 }, { "epoch": 0.53, "grad_norm": 1.8109476817512207, "learning_rate": 4.8331455357050084e-06, "loss": 0.8598, "step": 8207 }, { "epoch": 0.53, "grad_norm": 1.163277024155696, "learning_rate": 4.832109561040853e-06, "loss": 0.6113, "step": 8208 }, { "epoch": 0.53, "grad_norm": 1.5171830999025424, "learning_rate": 4.831073593592279e-06, "loss": 0.7533, "step": 8209 }, { "epoch": 0.53, "grad_norm": 1.6221421156976574, "learning_rate": 4.830037633403805e-06, "loss": 0.8541, "step": 8210 }, { "epoch": 0.53, "grad_norm": 1.6021767934701636, "learning_rate": 4.829001680519958e-06, "loss": 0.7774, "step": 8211 }, { "epoch": 0.53, "grad_norm": 3.15046965630477, "learning_rate": 4.8279657349852575e-06, "loss": 0.7219, "step": 8212 }, { "epoch": 0.53, "grad_norm": 1.495868264479994, "learning_rate": 4.826929796844228e-06, "loss": 0.6893, "step": 8213 }, { "epoch": 0.53, "grad_norm": 1.0191391134526886, "learning_rate": 4.825893866141394e-06, "loss": 0.5995, "step": 8214 }, { "epoch": 0.53, "grad_norm": 1.5581733216502602, "learning_rate": 4.824857942921274e-06, "loss": 0.704, "step": 8215 }, { "epoch": 0.53, "grad_norm": 1.7858696026197207, "learning_rate": 4.823822027228394e-06, "loss": 0.6153, "step": 8216 }, { "epoch": 0.53, "grad_norm": 1.8408626087506212, "learning_rate": 4.822786119107271e-06, "loss": 0.7058, "step": 8217 }, { "epoch": 0.53, "grad_norm": 1.6724916684948903, "learning_rate": 4.82175021860243e-06, "loss": 0.7148, "step": 8218 }, { "epoch": 0.53, "grad_norm": 1.6717870350146522, "learning_rate": 4.820714325758388e-06, "loss": 0.7269, "step": 8219 }, { "epoch": 0.53, "grad_norm": 2.382217328865986, "learning_rate": 4.8196784406196685e-06, "loss": 0.8152, "step": 8220 }, { "epoch": 0.53, "grad_norm": 1.529844554553465, "learning_rate": 4.818642563230791e-06, "loss": 0.7755, "step": 8221 }, { "epoch": 0.53, "grad_norm": 1.741259271937695, "learning_rate": 4.817606693636273e-06, "loss": 0.8088, "step": 8222 }, { "epoch": 0.53, "grad_norm": 1.6601023656315472, "learning_rate": 4.8165708318806385e-06, "loss": 0.8208, "step": 8223 }, { "epoch": 0.53, "grad_norm": 1.549134059385893, "learning_rate": 4.815534978008403e-06, "loss": 0.6612, "step": 8224 }, { "epoch": 0.53, "grad_norm": 1.6187921974925596, "learning_rate": 4.814499132064088e-06, "loss": 0.7165, "step": 8225 }, { "epoch": 0.53, "grad_norm": 1.7151255955532245, "learning_rate": 4.813463294092209e-06, "loss": 0.8073, "step": 8226 }, { "epoch": 0.53, "grad_norm": 1.9056944076298974, "learning_rate": 4.8124274641372875e-06, "loss": 0.8701, "step": 8227 }, { "epoch": 0.53, "grad_norm": 1.600416599531099, "learning_rate": 4.811391642243837e-06, "loss": 0.7372, "step": 8228 }, { "epoch": 0.53, "grad_norm": 1.660878636104992, "learning_rate": 4.810355828456379e-06, "loss": 0.7148, "step": 8229 }, { "epoch": 0.53, "grad_norm": 1.6043793436349656, "learning_rate": 4.809320022819427e-06, "loss": 0.6925, "step": 8230 }, { "epoch": 0.53, "grad_norm": 1.6821365583867736, "learning_rate": 4.808284225377503e-06, "loss": 0.6939, "step": 8231 }, { "epoch": 0.53, "grad_norm": 1.7047040700911147, "learning_rate": 4.8072484361751176e-06, "loss": 0.7422, "step": 8232 }, { "epoch": 0.53, "grad_norm": 1.6094340572110937, "learning_rate": 4.8062126552567896e-06, "loss": 0.7196, "step": 8233 }, { "epoch": 0.53, "grad_norm": 1.3421091219929198, "learning_rate": 4.805176882667036e-06, "loss": 0.6348, "step": 8234 }, { "epoch": 0.53, "grad_norm": 1.054974162693153, "learning_rate": 4.804141118450369e-06, "loss": 0.6914, "step": 8235 }, { "epoch": 0.53, "grad_norm": 1.6893728547269107, "learning_rate": 4.803105362651306e-06, "loss": 0.7602, "step": 8236 }, { "epoch": 0.53, "grad_norm": 1.9424105229286006, "learning_rate": 4.80206961531436e-06, "loss": 0.837, "step": 8237 }, { "epoch": 0.53, "grad_norm": 1.6644942066651918, "learning_rate": 4.801033876484048e-06, "loss": 0.7251, "step": 8238 }, { "epoch": 0.53, "grad_norm": 1.5645455425143533, "learning_rate": 4.799998146204879e-06, "loss": 0.6817, "step": 8239 }, { "epoch": 0.53, "grad_norm": 1.8398353428458583, "learning_rate": 4.798962424521373e-06, "loss": 0.7163, "step": 8240 }, { "epoch": 0.53, "grad_norm": 1.6593166172958904, "learning_rate": 4.797926711478037e-06, "loss": 0.6539, "step": 8241 }, { "epoch": 0.53, "grad_norm": 1.0704665511302012, "learning_rate": 4.796891007119385e-06, "loss": 0.6787, "step": 8242 }, { "epoch": 0.53, "grad_norm": 1.597747807796704, "learning_rate": 4.7958553114899335e-06, "loss": 0.7699, "step": 8243 }, { "epoch": 0.53, "grad_norm": 1.6425516510970393, "learning_rate": 4.794819624634189e-06, "loss": 0.8922, "step": 8244 }, { "epoch": 0.53, "grad_norm": 4.480969134255241, "learning_rate": 4.793783946596669e-06, "loss": 0.8258, "step": 8245 }, { "epoch": 0.53, "grad_norm": 1.0845064147226522, "learning_rate": 4.79274827742188e-06, "loss": 0.6858, "step": 8246 }, { "epoch": 0.53, "grad_norm": 1.7201662846228603, "learning_rate": 4.791712617154336e-06, "loss": 0.7934, "step": 8247 }, { "epoch": 0.53, "grad_norm": 1.3944057848543, "learning_rate": 4.790676965838545e-06, "loss": 0.6258, "step": 8248 }, { "epoch": 0.53, "grad_norm": 1.709518025892682, "learning_rate": 4.789641323519018e-06, "loss": 0.737, "step": 8249 }, { "epoch": 0.53, "grad_norm": 1.8219876643810873, "learning_rate": 4.788605690240264e-06, "loss": 0.6373, "step": 8250 }, { "epoch": 0.53, "grad_norm": 1.4587686989247919, "learning_rate": 4.787570066046794e-06, "loss": 0.6951, "step": 8251 }, { "epoch": 0.53, "grad_norm": 1.7298546215197343, "learning_rate": 4.7865344509831165e-06, "loss": 0.7706, "step": 8252 }, { "epoch": 0.53, "grad_norm": 1.71804808723525, "learning_rate": 4.785498845093739e-06, "loss": 0.7046, "step": 8253 }, { "epoch": 0.53, "grad_norm": 2.0087818828852018, "learning_rate": 4.784463248423172e-06, "loss": 0.7513, "step": 8254 }, { "epoch": 0.53, "grad_norm": 2.1902815116096024, "learning_rate": 4.78342766101592e-06, "loss": 0.6924, "step": 8255 }, { "epoch": 0.53, "grad_norm": 1.3500286638865067, "learning_rate": 4.782392082916493e-06, "loss": 0.7468, "step": 8256 }, { "epoch": 0.53, "grad_norm": 2.033990510708674, "learning_rate": 4.7813565141693965e-06, "loss": 0.8106, "step": 8257 }, { "epoch": 0.53, "grad_norm": 1.8348595782910706, "learning_rate": 4.7803209548191395e-06, "loss": 0.7518, "step": 8258 }, { "epoch": 0.53, "grad_norm": 1.7806769173248476, "learning_rate": 4.7792854049102234e-06, "loss": 0.7676, "step": 8259 }, { "epoch": 0.53, "grad_norm": 1.8843400336457408, "learning_rate": 4.778249864487157e-06, "loss": 0.6385, "step": 8260 }, { "epoch": 0.53, "grad_norm": 1.1369416914267942, "learning_rate": 4.7772143335944486e-06, "loss": 0.6472, "step": 8261 }, { "epoch": 0.53, "grad_norm": 2.19183448211016, "learning_rate": 4.776178812276596e-06, "loss": 0.7456, "step": 8262 }, { "epoch": 0.53, "grad_norm": 1.2832302828732578, "learning_rate": 4.775143300578112e-06, "loss": 0.6767, "step": 8263 }, { "epoch": 0.53, "grad_norm": 1.5485936533160698, "learning_rate": 4.774107798543492e-06, "loss": 0.7835, "step": 8264 }, { "epoch": 0.53, "grad_norm": 1.5753157858581206, "learning_rate": 4.773072306217247e-06, "loss": 0.8516, "step": 8265 }, { "epoch": 0.53, "grad_norm": 1.2295055886289115, "learning_rate": 4.772036823643876e-06, "loss": 0.6992, "step": 8266 }, { "epoch": 0.53, "grad_norm": 1.7126302131991953, "learning_rate": 4.771001350867886e-06, "loss": 0.8323, "step": 8267 }, { "epoch": 0.53, "grad_norm": 1.9343502368643062, "learning_rate": 4.769965887933774e-06, "loss": 0.8852, "step": 8268 }, { "epoch": 0.53, "grad_norm": 1.9604272386113388, "learning_rate": 4.768930434886044e-06, "loss": 0.8571, "step": 8269 }, { "epoch": 0.53, "grad_norm": 1.7852991004271528, "learning_rate": 4.7678949917692015e-06, "loss": 0.7426, "step": 8270 }, { "epoch": 0.53, "grad_norm": 1.7672768819427085, "learning_rate": 4.766859558627741e-06, "loss": 0.7779, "step": 8271 }, { "epoch": 0.53, "grad_norm": 1.6796746037179437, "learning_rate": 4.765824135506169e-06, "loss": 0.6392, "step": 8272 }, { "epoch": 0.53, "grad_norm": 1.6500731981443062, "learning_rate": 4.7647887224489834e-06, "loss": 0.7603, "step": 8273 }, { "epoch": 0.53, "grad_norm": 1.4936001532631373, "learning_rate": 4.763753319500685e-06, "loss": 0.7007, "step": 8274 }, { "epoch": 0.53, "grad_norm": 1.7061847543589237, "learning_rate": 4.762717926705771e-06, "loss": 0.6738, "step": 8275 }, { "epoch": 0.53, "grad_norm": 1.96046833055575, "learning_rate": 4.761682544108744e-06, "loss": 0.7966, "step": 8276 }, { "epoch": 0.53, "grad_norm": 1.830906664921177, "learning_rate": 4.760647171754098e-06, "loss": 0.7695, "step": 8277 }, { "epoch": 0.53, "grad_norm": 1.650815288666478, "learning_rate": 4.759611809686334e-06, "loss": 0.6748, "step": 8278 }, { "epoch": 0.53, "grad_norm": 1.8176753001559125, "learning_rate": 4.75857645794995e-06, "loss": 0.8424, "step": 8279 }, { "epoch": 0.53, "grad_norm": 1.743322161972767, "learning_rate": 4.757541116589441e-06, "loss": 0.6533, "step": 8280 }, { "epoch": 0.53, "grad_norm": 1.6856970880662518, "learning_rate": 4.756505785649309e-06, "loss": 0.7563, "step": 8281 }, { "epoch": 0.53, "grad_norm": 1.7491892148121173, "learning_rate": 4.755470465174044e-06, "loss": 0.7424, "step": 8282 }, { "epoch": 0.53, "grad_norm": 1.8311352830288843, "learning_rate": 4.754435155208146e-06, "loss": 0.7432, "step": 8283 }, { "epoch": 0.53, "grad_norm": 1.1198172019052446, "learning_rate": 4.753399855796107e-06, "loss": 0.5747, "step": 8284 }, { "epoch": 0.53, "grad_norm": 1.786794021491413, "learning_rate": 4.752364566982425e-06, "loss": 0.8051, "step": 8285 }, { "epoch": 0.53, "grad_norm": 1.7964271162587444, "learning_rate": 4.751329288811593e-06, "loss": 0.7461, "step": 8286 }, { "epoch": 0.53, "grad_norm": 1.5836880141876064, "learning_rate": 4.750294021328105e-06, "loss": 0.8114, "step": 8287 }, { "epoch": 0.53, "grad_norm": 1.847611792373097, "learning_rate": 4.749258764576458e-06, "loss": 0.8543, "step": 8288 }, { "epoch": 0.53, "grad_norm": 1.5487594770926083, "learning_rate": 4.748223518601141e-06, "loss": 0.6321, "step": 8289 }, { "epoch": 0.53, "grad_norm": 1.708973369711639, "learning_rate": 4.747188283446649e-06, "loss": 0.6972, "step": 8290 }, { "epoch": 0.53, "grad_norm": 1.9227585737675872, "learning_rate": 4.7461530591574715e-06, "loss": 0.7966, "step": 8291 }, { "epoch": 0.53, "grad_norm": 2.5868966758056104, "learning_rate": 4.745117845778103e-06, "loss": 0.7325, "step": 8292 }, { "epoch": 0.53, "grad_norm": 1.6806398375848577, "learning_rate": 4.744082643353034e-06, "loss": 0.6622, "step": 8293 }, { "epoch": 0.53, "grad_norm": 1.8223823760948015, "learning_rate": 4.743047451926756e-06, "loss": 0.7653, "step": 8294 }, { "epoch": 0.53, "grad_norm": 1.5373605192074211, "learning_rate": 4.742012271543757e-06, "loss": 0.7372, "step": 8295 }, { "epoch": 0.53, "grad_norm": 1.7695726758157464, "learning_rate": 4.740977102248528e-06, "loss": 0.7138, "step": 8296 }, { "epoch": 0.53, "grad_norm": 1.099803021328492, "learning_rate": 4.739941944085561e-06, "loss": 0.8228, "step": 8297 }, { "epoch": 0.53, "grad_norm": 1.7072892937501438, "learning_rate": 4.738906797099342e-06, "loss": 0.7328, "step": 8298 }, { "epoch": 0.53, "grad_norm": 1.0790765246517378, "learning_rate": 4.73787166133436e-06, "loss": 0.6172, "step": 8299 }, { "epoch": 0.53, "grad_norm": 1.7034568779143897, "learning_rate": 4.736836536835104e-06, "loss": 0.718, "step": 8300 }, { "epoch": 0.53, "grad_norm": 1.097689574792881, "learning_rate": 4.735801423646059e-06, "loss": 0.6909, "step": 8301 }, { "epoch": 0.53, "grad_norm": 1.7401315291860875, "learning_rate": 4.734766321811714e-06, "loss": 0.6953, "step": 8302 }, { "epoch": 0.53, "grad_norm": 1.5652592969134211, "learning_rate": 4.733731231376557e-06, "loss": 0.7137, "step": 8303 }, { "epoch": 0.53, "grad_norm": 2.628797246613405, "learning_rate": 4.732696152385071e-06, "loss": 0.7942, "step": 8304 }, { "epoch": 0.53, "grad_norm": 1.8146493945543696, "learning_rate": 4.731661084881744e-06, "loss": 0.7077, "step": 8305 }, { "epoch": 0.53, "grad_norm": 2.0280496537688055, "learning_rate": 4.730626028911058e-06, "loss": 0.7608, "step": 8306 }, { "epoch": 0.53, "grad_norm": 1.6687960709614145, "learning_rate": 4.729590984517498e-06, "loss": 0.7767, "step": 8307 }, { "epoch": 0.53, "grad_norm": 1.9952656238164295, "learning_rate": 4.7285559517455515e-06, "loss": 0.6743, "step": 8308 }, { "epoch": 0.53, "grad_norm": 1.8448227410942077, "learning_rate": 4.727520930639698e-06, "loss": 0.7558, "step": 8309 }, { "epoch": 0.53, "grad_norm": 1.7523410838478042, "learning_rate": 4.7264859212444245e-06, "loss": 0.8618, "step": 8310 }, { "epoch": 0.53, "grad_norm": 2.015557433599426, "learning_rate": 4.7254509236042105e-06, "loss": 0.7972, "step": 8311 }, { "epoch": 0.53, "grad_norm": 1.6289461929375062, "learning_rate": 4.72441593776354e-06, "loss": 0.7787, "step": 8312 }, { "epoch": 0.53, "grad_norm": 1.6988262620150072, "learning_rate": 4.723380963766891e-06, "loss": 0.7462, "step": 8313 }, { "epoch": 0.53, "grad_norm": 1.7574728966804838, "learning_rate": 4.7223460016587485e-06, "loss": 0.852, "step": 8314 }, { "epoch": 0.53, "grad_norm": 1.6504532057724057, "learning_rate": 4.72131105148359e-06, "loss": 0.6941, "step": 8315 }, { "epoch": 0.53, "grad_norm": 1.7615149719686485, "learning_rate": 4.720276113285897e-06, "loss": 0.7655, "step": 8316 }, { "epoch": 0.53, "grad_norm": 1.48846801278594, "learning_rate": 4.719241187110152e-06, "loss": 0.6914, "step": 8317 }, { "epoch": 0.53, "grad_norm": 1.6974361563857803, "learning_rate": 4.718206273000827e-06, "loss": 0.6728, "step": 8318 }, { "epoch": 0.53, "grad_norm": 1.2348862767513056, "learning_rate": 4.717171371002407e-06, "loss": 0.6989, "step": 8319 }, { "epoch": 0.53, "grad_norm": 1.1640652345161144, "learning_rate": 4.716136481159366e-06, "loss": 0.7518, "step": 8320 }, { "epoch": 0.53, "grad_norm": 2.0113377628607054, "learning_rate": 4.715101603516183e-06, "loss": 0.6547, "step": 8321 }, { "epoch": 0.53, "grad_norm": 1.6089456350714977, "learning_rate": 4.7140667381173324e-06, "loss": 0.6551, "step": 8322 }, { "epoch": 0.53, "grad_norm": 1.6446554552945567, "learning_rate": 4.713031885007296e-06, "loss": 0.7861, "step": 8323 }, { "epoch": 0.53, "grad_norm": 1.763120592731069, "learning_rate": 4.7119970442305435e-06, "loss": 0.6476, "step": 8324 }, { "epoch": 0.53, "grad_norm": 1.6639551711569298, "learning_rate": 4.710962215831553e-06, "loss": 0.6651, "step": 8325 }, { "epoch": 0.53, "grad_norm": 1.5541978896769708, "learning_rate": 4.709927399854801e-06, "loss": 0.7397, "step": 8326 }, { "epoch": 0.53, "grad_norm": 1.6707756978393458, "learning_rate": 4.708892596344758e-06, "loss": 0.7736, "step": 8327 }, { "epoch": 0.53, "grad_norm": 1.7376729038333503, "learning_rate": 4.7078578053459e-06, "loss": 0.8305, "step": 8328 }, { "epoch": 0.53, "grad_norm": 2.0756945315863393, "learning_rate": 4.706823026902699e-06, "loss": 0.7436, "step": 8329 }, { "epoch": 0.53, "grad_norm": 1.5215365507464866, "learning_rate": 4.705788261059631e-06, "loss": 0.7142, "step": 8330 }, { "epoch": 0.53, "grad_norm": 1.6295903454914222, "learning_rate": 4.7047535078611626e-06, "loss": 0.6711, "step": 8331 }, { "epoch": 0.53, "grad_norm": 1.9435760789558565, "learning_rate": 4.70371876735177e-06, "loss": 0.7374, "step": 8332 }, { "epoch": 0.53, "grad_norm": 1.591044949356324, "learning_rate": 4.70268403957592e-06, "loss": 0.6648, "step": 8333 }, { "epoch": 0.53, "grad_norm": 2.0819659932548795, "learning_rate": 4.701649324578085e-06, "loss": 0.7846, "step": 8334 }, { "epoch": 0.53, "grad_norm": 1.6348523559434591, "learning_rate": 4.700614622402737e-06, "loss": 0.7789, "step": 8335 }, { "epoch": 0.53, "grad_norm": 1.761112767886392, "learning_rate": 4.699579933094343e-06, "loss": 0.832, "step": 8336 }, { "epoch": 0.53, "grad_norm": 1.2017934602480334, "learning_rate": 4.698545256697373e-06, "loss": 0.6805, "step": 8337 }, { "epoch": 0.53, "grad_norm": 1.8184224198043255, "learning_rate": 4.697510593256292e-06, "loss": 0.6331, "step": 8338 }, { "epoch": 0.53, "grad_norm": 1.7271227075582454, "learning_rate": 4.696475942815573e-06, "loss": 0.7517, "step": 8339 }, { "epoch": 0.53, "grad_norm": 1.6337208006397197, "learning_rate": 4.695441305419678e-06, "loss": 0.6851, "step": 8340 }, { "epoch": 0.53, "grad_norm": 1.785297563506015, "learning_rate": 4.6944066811130775e-06, "loss": 0.7875, "step": 8341 }, { "epoch": 0.53, "grad_norm": 1.608642194025001, "learning_rate": 4.693372069940234e-06, "loss": 0.9342, "step": 8342 }, { "epoch": 0.53, "grad_norm": 1.7309344990205597, "learning_rate": 4.692337471945614e-06, "loss": 0.6869, "step": 8343 }, { "epoch": 0.53, "grad_norm": 1.3216126348842485, "learning_rate": 4.691302887173685e-06, "loss": 0.6858, "step": 8344 }, { "epoch": 0.53, "grad_norm": 1.5692807739422885, "learning_rate": 4.690268315668908e-06, "loss": 0.7449, "step": 8345 }, { "epoch": 0.53, "grad_norm": 1.6987066809329943, "learning_rate": 4.68923375747575e-06, "loss": 0.7936, "step": 8346 }, { "epoch": 0.53, "grad_norm": 1.5908792088042556, "learning_rate": 4.688199212638669e-06, "loss": 0.6367, "step": 8347 }, { "epoch": 0.53, "grad_norm": 1.9925744931852025, "learning_rate": 4.687164681202134e-06, "loss": 0.7237, "step": 8348 }, { "epoch": 0.53, "grad_norm": 1.759068332041832, "learning_rate": 4.6861301632106005e-06, "loss": 0.759, "step": 8349 }, { "epoch": 0.53, "grad_norm": 1.6787325424816515, "learning_rate": 4.685095658708535e-06, "loss": 0.9883, "step": 8350 }, { "epoch": 0.53, "grad_norm": 1.8267613276307535, "learning_rate": 4.684061167740395e-06, "loss": 0.7866, "step": 8351 }, { "epoch": 0.53, "grad_norm": 1.6967485277787946, "learning_rate": 4.683026690350642e-06, "loss": 0.8668, "step": 8352 }, { "epoch": 0.53, "grad_norm": 1.6308131554884273, "learning_rate": 4.6819922265837385e-06, "loss": 0.729, "step": 8353 }, { "epoch": 0.53, "grad_norm": 1.2629687095156157, "learning_rate": 4.680957776484138e-06, "loss": 0.6271, "step": 8354 }, { "epoch": 0.53, "grad_norm": 1.6171871742305461, "learning_rate": 4.679923340096304e-06, "loss": 0.7549, "step": 8355 }, { "epoch": 0.53, "grad_norm": 1.8501029877115769, "learning_rate": 4.6788889174646905e-06, "loss": 0.839, "step": 8356 }, { "epoch": 0.53, "grad_norm": 1.599435379356856, "learning_rate": 4.677854508633757e-06, "loss": 0.6429, "step": 8357 }, { "epoch": 0.53, "grad_norm": 2.038515590105271, "learning_rate": 4.676820113647959e-06, "loss": 0.686, "step": 8358 }, { "epoch": 0.54, "grad_norm": 1.1335032178445184, "learning_rate": 4.675785732551756e-06, "loss": 0.5898, "step": 8359 }, { "epoch": 0.54, "grad_norm": 1.8205091604402914, "learning_rate": 4.674751365389599e-06, "loss": 0.702, "step": 8360 }, { "epoch": 0.54, "grad_norm": 1.8376496518278618, "learning_rate": 4.673717012205945e-06, "loss": 0.7277, "step": 8361 }, { "epoch": 0.54, "grad_norm": 1.8937836150845069, "learning_rate": 4.67268267304525e-06, "loss": 0.7407, "step": 8362 }, { "epoch": 0.54, "grad_norm": 1.5422856389418353, "learning_rate": 4.671648347951963e-06, "loss": 0.7637, "step": 8363 }, { "epoch": 0.54, "grad_norm": 1.4684091941880453, "learning_rate": 4.6706140369705425e-06, "loss": 0.7421, "step": 8364 }, { "epoch": 0.54, "grad_norm": 1.7966953484250714, "learning_rate": 4.669579740145436e-06, "loss": 0.7497, "step": 8365 }, { "epoch": 0.54, "grad_norm": 1.6211451975807247, "learning_rate": 4.668545457521101e-06, "loss": 0.6403, "step": 8366 }, { "epoch": 0.54, "grad_norm": 1.5509920216986226, "learning_rate": 4.6675111891419835e-06, "loss": 0.8156, "step": 8367 }, { "epoch": 0.54, "grad_norm": 1.745311905818544, "learning_rate": 4.666476935052539e-06, "loss": 0.6955, "step": 8368 }, { "epoch": 0.54, "grad_norm": 1.5312193414172455, "learning_rate": 4.665442695297212e-06, "loss": 0.646, "step": 8369 }, { "epoch": 0.54, "grad_norm": 1.7439008783982217, "learning_rate": 4.664408469920455e-06, "loss": 0.7948, "step": 8370 }, { "epoch": 0.54, "grad_norm": 1.6379997530141948, "learning_rate": 4.663374258966718e-06, "loss": 0.6425, "step": 8371 }, { "epoch": 0.54, "grad_norm": 1.8656713307697073, "learning_rate": 4.662340062480449e-06, "loss": 0.889, "step": 8372 }, { "epoch": 0.54, "grad_norm": 1.9048621166427329, "learning_rate": 4.661305880506094e-06, "loss": 0.7329, "step": 8373 }, { "epoch": 0.54, "grad_norm": 1.8410068452325725, "learning_rate": 4.6602717130880985e-06, "loss": 0.6734, "step": 8374 }, { "epoch": 0.54, "grad_norm": 1.2312030294788576, "learning_rate": 4.659237560270914e-06, "loss": 0.7037, "step": 8375 }, { "epoch": 0.54, "grad_norm": 1.6827654548422433, "learning_rate": 4.6582034220989805e-06, "loss": 0.7559, "step": 8376 }, { "epoch": 0.54, "grad_norm": 1.6982661305990128, "learning_rate": 4.657169298616748e-06, "loss": 0.8362, "step": 8377 }, { "epoch": 0.54, "grad_norm": 2.07981450436537, "learning_rate": 4.6561351898686575e-06, "loss": 0.953, "step": 8378 }, { "epoch": 0.54, "grad_norm": 1.8199397851133594, "learning_rate": 4.655101095899154e-06, "loss": 0.7707, "step": 8379 }, { "epoch": 0.54, "grad_norm": 1.6286942335102745, "learning_rate": 4.65406701675268e-06, "loss": 0.7609, "step": 8380 }, { "epoch": 0.54, "grad_norm": 1.506982071401645, "learning_rate": 4.653032952473678e-06, "loss": 0.718, "step": 8381 }, { "epoch": 0.54, "grad_norm": 1.0375077838226021, "learning_rate": 4.651998903106594e-06, "loss": 0.6415, "step": 8382 }, { "epoch": 0.54, "grad_norm": 1.6660123963013194, "learning_rate": 4.650964868695863e-06, "loss": 0.7258, "step": 8383 }, { "epoch": 0.54, "grad_norm": 1.4537543018469523, "learning_rate": 4.649930849285931e-06, "loss": 0.756, "step": 8384 }, { "epoch": 0.54, "grad_norm": 1.6252391253880987, "learning_rate": 4.648896844921233e-06, "loss": 0.6574, "step": 8385 }, { "epoch": 0.54, "grad_norm": 1.5119238138429318, "learning_rate": 4.647862855646211e-06, "loss": 0.8173, "step": 8386 }, { "epoch": 0.54, "grad_norm": 2.0915873610009994, "learning_rate": 4.6468288815053045e-06, "loss": 0.8109, "step": 8387 }, { "epoch": 0.54, "grad_norm": 1.8981958635428249, "learning_rate": 4.645794922542951e-06, "loss": 0.8243, "step": 8388 }, { "epoch": 0.54, "grad_norm": 0.9938063119917558, "learning_rate": 4.644760978803587e-06, "loss": 0.5842, "step": 8389 }, { "epoch": 0.54, "grad_norm": 1.8217037970870313, "learning_rate": 4.643727050331648e-06, "loss": 0.9415, "step": 8390 }, { "epoch": 0.54, "grad_norm": 1.7790678979912145, "learning_rate": 4.642693137171575e-06, "loss": 0.7438, "step": 8391 }, { "epoch": 0.54, "grad_norm": 1.6972639711824862, "learning_rate": 4.641659239367797e-06, "loss": 0.7872, "step": 8392 }, { "epoch": 0.54, "grad_norm": 1.6771787693788507, "learning_rate": 4.640625356964753e-06, "loss": 0.742, "step": 8393 }, { "epoch": 0.54, "grad_norm": 1.6713591172550317, "learning_rate": 4.639591490006875e-06, "loss": 0.6439, "step": 8394 }, { "epoch": 0.54, "grad_norm": 1.6608290563961707, "learning_rate": 4.638557638538601e-06, "loss": 0.7141, "step": 8395 }, { "epoch": 0.54, "grad_norm": 1.939342070961807, "learning_rate": 4.637523802604357e-06, "loss": 0.7221, "step": 8396 }, { "epoch": 0.54, "grad_norm": 1.6768445962729734, "learning_rate": 4.63648998224858e-06, "loss": 0.7278, "step": 8397 }, { "epoch": 0.54, "grad_norm": 1.7082222799054925, "learning_rate": 4.635456177515698e-06, "loss": 0.8293, "step": 8398 }, { "epoch": 0.54, "grad_norm": 1.6300505937975511, "learning_rate": 4.634422388450144e-06, "loss": 0.7121, "step": 8399 }, { "epoch": 0.54, "grad_norm": 1.5522711332516257, "learning_rate": 4.633388615096348e-06, "loss": 0.7174, "step": 8400 }, { "epoch": 0.54, "grad_norm": 1.6035784968766222, "learning_rate": 4.632354857498738e-06, "loss": 0.7432, "step": 8401 }, { "epoch": 0.54, "grad_norm": 1.668674177095379, "learning_rate": 4.631321115701746e-06, "loss": 0.7899, "step": 8402 }, { "epoch": 0.54, "grad_norm": 1.8445569990541955, "learning_rate": 4.6302873897497955e-06, "loss": 0.7962, "step": 8403 }, { "epoch": 0.54, "grad_norm": 1.3968973817476016, "learning_rate": 4.629253679687318e-06, "loss": 0.6433, "step": 8404 }, { "epoch": 0.54, "grad_norm": 1.7857912602047858, "learning_rate": 4.628219985558737e-06, "loss": 0.8665, "step": 8405 }, { "epoch": 0.54, "grad_norm": 1.692336955232839, "learning_rate": 4.627186307408481e-06, "loss": 0.7188, "step": 8406 }, { "epoch": 0.54, "grad_norm": 2.090430270413178, "learning_rate": 4.626152645280972e-06, "loss": 0.736, "step": 8407 }, { "epoch": 0.54, "grad_norm": 1.7846338835561537, "learning_rate": 4.625118999220637e-06, "loss": 0.6581, "step": 8408 }, { "epoch": 0.54, "grad_norm": 1.6916292857109325, "learning_rate": 4.624085369271902e-06, "loss": 0.614, "step": 8409 }, { "epoch": 0.54, "grad_norm": 3.2706577543842292, "learning_rate": 4.6230517554791854e-06, "loss": 0.6918, "step": 8410 }, { "epoch": 0.54, "grad_norm": 1.8680134369099586, "learning_rate": 4.622018157886915e-06, "loss": 0.8878, "step": 8411 }, { "epoch": 0.54, "grad_norm": 1.7187019504067154, "learning_rate": 4.620984576539507e-06, "loss": 0.845, "step": 8412 }, { "epoch": 0.54, "grad_norm": 1.8182209861452892, "learning_rate": 4.6199510114813875e-06, "loss": 0.8716, "step": 8413 }, { "epoch": 0.54, "grad_norm": 1.5675548620326551, "learning_rate": 4.618917462756975e-06, "loss": 0.6845, "step": 8414 }, { "epoch": 0.54, "grad_norm": 1.8738557978964856, "learning_rate": 4.617883930410688e-06, "loss": 0.7243, "step": 8415 }, { "epoch": 0.54, "grad_norm": 1.9364728070969426, "learning_rate": 4.616850414486946e-06, "loss": 0.7312, "step": 8416 }, { "epoch": 0.54, "grad_norm": 1.8496727146133995, "learning_rate": 4.615816915030169e-06, "loss": 0.6854, "step": 8417 }, { "epoch": 0.54, "grad_norm": 1.9653927725094928, "learning_rate": 4.614783432084775e-06, "loss": 1.0289, "step": 8418 }, { "epoch": 0.54, "grad_norm": 1.576885811412508, "learning_rate": 4.613749965695178e-06, "loss": 0.6847, "step": 8419 }, { "epoch": 0.54, "grad_norm": 1.9534652214507635, "learning_rate": 4.612716515905798e-06, "loss": 0.804, "step": 8420 }, { "epoch": 0.54, "grad_norm": 1.1274089234899178, "learning_rate": 4.611683082761046e-06, "loss": 0.7107, "step": 8421 }, { "epoch": 0.54, "grad_norm": 2.198261345559149, "learning_rate": 4.61064966630534e-06, "loss": 0.7379, "step": 8422 }, { "epoch": 0.54, "grad_norm": 1.9955873593746583, "learning_rate": 4.609616266583093e-06, "loss": 0.694, "step": 8423 }, { "epoch": 0.54, "grad_norm": 1.8894663585476785, "learning_rate": 4.608582883638719e-06, "loss": 0.8148, "step": 8424 }, { "epoch": 0.54, "grad_norm": 2.283159380461939, "learning_rate": 4.607549517516629e-06, "loss": 0.6588, "step": 8425 }, { "epoch": 0.54, "grad_norm": 1.5987785377707882, "learning_rate": 4.606516168261236e-06, "loss": 0.8019, "step": 8426 }, { "epoch": 0.54, "grad_norm": 1.8031787529311458, "learning_rate": 4.605482835916954e-06, "loss": 0.7029, "step": 8427 }, { "epoch": 0.54, "grad_norm": 2.660222311386964, "learning_rate": 4.604449520528188e-06, "loss": 0.7796, "step": 8428 }, { "epoch": 0.54, "grad_norm": 1.5640963533635865, "learning_rate": 4.603416222139352e-06, "loss": 0.7325, "step": 8429 }, { "epoch": 0.54, "grad_norm": 1.2102452592673982, "learning_rate": 4.602382940794852e-06, "loss": 0.7465, "step": 8430 }, { "epoch": 0.54, "grad_norm": 1.1691587420993548, "learning_rate": 4.6013496765391e-06, "loss": 0.5851, "step": 8431 }, { "epoch": 0.54, "grad_norm": 1.7887746461168206, "learning_rate": 4.600316429416499e-06, "loss": 0.7081, "step": 8432 }, { "epoch": 0.54, "grad_norm": 1.723092411020618, "learning_rate": 4.59928319947146e-06, "loss": 0.7406, "step": 8433 }, { "epoch": 0.54, "grad_norm": 1.851190696940295, "learning_rate": 4.598249986748384e-06, "loss": 0.6216, "step": 8434 }, { "epoch": 0.54, "grad_norm": 1.7191866068502162, "learning_rate": 4.597216791291681e-06, "loss": 0.7637, "step": 8435 }, { "epoch": 0.54, "grad_norm": 1.8226040223872082, "learning_rate": 4.596183613145754e-06, "loss": 0.8018, "step": 8436 }, { "epoch": 0.54, "grad_norm": 2.374764259919539, "learning_rate": 4.595150452355006e-06, "loss": 0.749, "step": 8437 }, { "epoch": 0.54, "grad_norm": 1.8038020078493735, "learning_rate": 4.594117308963843e-06, "loss": 0.7392, "step": 8438 }, { "epoch": 0.54, "grad_norm": 1.467844430433112, "learning_rate": 4.593084183016664e-06, "loss": 0.7548, "step": 8439 }, { "epoch": 0.54, "grad_norm": 2.2531048066450152, "learning_rate": 4.592051074557873e-06, "loss": 0.7131, "step": 8440 }, { "epoch": 0.54, "grad_norm": 2.281099024621853, "learning_rate": 4.5910179836318665e-06, "loss": 0.7103, "step": 8441 }, { "epoch": 0.54, "grad_norm": 2.338844586218298, "learning_rate": 4.589984910283051e-06, "loss": 0.6455, "step": 8442 }, { "epoch": 0.54, "grad_norm": 1.716824723027951, "learning_rate": 4.58895185455582e-06, "loss": 0.8675, "step": 8443 }, { "epoch": 0.54, "grad_norm": 1.689735306656026, "learning_rate": 4.587918816494574e-06, "loss": 0.6908, "step": 8444 }, { "epoch": 0.54, "grad_norm": 1.0325239008853724, "learning_rate": 4.586885796143715e-06, "loss": 0.6126, "step": 8445 }, { "epoch": 0.54, "grad_norm": 1.3272397890908256, "learning_rate": 4.585852793547633e-06, "loss": 0.67, "step": 8446 }, { "epoch": 0.54, "grad_norm": 1.1172737200455785, "learning_rate": 4.58481980875073e-06, "loss": 0.6381, "step": 8447 }, { "epoch": 0.54, "grad_norm": 1.5872018899003981, "learning_rate": 4.583786841797398e-06, "loss": 0.8271, "step": 8448 }, { "epoch": 0.54, "grad_norm": 1.5972767346694547, "learning_rate": 4.582753892732033e-06, "loss": 0.6208, "step": 8449 }, { "epoch": 0.54, "grad_norm": 1.5236867498359745, "learning_rate": 4.581720961599028e-06, "loss": 0.5137, "step": 8450 }, { "epoch": 0.54, "grad_norm": 1.804303630634004, "learning_rate": 4.58068804844278e-06, "loss": 0.664, "step": 8451 }, { "epoch": 0.54, "grad_norm": 1.513262717964304, "learning_rate": 4.579655153307676e-06, "loss": 0.8926, "step": 8452 }, { "epoch": 0.54, "grad_norm": 1.7222622184976892, "learning_rate": 4.578622276238112e-06, "loss": 0.6968, "step": 8453 }, { "epoch": 0.54, "grad_norm": 1.6796722452013615, "learning_rate": 4.577589417278476e-06, "loss": 0.7649, "step": 8454 }, { "epoch": 0.54, "grad_norm": 1.6133209724132758, "learning_rate": 4.576556576473158e-06, "loss": 0.747, "step": 8455 }, { "epoch": 0.54, "grad_norm": 1.541074324756214, "learning_rate": 4.575523753866552e-06, "loss": 0.7768, "step": 8456 }, { "epoch": 0.54, "grad_norm": 1.8605246952106984, "learning_rate": 4.57449094950304e-06, "loss": 0.7334, "step": 8457 }, { "epoch": 0.54, "grad_norm": 1.9186018510802259, "learning_rate": 4.573458163427014e-06, "loss": 0.8308, "step": 8458 }, { "epoch": 0.54, "grad_norm": 1.8130555446384624, "learning_rate": 4.572425395682859e-06, "loss": 0.7575, "step": 8459 }, { "epoch": 0.54, "grad_norm": 1.522606264890869, "learning_rate": 4.571392646314965e-06, "loss": 0.6516, "step": 8460 }, { "epoch": 0.54, "grad_norm": 1.5887072436195184, "learning_rate": 4.570359915367711e-06, "loss": 0.8062, "step": 8461 }, { "epoch": 0.54, "grad_norm": 1.787639890229291, "learning_rate": 4.569327202885488e-06, "loss": 0.7556, "step": 8462 }, { "epoch": 0.54, "grad_norm": 2.5139801675581723, "learning_rate": 4.568294508912674e-06, "loss": 0.9034, "step": 8463 }, { "epoch": 0.54, "grad_norm": 3.499018667158755, "learning_rate": 4.567261833493655e-06, "loss": 0.728, "step": 8464 }, { "epoch": 0.54, "grad_norm": 1.6243408741555523, "learning_rate": 4.566229176672815e-06, "loss": 0.7029, "step": 8465 }, { "epoch": 0.54, "grad_norm": 1.7293308352850265, "learning_rate": 4.565196538494532e-06, "loss": 0.758, "step": 8466 }, { "epoch": 0.54, "grad_norm": 1.482679526522639, "learning_rate": 4.5641639190031894e-06, "loss": 0.7315, "step": 8467 }, { "epoch": 0.54, "grad_norm": 1.5327840106157506, "learning_rate": 4.563131318243164e-06, "loss": 0.8017, "step": 8468 }, { "epoch": 0.54, "grad_norm": 1.5016407911483765, "learning_rate": 4.56209873625884e-06, "loss": 0.636, "step": 8469 }, { "epoch": 0.54, "grad_norm": 1.5798379422164368, "learning_rate": 4.5610661730945884e-06, "loss": 0.7041, "step": 8470 }, { "epoch": 0.54, "grad_norm": 1.779380758810965, "learning_rate": 4.560033628794792e-06, "loss": 0.7608, "step": 8471 }, { "epoch": 0.54, "grad_norm": 1.8204415089293784, "learning_rate": 4.559001103403825e-06, "loss": 0.756, "step": 8472 }, { "epoch": 0.54, "grad_norm": 1.5955095998530575, "learning_rate": 4.557968596966063e-06, "loss": 0.8006, "step": 8473 }, { "epoch": 0.54, "grad_norm": 1.7095229279330524, "learning_rate": 4.5569361095258854e-06, "loss": 0.64, "step": 8474 }, { "epoch": 0.54, "grad_norm": 1.1742098303982922, "learning_rate": 4.5559036411276596e-06, "loss": 0.6584, "step": 8475 }, { "epoch": 0.54, "grad_norm": 1.6034956798543498, "learning_rate": 4.554871191815764e-06, "loss": 0.7053, "step": 8476 }, { "epoch": 0.54, "grad_norm": 1.5656206502340242, "learning_rate": 4.553838761634569e-06, "loss": 0.7399, "step": 8477 }, { "epoch": 0.54, "grad_norm": 1.6436494266729884, "learning_rate": 4.552806350628446e-06, "loss": 0.8594, "step": 8478 }, { "epoch": 0.54, "grad_norm": 1.6684555451623262, "learning_rate": 4.551773958841765e-06, "loss": 0.8442, "step": 8479 }, { "epoch": 0.54, "grad_norm": 1.0294444388969473, "learning_rate": 4.5507415863189005e-06, "loss": 0.6598, "step": 8480 }, { "epoch": 0.54, "grad_norm": 1.9260565426653673, "learning_rate": 4.549709233104216e-06, "loss": 0.7114, "step": 8481 }, { "epoch": 0.54, "grad_norm": 1.4149389781132409, "learning_rate": 4.548676899242082e-06, "loss": 0.6527, "step": 8482 }, { "epoch": 0.54, "grad_norm": 1.1042930449179744, "learning_rate": 4.54764458477687e-06, "loss": 0.5635, "step": 8483 }, { "epoch": 0.54, "grad_norm": 1.9811029970009273, "learning_rate": 4.546612289752939e-06, "loss": 0.845, "step": 8484 }, { "epoch": 0.54, "grad_norm": 1.6687546794906898, "learning_rate": 4.5455800142146626e-06, "loss": 0.7601, "step": 8485 }, { "epoch": 0.54, "grad_norm": 1.6222198376840764, "learning_rate": 4.5445477582064e-06, "loss": 0.7804, "step": 8486 }, { "epoch": 0.54, "grad_norm": 1.6352042262775894, "learning_rate": 4.54351552177252e-06, "loss": 0.6384, "step": 8487 }, { "epoch": 0.54, "grad_norm": 1.622907944671874, "learning_rate": 4.542483304957381e-06, "loss": 0.6722, "step": 8488 }, { "epoch": 0.54, "grad_norm": 1.5614853070013852, "learning_rate": 4.541451107805351e-06, "loss": 0.6925, "step": 8489 }, { "epoch": 0.54, "grad_norm": 1.617830598934953, "learning_rate": 4.540418930360786e-06, "loss": 0.7792, "step": 8490 }, { "epoch": 0.54, "grad_norm": 1.8128647032893033, "learning_rate": 4.53938677266805e-06, "loss": 0.9471, "step": 8491 }, { "epoch": 0.54, "grad_norm": 1.8414602266666218, "learning_rate": 4.5383546347715056e-06, "loss": 0.6788, "step": 8492 }, { "epoch": 0.54, "grad_norm": 1.1153028920831891, "learning_rate": 4.537322516715505e-06, "loss": 0.5757, "step": 8493 }, { "epoch": 0.54, "grad_norm": 1.9643430565692734, "learning_rate": 4.5362904185444125e-06, "loss": 0.9049, "step": 8494 }, { "epoch": 0.54, "grad_norm": 1.5132244910522858, "learning_rate": 4.535258340302583e-06, "loss": 0.6925, "step": 8495 }, { "epoch": 0.54, "grad_norm": 1.5249304763636364, "learning_rate": 4.534226282034375e-06, "loss": 0.8706, "step": 8496 }, { "epoch": 0.54, "grad_norm": 1.6610845768591476, "learning_rate": 4.53319424378414e-06, "loss": 0.7764, "step": 8497 }, { "epoch": 0.54, "grad_norm": 1.7266800558499462, "learning_rate": 4.532162225596239e-06, "loss": 0.7997, "step": 8498 }, { "epoch": 0.54, "grad_norm": 1.612423681445152, "learning_rate": 4.531130227515019e-06, "loss": 0.5472, "step": 8499 }, { "epoch": 0.54, "grad_norm": 1.869862646491967, "learning_rate": 4.530098249584838e-06, "loss": 1.0287, "step": 8500 }, { "epoch": 0.54, "grad_norm": 1.7091421387819055, "learning_rate": 4.529066291850047e-06, "loss": 0.7459, "step": 8501 }, { "epoch": 0.54, "grad_norm": 1.7504463580432457, "learning_rate": 4.528034354354996e-06, "loss": 0.8949, "step": 8502 }, { "epoch": 0.54, "grad_norm": 1.7251947547972035, "learning_rate": 4.527002437144039e-06, "loss": 0.8541, "step": 8503 }, { "epoch": 0.54, "grad_norm": 1.4286839735960406, "learning_rate": 4.525970540261521e-06, "loss": 0.6825, "step": 8504 }, { "epoch": 0.54, "grad_norm": 1.9027229063030595, "learning_rate": 4.524938663751796e-06, "loss": 0.9282, "step": 8505 }, { "epoch": 0.54, "grad_norm": 1.704329509882329, "learning_rate": 4.523906807659206e-06, "loss": 0.8277, "step": 8506 }, { "epoch": 0.54, "grad_norm": 1.7077662765853254, "learning_rate": 4.522874972028103e-06, "loss": 0.7122, "step": 8507 }, { "epoch": 0.54, "grad_norm": 1.6507026303989285, "learning_rate": 4.521843156902829e-06, "loss": 0.7808, "step": 8508 }, { "epoch": 0.54, "grad_norm": 2.035332559789901, "learning_rate": 4.520811362327732e-06, "loss": 0.6993, "step": 8509 }, { "epoch": 0.54, "grad_norm": 0.9448937397462157, "learning_rate": 4.519779588347158e-06, "loss": 0.5825, "step": 8510 }, { "epoch": 0.54, "grad_norm": 1.7566946038967723, "learning_rate": 4.518747835005445e-06, "loss": 0.787, "step": 8511 }, { "epoch": 0.54, "grad_norm": 1.261514637908114, "learning_rate": 4.5177161023469414e-06, "loss": 0.7289, "step": 8512 }, { "epoch": 0.54, "grad_norm": 1.7143194275066616, "learning_rate": 4.516684390415985e-06, "loss": 1.0525, "step": 8513 }, { "epoch": 0.54, "grad_norm": 1.434225732712974, "learning_rate": 4.515652699256918e-06, "loss": 0.6782, "step": 8514 }, { "epoch": 0.55, "grad_norm": 1.5188204318882133, "learning_rate": 4.51462102891408e-06, "loss": 0.7973, "step": 8515 }, { "epoch": 0.55, "grad_norm": 1.7463108187505365, "learning_rate": 4.513589379431812e-06, "loss": 0.7957, "step": 8516 }, { "epoch": 0.55, "grad_norm": 1.95329288808101, "learning_rate": 4.512557750854448e-06, "loss": 0.6139, "step": 8517 }, { "epoch": 0.55, "grad_norm": 1.831070730502663, "learning_rate": 4.511526143226328e-06, "loss": 0.7531, "step": 8518 }, { "epoch": 0.55, "grad_norm": 1.6941002828953207, "learning_rate": 4.51049455659179e-06, "loss": 0.7497, "step": 8519 }, { "epoch": 0.55, "grad_norm": 2.0185567337202865, "learning_rate": 4.509462990995165e-06, "loss": 0.7588, "step": 8520 }, { "epoch": 0.55, "grad_norm": 1.4358893762814047, "learning_rate": 4.508431446480792e-06, "loss": 0.5969, "step": 8521 }, { "epoch": 0.55, "grad_norm": 1.5715083650448873, "learning_rate": 4.507399923093e-06, "loss": 0.7186, "step": 8522 }, { "epoch": 0.55, "grad_norm": 1.541979436503046, "learning_rate": 4.506368420876127e-06, "loss": 0.6798, "step": 8523 }, { "epoch": 0.55, "grad_norm": 1.8279510713779303, "learning_rate": 4.5053369398745e-06, "loss": 0.7163, "step": 8524 }, { "epoch": 0.55, "grad_norm": 1.7441399521144578, "learning_rate": 4.504305480132454e-06, "loss": 0.7412, "step": 8525 }, { "epoch": 0.55, "grad_norm": 1.6112262519538867, "learning_rate": 4.5032740416943145e-06, "loss": 0.7403, "step": 8526 }, { "epoch": 0.55, "grad_norm": 1.560651477332098, "learning_rate": 4.502242624604413e-06, "loss": 0.7818, "step": 8527 }, { "epoch": 0.55, "grad_norm": 1.3487863785790957, "learning_rate": 4.501211228907078e-06, "loss": 0.6571, "step": 8528 }, { "epoch": 0.55, "grad_norm": 1.6107073462679369, "learning_rate": 4.5001798546466365e-06, "loss": 0.7906, "step": 8529 }, { "epoch": 0.55, "grad_norm": 2.0968021821422225, "learning_rate": 4.4991485018674154e-06, "loss": 0.6974, "step": 8530 }, { "epoch": 0.55, "grad_norm": 1.4183592373769358, "learning_rate": 4.498117170613738e-06, "loss": 0.6815, "step": 8531 }, { "epoch": 0.55, "grad_norm": 1.9548841451883316, "learning_rate": 4.497085860929932e-06, "loss": 0.7068, "step": 8532 }, { "epoch": 0.55, "grad_norm": 1.776559544113692, "learning_rate": 4.496054572860317e-06, "loss": 0.7453, "step": 8533 }, { "epoch": 0.55, "grad_norm": 1.6809562945178522, "learning_rate": 4.495023306449219e-06, "loss": 0.7696, "step": 8534 }, { "epoch": 0.55, "grad_norm": 2.2279575322242176, "learning_rate": 4.493992061740956e-06, "loss": 0.6747, "step": 8535 }, { "epoch": 0.55, "grad_norm": 1.6431238890256497, "learning_rate": 4.492960838779853e-06, "loss": 0.7858, "step": 8536 }, { "epoch": 0.55, "grad_norm": 1.8173367730662011, "learning_rate": 4.491929637610225e-06, "loss": 0.8568, "step": 8537 }, { "epoch": 0.55, "grad_norm": 2.443776830101685, "learning_rate": 4.490898458276395e-06, "loss": 0.6998, "step": 8538 }, { "epoch": 0.55, "grad_norm": 1.7288109133672618, "learning_rate": 4.489867300822681e-06, "loss": 0.7091, "step": 8539 }, { "epoch": 0.55, "grad_norm": 1.1497163867863878, "learning_rate": 4.488836165293397e-06, "loss": 0.691, "step": 8540 }, { "epoch": 0.55, "grad_norm": 1.5134968707335927, "learning_rate": 4.4878050517328625e-06, "loss": 0.6308, "step": 8541 }, { "epoch": 0.55, "grad_norm": 1.4826262845751719, "learning_rate": 4.486773960185388e-06, "loss": 0.6165, "step": 8542 }, { "epoch": 0.55, "grad_norm": 1.8314460665254815, "learning_rate": 4.485742890695292e-06, "loss": 0.6742, "step": 8543 }, { "epoch": 0.55, "grad_norm": 0.9902046320834, "learning_rate": 4.484711843306884e-06, "loss": 0.672, "step": 8544 }, { "epoch": 0.55, "grad_norm": 0.9029829863058688, "learning_rate": 4.483680818064481e-06, "loss": 0.5756, "step": 8545 }, { "epoch": 0.55, "grad_norm": 1.625087888901004, "learning_rate": 4.482649815012389e-06, "loss": 0.7769, "step": 8546 }, { "epoch": 0.55, "grad_norm": 1.5518776112701054, "learning_rate": 4.481618834194921e-06, "loss": 0.6462, "step": 8547 }, { "epoch": 0.55, "grad_norm": 1.979789075231454, "learning_rate": 4.480587875656388e-06, "loss": 0.7884, "step": 8548 }, { "epoch": 0.55, "grad_norm": 1.7526921685209742, "learning_rate": 4.479556939441095e-06, "loss": 0.7079, "step": 8549 }, { "epoch": 0.55, "grad_norm": 1.9738776592246723, "learning_rate": 4.478526025593352e-06, "loss": 0.7786, "step": 8550 }, { "epoch": 0.55, "grad_norm": 0.931881910792116, "learning_rate": 4.477495134157464e-06, "loss": 0.6169, "step": 8551 }, { "epoch": 0.55, "grad_norm": 1.8068188493692898, "learning_rate": 4.4764642651777385e-06, "loss": 0.7879, "step": 8552 }, { "epoch": 0.55, "grad_norm": 1.2544796421324194, "learning_rate": 4.475433418698477e-06, "loss": 0.6956, "step": 8553 }, { "epoch": 0.55, "grad_norm": 1.6658848428405817, "learning_rate": 4.474402594763987e-06, "loss": 0.7321, "step": 8554 }, { "epoch": 0.55, "grad_norm": 1.760643868125485, "learning_rate": 4.473371793418567e-06, "loss": 0.6759, "step": 8555 }, { "epoch": 0.55, "grad_norm": 1.4929155655845077, "learning_rate": 4.4723410147065215e-06, "loss": 0.7255, "step": 8556 }, { "epoch": 0.55, "grad_norm": 1.0704944045919753, "learning_rate": 4.471310258672151e-06, "loss": 0.6613, "step": 8557 }, { "epoch": 0.55, "grad_norm": 1.8181487482145808, "learning_rate": 4.470279525359753e-06, "loss": 0.7233, "step": 8558 }, { "epoch": 0.55, "grad_norm": 1.6505787178596325, "learning_rate": 4.469248814813631e-06, "loss": 0.7318, "step": 8559 }, { "epoch": 0.55, "grad_norm": 1.570128223115276, "learning_rate": 4.468218127078076e-06, "loss": 0.8271, "step": 8560 }, { "epoch": 0.55, "grad_norm": 1.7825259549227488, "learning_rate": 4.467187462197392e-06, "loss": 0.8184, "step": 8561 }, { "epoch": 0.55, "grad_norm": 2.9478544414870993, "learning_rate": 4.466156820215868e-06, "loss": 0.8197, "step": 8562 }, { "epoch": 0.55, "grad_norm": 1.4771346880253216, "learning_rate": 4.465126201177804e-06, "loss": 0.8192, "step": 8563 }, { "epoch": 0.55, "grad_norm": 1.6186131309810667, "learning_rate": 4.464095605127491e-06, "loss": 0.6956, "step": 8564 }, { "epoch": 0.55, "grad_norm": 1.5608506853931552, "learning_rate": 4.463065032109224e-06, "loss": 0.6802, "step": 8565 }, { "epoch": 0.55, "grad_norm": 2.0958003839272012, "learning_rate": 4.462034482167293e-06, "loss": 0.7629, "step": 8566 }, { "epoch": 0.55, "grad_norm": 1.8607540389665658, "learning_rate": 4.461003955345989e-06, "loss": 0.6657, "step": 8567 }, { "epoch": 0.55, "grad_norm": 2.8669618111165964, "learning_rate": 4.459973451689604e-06, "loss": 0.697, "step": 8568 }, { "epoch": 0.55, "grad_norm": 1.343652057395169, "learning_rate": 4.458942971242423e-06, "loss": 0.6513, "step": 8569 }, { "epoch": 0.55, "grad_norm": 1.9174254751017088, "learning_rate": 4.457912514048739e-06, "loss": 0.7818, "step": 8570 }, { "epoch": 0.55, "grad_norm": 2.143211067441938, "learning_rate": 4.456882080152834e-06, "loss": 0.8802, "step": 8571 }, { "epoch": 0.55, "grad_norm": 1.8293247486452815, "learning_rate": 4.455851669598997e-06, "loss": 0.6615, "step": 8572 }, { "epoch": 0.55, "grad_norm": 1.8491895659560251, "learning_rate": 4.454821282431511e-06, "loss": 0.7838, "step": 8573 }, { "epoch": 0.55, "grad_norm": 1.7751427681015401, "learning_rate": 4.45379091869466e-06, "loss": 0.8778, "step": 8574 }, { "epoch": 0.55, "grad_norm": 1.7237065964150782, "learning_rate": 4.4527605784327295e-06, "loss": 0.6772, "step": 8575 }, { "epoch": 0.55, "grad_norm": 1.6444384164402406, "learning_rate": 4.451730261689998e-06, "loss": 0.6671, "step": 8576 }, { "epoch": 0.55, "grad_norm": 1.468081050570097, "learning_rate": 4.450699968510749e-06, "loss": 0.5709, "step": 8577 }, { "epoch": 0.55, "grad_norm": 1.959158791238692, "learning_rate": 4.44966969893926e-06, "loss": 0.7904, "step": 8578 }, { "epoch": 0.55, "grad_norm": 1.6006413634677399, "learning_rate": 4.448639453019812e-06, "loss": 0.7416, "step": 8579 }, { "epoch": 0.55, "grad_norm": 1.9878040055841677, "learning_rate": 4.4476092307966805e-06, "loss": 0.7728, "step": 8580 }, { "epoch": 0.55, "grad_norm": 1.5394564111622444, "learning_rate": 4.446579032314145e-06, "loss": 0.7621, "step": 8581 }, { "epoch": 0.55, "grad_norm": 1.7412330377076324, "learning_rate": 4.445548857616478e-06, "loss": 0.7956, "step": 8582 }, { "epoch": 0.55, "grad_norm": 1.0239063432843483, "learning_rate": 4.444518706747956e-06, "loss": 0.6154, "step": 8583 }, { "epoch": 0.55, "grad_norm": 1.5432347147957404, "learning_rate": 4.443488579752855e-06, "loss": 0.7517, "step": 8584 }, { "epoch": 0.55, "grad_norm": 1.7341033783170403, "learning_rate": 4.442458476675443e-06, "loss": 0.7897, "step": 8585 }, { "epoch": 0.55, "grad_norm": 2.099814265532343, "learning_rate": 4.441428397559996e-06, "loss": 0.7321, "step": 8586 }, { "epoch": 0.55, "grad_norm": 1.6047248369587421, "learning_rate": 4.440398342450782e-06, "loss": 0.7335, "step": 8587 }, { "epoch": 0.55, "grad_norm": 1.9887302359272443, "learning_rate": 4.439368311392071e-06, "loss": 0.8145, "step": 8588 }, { "epoch": 0.55, "grad_norm": 1.7889930416277828, "learning_rate": 4.438338304428132e-06, "loss": 0.6191, "step": 8589 }, { "epoch": 0.55, "grad_norm": 1.8008356843021598, "learning_rate": 4.437308321603234e-06, "loss": 0.8147, "step": 8590 }, { "epoch": 0.55, "grad_norm": 1.046732144294012, "learning_rate": 4.43627836296164e-06, "loss": 0.6042, "step": 8591 }, { "epoch": 0.55, "grad_norm": 1.831858982026848, "learning_rate": 4.435248428547618e-06, "loss": 0.7588, "step": 8592 }, { "epoch": 0.55, "grad_norm": 2.02427437133765, "learning_rate": 4.434218518405432e-06, "loss": 0.7078, "step": 8593 }, { "epoch": 0.55, "grad_norm": 1.0956021511080691, "learning_rate": 4.433188632579344e-06, "loss": 0.6414, "step": 8594 }, { "epoch": 0.55, "grad_norm": 1.1106038973893182, "learning_rate": 4.43215877111362e-06, "loss": 0.6741, "step": 8595 }, { "epoch": 0.55, "grad_norm": 1.555619429269576, "learning_rate": 4.431128934052517e-06, "loss": 0.8176, "step": 8596 }, { "epoch": 0.55, "grad_norm": 1.6853791967078597, "learning_rate": 4.4300991214402986e-06, "loss": 0.6239, "step": 8597 }, { "epoch": 0.55, "grad_norm": 2.438202033619927, "learning_rate": 4.429069333321221e-06, "loss": 0.6337, "step": 8598 }, { "epoch": 0.55, "grad_norm": 1.5946891311832285, "learning_rate": 4.428039569739544e-06, "loss": 0.797, "step": 8599 }, { "epoch": 0.55, "grad_norm": 3.1726823046921635, "learning_rate": 4.427009830739524e-06, "loss": 0.6144, "step": 8600 }, { "epoch": 0.55, "grad_norm": 1.5346127219013073, "learning_rate": 4.425980116365419e-06, "loss": 0.5706, "step": 8601 }, { "epoch": 0.55, "grad_norm": 1.7880519802485904, "learning_rate": 4.4249504266614814e-06, "loss": 0.6726, "step": 8602 }, { "epoch": 0.55, "grad_norm": 1.9716514052831629, "learning_rate": 4.423920761671965e-06, "loss": 0.8038, "step": 8603 }, { "epoch": 0.55, "grad_norm": 1.76450048675049, "learning_rate": 4.422891121441126e-06, "loss": 0.7839, "step": 8604 }, { "epoch": 0.55, "grad_norm": 1.950206996947014, "learning_rate": 4.421861506013213e-06, "loss": 0.7702, "step": 8605 }, { "epoch": 0.55, "grad_norm": 1.7162778902655662, "learning_rate": 4.420831915432477e-06, "loss": 0.6919, "step": 8606 }, { "epoch": 0.55, "grad_norm": 1.6125970654272843, "learning_rate": 4.419802349743169e-06, "loss": 0.6906, "step": 8607 }, { "epoch": 0.55, "grad_norm": 1.5875426537344655, "learning_rate": 4.418772808989537e-06, "loss": 0.7105, "step": 8608 }, { "epoch": 0.55, "grad_norm": 1.5379390310271837, "learning_rate": 4.417743293215827e-06, "loss": 0.6928, "step": 8609 }, { "epoch": 0.55, "grad_norm": 1.7396886478426545, "learning_rate": 4.416713802466289e-06, "loss": 0.7242, "step": 8610 }, { "epoch": 0.55, "grad_norm": 1.4295959225589208, "learning_rate": 4.415684336785165e-06, "loss": 0.8274, "step": 8611 }, { "epoch": 0.55, "grad_norm": 1.9836643720939076, "learning_rate": 4.414654896216699e-06, "loss": 0.726, "step": 8612 }, { "epoch": 0.55, "grad_norm": 1.5438310044684156, "learning_rate": 4.413625480805138e-06, "loss": 0.7074, "step": 8613 }, { "epoch": 0.55, "grad_norm": 1.9253919265087391, "learning_rate": 4.4125960905947195e-06, "loss": 0.6904, "step": 8614 }, { "epoch": 0.55, "grad_norm": 1.7828934447665092, "learning_rate": 4.411566725629688e-06, "loss": 0.6868, "step": 8615 }, { "epoch": 0.55, "grad_norm": 1.826111755031206, "learning_rate": 4.410537385954282e-06, "loss": 0.7461, "step": 8616 }, { "epoch": 0.55, "grad_norm": 2.844034967761191, "learning_rate": 4.409508071612741e-06, "loss": 0.6218, "step": 8617 }, { "epoch": 0.55, "grad_norm": 1.5816341058224965, "learning_rate": 4.408478782649301e-06, "loss": 0.6751, "step": 8618 }, { "epoch": 0.55, "grad_norm": 1.2056782963536092, "learning_rate": 4.407449519108203e-06, "loss": 0.6277, "step": 8619 }, { "epoch": 0.55, "grad_norm": 1.6036961498195215, "learning_rate": 4.4064202810336755e-06, "loss": 0.7496, "step": 8620 }, { "epoch": 0.55, "grad_norm": 1.757753004413614, "learning_rate": 4.405391068469958e-06, "loss": 0.7924, "step": 8621 }, { "epoch": 0.55, "grad_norm": 1.6832384167859695, "learning_rate": 4.404361881461285e-06, "loss": 0.6725, "step": 8622 }, { "epoch": 0.55, "grad_norm": 1.6876853345003158, "learning_rate": 4.403332720051884e-06, "loss": 0.8385, "step": 8623 }, { "epoch": 0.55, "grad_norm": 1.60683637450965, "learning_rate": 4.402303584285991e-06, "loss": 0.7137, "step": 8624 }, { "epoch": 0.55, "grad_norm": 1.4948515557650714, "learning_rate": 4.401274474207833e-06, "loss": 0.7257, "step": 8625 }, { "epoch": 0.55, "grad_norm": 1.7843558588124913, "learning_rate": 4.400245389861642e-06, "loss": 0.7, "step": 8626 }, { "epoch": 0.55, "grad_norm": 1.711914799954194, "learning_rate": 4.3992163312916404e-06, "loss": 0.745, "step": 8627 }, { "epoch": 0.55, "grad_norm": 1.4538953651534445, "learning_rate": 4.398187298542061e-06, "loss": 0.7373, "step": 8628 }, { "epoch": 0.55, "grad_norm": 3.6642641824922126, "learning_rate": 4.397158291657125e-06, "loss": 0.6758, "step": 8629 }, { "epoch": 0.55, "grad_norm": 1.5283712119676496, "learning_rate": 4.39612931068106e-06, "loss": 0.7748, "step": 8630 }, { "epoch": 0.55, "grad_norm": 1.6205707295108986, "learning_rate": 4.39510035565809e-06, "loss": 0.6834, "step": 8631 }, { "epoch": 0.55, "grad_norm": 1.619530696314027, "learning_rate": 4.394071426632433e-06, "loss": 0.8267, "step": 8632 }, { "epoch": 0.55, "grad_norm": 1.1589813517507936, "learning_rate": 4.393042523648315e-06, "loss": 0.7021, "step": 8633 }, { "epoch": 0.55, "grad_norm": 2.0134915693108972, "learning_rate": 4.392013646749952e-06, "loss": 0.6543, "step": 8634 }, { "epoch": 0.55, "grad_norm": 1.7171351933507841, "learning_rate": 4.390984795981566e-06, "loss": 0.744, "step": 8635 }, { "epoch": 0.55, "grad_norm": 1.7525549850167785, "learning_rate": 4.389955971387373e-06, "loss": 0.699, "step": 8636 }, { "epoch": 0.55, "grad_norm": 2.147883796232233, "learning_rate": 4.388927173011592e-06, "loss": 0.707, "step": 8637 }, { "epoch": 0.55, "grad_norm": 4.053018601115359, "learning_rate": 4.387898400898436e-06, "loss": 0.8174, "step": 8638 }, { "epoch": 0.55, "grad_norm": 1.9045240112190684, "learning_rate": 4.38686965509212e-06, "loss": 0.8024, "step": 8639 }, { "epoch": 0.55, "grad_norm": 1.2281164309903063, "learning_rate": 4.385840935636859e-06, "loss": 0.7026, "step": 8640 }, { "epoch": 0.55, "grad_norm": 3.7078375279749793, "learning_rate": 4.384812242576863e-06, "loss": 0.674, "step": 8641 }, { "epoch": 0.55, "grad_norm": 1.8276089383466962, "learning_rate": 4.383783575956345e-06, "loss": 0.7189, "step": 8642 }, { "epoch": 0.55, "grad_norm": 1.699843630826308, "learning_rate": 4.382754935819514e-06, "loss": 0.8142, "step": 8643 }, { "epoch": 0.55, "grad_norm": 1.0232565525475086, "learning_rate": 4.381726322210579e-06, "loss": 0.6741, "step": 8644 }, { "epoch": 0.55, "grad_norm": 1.700903074439366, "learning_rate": 4.380697735173745e-06, "loss": 0.7351, "step": 8645 }, { "epoch": 0.55, "grad_norm": 1.1287198196399921, "learning_rate": 4.379669174753226e-06, "loss": 0.7894, "step": 8646 }, { "epoch": 0.55, "grad_norm": 1.6356392130928536, "learning_rate": 4.378640640993218e-06, "loss": 0.8795, "step": 8647 }, { "epoch": 0.55, "grad_norm": 1.4983189916163686, "learning_rate": 4.37761213393793e-06, "loss": 0.7481, "step": 8648 }, { "epoch": 0.55, "grad_norm": 1.838377732435305, "learning_rate": 4.376583653631567e-06, "loss": 0.6509, "step": 8649 }, { "epoch": 0.55, "grad_norm": 1.9849764767096834, "learning_rate": 4.3755552001183265e-06, "loss": 0.8455, "step": 8650 }, { "epoch": 0.55, "grad_norm": 1.6136549386100245, "learning_rate": 4.374526773442413e-06, "loss": 0.8501, "step": 8651 }, { "epoch": 0.55, "grad_norm": 1.628314522479557, "learning_rate": 4.373498373648022e-06, "loss": 0.6175, "step": 8652 }, { "epoch": 0.55, "grad_norm": 1.7278961648706541, "learning_rate": 4.372470000779357e-06, "loss": 0.6146, "step": 8653 }, { "epoch": 0.55, "grad_norm": 1.0530946509953307, "learning_rate": 4.371441654880612e-06, "loss": 0.5978, "step": 8654 }, { "epoch": 0.55, "grad_norm": 1.5879584313106685, "learning_rate": 4.370413335995985e-06, "loss": 0.617, "step": 8655 }, { "epoch": 0.55, "grad_norm": 1.6834974804561762, "learning_rate": 4.369385044169667e-06, "loss": 0.6839, "step": 8656 }, { "epoch": 0.55, "grad_norm": 1.630183219671313, "learning_rate": 4.368356779445856e-06, "loss": 0.6255, "step": 8657 }, { "epoch": 0.55, "grad_norm": 1.5815608770110938, "learning_rate": 4.367328541868744e-06, "loss": 0.8206, "step": 8658 }, { "epoch": 0.55, "grad_norm": 2.147054020647294, "learning_rate": 4.36630033148252e-06, "loss": 0.8238, "step": 8659 }, { "epoch": 0.55, "grad_norm": 1.8659580003263867, "learning_rate": 4.36527214833138e-06, "loss": 0.7287, "step": 8660 }, { "epoch": 0.55, "grad_norm": 1.791601382819844, "learning_rate": 4.364243992459506e-06, "loss": 0.7369, "step": 8661 }, { "epoch": 0.55, "grad_norm": 1.9046525131523249, "learning_rate": 4.363215863911091e-06, "loss": 0.8542, "step": 8662 }, { "epoch": 0.55, "grad_norm": 1.6113187683375227, "learning_rate": 4.362187762730319e-06, "loss": 0.6206, "step": 8663 }, { "epoch": 0.55, "grad_norm": 1.769072123925916, "learning_rate": 4.3611596889613775e-06, "loss": 0.7824, "step": 8664 }, { "epoch": 0.55, "grad_norm": 1.6166075772119983, "learning_rate": 4.360131642648449e-06, "loss": 0.772, "step": 8665 }, { "epoch": 0.55, "grad_norm": 1.7457741417081112, "learning_rate": 4.359103623835718e-06, "loss": 0.8891, "step": 8666 }, { "epoch": 0.55, "grad_norm": 1.7097341356703728, "learning_rate": 4.358075632567368e-06, "loss": 0.7955, "step": 8667 }, { "epoch": 0.55, "grad_norm": 1.5014956544438545, "learning_rate": 4.357047668887577e-06, "loss": 0.709, "step": 8668 }, { "epoch": 0.55, "grad_norm": 1.0440207525140495, "learning_rate": 4.356019732840528e-06, "loss": 0.5858, "step": 8669 }, { "epoch": 0.55, "grad_norm": 2.6369217463411765, "learning_rate": 4.354991824470396e-06, "loss": 0.8227, "step": 8670 }, { "epoch": 0.55, "grad_norm": 1.6386743206361525, "learning_rate": 4.3539639438213606e-06, "loss": 0.6919, "step": 8671 }, { "epoch": 0.56, "grad_norm": 1.843707618891529, "learning_rate": 4.3529360909375966e-06, "loss": 0.6427, "step": 8672 }, { "epoch": 0.56, "grad_norm": 1.544291318057311, "learning_rate": 4.351908265863282e-06, "loss": 0.7281, "step": 8673 }, { "epoch": 0.56, "grad_norm": 2.0139466601175537, "learning_rate": 4.350880468642586e-06, "loss": 0.6576, "step": 8674 }, { "epoch": 0.56, "grad_norm": 1.620419748577929, "learning_rate": 4.349852699319686e-06, "loss": 0.7793, "step": 8675 }, { "epoch": 0.56, "grad_norm": 1.759398470260094, "learning_rate": 4.3488249579387475e-06, "loss": 0.803, "step": 8676 }, { "epoch": 0.56, "grad_norm": 2.037710045627056, "learning_rate": 4.347797244543945e-06, "loss": 0.6662, "step": 8677 }, { "epoch": 0.56, "grad_norm": 1.7690067066169386, "learning_rate": 4.346769559179447e-06, "loss": 0.68, "step": 8678 }, { "epoch": 0.56, "grad_norm": 5.3527761176761235, "learning_rate": 4.34574190188942e-06, "loss": 0.7882, "step": 8679 }, { "epoch": 0.56, "grad_norm": 1.5706231952546865, "learning_rate": 4.344714272718033e-06, "loss": 0.6375, "step": 8680 }, { "epoch": 0.56, "grad_norm": 1.6692640285930194, "learning_rate": 4.343686671709449e-06, "loss": 0.8452, "step": 8681 }, { "epoch": 0.56, "grad_norm": 1.794656597787977, "learning_rate": 4.342659098907833e-06, "loss": 0.6827, "step": 8682 }, { "epoch": 0.56, "grad_norm": 1.6845295520442334, "learning_rate": 4.341631554357347e-06, "loss": 0.8467, "step": 8683 }, { "epoch": 0.56, "grad_norm": 1.7304900243679768, "learning_rate": 4.340604038102154e-06, "loss": 0.7056, "step": 8684 }, { "epoch": 0.56, "grad_norm": 1.6568515474513568, "learning_rate": 4.339576550186413e-06, "loss": 0.6321, "step": 8685 }, { "epoch": 0.56, "grad_norm": 1.0443442842391741, "learning_rate": 4.338549090654284e-06, "loss": 0.706, "step": 8686 }, { "epoch": 0.56, "grad_norm": 1.9823799393350725, "learning_rate": 4.3375216595499254e-06, "loss": 0.8076, "step": 8687 }, { "epoch": 0.56, "grad_norm": 1.4595527796506935, "learning_rate": 4.336494256917494e-06, "loss": 0.6778, "step": 8688 }, { "epoch": 0.56, "grad_norm": 1.8517665299664932, "learning_rate": 4.335466882801146e-06, "loss": 0.8146, "step": 8689 }, { "epoch": 0.56, "grad_norm": 1.8287909886937015, "learning_rate": 4.334439537245033e-06, "loss": 0.725, "step": 8690 }, { "epoch": 0.56, "grad_norm": 1.4527440038953003, "learning_rate": 4.333412220293313e-06, "loss": 0.6153, "step": 8691 }, { "epoch": 0.56, "grad_norm": 1.6278459444448656, "learning_rate": 4.332384931990133e-06, "loss": 0.8328, "step": 8692 }, { "epoch": 0.56, "grad_norm": 1.7657458414794627, "learning_rate": 4.3313576723796464e-06, "loss": 0.7693, "step": 8693 }, { "epoch": 0.56, "grad_norm": 1.5333927686609634, "learning_rate": 4.330330441506001e-06, "loss": 0.7054, "step": 8694 }, { "epoch": 0.56, "grad_norm": 1.6367539520684824, "learning_rate": 4.329303239413346e-06, "loss": 0.6792, "step": 8695 }, { "epoch": 0.56, "grad_norm": 1.784217660339352, "learning_rate": 4.328276066145831e-06, "loss": 0.7631, "step": 8696 }, { "epoch": 0.56, "grad_norm": 7.310980621822048, "learning_rate": 4.327248921747597e-06, "loss": 0.7573, "step": 8697 }, { "epoch": 0.56, "grad_norm": 1.7364201178649146, "learning_rate": 4.326221806262793e-06, "loss": 0.7031, "step": 8698 }, { "epoch": 0.56, "grad_norm": 1.5817829079134502, "learning_rate": 4.325194719735557e-06, "loss": 0.6104, "step": 8699 }, { "epoch": 0.56, "grad_norm": 1.7396429202204555, "learning_rate": 4.324167662210035e-06, "loss": 0.7591, "step": 8700 }, { "epoch": 0.56, "grad_norm": 1.1139621629791938, "learning_rate": 4.3231406337303665e-06, "loss": 0.4938, "step": 8701 }, { "epoch": 0.56, "grad_norm": 1.6555206958719424, "learning_rate": 4.322113634340693e-06, "loss": 0.7994, "step": 8702 }, { "epoch": 0.56, "grad_norm": 1.0700721172872913, "learning_rate": 4.321086664085149e-06, "loss": 0.773, "step": 8703 }, { "epoch": 0.56, "grad_norm": 1.0818329625940806, "learning_rate": 4.320059723007874e-06, "loss": 0.6211, "step": 8704 }, { "epoch": 0.56, "grad_norm": 1.5819761862182182, "learning_rate": 4.319032811153005e-06, "loss": 0.6935, "step": 8705 }, { "epoch": 0.56, "grad_norm": 1.8607820565088984, "learning_rate": 4.318005928564672e-06, "loss": 0.7848, "step": 8706 }, { "epoch": 0.56, "grad_norm": 1.5867125471601407, "learning_rate": 4.316979075287014e-06, "loss": 0.7948, "step": 8707 }, { "epoch": 0.56, "grad_norm": 1.3417826974134206, "learning_rate": 4.315952251364158e-06, "loss": 0.6931, "step": 8708 }, { "epoch": 0.56, "grad_norm": 1.4737619190985607, "learning_rate": 4.314925456840239e-06, "loss": 0.6692, "step": 8709 }, { "epoch": 0.56, "grad_norm": 1.7199027037680714, "learning_rate": 4.313898691759382e-06, "loss": 0.8121, "step": 8710 }, { "epoch": 0.56, "grad_norm": 2.0128022672533303, "learning_rate": 4.3128719561657205e-06, "loss": 0.7392, "step": 8711 }, { "epoch": 0.56, "grad_norm": 1.5886060908433512, "learning_rate": 4.311845250103376e-06, "loss": 0.6323, "step": 8712 }, { "epoch": 0.56, "grad_norm": 1.4938180182024683, "learning_rate": 4.310818573616476e-06, "loss": 0.6649, "step": 8713 }, { "epoch": 0.56, "grad_norm": 1.8930072279943106, "learning_rate": 4.309791926749147e-06, "loss": 0.8041, "step": 8714 }, { "epoch": 0.56, "grad_norm": 1.1749233680928832, "learning_rate": 4.30876530954551e-06, "loss": 0.6816, "step": 8715 }, { "epoch": 0.56, "grad_norm": 1.63235476363546, "learning_rate": 4.3077387220496886e-06, "loss": 0.8369, "step": 8716 }, { "epoch": 0.56, "grad_norm": 1.5553737422074387, "learning_rate": 4.3067121643058e-06, "loss": 0.7557, "step": 8717 }, { "epoch": 0.56, "grad_norm": 1.0288214283005694, "learning_rate": 4.30568563635797e-06, "loss": 0.7136, "step": 8718 }, { "epoch": 0.56, "grad_norm": 1.7303357463067879, "learning_rate": 4.304659138250309e-06, "loss": 0.7909, "step": 8719 }, { "epoch": 0.56, "grad_norm": 1.9148608021559272, "learning_rate": 4.303632670026937e-06, "loss": 0.6839, "step": 8720 }, { "epoch": 0.56, "grad_norm": 1.7733219600122, "learning_rate": 4.302606231731971e-06, "loss": 0.7545, "step": 8721 }, { "epoch": 0.56, "grad_norm": 1.6391726951167696, "learning_rate": 4.301579823409523e-06, "loss": 0.8555, "step": 8722 }, { "epoch": 0.56, "grad_norm": 1.782492715277076, "learning_rate": 4.300553445103707e-06, "loss": 0.7285, "step": 8723 }, { "epoch": 0.56, "grad_norm": 1.6690606136730861, "learning_rate": 4.299527096858633e-06, "loss": 0.8133, "step": 8724 }, { "epoch": 0.56, "grad_norm": 2.383179248838448, "learning_rate": 4.298500778718415e-06, "loss": 0.7482, "step": 8725 }, { "epoch": 0.56, "grad_norm": 2.1963010911578755, "learning_rate": 4.297474490727157e-06, "loss": 0.7251, "step": 8726 }, { "epoch": 0.56, "grad_norm": 1.7386042801568973, "learning_rate": 4.296448232928971e-06, "loss": 0.7709, "step": 8727 }, { "epoch": 0.56, "grad_norm": 1.6749880105326065, "learning_rate": 4.295422005367961e-06, "loss": 0.6774, "step": 8728 }, { "epoch": 0.56, "grad_norm": 1.122390717153694, "learning_rate": 4.294395808088232e-06, "loss": 0.7263, "step": 8729 }, { "epoch": 0.56, "grad_norm": 1.682476287789308, "learning_rate": 4.2933696411338885e-06, "loss": 0.6691, "step": 8730 }, { "epoch": 0.56, "grad_norm": 1.8824259855102174, "learning_rate": 4.292343504549032e-06, "loss": 0.7948, "step": 8731 }, { "epoch": 0.56, "grad_norm": 1.7368838206690658, "learning_rate": 4.291317398377768e-06, "loss": 0.6963, "step": 8732 }, { "epoch": 0.56, "grad_norm": 1.7011001804655566, "learning_rate": 4.29029132266419e-06, "loss": 0.795, "step": 8733 }, { "epoch": 0.56, "grad_norm": 1.6994229784811228, "learning_rate": 4.289265277452403e-06, "loss": 0.6347, "step": 8734 }, { "epoch": 0.56, "grad_norm": 1.6893280778338184, "learning_rate": 4.288239262786497e-06, "loss": 0.6888, "step": 8735 }, { "epoch": 0.56, "grad_norm": 1.8448097177181513, "learning_rate": 4.287213278710574e-06, "loss": 0.7375, "step": 8736 }, { "epoch": 0.56, "grad_norm": 2.603365472918483, "learning_rate": 4.286187325268726e-06, "loss": 0.8269, "step": 8737 }, { "epoch": 0.56, "grad_norm": 1.528833549850697, "learning_rate": 4.285161402505047e-06, "loss": 0.835, "step": 8738 }, { "epoch": 0.56, "grad_norm": 1.8135209484698716, "learning_rate": 4.284135510463628e-06, "loss": 0.795, "step": 8739 }, { "epoch": 0.56, "grad_norm": 1.5188112729991097, "learning_rate": 4.283109649188561e-06, "loss": 0.7135, "step": 8740 }, { "epoch": 0.56, "grad_norm": 1.1449306663820953, "learning_rate": 4.282083818723937e-06, "loss": 0.6444, "step": 8741 }, { "epoch": 0.56, "grad_norm": 2.4552072713856594, "learning_rate": 4.2810580191138385e-06, "loss": 0.6743, "step": 8742 }, { "epoch": 0.56, "grad_norm": 1.8437700669132502, "learning_rate": 4.280032250402358e-06, "loss": 0.7363, "step": 8743 }, { "epoch": 0.56, "grad_norm": 1.8342838352636617, "learning_rate": 4.279006512633576e-06, "loss": 0.63, "step": 8744 }, { "epoch": 0.56, "grad_norm": 2.545961428115466, "learning_rate": 4.2779808058515825e-06, "loss": 0.8401, "step": 8745 }, { "epoch": 0.56, "grad_norm": 1.6208847249284564, "learning_rate": 4.276955130100455e-06, "loss": 0.708, "step": 8746 }, { "epoch": 0.56, "grad_norm": 1.6475420763164283, "learning_rate": 4.275929485424278e-06, "loss": 0.7805, "step": 8747 }, { "epoch": 0.56, "grad_norm": 1.6458526352288387, "learning_rate": 4.274903871867128e-06, "loss": 0.7664, "step": 8748 }, { "epoch": 0.56, "grad_norm": 1.614701026802736, "learning_rate": 4.2738782894730876e-06, "loss": 0.6602, "step": 8749 }, { "epoch": 0.56, "grad_norm": 1.784117833265564, "learning_rate": 4.272852738286231e-06, "loss": 0.7303, "step": 8750 }, { "epoch": 0.56, "grad_norm": 1.5356578510911718, "learning_rate": 4.271827218350636e-06, "loss": 0.6671, "step": 8751 }, { "epoch": 0.56, "grad_norm": 1.7702766504413106, "learning_rate": 4.270801729710379e-06, "loss": 0.8123, "step": 8752 }, { "epoch": 0.56, "grad_norm": 1.83264179043616, "learning_rate": 4.269776272409529e-06, "loss": 0.8109, "step": 8753 }, { "epoch": 0.56, "grad_norm": 1.9300859580153498, "learning_rate": 4.268750846492163e-06, "loss": 0.7628, "step": 8754 }, { "epoch": 0.56, "grad_norm": 2.3744757600302075, "learning_rate": 4.2677254520023465e-06, "loss": 0.6621, "step": 8755 }, { "epoch": 0.56, "grad_norm": 2.915372365967836, "learning_rate": 4.266700088984153e-06, "loss": 1.0703, "step": 8756 }, { "epoch": 0.56, "grad_norm": 1.1019515313266919, "learning_rate": 4.265674757481647e-06, "loss": 0.6555, "step": 8757 }, { "epoch": 0.56, "grad_norm": 1.5351474474485924, "learning_rate": 4.2646494575389e-06, "loss": 0.7778, "step": 8758 }, { "epoch": 0.56, "grad_norm": 1.4622700170121088, "learning_rate": 4.263624189199971e-06, "loss": 0.5953, "step": 8759 }, { "epoch": 0.56, "grad_norm": 1.6330340880660474, "learning_rate": 4.262598952508927e-06, "loss": 0.7359, "step": 8760 }, { "epoch": 0.56, "grad_norm": 2.3738536444128773, "learning_rate": 4.261573747509833e-06, "loss": 0.7452, "step": 8761 }, { "epoch": 0.56, "grad_norm": 1.5402756402711955, "learning_rate": 4.260548574246746e-06, "loss": 0.7874, "step": 8762 }, { "epoch": 0.56, "grad_norm": 1.5354832887981882, "learning_rate": 4.259523432763728e-06, "loss": 0.6337, "step": 8763 }, { "epoch": 0.56, "grad_norm": 1.4078083494312725, "learning_rate": 4.258498323104837e-06, "loss": 0.7051, "step": 8764 }, { "epoch": 0.56, "grad_norm": 2.44076857305043, "learning_rate": 4.25747324531413e-06, "loss": 0.9131, "step": 8765 }, { "epoch": 0.56, "grad_norm": 1.8380530299473126, "learning_rate": 4.256448199435662e-06, "loss": 0.6791, "step": 8766 }, { "epoch": 0.56, "grad_norm": 2.174121162048791, "learning_rate": 4.25542318551349e-06, "loss": 0.7599, "step": 8767 }, { "epoch": 0.56, "grad_norm": 1.8309528429801876, "learning_rate": 4.2543982035916625e-06, "loss": 0.7427, "step": 8768 }, { "epoch": 0.56, "grad_norm": 1.749309175523821, "learning_rate": 4.2533732537142335e-06, "loss": 0.839, "step": 8769 }, { "epoch": 0.56, "grad_norm": 1.0522660205681758, "learning_rate": 4.252348335925255e-06, "loss": 0.557, "step": 8770 }, { "epoch": 0.56, "grad_norm": 1.4985573178934384, "learning_rate": 4.2513234502687725e-06, "loss": 0.6157, "step": 8771 }, { "epoch": 0.56, "grad_norm": 1.8979220475597613, "learning_rate": 4.250298596788835e-06, "loss": 0.7763, "step": 8772 }, { "epoch": 0.56, "grad_norm": 1.867186422359606, "learning_rate": 4.249273775529489e-06, "loss": 0.8158, "step": 8773 }, { "epoch": 0.56, "grad_norm": 1.660794986039141, "learning_rate": 4.24824898653478e-06, "loss": 0.7492, "step": 8774 }, { "epoch": 0.56, "grad_norm": 1.6967593827340353, "learning_rate": 4.247224229848747e-06, "loss": 0.6917, "step": 8775 }, { "epoch": 0.56, "grad_norm": 1.5266371136077663, "learning_rate": 4.246199505515438e-06, "loss": 0.6566, "step": 8776 }, { "epoch": 0.56, "grad_norm": 1.7252862410820833, "learning_rate": 4.245174813578887e-06, "loss": 0.5825, "step": 8777 }, { "epoch": 0.56, "grad_norm": 1.5939961883437586, "learning_rate": 4.244150154083137e-06, "loss": 0.7639, "step": 8778 }, { "epoch": 0.56, "grad_norm": 1.6660995867110395, "learning_rate": 4.243125527072227e-06, "loss": 0.7175, "step": 8779 }, { "epoch": 0.56, "grad_norm": 1.515449840110544, "learning_rate": 4.24210093259019e-06, "loss": 0.672, "step": 8780 }, { "epoch": 0.56, "grad_norm": 1.649801040473455, "learning_rate": 4.241076370681064e-06, "loss": 0.7683, "step": 8781 }, { "epoch": 0.56, "grad_norm": 1.4382978393181813, "learning_rate": 4.240051841388878e-06, "loss": 0.8026, "step": 8782 }, { "epoch": 0.56, "grad_norm": 1.8297765085844893, "learning_rate": 4.239027344757671e-06, "loss": 0.8362, "step": 8783 }, { "epoch": 0.56, "grad_norm": 1.7734803413146505, "learning_rate": 4.238002880831466e-06, "loss": 0.6522, "step": 8784 }, { "epoch": 0.56, "grad_norm": 1.6639475740016103, "learning_rate": 4.2369784496542986e-06, "loss": 0.6874, "step": 8785 }, { "epoch": 0.56, "grad_norm": 1.9852579888381112, "learning_rate": 4.235954051270192e-06, "loss": 0.7829, "step": 8786 }, { "epoch": 0.56, "grad_norm": 1.8094243787193935, "learning_rate": 4.234929685723175e-06, "loss": 0.7136, "step": 8787 }, { "epoch": 0.56, "grad_norm": 1.7763319386741825, "learning_rate": 4.2339053530572735e-06, "loss": 0.8482, "step": 8788 }, { "epoch": 0.56, "grad_norm": 0.9905214807604376, "learning_rate": 4.2328810533165095e-06, "loss": 0.6649, "step": 8789 }, { "epoch": 0.56, "grad_norm": 1.6670736476506542, "learning_rate": 4.231856786544907e-06, "loss": 0.7634, "step": 8790 }, { "epoch": 0.56, "grad_norm": 1.7250820350937324, "learning_rate": 4.230832552786485e-06, "loss": 0.7427, "step": 8791 }, { "epoch": 0.56, "grad_norm": 1.7313670791236244, "learning_rate": 4.2298083520852636e-06, "loss": 0.7403, "step": 8792 }, { "epoch": 0.56, "grad_norm": 1.657055736715687, "learning_rate": 4.2287841844852595e-06, "loss": 0.7338, "step": 8793 }, { "epoch": 0.56, "grad_norm": 2.2136390134758743, "learning_rate": 4.227760050030494e-06, "loss": 0.735, "step": 8794 }, { "epoch": 0.56, "grad_norm": 1.7217614427431338, "learning_rate": 4.226735948764976e-06, "loss": 0.7274, "step": 8795 }, { "epoch": 0.56, "grad_norm": 1.2520465776143694, "learning_rate": 4.2257118807327216e-06, "loss": 0.6608, "step": 8796 }, { "epoch": 0.56, "grad_norm": 1.8676495478339037, "learning_rate": 4.2246878459777465e-06, "loss": 0.7099, "step": 8797 }, { "epoch": 0.56, "grad_norm": 1.563697443519513, "learning_rate": 4.223663844544056e-06, "loss": 0.7487, "step": 8798 }, { "epoch": 0.56, "grad_norm": 1.751954449382749, "learning_rate": 4.222639876475663e-06, "loss": 0.7853, "step": 8799 }, { "epoch": 0.56, "grad_norm": 1.8777359655460375, "learning_rate": 4.221615941816575e-06, "loss": 0.779, "step": 8800 }, { "epoch": 0.56, "grad_norm": 1.4458221069939936, "learning_rate": 4.220592040610798e-06, "loss": 0.6445, "step": 8801 }, { "epoch": 0.56, "grad_norm": 1.1729440734322332, "learning_rate": 4.219568172902336e-06, "loss": 0.6992, "step": 8802 }, { "epoch": 0.56, "grad_norm": 1.246506704701639, "learning_rate": 4.218544338735197e-06, "loss": 0.6669, "step": 8803 }, { "epoch": 0.56, "grad_norm": 1.4782310663292544, "learning_rate": 4.217520538153378e-06, "loss": 0.6545, "step": 8804 }, { "epoch": 0.56, "grad_norm": 1.5406241191055794, "learning_rate": 4.216496771200881e-06, "loss": 0.7376, "step": 8805 }, { "epoch": 0.56, "grad_norm": 1.7892396869313556, "learning_rate": 4.21547303792171e-06, "loss": 0.7242, "step": 8806 }, { "epoch": 0.56, "grad_norm": 1.636633353996578, "learning_rate": 4.214449338359856e-06, "loss": 0.7043, "step": 8807 }, { "epoch": 0.56, "grad_norm": 1.6302552209992252, "learning_rate": 4.2134256725593206e-06, "loss": 0.755, "step": 8808 }, { "epoch": 0.56, "grad_norm": 1.6900960857853862, "learning_rate": 4.2124020405640955e-06, "loss": 0.7356, "step": 8809 }, { "epoch": 0.56, "grad_norm": 1.8271345079780623, "learning_rate": 4.211378442418178e-06, "loss": 0.7407, "step": 8810 }, { "epoch": 0.56, "grad_norm": 1.4290791945246961, "learning_rate": 4.2103548781655555e-06, "loss": 0.6944, "step": 8811 }, { "epoch": 0.56, "grad_norm": 2.1407103704989794, "learning_rate": 4.209331347850224e-06, "loss": 0.7666, "step": 8812 }, { "epoch": 0.56, "grad_norm": 1.5356374819299325, "learning_rate": 4.2083078515161664e-06, "loss": 0.7015, "step": 8813 }, { "epoch": 0.56, "grad_norm": 1.2991689823344184, "learning_rate": 4.207284389207375e-06, "loss": 0.5975, "step": 8814 }, { "epoch": 0.56, "grad_norm": 1.6555168088742596, "learning_rate": 4.206260960967836e-06, "loss": 0.6504, "step": 8815 }, { "epoch": 0.56, "grad_norm": 1.8404409373673967, "learning_rate": 4.205237566841531e-06, "loss": 0.8166, "step": 8816 }, { "epoch": 0.56, "grad_norm": 1.649134281655058, "learning_rate": 4.204214206872448e-06, "loss": 0.7965, "step": 8817 }, { "epoch": 0.56, "grad_norm": 1.5733888768020485, "learning_rate": 4.203190881104564e-06, "loss": 0.6746, "step": 8818 }, { "epoch": 0.56, "grad_norm": 1.7434404954024794, "learning_rate": 4.202167589581863e-06, "loss": 0.7363, "step": 8819 }, { "epoch": 0.56, "grad_norm": 1.760427478154674, "learning_rate": 4.201144332348321e-06, "loss": 0.6983, "step": 8820 }, { "epoch": 0.56, "grad_norm": 2.1531451728091433, "learning_rate": 4.200121109447919e-06, "loss": 0.8394, "step": 8821 }, { "epoch": 0.56, "grad_norm": 1.8243854462440947, "learning_rate": 4.199097920924628e-06, "loss": 0.773, "step": 8822 }, { "epoch": 0.56, "grad_norm": 1.5561972174277083, "learning_rate": 4.198074766822429e-06, "loss": 0.6973, "step": 8823 }, { "epoch": 0.56, "grad_norm": 1.6012544338153876, "learning_rate": 4.197051647185288e-06, "loss": 0.741, "step": 8824 }, { "epoch": 0.56, "grad_norm": 0.9140802101146281, "learning_rate": 4.196028562057181e-06, "loss": 0.6801, "step": 8825 }, { "epoch": 0.56, "grad_norm": 1.6187860131867005, "learning_rate": 4.1950055114820785e-06, "loss": 0.7139, "step": 8826 }, { "epoch": 0.56, "grad_norm": 1.8487369448611362, "learning_rate": 4.193982495503946e-06, "loss": 0.7343, "step": 8827 }, { "epoch": 0.57, "grad_norm": 1.692869466882005, "learning_rate": 4.1929595141667535e-06, "loss": 0.8411, "step": 8828 }, { "epoch": 0.57, "grad_norm": 1.6881528821514011, "learning_rate": 4.191936567514464e-06, "loss": 0.7789, "step": 8829 }, { "epoch": 0.57, "grad_norm": 1.1034892720269223, "learning_rate": 4.190913655591044e-06, "loss": 0.5935, "step": 8830 }, { "epoch": 0.57, "grad_norm": 1.5089834510956444, "learning_rate": 4.189890778440454e-06, "loss": 0.713, "step": 8831 }, { "epoch": 0.57, "grad_norm": 1.870989810538701, "learning_rate": 4.188867936106658e-06, "loss": 0.762, "step": 8832 }, { "epoch": 0.57, "grad_norm": 1.5045630799914067, "learning_rate": 4.187845128633611e-06, "loss": 0.6162, "step": 8833 }, { "epoch": 0.57, "grad_norm": 1.5263324567947614, "learning_rate": 4.1868223560652746e-06, "loss": 0.7461, "step": 8834 }, { "epoch": 0.57, "grad_norm": 1.6245637667211594, "learning_rate": 4.185799618445605e-06, "loss": 0.7531, "step": 8835 }, { "epoch": 0.57, "grad_norm": 1.7623497046802443, "learning_rate": 4.184776915818557e-06, "loss": 0.8036, "step": 8836 }, { "epoch": 0.57, "grad_norm": 1.6719374000068632, "learning_rate": 4.1837542482280845e-06, "loss": 0.8183, "step": 8837 }, { "epoch": 0.57, "grad_norm": 1.065681043956496, "learning_rate": 4.182731615718138e-06, "loss": 0.5949, "step": 8838 }, { "epoch": 0.57, "grad_norm": 1.614629452710703, "learning_rate": 4.181709018332672e-06, "loss": 0.8045, "step": 8839 }, { "epoch": 0.57, "grad_norm": 1.677043647506664, "learning_rate": 4.18068645611563e-06, "loss": 0.6422, "step": 8840 }, { "epoch": 0.57, "grad_norm": 1.8444235803488072, "learning_rate": 4.179663929110964e-06, "loss": 0.7356, "step": 8841 }, { "epoch": 0.57, "grad_norm": 1.873669162998417, "learning_rate": 4.178641437362618e-06, "loss": 0.8059, "step": 8842 }, { "epoch": 0.57, "grad_norm": 1.6690959188498469, "learning_rate": 4.177618980914536e-06, "loss": 0.7407, "step": 8843 }, { "epoch": 0.57, "grad_norm": 1.0441647750509684, "learning_rate": 4.176596559810664e-06, "loss": 0.7826, "step": 8844 }, { "epoch": 0.57, "grad_norm": 1.9294075846796348, "learning_rate": 4.1755741740949405e-06, "loss": 0.7718, "step": 8845 }, { "epoch": 0.57, "grad_norm": 1.5584018273349654, "learning_rate": 4.174551823811308e-06, "loss": 0.6521, "step": 8846 }, { "epoch": 0.57, "grad_norm": 2.1406163276199566, "learning_rate": 4.173529509003702e-06, "loss": 0.6863, "step": 8847 }, { "epoch": 0.57, "grad_norm": 1.4986554895123754, "learning_rate": 4.172507229716063e-06, "loss": 0.785, "step": 8848 }, { "epoch": 0.57, "grad_norm": 1.8300871365635496, "learning_rate": 4.171484985992323e-06, "loss": 0.7392, "step": 8849 }, { "epoch": 0.57, "grad_norm": 1.7018510534196583, "learning_rate": 4.1704627778764175e-06, "loss": 0.8036, "step": 8850 }, { "epoch": 0.57, "grad_norm": 1.6318417664764042, "learning_rate": 4.169440605412278e-06, "loss": 0.6551, "step": 8851 }, { "epoch": 0.57, "grad_norm": 1.6815210381183303, "learning_rate": 4.168418468643836e-06, "loss": 0.7076, "step": 8852 }, { "epoch": 0.57, "grad_norm": 1.5803788360601567, "learning_rate": 4.167396367615023e-06, "loss": 0.7391, "step": 8853 }, { "epoch": 0.57, "grad_norm": 1.36343654260437, "learning_rate": 4.166374302369763e-06, "loss": 0.6091, "step": 8854 }, { "epoch": 0.57, "grad_norm": 1.6132984826220627, "learning_rate": 4.165352272951985e-06, "loss": 0.7193, "step": 8855 }, { "epoch": 0.57, "grad_norm": 1.7116687070717171, "learning_rate": 4.1643302794056105e-06, "loss": 0.6635, "step": 8856 }, { "epoch": 0.57, "grad_norm": 1.5295791102897436, "learning_rate": 4.163308321774566e-06, "loss": 0.7224, "step": 8857 }, { "epoch": 0.57, "grad_norm": 1.7786852391865333, "learning_rate": 4.162286400102771e-06, "loss": 0.6664, "step": 8858 }, { "epoch": 0.57, "grad_norm": 1.752401776333696, "learning_rate": 4.161264514434148e-06, "loss": 0.7566, "step": 8859 }, { "epoch": 0.57, "grad_norm": 1.6069722011912944, "learning_rate": 4.160242664812611e-06, "loss": 0.757, "step": 8860 }, { "epoch": 0.57, "grad_norm": 2.083671846594741, "learning_rate": 4.15922085128208e-06, "loss": 0.7362, "step": 8861 }, { "epoch": 0.57, "grad_norm": 1.531861831292611, "learning_rate": 4.158199073886473e-06, "loss": 0.6516, "step": 8862 }, { "epoch": 0.57, "grad_norm": 1.6800000197695693, "learning_rate": 4.157177332669698e-06, "loss": 0.791, "step": 8863 }, { "epoch": 0.57, "grad_norm": 1.7476886621365662, "learning_rate": 4.1561556276756725e-06, "loss": 0.7235, "step": 8864 }, { "epoch": 0.57, "grad_norm": 1.6543560515955562, "learning_rate": 4.155133958948302e-06, "loss": 0.649, "step": 8865 }, { "epoch": 0.57, "grad_norm": 1.450512789583996, "learning_rate": 4.154112326531502e-06, "loss": 0.7322, "step": 8866 }, { "epoch": 0.57, "grad_norm": 1.4961916153918775, "learning_rate": 4.153090730469174e-06, "loss": 0.7303, "step": 8867 }, { "epoch": 0.57, "grad_norm": 1.1570644417103713, "learning_rate": 4.1520691708052295e-06, "loss": 0.7484, "step": 8868 }, { "epoch": 0.57, "grad_norm": 1.5613408015859824, "learning_rate": 4.151047647583568e-06, "loss": 0.6811, "step": 8869 }, { "epoch": 0.57, "grad_norm": 1.5843590551480575, "learning_rate": 4.150026160848094e-06, "loss": 0.6853, "step": 8870 }, { "epoch": 0.57, "grad_norm": 1.0601381067558175, "learning_rate": 4.149004710642712e-06, "loss": 0.558, "step": 8871 }, { "epoch": 0.57, "grad_norm": 1.6532625385439859, "learning_rate": 4.147983297011318e-06, "loss": 0.8033, "step": 8872 }, { "epoch": 0.57, "grad_norm": 1.2572224079918142, "learning_rate": 4.146961919997813e-06, "loss": 0.589, "step": 8873 }, { "epoch": 0.57, "grad_norm": 2.6132566912706667, "learning_rate": 4.14594057964609e-06, "loss": 0.7478, "step": 8874 }, { "epoch": 0.57, "grad_norm": 1.5244685144095538, "learning_rate": 4.144919276000048e-06, "loss": 0.7426, "step": 8875 }, { "epoch": 0.57, "grad_norm": 1.296650355706745, "learning_rate": 4.143898009103578e-06, "loss": 0.6678, "step": 8876 }, { "epoch": 0.57, "grad_norm": 1.4860990556155949, "learning_rate": 4.142876779000573e-06, "loss": 0.6913, "step": 8877 }, { "epoch": 0.57, "grad_norm": 1.59818539700649, "learning_rate": 4.141855585734923e-06, "loss": 0.7236, "step": 8878 }, { "epoch": 0.57, "grad_norm": 1.5693497363696889, "learning_rate": 4.1408344293505154e-06, "loss": 0.7458, "step": 8879 }, { "epoch": 0.57, "grad_norm": 3.8635677754921396, "learning_rate": 4.13981330989124e-06, "loss": 0.6729, "step": 8880 }, { "epoch": 0.57, "grad_norm": 1.4496637188574115, "learning_rate": 4.13879222740098e-06, "loss": 0.6911, "step": 8881 }, { "epoch": 0.57, "grad_norm": 1.1879122800717576, "learning_rate": 4.1377711819236225e-06, "loss": 0.6585, "step": 8882 }, { "epoch": 0.57, "grad_norm": 1.7306850588113702, "learning_rate": 4.136750173503046e-06, "loss": 0.8075, "step": 8883 }, { "epoch": 0.57, "grad_norm": 1.6345225172567959, "learning_rate": 4.135729202183134e-06, "loss": 0.6384, "step": 8884 }, { "epoch": 0.57, "grad_norm": 1.595879356493694, "learning_rate": 4.134708268007764e-06, "loss": 0.8075, "step": 8885 }, { "epoch": 0.57, "grad_norm": 1.9240896996706884, "learning_rate": 4.133687371020815e-06, "loss": 0.7563, "step": 8886 }, { "epoch": 0.57, "grad_norm": 1.5824171371306326, "learning_rate": 4.132666511266162e-06, "loss": 0.6135, "step": 8887 }, { "epoch": 0.57, "grad_norm": 1.6962805378381929, "learning_rate": 4.131645688787679e-06, "loss": 0.7139, "step": 8888 }, { "epoch": 0.57, "grad_norm": 1.0399282210980147, "learning_rate": 4.130624903629242e-06, "loss": 0.6071, "step": 8889 }, { "epoch": 0.57, "grad_norm": 2.0198850371122794, "learning_rate": 4.129604155834718e-06, "loss": 0.7103, "step": 8890 }, { "epoch": 0.57, "grad_norm": 1.9302360739967848, "learning_rate": 4.12858344544798e-06, "loss": 0.8424, "step": 8891 }, { "epoch": 0.57, "grad_norm": 1.5690759944346064, "learning_rate": 4.127562772512893e-06, "loss": 0.7277, "step": 8892 }, { "epoch": 0.57, "grad_norm": 1.6334198849950274, "learning_rate": 4.126542137073325e-06, "loss": 0.7471, "step": 8893 }, { "epoch": 0.57, "grad_norm": 1.4465147965851333, "learning_rate": 4.12552153917314e-06, "loss": 0.7329, "step": 8894 }, { "epoch": 0.57, "grad_norm": 1.471217403368843, "learning_rate": 4.124500978856204e-06, "loss": 0.7511, "step": 8895 }, { "epoch": 0.57, "grad_norm": 1.795550757223962, "learning_rate": 4.123480456166374e-06, "loss": 0.836, "step": 8896 }, { "epoch": 0.57, "grad_norm": 1.8589111195647356, "learning_rate": 4.122459971147514e-06, "loss": 0.7148, "step": 8897 }, { "epoch": 0.57, "grad_norm": 1.8532722590602764, "learning_rate": 4.121439523843478e-06, "loss": 0.7584, "step": 8898 }, { "epoch": 0.57, "grad_norm": 1.5263163435262992, "learning_rate": 4.120419114298127e-06, "loss": 0.7604, "step": 8899 }, { "epoch": 0.57, "grad_norm": 1.4349550118208083, "learning_rate": 4.119398742555314e-06, "loss": 0.8186, "step": 8900 }, { "epoch": 0.57, "grad_norm": 1.8094349211491692, "learning_rate": 4.118378408658891e-06, "loss": 0.6911, "step": 8901 }, { "epoch": 0.57, "grad_norm": 1.7542480448894837, "learning_rate": 4.117358112652714e-06, "loss": 0.7792, "step": 8902 }, { "epoch": 0.57, "grad_norm": 1.8136932201083826, "learning_rate": 4.1163378545806286e-06, "loss": 0.8644, "step": 8903 }, { "epoch": 0.57, "grad_norm": 1.6803653191946992, "learning_rate": 4.115317634486488e-06, "loss": 0.6746, "step": 8904 }, { "epoch": 0.57, "grad_norm": 1.6056798926426352, "learning_rate": 4.114297452414133e-06, "loss": 0.6938, "step": 8905 }, { "epoch": 0.57, "grad_norm": 1.650872543982009, "learning_rate": 4.113277308407415e-06, "loss": 0.7607, "step": 8906 }, { "epoch": 0.57, "grad_norm": 1.2499355394816958, "learning_rate": 4.112257202510173e-06, "loss": 0.5963, "step": 8907 }, { "epoch": 0.57, "grad_norm": 1.8116943092820192, "learning_rate": 4.111237134766251e-06, "loss": 0.6832, "step": 8908 }, { "epoch": 0.57, "grad_norm": 1.706198808348455, "learning_rate": 4.110217105219492e-06, "loss": 0.8759, "step": 8909 }, { "epoch": 0.57, "grad_norm": 1.4598508152957377, "learning_rate": 4.10919711391373e-06, "loss": 0.5871, "step": 8910 }, { "epoch": 0.57, "grad_norm": 1.9213134838611712, "learning_rate": 4.108177160892807e-06, "loss": 0.7758, "step": 8911 }, { "epoch": 0.57, "grad_norm": 1.6050437544716356, "learning_rate": 4.107157246200552e-06, "loss": 0.79, "step": 8912 }, { "epoch": 0.57, "grad_norm": 1.72585385210603, "learning_rate": 4.106137369880804e-06, "loss": 0.6252, "step": 8913 }, { "epoch": 0.57, "grad_norm": 1.5185524087520306, "learning_rate": 4.105117531977393e-06, "loss": 0.7149, "step": 8914 }, { "epoch": 0.57, "grad_norm": 1.4029153747504894, "learning_rate": 4.104097732534153e-06, "loss": 0.7588, "step": 8915 }, { "epoch": 0.57, "grad_norm": 1.9026070134151511, "learning_rate": 4.103077971594906e-06, "loss": 0.6263, "step": 8916 }, { "epoch": 0.57, "grad_norm": 1.0835809524233782, "learning_rate": 4.102058249203483e-06, "loss": 0.7145, "step": 8917 }, { "epoch": 0.57, "grad_norm": 1.6264277114419246, "learning_rate": 4.101038565403713e-06, "loss": 0.8734, "step": 8918 }, { "epoch": 0.57, "grad_norm": 2.0509942879783947, "learning_rate": 4.1000189202394144e-06, "loss": 0.7323, "step": 8919 }, { "epoch": 0.57, "grad_norm": 1.7695878573063304, "learning_rate": 4.098999313754413e-06, "loss": 0.8837, "step": 8920 }, { "epoch": 0.57, "grad_norm": 1.8463179027791314, "learning_rate": 4.097979745992526e-06, "loss": 0.7498, "step": 8921 }, { "epoch": 0.57, "grad_norm": 1.5461906850012188, "learning_rate": 4.096960216997575e-06, "loss": 0.6807, "step": 8922 }, { "epoch": 0.57, "grad_norm": 1.825556339224541, "learning_rate": 4.095940726813375e-06, "loss": 0.6967, "step": 8923 }, { "epoch": 0.57, "grad_norm": 1.705956340399083, "learning_rate": 4.094921275483745e-06, "loss": 0.8261, "step": 8924 }, { "epoch": 0.57, "grad_norm": 1.6532268251902558, "learning_rate": 4.093901863052495e-06, "loss": 0.6986, "step": 8925 }, { "epoch": 0.57, "grad_norm": 1.9119837347051882, "learning_rate": 4.092882489563439e-06, "loss": 0.744, "step": 8926 }, { "epoch": 0.57, "grad_norm": 1.75192768130218, "learning_rate": 4.091863155060389e-06, "loss": 0.6826, "step": 8927 }, { "epoch": 0.57, "grad_norm": 1.4887564470660422, "learning_rate": 4.090843859587151e-06, "loss": 0.6962, "step": 8928 }, { "epoch": 0.57, "grad_norm": 2.1789071931014092, "learning_rate": 4.0898246031875346e-06, "loss": 0.7671, "step": 8929 }, { "epoch": 0.57, "grad_norm": 1.8496777961268454, "learning_rate": 4.088805385905342e-06, "loss": 0.7482, "step": 8930 }, { "epoch": 0.57, "grad_norm": 1.6706876657193555, "learning_rate": 4.087786207784383e-06, "loss": 0.6728, "step": 8931 }, { "epoch": 0.57, "grad_norm": 1.9443050298125943, "learning_rate": 4.086767068868453e-06, "loss": 1.0645, "step": 8932 }, { "epoch": 0.57, "grad_norm": 1.6938188628308213, "learning_rate": 4.085747969201357e-06, "loss": 0.7958, "step": 8933 }, { "epoch": 0.57, "grad_norm": 2.6161961824994933, "learning_rate": 4.084728908826891e-06, "loss": 0.7432, "step": 8934 }, { "epoch": 0.57, "grad_norm": 1.7810655828246764, "learning_rate": 4.083709887788852e-06, "loss": 0.6386, "step": 8935 }, { "epoch": 0.57, "grad_norm": 1.6819831576365885, "learning_rate": 4.082690906131039e-06, "loss": 0.7972, "step": 8936 }, { "epoch": 0.57, "grad_norm": 1.6998008911845273, "learning_rate": 4.081671963897241e-06, "loss": 0.7138, "step": 8937 }, { "epoch": 0.57, "grad_norm": 1.701571353344424, "learning_rate": 4.080653061131256e-06, "loss": 0.783, "step": 8938 }, { "epoch": 0.57, "grad_norm": 2.0476874292198017, "learning_rate": 4.079634197876867e-06, "loss": 0.7149, "step": 8939 }, { "epoch": 0.57, "grad_norm": 1.4738481089302926, "learning_rate": 4.07861537417787e-06, "loss": 0.7488, "step": 8940 }, { "epoch": 0.57, "grad_norm": 1.6681883638481194, "learning_rate": 4.077596590078044e-06, "loss": 0.7729, "step": 8941 }, { "epoch": 0.57, "grad_norm": 2.372489525439508, "learning_rate": 4.076577845621181e-06, "loss": 0.8507, "step": 8942 }, { "epoch": 0.57, "grad_norm": 2.0466394861852013, "learning_rate": 4.075559140851061e-06, "loss": 0.6551, "step": 8943 }, { "epoch": 0.57, "grad_norm": 1.552324752458273, "learning_rate": 4.0745404758114644e-06, "loss": 0.7716, "step": 8944 }, { "epoch": 0.57, "grad_norm": 1.8186483623973475, "learning_rate": 4.073521850546177e-06, "loss": 0.6476, "step": 8945 }, { "epoch": 0.57, "grad_norm": 1.6609379830393411, "learning_rate": 4.072503265098972e-06, "loss": 0.7638, "step": 8946 }, { "epoch": 0.57, "grad_norm": 1.0291762278523537, "learning_rate": 4.07148471951363e-06, "loss": 0.6421, "step": 8947 }, { "epoch": 0.57, "grad_norm": 3.3554187035379464, "learning_rate": 4.0704662138339204e-06, "loss": 0.8632, "step": 8948 }, { "epoch": 0.57, "grad_norm": 1.512540437738859, "learning_rate": 4.069447748103621e-06, "loss": 0.607, "step": 8949 }, { "epoch": 0.57, "grad_norm": 1.5385385986308147, "learning_rate": 4.068429322366502e-06, "loss": 0.6854, "step": 8950 }, { "epoch": 0.57, "grad_norm": 1.108468832969952, "learning_rate": 4.067410936666335e-06, "loss": 0.589, "step": 8951 }, { "epoch": 0.57, "grad_norm": 1.699284718211427, "learning_rate": 4.066392591046883e-06, "loss": 0.6736, "step": 8952 }, { "epoch": 0.57, "grad_norm": 1.7762334959489847, "learning_rate": 4.065374285551917e-06, "loss": 0.6811, "step": 8953 }, { "epoch": 0.57, "grad_norm": 1.6866668517670902, "learning_rate": 4.064356020225202e-06, "loss": 0.6925, "step": 8954 }, { "epoch": 0.57, "grad_norm": 1.7285015148387624, "learning_rate": 4.063337795110497e-06, "loss": 0.7781, "step": 8955 }, { "epoch": 0.57, "grad_norm": 1.7029496263134314, "learning_rate": 4.062319610251566e-06, "loss": 0.7032, "step": 8956 }, { "epoch": 0.57, "grad_norm": 1.6818018702776254, "learning_rate": 4.0613014656921675e-06, "loss": 0.8168, "step": 8957 }, { "epoch": 0.57, "grad_norm": 2.055403015666352, "learning_rate": 4.0602833614760605e-06, "loss": 0.6671, "step": 8958 }, { "epoch": 0.57, "grad_norm": 1.041214051245651, "learning_rate": 4.059265297646999e-06, "loss": 0.8002, "step": 8959 }, { "epoch": 0.57, "grad_norm": 1.4042882691817828, "learning_rate": 4.05824727424874e-06, "loss": 0.6707, "step": 8960 }, { "epoch": 0.57, "grad_norm": 1.7373088102640548, "learning_rate": 4.057229291325032e-06, "loss": 0.7866, "step": 8961 }, { "epoch": 0.57, "grad_norm": 1.6420374427960072, "learning_rate": 4.056211348919629e-06, "loss": 0.6868, "step": 8962 }, { "epoch": 0.57, "grad_norm": 1.5972273443353249, "learning_rate": 4.05519344707628e-06, "loss": 0.6891, "step": 8963 }, { "epoch": 0.57, "grad_norm": 1.8322685702906378, "learning_rate": 4.054175585838729e-06, "loss": 0.7963, "step": 8964 }, { "epoch": 0.57, "grad_norm": 1.658465281681418, "learning_rate": 4.0531577652507256e-06, "loss": 0.6979, "step": 8965 }, { "epoch": 0.57, "grad_norm": 1.5983090150115404, "learning_rate": 4.0521399853560116e-06, "loss": 0.8039, "step": 8966 }, { "epoch": 0.57, "grad_norm": 1.1944051525003863, "learning_rate": 4.05112224619833e-06, "loss": 0.72, "step": 8967 }, { "epoch": 0.57, "grad_norm": 1.1881690291180214, "learning_rate": 4.050104547821419e-06, "loss": 0.6391, "step": 8968 }, { "epoch": 0.57, "grad_norm": 1.110112468862252, "learning_rate": 4.049086890269021e-06, "loss": 0.595, "step": 8969 }, { "epoch": 0.57, "grad_norm": 1.7597195022252206, "learning_rate": 4.048069273584867e-06, "loss": 0.7802, "step": 8970 }, { "epoch": 0.57, "grad_norm": 1.5366159291138435, "learning_rate": 4.047051697812697e-06, "loss": 0.8671, "step": 8971 }, { "epoch": 0.57, "grad_norm": 1.9815217554136906, "learning_rate": 4.046034162996242e-06, "loss": 0.9072, "step": 8972 }, { "epoch": 0.57, "grad_norm": 1.6405621090375326, "learning_rate": 4.0450166691792335e-06, "loss": 0.7762, "step": 8973 }, { "epoch": 0.57, "grad_norm": 1.9755162019093142, "learning_rate": 4.043999216405405e-06, "loss": 0.6924, "step": 8974 }, { "epoch": 0.57, "grad_norm": 1.962381921519548, "learning_rate": 4.042981804718478e-06, "loss": 0.8023, "step": 8975 }, { "epoch": 0.57, "grad_norm": 1.83061419194865, "learning_rate": 4.041964434162184e-06, "loss": 0.7614, "step": 8976 }, { "epoch": 0.57, "grad_norm": 1.874802116461584, "learning_rate": 4.040947104780244e-06, "loss": 0.8108, "step": 8977 }, { "epoch": 0.57, "grad_norm": 1.7219506520254364, "learning_rate": 4.039929816616383e-06, "loss": 0.805, "step": 8978 }, { "epoch": 0.57, "grad_norm": 1.8316859654968354, "learning_rate": 4.03891256971432e-06, "loss": 0.6363, "step": 8979 }, { "epoch": 0.57, "grad_norm": 2.06186799704325, "learning_rate": 4.037895364117778e-06, "loss": 0.7081, "step": 8980 }, { "epoch": 0.57, "grad_norm": 1.5831028893568317, "learning_rate": 4.036878199870469e-06, "loss": 0.695, "step": 8981 }, { "epoch": 0.57, "grad_norm": 1.9807269296017864, "learning_rate": 4.035861077016111e-06, "loss": 0.7799, "step": 8982 }, { "epoch": 0.57, "grad_norm": 1.2844978153403515, "learning_rate": 4.034843995598421e-06, "loss": 0.8105, "step": 8983 }, { "epoch": 0.58, "grad_norm": 1.1748322199974184, "learning_rate": 4.033826955661106e-06, "loss": 0.6638, "step": 8984 }, { "epoch": 0.58, "grad_norm": 1.2327996948378728, "learning_rate": 4.032809957247878e-06, "loss": 0.632, "step": 8985 }, { "epoch": 0.58, "grad_norm": 2.0469236967281335, "learning_rate": 4.031793000402445e-06, "loss": 0.7503, "step": 8986 }, { "epoch": 0.58, "grad_norm": 1.5584501082276245, "learning_rate": 4.030776085168516e-06, "loss": 0.7852, "step": 8987 }, { "epoch": 0.58, "grad_norm": 1.7641636739037843, "learning_rate": 4.029759211589794e-06, "loss": 0.6584, "step": 8988 }, { "epoch": 0.58, "grad_norm": 1.166428507825299, "learning_rate": 4.028742379709982e-06, "loss": 0.7162, "step": 8989 }, { "epoch": 0.58, "grad_norm": 1.0738977179411038, "learning_rate": 4.0277255895727814e-06, "loss": 0.6557, "step": 8990 }, { "epoch": 0.58, "grad_norm": 1.1994318688059686, "learning_rate": 4.0267088412218906e-06, "loss": 0.6019, "step": 8991 }, { "epoch": 0.58, "grad_norm": 2.332506284654472, "learning_rate": 4.025692134701011e-06, "loss": 0.6793, "step": 8992 }, { "epoch": 0.58, "grad_norm": 1.5523829607626405, "learning_rate": 4.024675470053836e-06, "loss": 0.8342, "step": 8993 }, { "epoch": 0.58, "grad_norm": 1.5315405497842047, "learning_rate": 4.023658847324058e-06, "loss": 0.8051, "step": 8994 }, { "epoch": 0.58, "grad_norm": 1.5766042668887772, "learning_rate": 4.0226422665553724e-06, "loss": 0.6993, "step": 8995 }, { "epoch": 0.58, "grad_norm": 1.5787028082633194, "learning_rate": 4.02162572779147e-06, "loss": 0.7316, "step": 8996 }, { "epoch": 0.58, "grad_norm": 1.6328726476266193, "learning_rate": 4.020609231076035e-06, "loss": 0.6458, "step": 8997 }, { "epoch": 0.58, "grad_norm": 1.6001901232945666, "learning_rate": 4.019592776452759e-06, "loss": 0.7178, "step": 8998 }, { "epoch": 0.58, "grad_norm": 1.8207460419939763, "learning_rate": 4.018576363965324e-06, "loss": 0.6894, "step": 8999 }, { "epoch": 0.58, "grad_norm": 1.9862409955153753, "learning_rate": 4.017559993657416e-06, "loss": 0.7188, "step": 9000 }, { "epoch": 0.58, "grad_norm": 2.2171556145571705, "learning_rate": 4.0165436655727144e-06, "loss": 0.6354, "step": 9001 }, { "epoch": 0.58, "grad_norm": 1.6566058140031539, "learning_rate": 4.015527379754899e-06, "loss": 0.8184, "step": 9002 }, { "epoch": 0.58, "grad_norm": 1.737434056774397, "learning_rate": 4.01451113624765e-06, "loss": 0.7184, "step": 9003 }, { "epoch": 0.58, "grad_norm": 1.9262292794943965, "learning_rate": 4.01349493509464e-06, "loss": 0.7083, "step": 9004 }, { "epoch": 0.58, "grad_norm": 1.4873016740358387, "learning_rate": 4.012478776339547e-06, "loss": 0.662, "step": 9005 }, { "epoch": 0.58, "grad_norm": 1.6357666861919118, "learning_rate": 4.011462660026038e-06, "loss": 0.7485, "step": 9006 }, { "epoch": 0.58, "grad_norm": 3.213142695212908, "learning_rate": 4.010446586197788e-06, "loss": 0.8338, "step": 9007 }, { "epoch": 0.58, "grad_norm": 1.4965062874221577, "learning_rate": 4.009430554898464e-06, "loss": 0.8064, "step": 9008 }, { "epoch": 0.58, "grad_norm": 1.8275591202023675, "learning_rate": 4.008414566171733e-06, "loss": 0.7612, "step": 9009 }, { "epoch": 0.58, "grad_norm": 2.029863183530542, "learning_rate": 4.007398620061262e-06, "loss": 0.7684, "step": 9010 }, { "epoch": 0.58, "grad_norm": 1.747395460908884, "learning_rate": 4.006382716610711e-06, "loss": 0.8207, "step": 9011 }, { "epoch": 0.58, "grad_norm": 1.6011748054175885, "learning_rate": 4.0053668558637444e-06, "loss": 0.8355, "step": 9012 }, { "epoch": 0.58, "grad_norm": 1.848100964102045, "learning_rate": 4.00435103786402e-06, "loss": 0.6863, "step": 9013 }, { "epoch": 0.58, "grad_norm": 1.7202208615821681, "learning_rate": 4.003335262655195e-06, "loss": 0.7025, "step": 9014 }, { "epoch": 0.58, "grad_norm": 2.052606872214128, "learning_rate": 4.002319530280925e-06, "loss": 0.8353, "step": 9015 }, { "epoch": 0.58, "grad_norm": 2.21721047918199, "learning_rate": 4.001303840784868e-06, "loss": 0.6442, "step": 9016 }, { "epoch": 0.58, "grad_norm": 1.5006760639236645, "learning_rate": 4.000288194210671e-06, "loss": 0.6357, "step": 9017 }, { "epoch": 0.58, "grad_norm": 1.7251153736587372, "learning_rate": 3.999272590601988e-06, "loss": 0.7757, "step": 9018 }, { "epoch": 0.58, "grad_norm": 1.6894720364381626, "learning_rate": 3.998257030002466e-06, "loss": 0.7545, "step": 9019 }, { "epoch": 0.58, "grad_norm": 1.854224032665534, "learning_rate": 3.9972415124557514e-06, "loss": 0.6587, "step": 9020 }, { "epoch": 0.58, "grad_norm": 1.7530037874683415, "learning_rate": 3.996226038005491e-06, "loss": 0.801, "step": 9021 }, { "epoch": 0.58, "grad_norm": 1.1803234965774487, "learning_rate": 3.995210606695324e-06, "loss": 0.6978, "step": 9022 }, { "epoch": 0.58, "grad_norm": 1.5128121038258497, "learning_rate": 3.994195218568896e-06, "loss": 0.6455, "step": 9023 }, { "epoch": 0.58, "grad_norm": 1.4551395490034775, "learning_rate": 3.9931798736698424e-06, "loss": 0.6941, "step": 9024 }, { "epoch": 0.58, "grad_norm": 1.6953770236552226, "learning_rate": 3.992164572041805e-06, "loss": 0.619, "step": 9025 }, { "epoch": 0.58, "grad_norm": 1.1363864797029306, "learning_rate": 3.991149313728413e-06, "loss": 0.7349, "step": 9026 }, { "epoch": 0.58, "grad_norm": 1.8055600032063617, "learning_rate": 3.990134098773304e-06, "loss": 0.7246, "step": 9027 }, { "epoch": 0.58, "grad_norm": 1.6689687212165232, "learning_rate": 3.989118927220111e-06, "loss": 0.6621, "step": 9028 }, { "epoch": 0.58, "grad_norm": 1.6864736725532912, "learning_rate": 3.988103799112461e-06, "loss": 0.6752, "step": 9029 }, { "epoch": 0.58, "grad_norm": 1.1745196677837375, "learning_rate": 3.987088714493985e-06, "loss": 0.6435, "step": 9030 }, { "epoch": 0.58, "grad_norm": 1.9571654706165489, "learning_rate": 3.986073673408306e-06, "loss": 0.8315, "step": 9031 }, { "epoch": 0.58, "grad_norm": 1.6925074125239632, "learning_rate": 3.985058675899052e-06, "loss": 0.7847, "step": 9032 }, { "epoch": 0.58, "grad_norm": 1.712263934573409, "learning_rate": 3.984043722009842e-06, "loss": 0.7087, "step": 9033 }, { "epoch": 0.58, "grad_norm": 1.4663667323763505, "learning_rate": 3.983028811784298e-06, "loss": 0.6953, "step": 9034 }, { "epoch": 0.58, "grad_norm": 1.639657396408103, "learning_rate": 3.98201394526604e-06, "loss": 0.7739, "step": 9035 }, { "epoch": 0.58, "grad_norm": 2.07510706963125, "learning_rate": 3.980999122498681e-06, "loss": 0.8149, "step": 9036 }, { "epoch": 0.58, "grad_norm": 1.6170144971148568, "learning_rate": 3.97998434352584e-06, "loss": 0.8069, "step": 9037 }, { "epoch": 0.58, "grad_norm": 1.7737209219998853, "learning_rate": 3.978969608391128e-06, "loss": 0.734, "step": 9038 }, { "epoch": 0.58, "grad_norm": 1.6904436125698548, "learning_rate": 3.977954917138158e-06, "loss": 0.7266, "step": 9039 }, { "epoch": 0.58, "grad_norm": 1.826217253031413, "learning_rate": 3.976940269810537e-06, "loss": 0.685, "step": 9040 }, { "epoch": 0.58, "grad_norm": 1.1696829280777639, "learning_rate": 3.975925666451875e-06, "loss": 0.6913, "step": 9041 }, { "epoch": 0.58, "grad_norm": 2.0889598689270414, "learning_rate": 3.9749111071057745e-06, "loss": 0.7012, "step": 9042 }, { "epoch": 0.58, "grad_norm": 1.9445148741072533, "learning_rate": 3.973896591815841e-06, "loss": 0.7651, "step": 9043 }, { "epoch": 0.58, "grad_norm": 1.8172135771152287, "learning_rate": 3.972882120625675e-06, "loss": 0.8556, "step": 9044 }, { "epoch": 0.58, "grad_norm": 2.071728498609928, "learning_rate": 3.971867693578879e-06, "loss": 0.7873, "step": 9045 }, { "epoch": 0.58, "grad_norm": 1.4395597149760742, "learning_rate": 3.970853310719047e-06, "loss": 0.6297, "step": 9046 }, { "epoch": 0.58, "grad_norm": 1.5868700035266787, "learning_rate": 3.969838972089778e-06, "loss": 0.7051, "step": 9047 }, { "epoch": 0.58, "grad_norm": 1.6088788703292847, "learning_rate": 3.968824677734667e-06, "loss": 0.7193, "step": 9048 }, { "epoch": 0.58, "grad_norm": 1.670891704846005, "learning_rate": 3.967810427697301e-06, "loss": 0.6992, "step": 9049 }, { "epoch": 0.58, "grad_norm": 1.7014158958655505, "learning_rate": 3.9667962220212765e-06, "loss": 0.789, "step": 9050 }, { "epoch": 0.58, "grad_norm": 1.6415174098742653, "learning_rate": 3.965782060750178e-06, "loss": 0.7314, "step": 9051 }, { "epoch": 0.58, "grad_norm": 1.7127368958482774, "learning_rate": 3.964767943927596e-06, "loss": 0.6517, "step": 9052 }, { "epoch": 0.58, "grad_norm": 1.6022846543641145, "learning_rate": 3.96375387159711e-06, "loss": 0.6904, "step": 9053 }, { "epoch": 0.58, "grad_norm": 1.9722925756090472, "learning_rate": 3.962739843802307e-06, "loss": 0.725, "step": 9054 }, { "epoch": 0.58, "grad_norm": 1.6773509593146123, "learning_rate": 3.961725860586763e-06, "loss": 0.6659, "step": 9055 }, { "epoch": 0.58, "grad_norm": 1.0343615837387883, "learning_rate": 3.960711921994061e-06, "loss": 0.5623, "step": 9056 }, { "epoch": 0.58, "grad_norm": 1.7456263633572302, "learning_rate": 3.9596980280677775e-06, "loss": 0.7297, "step": 9057 }, { "epoch": 0.58, "grad_norm": 1.1452430390926542, "learning_rate": 3.958684178851486e-06, "loss": 0.7215, "step": 9058 }, { "epoch": 0.58, "grad_norm": 1.8213567470111092, "learning_rate": 3.957670374388762e-06, "loss": 0.7234, "step": 9059 }, { "epoch": 0.58, "grad_norm": 1.4346096132935156, "learning_rate": 3.956656614723173e-06, "loss": 0.6086, "step": 9060 }, { "epoch": 0.58, "grad_norm": 1.5737046926337854, "learning_rate": 3.955642899898293e-06, "loss": 0.7733, "step": 9061 }, { "epoch": 0.58, "grad_norm": 1.280361542497041, "learning_rate": 3.954629229957684e-06, "loss": 0.7172, "step": 9062 }, { "epoch": 0.58, "grad_norm": 1.6684201479381633, "learning_rate": 3.953615604944915e-06, "loss": 0.7444, "step": 9063 }, { "epoch": 0.58, "grad_norm": 1.8599701340719397, "learning_rate": 3.952602024903548e-06, "loss": 0.6975, "step": 9064 }, { "epoch": 0.58, "grad_norm": 1.654734813852914, "learning_rate": 3.9515884898771455e-06, "loss": 0.7811, "step": 9065 }, { "epoch": 0.58, "grad_norm": 1.6719334747001233, "learning_rate": 3.950574999909267e-06, "loss": 0.8176, "step": 9066 }, { "epoch": 0.58, "grad_norm": 1.509130904744816, "learning_rate": 3.949561555043469e-06, "loss": 0.6805, "step": 9067 }, { "epoch": 0.58, "grad_norm": 1.1132641536861037, "learning_rate": 3.94854815532331e-06, "loss": 0.6014, "step": 9068 }, { "epoch": 0.58, "grad_norm": 1.0589096106615985, "learning_rate": 3.94753480079234e-06, "loss": 0.6686, "step": 9069 }, { "epoch": 0.58, "grad_norm": 1.1988797324067033, "learning_rate": 3.9465214914941145e-06, "loss": 0.7112, "step": 9070 }, { "epoch": 0.58, "grad_norm": 1.612591601045913, "learning_rate": 3.94550822747218e-06, "loss": 0.6578, "step": 9071 }, { "epoch": 0.58, "grad_norm": 1.5851319660468841, "learning_rate": 3.9444950087700866e-06, "loss": 0.7625, "step": 9072 }, { "epoch": 0.58, "grad_norm": 2.484173839396031, "learning_rate": 3.943481835431379e-06, "loss": 0.8792, "step": 9073 }, { "epoch": 0.58, "grad_norm": 1.7970086879047014, "learning_rate": 3.942468707499603e-06, "loss": 0.7424, "step": 9074 }, { "epoch": 0.58, "grad_norm": 1.4652288180851414, "learning_rate": 3.9414556250183e-06, "loss": 0.7458, "step": 9075 }, { "epoch": 0.58, "grad_norm": 1.7199195168102024, "learning_rate": 3.940442588031009e-06, "loss": 0.8124, "step": 9076 }, { "epoch": 0.58, "grad_norm": 1.5077348933447248, "learning_rate": 3.939429596581271e-06, "loss": 0.7428, "step": 9077 }, { "epoch": 0.58, "grad_norm": 1.6803221117504512, "learning_rate": 3.9384166507126185e-06, "loss": 0.7266, "step": 9078 }, { "epoch": 0.58, "grad_norm": 1.6710396037242972, "learning_rate": 3.937403750468588e-06, "loss": 0.7456, "step": 9079 }, { "epoch": 0.58, "grad_norm": 1.57015159999995, "learning_rate": 3.936390895892711e-06, "loss": 0.7815, "step": 9080 }, { "epoch": 0.58, "grad_norm": 1.013553111499555, "learning_rate": 3.935378087028521e-06, "loss": 0.7543, "step": 9081 }, { "epoch": 0.58, "grad_norm": 1.5820277812409052, "learning_rate": 3.93436532391954e-06, "loss": 0.7002, "step": 9082 }, { "epoch": 0.58, "grad_norm": 1.0535184021258839, "learning_rate": 3.9333526066093e-06, "loss": 0.5803, "step": 9083 }, { "epoch": 0.58, "grad_norm": 1.6875570627810192, "learning_rate": 3.932339935141324e-06, "loss": 0.7582, "step": 9084 }, { "epoch": 0.58, "grad_norm": 0.993518506702433, "learning_rate": 3.931327309559133e-06, "loss": 0.6367, "step": 9085 }, { "epoch": 0.58, "grad_norm": 1.8794064992026587, "learning_rate": 3.93031472990625e-06, "loss": 0.7394, "step": 9086 }, { "epoch": 0.58, "grad_norm": 1.826883752895906, "learning_rate": 3.9293021962261906e-06, "loss": 0.8317, "step": 9087 }, { "epoch": 0.58, "grad_norm": 1.1335355687396962, "learning_rate": 3.928289708562475e-06, "loss": 0.7117, "step": 9088 }, { "epoch": 0.58, "grad_norm": 1.2292997327064399, "learning_rate": 3.927277266958614e-06, "loss": 0.6545, "step": 9089 }, { "epoch": 0.58, "grad_norm": 1.2284721420711122, "learning_rate": 3.926264871458124e-06, "loss": 0.612, "step": 9090 }, { "epoch": 0.58, "grad_norm": 1.5814548895657898, "learning_rate": 3.925252522104512e-06, "loss": 0.7732, "step": 9091 }, { "epoch": 0.58, "grad_norm": 1.2675990611423888, "learning_rate": 3.924240218941288e-06, "loss": 0.7686, "step": 9092 }, { "epoch": 0.58, "grad_norm": 1.5697758261608352, "learning_rate": 3.923227962011959e-06, "loss": 0.6534, "step": 9093 }, { "epoch": 0.58, "grad_norm": 1.5382287288065912, "learning_rate": 3.922215751360029e-06, "loss": 0.7041, "step": 9094 }, { "epoch": 0.58, "grad_norm": 1.437101870449229, "learning_rate": 3.9212035870290035e-06, "loss": 0.6091, "step": 9095 }, { "epoch": 0.58, "grad_norm": 1.1282465588261146, "learning_rate": 3.9201914690623785e-06, "loss": 0.7987, "step": 9096 }, { "epoch": 0.58, "grad_norm": 1.8798784654755383, "learning_rate": 3.919179397503659e-06, "loss": 0.6828, "step": 9097 }, { "epoch": 0.58, "grad_norm": 1.2541190233669819, "learning_rate": 3.918167372396333e-06, "loss": 0.664, "step": 9098 }, { "epoch": 0.58, "grad_norm": 1.1898480232878956, "learning_rate": 3.917155393783903e-06, "loss": 0.6705, "step": 9099 }, { "epoch": 0.58, "grad_norm": 1.7555833881418443, "learning_rate": 3.916143461709857e-06, "loss": 0.7581, "step": 9100 }, { "epoch": 0.58, "grad_norm": 0.9952563059942966, "learning_rate": 3.9151315762176885e-06, "loss": 0.585, "step": 9101 }, { "epoch": 0.58, "grad_norm": 1.6967446529200367, "learning_rate": 3.914119737350886e-06, "loss": 0.7869, "step": 9102 }, { "epoch": 0.58, "grad_norm": 3.0949837659783923, "learning_rate": 3.9131079451529345e-06, "loss": 0.7159, "step": 9103 }, { "epoch": 0.58, "grad_norm": 1.0428303857424346, "learning_rate": 3.912096199667321e-06, "loss": 0.7773, "step": 9104 }, { "epoch": 0.58, "grad_norm": 3.0292478052502707, "learning_rate": 3.9110845009375255e-06, "loss": 0.7728, "step": 9105 }, { "epoch": 0.58, "grad_norm": 1.9709959158904846, "learning_rate": 3.9100728490070305e-06, "loss": 0.6545, "step": 9106 }, { "epoch": 0.58, "grad_norm": 4.603720077910678, "learning_rate": 3.909061243919313e-06, "loss": 0.7516, "step": 9107 }, { "epoch": 0.58, "grad_norm": 1.7483160449998958, "learning_rate": 3.908049685717854e-06, "loss": 0.7818, "step": 9108 }, { "epoch": 0.58, "grad_norm": 1.319005219827267, "learning_rate": 3.907038174446122e-06, "loss": 0.58, "step": 9109 }, { "epoch": 0.58, "grad_norm": 1.5551785302101107, "learning_rate": 3.906026710147592e-06, "loss": 0.7621, "step": 9110 }, { "epoch": 0.58, "grad_norm": 1.5148581611445813, "learning_rate": 3.905015292865738e-06, "loss": 0.7764, "step": 9111 }, { "epoch": 0.58, "grad_norm": 1.1553800556441787, "learning_rate": 3.904003922644024e-06, "loss": 0.6723, "step": 9112 }, { "epoch": 0.58, "grad_norm": 1.1257412268638605, "learning_rate": 3.902992599525921e-06, "loss": 0.6458, "step": 9113 }, { "epoch": 0.58, "grad_norm": 1.5931832746079608, "learning_rate": 3.901981323554887e-06, "loss": 0.8004, "step": 9114 }, { "epoch": 0.58, "grad_norm": 1.8483184426556485, "learning_rate": 3.900970094774391e-06, "loss": 0.7716, "step": 9115 }, { "epoch": 0.58, "grad_norm": 1.7786286598092196, "learning_rate": 3.899958913227889e-06, "loss": 0.7667, "step": 9116 }, { "epoch": 0.58, "grad_norm": 1.572987301788226, "learning_rate": 3.898947778958845e-06, "loss": 0.7901, "step": 9117 }, { "epoch": 0.58, "grad_norm": 1.9685225276517453, "learning_rate": 3.897936692010708e-06, "loss": 0.8664, "step": 9118 }, { "epoch": 0.58, "grad_norm": 1.5072983998960316, "learning_rate": 3.8969256524269395e-06, "loss": 0.7097, "step": 9119 }, { "epoch": 0.58, "grad_norm": 1.9855425021272999, "learning_rate": 3.8959146602509865e-06, "loss": 0.6028, "step": 9120 }, { "epoch": 0.58, "grad_norm": 1.9380619305931388, "learning_rate": 3.894903715526301e-06, "loss": 0.8122, "step": 9121 }, { "epoch": 0.58, "grad_norm": 1.2407630631790938, "learning_rate": 3.893892818296333e-06, "loss": 0.6936, "step": 9122 }, { "epoch": 0.58, "grad_norm": 0.9856175979277141, "learning_rate": 3.892881968604525e-06, "loss": 0.6155, "step": 9123 }, { "epoch": 0.58, "grad_norm": 1.7620419823138214, "learning_rate": 3.891871166494327e-06, "loss": 0.7368, "step": 9124 }, { "epoch": 0.58, "grad_norm": 1.445996379294662, "learning_rate": 3.890860412009176e-06, "loss": 0.8956, "step": 9125 }, { "epoch": 0.58, "grad_norm": 1.6370094634513477, "learning_rate": 3.889849705192515e-06, "loss": 0.8574, "step": 9126 }, { "epoch": 0.58, "grad_norm": 1.649190578078779, "learning_rate": 3.888839046087779e-06, "loss": 0.7143, "step": 9127 }, { "epoch": 0.58, "grad_norm": 1.7499642611870638, "learning_rate": 3.887828434738408e-06, "loss": 0.7247, "step": 9128 }, { "epoch": 0.58, "grad_norm": 2.2146144973951736, "learning_rate": 3.886817871187832e-06, "loss": 0.7448, "step": 9129 }, { "epoch": 0.58, "grad_norm": 1.8883209512468069, "learning_rate": 3.885807355479485e-06, "loss": 0.9037, "step": 9130 }, { "epoch": 0.58, "grad_norm": 1.4858738035838375, "learning_rate": 3.8847968876567985e-06, "loss": 0.6642, "step": 9131 }, { "epoch": 0.58, "grad_norm": 1.60830074569578, "learning_rate": 3.883786467763196e-06, "loss": 0.7416, "step": 9132 }, { "epoch": 0.58, "grad_norm": 1.6697673229427445, "learning_rate": 3.882776095842108e-06, "loss": 0.7228, "step": 9133 }, { "epoch": 0.58, "grad_norm": 1.5417176538115174, "learning_rate": 3.881765771936954e-06, "loss": 0.6601, "step": 9134 }, { "epoch": 0.58, "grad_norm": 1.7848626779224153, "learning_rate": 3.880755496091158e-06, "loss": 0.6945, "step": 9135 }, { "epoch": 0.58, "grad_norm": 1.6451263078756122, "learning_rate": 3.879745268348139e-06, "loss": 0.7387, "step": 9136 }, { "epoch": 0.58, "grad_norm": 1.5503803506872158, "learning_rate": 3.878735088751315e-06, "loss": 0.6867, "step": 9137 }, { "epoch": 0.58, "grad_norm": 1.7975679216466454, "learning_rate": 3.877724957344099e-06, "loss": 0.7248, "step": 9138 }, { "epoch": 0.58, "grad_norm": 1.8109119043606996, "learning_rate": 3.876714874169906e-06, "loss": 0.6897, "step": 9139 }, { "epoch": 0.59, "grad_norm": 1.7665323806282454, "learning_rate": 3.87570483927215e-06, "loss": 0.7073, "step": 9140 }, { "epoch": 0.59, "grad_norm": 1.5891117875303244, "learning_rate": 3.874694852694236e-06, "loss": 0.649, "step": 9141 }, { "epoch": 0.59, "grad_norm": 1.7930471674841486, "learning_rate": 3.873684914479572e-06, "loss": 0.7157, "step": 9142 }, { "epoch": 0.59, "grad_norm": 1.8201044832178452, "learning_rate": 3.8726750246715635e-06, "loss": 0.711, "step": 9143 }, { "epoch": 0.59, "grad_norm": 1.5480909608018651, "learning_rate": 3.871665183313617e-06, "loss": 0.734, "step": 9144 }, { "epoch": 0.59, "grad_norm": 1.821972818283944, "learning_rate": 3.870655390449126e-06, "loss": 0.6853, "step": 9145 }, { "epoch": 0.59, "grad_norm": 1.2350276081919243, "learning_rate": 3.869645646121496e-06, "loss": 0.5817, "step": 9146 }, { "epoch": 0.59, "grad_norm": 1.7199125851063048, "learning_rate": 3.868635950374119e-06, "loss": 0.7799, "step": 9147 }, { "epoch": 0.59, "grad_norm": 1.5242773832575487, "learning_rate": 3.867626303250392e-06, "loss": 0.779, "step": 9148 }, { "epoch": 0.59, "grad_norm": 1.6636732999512265, "learning_rate": 3.866616704793706e-06, "loss": 0.5958, "step": 9149 }, { "epoch": 0.59, "grad_norm": 1.8730332768679758, "learning_rate": 3.865607155047455e-06, "loss": 0.9105, "step": 9150 }, { "epoch": 0.59, "grad_norm": 1.6950228595214725, "learning_rate": 3.864597654055024e-06, "loss": 0.7494, "step": 9151 }, { "epoch": 0.59, "grad_norm": 1.8030175045931787, "learning_rate": 3.863588201859798e-06, "loss": 0.6763, "step": 9152 }, { "epoch": 0.59, "grad_norm": 1.7585049799833186, "learning_rate": 3.862578798505166e-06, "loss": 0.6897, "step": 9153 }, { "epoch": 0.59, "grad_norm": 1.6807733449710067, "learning_rate": 3.861569444034505e-06, "loss": 0.7548, "step": 9154 }, { "epoch": 0.59, "grad_norm": 1.9749118194270627, "learning_rate": 3.860560138491199e-06, "loss": 0.6898, "step": 9155 }, { "epoch": 0.59, "grad_norm": 1.632778323529785, "learning_rate": 3.859550881918622e-06, "loss": 0.6057, "step": 9156 }, { "epoch": 0.59, "grad_norm": 1.645324464496816, "learning_rate": 3.858541674360151e-06, "loss": 0.7266, "step": 9157 }, { "epoch": 0.59, "grad_norm": 1.2493822992324108, "learning_rate": 3.857532515859163e-06, "loss": 0.7128, "step": 9158 }, { "epoch": 0.59, "grad_norm": 1.5860419550265408, "learning_rate": 3.856523406459025e-06, "loss": 0.7131, "step": 9159 }, { "epoch": 0.59, "grad_norm": 1.8864699758745604, "learning_rate": 3.85551434620311e-06, "loss": 0.6626, "step": 9160 }, { "epoch": 0.59, "grad_norm": 1.8140452334671426, "learning_rate": 3.854505335134781e-06, "loss": 0.77, "step": 9161 }, { "epoch": 0.59, "grad_norm": 1.5595552355664792, "learning_rate": 3.853496373297408e-06, "loss": 0.7528, "step": 9162 }, { "epoch": 0.59, "grad_norm": 1.7100143889344699, "learning_rate": 3.852487460734351e-06, "loss": 0.7922, "step": 9163 }, { "epoch": 0.59, "grad_norm": 1.2041184343822742, "learning_rate": 3.8514785974889714e-06, "loss": 0.5961, "step": 9164 }, { "epoch": 0.59, "grad_norm": 1.639139213066658, "learning_rate": 3.850469783604628e-06, "loss": 0.7902, "step": 9165 }, { "epoch": 0.59, "grad_norm": 1.599531775885963, "learning_rate": 3.849461019124678e-06, "loss": 0.67, "step": 9166 }, { "epoch": 0.59, "grad_norm": 3.5197996982102584, "learning_rate": 3.848452304092477e-06, "loss": 0.6755, "step": 9167 }, { "epoch": 0.59, "grad_norm": 1.6275708231395971, "learning_rate": 3.8474436385513735e-06, "loss": 0.6737, "step": 9168 }, { "epoch": 0.59, "grad_norm": 1.6405602447587373, "learning_rate": 3.846435022544724e-06, "loss": 0.7129, "step": 9169 }, { "epoch": 0.59, "grad_norm": 1.709969802114766, "learning_rate": 3.845426456115871e-06, "loss": 0.8138, "step": 9170 }, { "epoch": 0.59, "grad_norm": 1.7417847513560607, "learning_rate": 3.844417939308163e-06, "loss": 0.7869, "step": 9171 }, { "epoch": 0.59, "grad_norm": 1.8298866510885736, "learning_rate": 3.8434094721649435e-06, "loss": 0.8379, "step": 9172 }, { "epoch": 0.59, "grad_norm": 1.5500668680671956, "learning_rate": 3.842401054729557e-06, "loss": 0.7477, "step": 9173 }, { "epoch": 0.59, "grad_norm": 1.6734179610383912, "learning_rate": 3.841392687045338e-06, "loss": 0.7132, "step": 9174 }, { "epoch": 0.59, "grad_norm": 1.752610617290361, "learning_rate": 3.840384369155626e-06, "loss": 0.8016, "step": 9175 }, { "epoch": 0.59, "grad_norm": 1.7550525810573903, "learning_rate": 3.83937610110376e-06, "loss": 0.7922, "step": 9176 }, { "epoch": 0.59, "grad_norm": 1.578911264898317, "learning_rate": 3.838367882933068e-06, "loss": 0.7266, "step": 9177 }, { "epoch": 0.59, "grad_norm": 1.8221782457908855, "learning_rate": 3.837359714686885e-06, "loss": 0.7778, "step": 9178 }, { "epoch": 0.59, "grad_norm": 1.6045893849882091, "learning_rate": 3.836351596408537e-06, "loss": 0.7956, "step": 9179 }, { "epoch": 0.59, "grad_norm": 1.8416223709535269, "learning_rate": 3.835343528141355e-06, "loss": 0.6517, "step": 9180 }, { "epoch": 0.59, "grad_norm": 1.610807028433722, "learning_rate": 3.834335509928658e-06, "loss": 0.7373, "step": 9181 }, { "epoch": 0.59, "grad_norm": 1.902016569303283, "learning_rate": 3.833327541813774e-06, "loss": 0.7276, "step": 9182 }, { "epoch": 0.59, "grad_norm": 1.2414579391636222, "learning_rate": 3.832319623840018e-06, "loss": 0.683, "step": 9183 }, { "epoch": 0.59, "grad_norm": 1.630666598592641, "learning_rate": 3.831311756050712e-06, "loss": 0.7801, "step": 9184 }, { "epoch": 0.59, "grad_norm": 1.531572315707126, "learning_rate": 3.830303938489172e-06, "loss": 0.6865, "step": 9185 }, { "epoch": 0.59, "grad_norm": 1.75437460447722, "learning_rate": 3.829296171198709e-06, "loss": 0.7523, "step": 9186 }, { "epoch": 0.59, "grad_norm": 1.529149545626621, "learning_rate": 3.8282884542226385e-06, "loss": 0.8437, "step": 9187 }, { "epoch": 0.59, "grad_norm": 1.7671658461601019, "learning_rate": 3.827280787604266e-06, "loss": 0.7472, "step": 9188 }, { "epoch": 0.59, "grad_norm": 2.5450393245696747, "learning_rate": 3.826273171386904e-06, "loss": 0.695, "step": 9189 }, { "epoch": 0.59, "grad_norm": 1.552789151826226, "learning_rate": 3.825265605613852e-06, "loss": 0.687, "step": 9190 }, { "epoch": 0.59, "grad_norm": 1.5799546238686624, "learning_rate": 3.8242580903284186e-06, "loss": 0.6403, "step": 9191 }, { "epoch": 0.59, "grad_norm": 1.8022153619206656, "learning_rate": 3.823250625573899e-06, "loss": 0.7736, "step": 9192 }, { "epoch": 0.59, "grad_norm": 1.7653495646321447, "learning_rate": 3.822243211393596e-06, "loss": 0.7093, "step": 9193 }, { "epoch": 0.59, "grad_norm": 1.3149328498906399, "learning_rate": 3.821235847830804e-06, "loss": 0.7345, "step": 9194 }, { "epoch": 0.59, "grad_norm": 1.670349008695074, "learning_rate": 3.820228534928818e-06, "loss": 0.7908, "step": 9195 }, { "epoch": 0.59, "grad_norm": 2.785170122401532, "learning_rate": 3.819221272730933e-06, "loss": 0.7523, "step": 9196 }, { "epoch": 0.59, "grad_norm": 1.7618254847263513, "learning_rate": 3.818214061280435e-06, "loss": 0.7979, "step": 9197 }, { "epoch": 0.59, "grad_norm": 1.973302574803982, "learning_rate": 3.817206900620615e-06, "loss": 0.6771, "step": 9198 }, { "epoch": 0.59, "grad_norm": 2.1662837306614438, "learning_rate": 3.816199790794754e-06, "loss": 0.633, "step": 9199 }, { "epoch": 0.59, "grad_norm": 1.2229679048187294, "learning_rate": 3.815192731846141e-06, "loss": 0.7513, "step": 9200 }, { "epoch": 0.59, "grad_norm": 1.1893808705699174, "learning_rate": 3.8141857238180537e-06, "loss": 0.61, "step": 9201 }, { "epoch": 0.59, "grad_norm": 1.7387250629571729, "learning_rate": 3.8131787667537734e-06, "loss": 0.8297, "step": 9202 }, { "epoch": 0.59, "grad_norm": 2.3700727011795384, "learning_rate": 3.812171860696574e-06, "loss": 0.732, "step": 9203 }, { "epoch": 0.59, "grad_norm": 1.1689830814071367, "learning_rate": 3.811165005689732e-06, "loss": 0.7426, "step": 9204 }, { "epoch": 0.59, "grad_norm": 3.9825462923551265, "learning_rate": 3.810158201776523e-06, "loss": 0.614, "step": 9205 }, { "epoch": 0.59, "grad_norm": 1.7446964575888753, "learning_rate": 3.8091514490002108e-06, "loss": 0.6989, "step": 9206 }, { "epoch": 0.59, "grad_norm": 1.6692674380073624, "learning_rate": 3.808144747404069e-06, "loss": 0.7294, "step": 9207 }, { "epoch": 0.59, "grad_norm": 1.60325901561264, "learning_rate": 3.8071380970313597e-06, "loss": 0.6714, "step": 9208 }, { "epoch": 0.59, "grad_norm": 2.0611509319802734, "learning_rate": 3.806131497925351e-06, "loss": 0.8056, "step": 9209 }, { "epoch": 0.59, "grad_norm": 1.7509631304565767, "learning_rate": 3.8051249501293e-06, "loss": 0.8072, "step": 9210 }, { "epoch": 0.59, "grad_norm": 1.5458784019560199, "learning_rate": 3.8041184536864694e-06, "loss": 0.7064, "step": 9211 }, { "epoch": 0.59, "grad_norm": 1.8369772794267132, "learning_rate": 3.803112008640113e-06, "loss": 0.8046, "step": 9212 }, { "epoch": 0.59, "grad_norm": 2.072145088801198, "learning_rate": 3.802105615033488e-06, "loss": 0.782, "step": 9213 }, { "epoch": 0.59, "grad_norm": 1.8204495424498326, "learning_rate": 3.8010992729098466e-06, "loss": 0.7846, "step": 9214 }, { "epoch": 0.59, "grad_norm": 1.2338606018281726, "learning_rate": 3.8000929823124387e-06, "loss": 0.7623, "step": 9215 }, { "epoch": 0.59, "grad_norm": 1.5323597611948732, "learning_rate": 3.799086743284515e-06, "loss": 0.5844, "step": 9216 }, { "epoch": 0.59, "grad_norm": 1.6046536303199572, "learning_rate": 3.798080555869318e-06, "loss": 0.7052, "step": 9217 }, { "epoch": 0.59, "grad_norm": 1.6301392580623888, "learning_rate": 3.7970744201100944e-06, "loss": 0.7645, "step": 9218 }, { "epoch": 0.59, "grad_norm": 1.6254005747260476, "learning_rate": 3.796068336050083e-06, "loss": 0.6977, "step": 9219 }, { "epoch": 0.59, "grad_norm": 1.557014442014596, "learning_rate": 3.795062303732525e-06, "loss": 0.7432, "step": 9220 }, { "epoch": 0.59, "grad_norm": 1.5392138930272263, "learning_rate": 3.7940563232006573e-06, "loss": 0.7523, "step": 9221 }, { "epoch": 0.59, "grad_norm": 2.0661635010007373, "learning_rate": 3.7930503944977153e-06, "loss": 0.6131, "step": 9222 }, { "epoch": 0.59, "grad_norm": 0.970029897346641, "learning_rate": 3.792044517666931e-06, "loss": 0.536, "step": 9223 }, { "epoch": 0.59, "grad_norm": 1.6562826380384488, "learning_rate": 3.7910386927515346e-06, "loss": 0.7935, "step": 9224 }, { "epoch": 0.59, "grad_norm": 1.5483174011361138, "learning_rate": 3.790032919794757e-06, "loss": 0.807, "step": 9225 }, { "epoch": 0.59, "grad_norm": 1.60486164468413, "learning_rate": 3.7890271988398186e-06, "loss": 0.6456, "step": 9226 }, { "epoch": 0.59, "grad_norm": 1.5834446042552626, "learning_rate": 3.788021529929949e-06, "loss": 0.7163, "step": 9227 }, { "epoch": 0.59, "grad_norm": 1.0356915770409068, "learning_rate": 3.7870159131083668e-06, "loss": 0.6422, "step": 9228 }, { "epoch": 0.59, "grad_norm": 1.7687152238127553, "learning_rate": 3.786010348418292e-06, "loss": 0.6346, "step": 9229 }, { "epoch": 0.59, "grad_norm": 1.7721913578107968, "learning_rate": 3.78500483590294e-06, "loss": 0.6857, "step": 9230 }, { "epoch": 0.59, "grad_norm": 2.4858699510553657, "learning_rate": 3.783999375605527e-06, "loss": 0.7915, "step": 9231 }, { "epoch": 0.59, "grad_norm": 1.592569250110813, "learning_rate": 3.7829939675692683e-06, "loss": 0.6965, "step": 9232 }, { "epoch": 0.59, "grad_norm": 1.4768107647751672, "learning_rate": 3.7819886118373694e-06, "loss": 0.7799, "step": 9233 }, { "epoch": 0.59, "grad_norm": 1.6744489058883691, "learning_rate": 3.7809833084530427e-06, "loss": 0.7613, "step": 9234 }, { "epoch": 0.59, "grad_norm": 1.7275630699305125, "learning_rate": 3.77997805745949e-06, "loss": 0.8697, "step": 9235 }, { "epoch": 0.59, "grad_norm": 1.7162959957193948, "learning_rate": 3.7789728588999176e-06, "loss": 0.7424, "step": 9236 }, { "epoch": 0.59, "grad_norm": 1.678940507674137, "learning_rate": 3.7779677128175247e-06, "loss": 0.691, "step": 9237 }, { "epoch": 0.59, "grad_norm": 1.7758402808461247, "learning_rate": 3.7769626192555143e-06, "loss": 0.7259, "step": 9238 }, { "epoch": 0.59, "grad_norm": 1.809200707071283, "learning_rate": 3.775957578257078e-06, "loss": 0.6869, "step": 9239 }, { "epoch": 0.59, "grad_norm": 2.0056914815442983, "learning_rate": 3.774952589865413e-06, "loss": 0.7586, "step": 9240 }, { "epoch": 0.59, "grad_norm": 1.4913094719621585, "learning_rate": 3.7739476541237137e-06, "loss": 0.6498, "step": 9241 }, { "epoch": 0.59, "grad_norm": 2.680614085455496, "learning_rate": 3.7729427710751654e-06, "loss": 0.8023, "step": 9242 }, { "epoch": 0.59, "grad_norm": 2.06471771951964, "learning_rate": 3.7719379407629597e-06, "loss": 0.726, "step": 9243 }, { "epoch": 0.59, "grad_norm": 2.1422836623585573, "learning_rate": 3.77093316323028e-06, "loss": 0.7206, "step": 9244 }, { "epoch": 0.59, "grad_norm": 1.6653436872038798, "learning_rate": 3.769928438520312e-06, "loss": 0.676, "step": 9245 }, { "epoch": 0.59, "grad_norm": 1.6146244451393827, "learning_rate": 3.7689237666762335e-06, "loss": 0.875, "step": 9246 }, { "epoch": 0.59, "grad_norm": 1.8673162779474533, "learning_rate": 3.7679191477412263e-06, "loss": 0.6154, "step": 9247 }, { "epoch": 0.59, "grad_norm": 1.6071768573612835, "learning_rate": 3.7669145817584635e-06, "loss": 0.7075, "step": 9248 }, { "epoch": 0.59, "grad_norm": 4.368399547302106, "learning_rate": 3.7659100687711205e-06, "loss": 0.7971, "step": 9249 }, { "epoch": 0.59, "grad_norm": 1.087328924733123, "learning_rate": 3.7649056088223705e-06, "loss": 0.63, "step": 9250 }, { "epoch": 0.59, "grad_norm": 1.7584138174636657, "learning_rate": 3.7639012019553813e-06, "loss": 0.8131, "step": 9251 }, { "epoch": 0.59, "grad_norm": 0.9648482700695223, "learning_rate": 3.762896848213323e-06, "loss": 0.6333, "step": 9252 }, { "epoch": 0.59, "grad_norm": 1.6451330132555033, "learning_rate": 3.761892547639357e-06, "loss": 0.7118, "step": 9253 }, { "epoch": 0.59, "grad_norm": 1.6217579747393611, "learning_rate": 3.7608883002766496e-06, "loss": 0.6436, "step": 9254 }, { "epoch": 0.59, "grad_norm": 1.0498637756318498, "learning_rate": 3.7598841061683566e-06, "loss": 0.6128, "step": 9255 }, { "epoch": 0.59, "grad_norm": 1.6101175099020801, "learning_rate": 3.758879965357641e-06, "loss": 0.6761, "step": 9256 }, { "epoch": 0.59, "grad_norm": 1.6020369343590795, "learning_rate": 3.7578758778876546e-06, "loss": 0.8743, "step": 9257 }, { "epoch": 0.59, "grad_norm": 1.8215336877311878, "learning_rate": 3.7568718438015532e-06, "loss": 0.7947, "step": 9258 }, { "epoch": 0.59, "grad_norm": 1.165462028375457, "learning_rate": 3.75586786314249e-06, "loss": 0.6457, "step": 9259 }, { "epoch": 0.59, "grad_norm": 1.661418663874356, "learning_rate": 3.754863935953609e-06, "loss": 0.7943, "step": 9260 }, { "epoch": 0.59, "grad_norm": 1.5317796604470417, "learning_rate": 3.753860062278063e-06, "loss": 0.7095, "step": 9261 }, { "epoch": 0.59, "grad_norm": 1.8515023376118305, "learning_rate": 3.7528562421589898e-06, "loss": 0.7858, "step": 9262 }, { "epoch": 0.59, "grad_norm": 1.3582679915475468, "learning_rate": 3.7518524756395362e-06, "loss": 0.6169, "step": 9263 }, { "epoch": 0.59, "grad_norm": 1.6446700694463914, "learning_rate": 3.750848762762841e-06, "loss": 0.6349, "step": 9264 }, { "epoch": 0.59, "grad_norm": 1.7314486739131785, "learning_rate": 3.74984510357204e-06, "loss": 0.8098, "step": 9265 }, { "epoch": 0.59, "grad_norm": 1.7432263994344943, "learning_rate": 3.7488414981102693e-06, "loss": 0.7293, "step": 9266 }, { "epoch": 0.59, "grad_norm": 1.7810552172112133, "learning_rate": 3.747837946420664e-06, "loss": 0.6073, "step": 9267 }, { "epoch": 0.59, "grad_norm": 1.268506127037051, "learning_rate": 3.7468344485463505e-06, "loss": 0.8213, "step": 9268 }, { "epoch": 0.59, "grad_norm": 1.8431160551978243, "learning_rate": 3.7458310045304586e-06, "loss": 0.9042, "step": 9269 }, { "epoch": 0.59, "grad_norm": 1.6313968358647404, "learning_rate": 3.7448276144161174e-06, "loss": 0.7304, "step": 9270 }, { "epoch": 0.59, "grad_norm": 1.7447642395310992, "learning_rate": 3.7438242782464453e-06, "loss": 0.7139, "step": 9271 }, { "epoch": 0.59, "grad_norm": 2.252200900143928, "learning_rate": 3.742820996064568e-06, "loss": 0.7027, "step": 9272 }, { "epoch": 0.59, "grad_norm": 1.6123795751270926, "learning_rate": 3.7418177679136005e-06, "loss": 0.6975, "step": 9273 }, { "epoch": 0.59, "grad_norm": 2.328428314363577, "learning_rate": 3.740814593836664e-06, "loss": 0.6903, "step": 9274 }, { "epoch": 0.59, "grad_norm": 1.1873463951444738, "learning_rate": 3.7398114738768686e-06, "loss": 0.6243, "step": 9275 }, { "epoch": 0.59, "grad_norm": 1.2080628492483168, "learning_rate": 3.73880840807733e-06, "loss": 0.7932, "step": 9276 }, { "epoch": 0.59, "grad_norm": 1.9807829785347788, "learning_rate": 3.7378053964811534e-06, "loss": 0.7246, "step": 9277 }, { "epoch": 0.59, "grad_norm": 2.7796193646086276, "learning_rate": 3.736802439131449e-06, "loss": 0.7874, "step": 9278 }, { "epoch": 0.59, "grad_norm": 1.567445311529536, "learning_rate": 3.735799536071322e-06, "loss": 0.6851, "step": 9279 }, { "epoch": 0.59, "grad_norm": 1.7775184221116977, "learning_rate": 3.734796687343874e-06, "loss": 0.7402, "step": 9280 }, { "epoch": 0.59, "grad_norm": 2.007439584932973, "learning_rate": 3.733793892992208e-06, "loss": 0.7974, "step": 9281 }, { "epoch": 0.59, "grad_norm": 1.352297990357502, "learning_rate": 3.7327911530594173e-06, "loss": 0.725, "step": 9282 }, { "epoch": 0.59, "grad_norm": 1.6357872951027443, "learning_rate": 3.7317884675886025e-06, "loss": 0.7155, "step": 9283 }, { "epoch": 0.59, "grad_norm": 2.557930814087296, "learning_rate": 3.7307858366228523e-06, "loss": 0.7313, "step": 9284 }, { "epoch": 0.59, "grad_norm": 1.9776428302742612, "learning_rate": 3.7297832602052608e-06, "loss": 0.8151, "step": 9285 }, { "epoch": 0.59, "grad_norm": 2.0279210168264736, "learning_rate": 3.7287807383789143e-06, "loss": 0.7371, "step": 9286 }, { "epoch": 0.59, "grad_norm": 1.6048910254565445, "learning_rate": 3.727778271186901e-06, "loss": 0.6317, "step": 9287 }, { "epoch": 0.59, "grad_norm": 1.5224526098893716, "learning_rate": 3.7267758586723065e-06, "loss": 0.68, "step": 9288 }, { "epoch": 0.59, "grad_norm": 1.1194346740773662, "learning_rate": 3.7257735008782076e-06, "loss": 0.6301, "step": 9289 }, { "epoch": 0.59, "grad_norm": 1.7896739925987333, "learning_rate": 3.7247711978476885e-06, "loss": 0.8384, "step": 9290 }, { "epoch": 0.59, "grad_norm": 1.7941971393104195, "learning_rate": 3.7237689496238217e-06, "loss": 0.895, "step": 9291 }, { "epoch": 0.59, "grad_norm": 1.7714155328298216, "learning_rate": 3.722766756249684e-06, "loss": 0.6986, "step": 9292 }, { "epoch": 0.59, "grad_norm": 1.617286792035256, "learning_rate": 3.721764617768347e-06, "loss": 0.6456, "step": 9293 }, { "epoch": 0.59, "grad_norm": 1.910555168634729, "learning_rate": 3.7207625342228826e-06, "loss": 0.6921, "step": 9294 }, { "epoch": 0.59, "grad_norm": 1.5534928537331807, "learning_rate": 3.7197605056563545e-06, "loss": 0.7353, "step": 9295 }, { "epoch": 0.6, "grad_norm": 1.6583558310893904, "learning_rate": 3.7187585321118293e-06, "loss": 0.7573, "step": 9296 }, { "epoch": 0.6, "grad_norm": 1.1595321018585072, "learning_rate": 3.7177566136323726e-06, "loss": 0.5874, "step": 9297 }, { "epoch": 0.6, "grad_norm": 1.8796880211043971, "learning_rate": 3.71675475026104e-06, "loss": 0.6889, "step": 9298 }, { "epoch": 0.6, "grad_norm": 1.9551107767891147, "learning_rate": 3.7157529420408933e-06, "loss": 0.713, "step": 9299 }, { "epoch": 0.6, "grad_norm": 1.2916191251987927, "learning_rate": 3.7147511890149864e-06, "loss": 0.619, "step": 9300 }, { "epoch": 0.6, "grad_norm": 1.7277121479850424, "learning_rate": 3.7137494912263723e-06, "loss": 0.8157, "step": 9301 }, { "epoch": 0.6, "grad_norm": 1.823934947523906, "learning_rate": 3.7127478487181014e-06, "loss": 0.686, "step": 9302 }, { "epoch": 0.6, "grad_norm": 1.7340697603945854, "learning_rate": 3.7117462615332254e-06, "loss": 0.7512, "step": 9303 }, { "epoch": 0.6, "grad_norm": 2.1483019493033706, "learning_rate": 3.710744729714786e-06, "loss": 0.6973, "step": 9304 }, { "epoch": 0.6, "grad_norm": 1.6224107727325197, "learning_rate": 3.7097432533058288e-06, "loss": 0.8394, "step": 9305 }, { "epoch": 0.6, "grad_norm": 1.6775238985576055, "learning_rate": 3.708741832349397e-06, "loss": 0.6929, "step": 9306 }, { "epoch": 0.6, "grad_norm": 1.727292999173661, "learning_rate": 3.7077404668885263e-06, "loss": 0.7108, "step": 9307 }, { "epoch": 0.6, "grad_norm": 2.020697347708348, "learning_rate": 3.7067391569662564e-06, "loss": 0.7392, "step": 9308 }, { "epoch": 0.6, "grad_norm": 1.6543154967649243, "learning_rate": 3.7057379026256185e-06, "loss": 0.67, "step": 9309 }, { "epoch": 0.6, "grad_norm": 1.5780944147714182, "learning_rate": 3.7047367039096483e-06, "loss": 0.7238, "step": 9310 }, { "epoch": 0.6, "grad_norm": 1.7396505416528816, "learning_rate": 3.7037355608613703e-06, "loss": 0.7821, "step": 9311 }, { "epoch": 0.6, "grad_norm": 1.5679358263884742, "learning_rate": 3.7027344735238168e-06, "loss": 0.6817, "step": 9312 }, { "epoch": 0.6, "grad_norm": 1.6961882458769164, "learning_rate": 3.701733441940007e-06, "loss": 0.6415, "step": 9313 }, { "epoch": 0.6, "grad_norm": 1.6073111315994948, "learning_rate": 3.7007324661529663e-06, "loss": 0.7566, "step": 9314 }, { "epoch": 0.6, "grad_norm": 2.4407062951370286, "learning_rate": 3.699731546205715e-06, "loss": 0.6569, "step": 9315 }, { "epoch": 0.6, "grad_norm": 1.091683644817072, "learning_rate": 3.698730682141268e-06, "loss": 0.7332, "step": 9316 }, { "epoch": 0.6, "grad_norm": 1.8728842811724864, "learning_rate": 3.6977298740026448e-06, "loss": 0.7908, "step": 9317 }, { "epoch": 0.6, "grad_norm": 1.2004982424912927, "learning_rate": 3.6967291218328525e-06, "loss": 0.6736, "step": 9318 }, { "epoch": 0.6, "grad_norm": 1.643796017639489, "learning_rate": 3.695728425674906e-06, "loss": 0.8521, "step": 9319 }, { "epoch": 0.6, "grad_norm": 1.6254858401639936, "learning_rate": 3.69472778557181e-06, "loss": 0.7045, "step": 9320 }, { "epoch": 0.6, "grad_norm": 1.8773992765908727, "learning_rate": 3.693727201566571e-06, "loss": 0.7493, "step": 9321 }, { "epoch": 0.6, "grad_norm": 1.783429496887628, "learning_rate": 3.6927266737021915e-06, "loss": 0.7052, "step": 9322 }, { "epoch": 0.6, "grad_norm": 1.99639350366379, "learning_rate": 3.6917262020216727e-06, "loss": 0.7508, "step": 9323 }, { "epoch": 0.6, "grad_norm": 1.8764538932312915, "learning_rate": 3.6907257865680146e-06, "loss": 0.6499, "step": 9324 }, { "epoch": 0.6, "grad_norm": 2.0341870325220017, "learning_rate": 3.6897254273842087e-06, "loss": 0.7654, "step": 9325 }, { "epoch": 0.6, "grad_norm": 1.9145065522594102, "learning_rate": 3.688725124513253e-06, "loss": 0.8391, "step": 9326 }, { "epoch": 0.6, "grad_norm": 1.5793512797684397, "learning_rate": 3.6877248779981332e-06, "loss": 0.7999, "step": 9327 }, { "epoch": 0.6, "grad_norm": 1.6170803571000132, "learning_rate": 3.686724687881843e-06, "loss": 0.7619, "step": 9328 }, { "epoch": 0.6, "grad_norm": 2.158406140734704, "learning_rate": 3.685724554207365e-06, "loss": 0.7557, "step": 9329 }, { "epoch": 0.6, "grad_norm": 1.8516524448574154, "learning_rate": 3.684724477017685e-06, "loss": 0.7809, "step": 9330 }, { "epoch": 0.6, "grad_norm": 1.533848445282813, "learning_rate": 3.6837244563557815e-06, "loss": 0.6705, "step": 9331 }, { "epoch": 0.6, "grad_norm": 1.8718448938278391, "learning_rate": 3.6827244922646354e-06, "loss": 0.7296, "step": 9332 }, { "epoch": 0.6, "grad_norm": 1.927307822364383, "learning_rate": 3.6817245847872253e-06, "loss": 0.6641, "step": 9333 }, { "epoch": 0.6, "grad_norm": 1.0678834016944954, "learning_rate": 3.6807247339665192e-06, "loss": 0.5951, "step": 9334 }, { "epoch": 0.6, "grad_norm": 1.6009422320668734, "learning_rate": 3.679724939845494e-06, "loss": 0.636, "step": 9335 }, { "epoch": 0.6, "grad_norm": 1.5487768625649223, "learning_rate": 3.6787252024671143e-06, "loss": 0.7306, "step": 9336 }, { "epoch": 0.6, "grad_norm": 1.7197924902796824, "learning_rate": 3.677725521874352e-06, "loss": 0.7708, "step": 9337 }, { "epoch": 0.6, "grad_norm": 2.2572444811821475, "learning_rate": 3.6767258981101655e-06, "loss": 0.6848, "step": 9338 }, { "epoch": 0.6, "grad_norm": 1.8919271243574145, "learning_rate": 3.6757263312175216e-06, "loss": 0.669, "step": 9339 }, { "epoch": 0.6, "grad_norm": 1.5554537177174708, "learning_rate": 3.674726821239376e-06, "loss": 0.6751, "step": 9340 }, { "epoch": 0.6, "grad_norm": 1.7503546898301516, "learning_rate": 3.673727368218687e-06, "loss": 0.7045, "step": 9341 }, { "epoch": 0.6, "grad_norm": 1.9835490960069884, "learning_rate": 3.6727279721984093e-06, "loss": 0.7413, "step": 9342 }, { "epoch": 0.6, "grad_norm": 1.7759118520606028, "learning_rate": 3.671728633221493e-06, "loss": 0.8024, "step": 9343 }, { "epoch": 0.6, "grad_norm": 1.7450317619988693, "learning_rate": 3.6707293513308906e-06, "loss": 0.6885, "step": 9344 }, { "epoch": 0.6, "grad_norm": 1.8254545104147464, "learning_rate": 3.669730126569546e-06, "loss": 0.6688, "step": 9345 }, { "epoch": 0.6, "grad_norm": 2.1437554290278498, "learning_rate": 3.668730958980407e-06, "loss": 0.7828, "step": 9346 }, { "epoch": 0.6, "grad_norm": 1.1540257592811582, "learning_rate": 3.667731848606413e-06, "loss": 0.7282, "step": 9347 }, { "epoch": 0.6, "grad_norm": 1.5422781161171761, "learning_rate": 3.6667327954905054e-06, "loss": 0.6879, "step": 9348 }, { "epoch": 0.6, "grad_norm": 1.728166506767682, "learning_rate": 3.665733799675619e-06, "loss": 0.8536, "step": 9349 }, { "epoch": 0.6, "grad_norm": 1.5975260743119328, "learning_rate": 3.664734861204692e-06, "loss": 0.8129, "step": 9350 }, { "epoch": 0.6, "grad_norm": 1.2446687220697485, "learning_rate": 3.663735980120653e-06, "loss": 0.7539, "step": 9351 }, { "epoch": 0.6, "grad_norm": 1.7271607471385415, "learning_rate": 3.662737156466434e-06, "loss": 0.8216, "step": 9352 }, { "epoch": 0.6, "grad_norm": 1.0273260890142595, "learning_rate": 3.6617383902849645e-06, "loss": 0.6736, "step": 9353 }, { "epoch": 0.6, "grad_norm": 1.6984136921202864, "learning_rate": 3.6607396816191644e-06, "loss": 0.8166, "step": 9354 }, { "epoch": 0.6, "grad_norm": 1.7994196789939179, "learning_rate": 3.6597410305119605e-06, "loss": 0.8165, "step": 9355 }, { "epoch": 0.6, "grad_norm": 1.7747405365164632, "learning_rate": 3.6587424370062696e-06, "loss": 0.8236, "step": 9356 }, { "epoch": 0.6, "grad_norm": 1.6423582382365023, "learning_rate": 3.6577439011450112e-06, "loss": 0.7143, "step": 9357 }, { "epoch": 0.6, "grad_norm": 1.7405657856201793, "learning_rate": 3.6567454229710973e-06, "loss": 0.5907, "step": 9358 }, { "epoch": 0.6, "grad_norm": 1.6365416214101942, "learning_rate": 3.6557470025274453e-06, "loss": 0.8285, "step": 9359 }, { "epoch": 0.6, "grad_norm": 1.6865553350587008, "learning_rate": 3.654748639856961e-06, "loss": 0.6926, "step": 9360 }, { "epoch": 0.6, "grad_norm": 1.1839753365087704, "learning_rate": 3.6537503350025525e-06, "loss": 0.6625, "step": 9361 }, { "epoch": 0.6, "grad_norm": 1.5825126308291295, "learning_rate": 3.652752088007129e-06, "loss": 0.6832, "step": 9362 }, { "epoch": 0.6, "grad_norm": 1.7175040330748237, "learning_rate": 3.6517538989135866e-06, "loss": 0.6931, "step": 9363 }, { "epoch": 0.6, "grad_norm": 1.405670026400599, "learning_rate": 3.6507557677648297e-06, "loss": 0.6147, "step": 9364 }, { "epoch": 0.6, "grad_norm": 1.8290549005397119, "learning_rate": 3.649757694603754e-06, "loss": 0.7269, "step": 9365 }, { "epoch": 0.6, "grad_norm": 1.6976871375554592, "learning_rate": 3.6487596794732573e-06, "loss": 0.7225, "step": 9366 }, { "epoch": 0.6, "grad_norm": 1.937347892972373, "learning_rate": 3.647761722416229e-06, "loss": 0.6948, "step": 9367 }, { "epoch": 0.6, "grad_norm": 2.0204862533411316, "learning_rate": 3.646763823475561e-06, "loss": 0.7026, "step": 9368 }, { "epoch": 0.6, "grad_norm": 1.7622892188081827, "learning_rate": 3.645765982694139e-06, "loss": 0.7808, "step": 9369 }, { "epoch": 0.6, "grad_norm": 1.5857030677961277, "learning_rate": 3.6447682001148497e-06, "loss": 0.6991, "step": 9370 }, { "epoch": 0.6, "grad_norm": 2.176000734228688, "learning_rate": 3.643770475780576e-06, "loss": 0.6933, "step": 9371 }, { "epoch": 0.6, "grad_norm": 1.6481651517936777, "learning_rate": 3.6427728097341963e-06, "loss": 0.7605, "step": 9372 }, { "epoch": 0.6, "grad_norm": 1.7045719808816657, "learning_rate": 3.641775202018592e-06, "loss": 0.7891, "step": 9373 }, { "epoch": 0.6, "grad_norm": 1.5356876186975266, "learning_rate": 3.640777652676633e-06, "loss": 0.8156, "step": 9374 }, { "epoch": 0.6, "grad_norm": 2.031529427746816, "learning_rate": 3.6397801617511965e-06, "loss": 0.737, "step": 9375 }, { "epoch": 0.6, "grad_norm": 1.441248346851334, "learning_rate": 3.6387827292851487e-06, "loss": 0.6368, "step": 9376 }, { "epoch": 0.6, "grad_norm": 0.9681408942173639, "learning_rate": 3.6377853553213593e-06, "loss": 0.6362, "step": 9377 }, { "epoch": 0.6, "grad_norm": 1.841463018683575, "learning_rate": 3.6367880399026923e-06, "loss": 0.7136, "step": 9378 }, { "epoch": 0.6, "grad_norm": 1.5371548162379214, "learning_rate": 3.635790783072012e-06, "loss": 0.6453, "step": 9379 }, { "epoch": 0.6, "grad_norm": 1.7977016805514978, "learning_rate": 3.6347935848721766e-06, "loss": 0.8369, "step": 9380 }, { "epoch": 0.6, "grad_norm": 1.958876433912193, "learning_rate": 3.633796445346044e-06, "loss": 0.7182, "step": 9381 }, { "epoch": 0.6, "grad_norm": 1.637444179577643, "learning_rate": 3.6327993645364704e-06, "loss": 0.6805, "step": 9382 }, { "epoch": 0.6, "grad_norm": 1.1586610186182793, "learning_rate": 3.6318023424863057e-06, "loss": 0.614, "step": 9383 }, { "epoch": 0.6, "grad_norm": 1.546647436635857, "learning_rate": 3.6308053792384035e-06, "loss": 0.7201, "step": 9384 }, { "epoch": 0.6, "grad_norm": 1.2897714384000767, "learning_rate": 3.6298084748356077e-06, "loss": 0.6927, "step": 9385 }, { "epoch": 0.6, "grad_norm": 1.8169770378265306, "learning_rate": 3.628811629320764e-06, "loss": 0.681, "step": 9386 }, { "epoch": 0.6, "grad_norm": 1.988926917538912, "learning_rate": 3.6278148427367154e-06, "loss": 0.7315, "step": 9387 }, { "epoch": 0.6, "grad_norm": 1.5312059038560784, "learning_rate": 3.626818115126301e-06, "loss": 0.749, "step": 9388 }, { "epoch": 0.6, "grad_norm": 2.3513895198891976, "learning_rate": 3.6258214465323604e-06, "loss": 0.9314, "step": 9389 }, { "epoch": 0.6, "grad_norm": 1.7736849234680865, "learning_rate": 3.6248248369977247e-06, "loss": 0.7701, "step": 9390 }, { "epoch": 0.6, "grad_norm": 1.655934580926124, "learning_rate": 3.6238282865652304e-06, "loss": 0.7013, "step": 9391 }, { "epoch": 0.6, "grad_norm": 1.6742793376365943, "learning_rate": 3.622831795277702e-06, "loss": 0.6856, "step": 9392 }, { "epoch": 0.6, "grad_norm": 1.6003269745436006, "learning_rate": 3.62183536317797e-06, "loss": 0.865, "step": 9393 }, { "epoch": 0.6, "grad_norm": 1.512348311188035, "learning_rate": 3.6208389903088578e-06, "loss": 0.6907, "step": 9394 }, { "epoch": 0.6, "grad_norm": 1.4646008557645975, "learning_rate": 3.6198426767131893e-06, "loss": 0.6703, "step": 9395 }, { "epoch": 0.6, "grad_norm": 1.3145353741781254, "learning_rate": 3.6188464224337804e-06, "loss": 0.6935, "step": 9396 }, { "epoch": 0.6, "grad_norm": 1.4536633818077103, "learning_rate": 3.6178502275134507e-06, "loss": 0.658, "step": 9397 }, { "epoch": 0.6, "grad_norm": 2.0173503937815753, "learning_rate": 3.6168540919950156e-06, "loss": 0.6131, "step": 9398 }, { "epoch": 0.6, "grad_norm": 1.8472828493745064, "learning_rate": 3.6158580159212833e-06, "loss": 0.7364, "step": 9399 }, { "epoch": 0.6, "grad_norm": 1.7661388933270625, "learning_rate": 3.6148619993350653e-06, "loss": 0.881, "step": 9400 }, { "epoch": 0.6, "grad_norm": 1.8981111330835798, "learning_rate": 3.6138660422791673e-06, "loss": 0.8169, "step": 9401 }, { "epoch": 0.6, "grad_norm": 1.820297542374596, "learning_rate": 3.6128701447963963e-06, "loss": 0.8018, "step": 9402 }, { "epoch": 0.6, "grad_norm": 1.2817627071220925, "learning_rate": 3.6118743069295503e-06, "loss": 0.6671, "step": 9403 }, { "epoch": 0.6, "grad_norm": 1.9828103958115815, "learning_rate": 3.610878528721431e-06, "loss": 0.7856, "step": 9404 }, { "epoch": 0.6, "grad_norm": 1.7410260464266964, "learning_rate": 3.609882810214832e-06, "loss": 0.6778, "step": 9405 }, { "epoch": 0.6, "grad_norm": 2.3788867635687745, "learning_rate": 3.608887151452548e-06, "loss": 0.8461, "step": 9406 }, { "epoch": 0.6, "grad_norm": 1.7349604972496764, "learning_rate": 3.6078915524773726e-06, "loss": 0.7261, "step": 9407 }, { "epoch": 0.6, "grad_norm": 1.039172606383538, "learning_rate": 3.6068960133320924e-06, "loss": 0.712, "step": 9408 }, { "epoch": 0.6, "grad_norm": 2.112049144358017, "learning_rate": 3.605900534059496e-06, "loss": 0.6699, "step": 9409 }, { "epoch": 0.6, "grad_norm": 1.8146682887217367, "learning_rate": 3.604905114702363e-06, "loss": 0.6829, "step": 9410 }, { "epoch": 0.6, "grad_norm": 1.7946929519041925, "learning_rate": 3.603909755303479e-06, "loss": 0.7186, "step": 9411 }, { "epoch": 0.6, "grad_norm": 1.9370562459046008, "learning_rate": 3.602914455905618e-06, "loss": 0.9184, "step": 9412 }, { "epoch": 0.6, "grad_norm": 1.9943102104945478, "learning_rate": 3.6019192165515595e-06, "loss": 0.7658, "step": 9413 }, { "epoch": 0.6, "grad_norm": 1.620570828541993, "learning_rate": 3.600924037284073e-06, "loss": 0.7205, "step": 9414 }, { "epoch": 0.6, "grad_norm": 1.7188031115093465, "learning_rate": 3.5999289181459346e-06, "loss": 0.7534, "step": 9415 }, { "epoch": 0.6, "grad_norm": 1.63543397752895, "learning_rate": 3.5989338591799073e-06, "loss": 0.6957, "step": 9416 }, { "epoch": 0.6, "grad_norm": 1.1388586215216578, "learning_rate": 3.597938860428758e-06, "loss": 0.6941, "step": 9417 }, { "epoch": 0.6, "grad_norm": 1.9190909420535713, "learning_rate": 3.596943921935253e-06, "loss": 0.732, "step": 9418 }, { "epoch": 0.6, "grad_norm": 1.7135057238138085, "learning_rate": 3.5959490437421473e-06, "loss": 0.7935, "step": 9419 }, { "epoch": 0.6, "grad_norm": 1.7910704235890091, "learning_rate": 3.5949542258922033e-06, "loss": 0.6696, "step": 9420 }, { "epoch": 0.6, "grad_norm": 1.6831633782804762, "learning_rate": 3.5939594684281736e-06, "loss": 0.7189, "step": 9421 }, { "epoch": 0.6, "grad_norm": 1.8285551331509031, "learning_rate": 3.592964771392812e-06, "loss": 0.7093, "step": 9422 }, { "epoch": 0.6, "grad_norm": 1.450777894306149, "learning_rate": 3.591970134828866e-06, "loss": 0.7485, "step": 9423 }, { "epoch": 0.6, "grad_norm": 2.0654287187866633, "learning_rate": 3.5909755587790883e-06, "loss": 0.7363, "step": 9424 }, { "epoch": 0.6, "grad_norm": 1.6066265860043107, "learning_rate": 3.589981043286217e-06, "loss": 0.6709, "step": 9425 }, { "epoch": 0.6, "grad_norm": 1.5740307408916843, "learning_rate": 3.5889865883929986e-06, "loss": 0.7148, "step": 9426 }, { "epoch": 0.6, "grad_norm": 1.6538211635936204, "learning_rate": 3.587992194142173e-06, "loss": 0.6842, "step": 9427 }, { "epoch": 0.6, "grad_norm": 1.0968238623756954, "learning_rate": 3.5869978605764745e-06, "loss": 0.6955, "step": 9428 }, { "epoch": 0.6, "grad_norm": 1.6869749086556571, "learning_rate": 3.586003587738639e-06, "loss": 0.8801, "step": 9429 }, { "epoch": 0.6, "grad_norm": 1.7261308638134563, "learning_rate": 3.585009375671398e-06, "loss": 0.7528, "step": 9430 }, { "epoch": 0.6, "grad_norm": 1.9641371894982516, "learning_rate": 3.584015224417482e-06, "loss": 0.6418, "step": 9431 }, { "epoch": 0.6, "grad_norm": 1.6572658846524337, "learning_rate": 3.583021134019614e-06, "loss": 0.6387, "step": 9432 }, { "epoch": 0.6, "grad_norm": 1.0873262284825265, "learning_rate": 3.5820271045205227e-06, "loss": 0.7157, "step": 9433 }, { "epoch": 0.6, "grad_norm": 1.586791600460797, "learning_rate": 3.5810331359629245e-06, "loss": 0.6443, "step": 9434 }, { "epoch": 0.6, "grad_norm": 1.06358086163613, "learning_rate": 3.580039228389541e-06, "loss": 0.7373, "step": 9435 }, { "epoch": 0.6, "grad_norm": 2.9489619683581734, "learning_rate": 3.5790453818430893e-06, "loss": 0.5998, "step": 9436 }, { "epoch": 0.6, "grad_norm": 1.6347564197058337, "learning_rate": 3.578051596366279e-06, "loss": 0.7986, "step": 9437 }, { "epoch": 0.6, "grad_norm": 1.061133546816794, "learning_rate": 3.5770578720018254e-06, "loss": 0.6886, "step": 9438 }, { "epoch": 0.6, "grad_norm": 1.9728905574189686, "learning_rate": 3.576064208792433e-06, "loss": 0.7423, "step": 9439 }, { "epoch": 0.6, "grad_norm": 1.6498065824953012, "learning_rate": 3.5750706067808104e-06, "loss": 0.6745, "step": 9440 }, { "epoch": 0.6, "grad_norm": 1.9339024804285836, "learning_rate": 3.574077066009657e-06, "loss": 0.7698, "step": 9441 }, { "epoch": 0.6, "grad_norm": 2.1067074109931716, "learning_rate": 3.5730835865216763e-06, "loss": 0.6999, "step": 9442 }, { "epoch": 0.6, "grad_norm": 1.5791292492272233, "learning_rate": 3.5720901683595633e-06, "loss": 0.7502, "step": 9443 }, { "epoch": 0.6, "grad_norm": 1.5686581794836283, "learning_rate": 3.5710968115660145e-06, "loss": 0.6613, "step": 9444 }, { "epoch": 0.6, "grad_norm": 2.0014703422574214, "learning_rate": 3.570103516183724e-06, "loss": 0.7864, "step": 9445 }, { "epoch": 0.6, "grad_norm": 1.6447742739402578, "learning_rate": 3.569110282255378e-06, "loss": 0.7682, "step": 9446 }, { "epoch": 0.6, "grad_norm": 2.1289190104505993, "learning_rate": 3.568117109823668e-06, "loss": 0.7337, "step": 9447 }, { "epoch": 0.6, "grad_norm": 1.6567384436042505, "learning_rate": 3.5671239989312726e-06, "loss": 0.7025, "step": 9448 }, { "epoch": 0.6, "grad_norm": 1.6119057627455227, "learning_rate": 3.5661309496208785e-06, "loss": 0.7196, "step": 9449 }, { "epoch": 0.6, "grad_norm": 1.6547139983487142, "learning_rate": 3.5651379619351624e-06, "loss": 0.6688, "step": 9450 }, { "epoch": 0.6, "grad_norm": 1.7327597345248245, "learning_rate": 3.564145035916803e-06, "loss": 0.7895, "step": 9451 }, { "epoch": 0.6, "grad_norm": 1.226090596917198, "learning_rate": 3.5631521716084715e-06, "loss": 0.7059, "step": 9452 }, { "epoch": 0.61, "grad_norm": 1.4952310619188638, "learning_rate": 3.56215936905284e-06, "loss": 0.7716, "step": 9453 }, { "epoch": 0.61, "grad_norm": 1.5810104905316211, "learning_rate": 3.5611666282925795e-06, "loss": 0.7576, "step": 9454 }, { "epoch": 0.61, "grad_norm": 1.7895300134735108, "learning_rate": 3.5601739493703517e-06, "loss": 0.8405, "step": 9455 }, { "epoch": 0.61, "grad_norm": 1.7518610412433575, "learning_rate": 3.559181332328824e-06, "loss": 0.6297, "step": 9456 }, { "epoch": 0.61, "grad_norm": 2.5788158539473995, "learning_rate": 3.5581887772106536e-06, "loss": 0.7432, "step": 9457 }, { "epoch": 0.61, "grad_norm": 1.5806806272188667, "learning_rate": 3.5571962840585013e-06, "loss": 0.633, "step": 9458 }, { "epoch": 0.61, "grad_norm": 1.7502381855583333, "learning_rate": 3.5562038529150187e-06, "loss": 0.7368, "step": 9459 }, { "epoch": 0.61, "grad_norm": 1.758976807145499, "learning_rate": 3.5552114838228634e-06, "loss": 0.6227, "step": 9460 }, { "epoch": 0.61, "grad_norm": 2.07758798251976, "learning_rate": 3.554219176824681e-06, "loss": 0.8039, "step": 9461 }, { "epoch": 0.61, "grad_norm": 1.2999031810863269, "learning_rate": 3.5532269319631198e-06, "loss": 0.7325, "step": 9462 }, { "epoch": 0.61, "grad_norm": 1.8621176369887937, "learning_rate": 3.5522347492808272e-06, "loss": 0.868, "step": 9463 }, { "epoch": 0.61, "grad_norm": 1.7641416301164419, "learning_rate": 3.5512426288204404e-06, "loss": 0.5965, "step": 9464 }, { "epoch": 0.61, "grad_norm": 1.6816296964063473, "learning_rate": 3.5502505706246027e-06, "loss": 0.6898, "step": 9465 }, { "epoch": 0.61, "grad_norm": 1.4817392528351276, "learning_rate": 3.5492585747359483e-06, "loss": 0.6333, "step": 9466 }, { "epoch": 0.61, "grad_norm": 1.1315967862437244, "learning_rate": 3.5482666411971134e-06, "loss": 0.6123, "step": 9467 }, { "epoch": 0.61, "grad_norm": 0.9629471503818856, "learning_rate": 3.547274770050726e-06, "loss": 0.6075, "step": 9468 }, { "epoch": 0.61, "grad_norm": 1.7237248391277926, "learning_rate": 3.5462829613394186e-06, "loss": 0.7769, "step": 9469 }, { "epoch": 0.61, "grad_norm": 2.805899107668096, "learning_rate": 3.545291215105813e-06, "loss": 0.7191, "step": 9470 }, { "epoch": 0.61, "grad_norm": 1.738293850655879, "learning_rate": 3.544299531392533e-06, "loss": 0.7021, "step": 9471 }, { "epoch": 0.61, "grad_norm": 1.6932618130246204, "learning_rate": 3.5433079102422024e-06, "loss": 0.932, "step": 9472 }, { "epoch": 0.61, "grad_norm": 1.8462384532542482, "learning_rate": 3.5423163516974356e-06, "loss": 0.679, "step": 9473 }, { "epoch": 0.61, "grad_norm": 1.6322735939172788, "learning_rate": 3.5413248558008505e-06, "loss": 0.7524, "step": 9474 }, { "epoch": 0.61, "grad_norm": 1.619581360948592, "learning_rate": 3.540333422595057e-06, "loss": 0.6406, "step": 9475 }, { "epoch": 0.61, "grad_norm": 2.1455955755158165, "learning_rate": 3.5393420521226675e-06, "loss": 0.7975, "step": 9476 }, { "epoch": 0.61, "grad_norm": 0.972033723451764, "learning_rate": 3.538350744426285e-06, "loss": 0.5786, "step": 9477 }, { "epoch": 0.61, "grad_norm": 1.7787180023479925, "learning_rate": 3.5373594995485173e-06, "loss": 0.7816, "step": 9478 }, { "epoch": 0.61, "grad_norm": 2.4142702508138303, "learning_rate": 3.5363683175319637e-06, "loss": 0.6576, "step": 9479 }, { "epoch": 0.61, "grad_norm": 1.6416176055628555, "learning_rate": 3.5353771984192243e-06, "loss": 0.7751, "step": 9480 }, { "epoch": 0.61, "grad_norm": 1.1204188912690458, "learning_rate": 3.5343861422528973e-06, "loss": 0.6315, "step": 9481 }, { "epoch": 0.61, "grad_norm": 1.5114089961912498, "learning_rate": 3.5333951490755723e-06, "loss": 0.7532, "step": 9482 }, { "epoch": 0.61, "grad_norm": 1.9937417851106656, "learning_rate": 3.5324042189298445e-06, "loss": 0.7837, "step": 9483 }, { "epoch": 0.61, "grad_norm": 2.6686743338104817, "learning_rate": 3.5314133518582972e-06, "loss": 0.6, "step": 9484 }, { "epoch": 0.61, "grad_norm": 1.858680017914238, "learning_rate": 3.5304225479035193e-06, "loss": 0.6589, "step": 9485 }, { "epoch": 0.61, "grad_norm": 1.5403339540856325, "learning_rate": 3.5294318071080906e-06, "loss": 0.7885, "step": 9486 }, { "epoch": 0.61, "grad_norm": 1.7000723040659427, "learning_rate": 3.5284411295145954e-06, "loss": 0.701, "step": 9487 }, { "epoch": 0.61, "grad_norm": 1.6297153853809927, "learning_rate": 3.527450515165606e-06, "loss": 0.7743, "step": 9488 }, { "epoch": 0.61, "grad_norm": 1.5413972229220847, "learning_rate": 3.526459964103701e-06, "loss": 0.7158, "step": 9489 }, { "epoch": 0.61, "grad_norm": 1.5149433186255745, "learning_rate": 3.525469476371449e-06, "loss": 0.6171, "step": 9490 }, { "epoch": 0.61, "grad_norm": 1.856817527998794, "learning_rate": 3.5244790520114202e-06, "loss": 0.891, "step": 9491 }, { "epoch": 0.61, "grad_norm": 1.2170563631105804, "learning_rate": 3.5234886910661825e-06, "loss": 0.7156, "step": 9492 }, { "epoch": 0.61, "grad_norm": 1.2876664438351004, "learning_rate": 3.5224983935782975e-06, "loss": 0.5617, "step": 9493 }, { "epoch": 0.61, "grad_norm": 1.1474739116078443, "learning_rate": 3.521508159590328e-06, "loss": 0.7348, "step": 9494 }, { "epoch": 0.61, "grad_norm": 2.044039261302142, "learning_rate": 3.5205179891448294e-06, "loss": 0.709, "step": 9495 }, { "epoch": 0.61, "grad_norm": 1.6736682248472197, "learning_rate": 3.5195278822843605e-06, "loss": 0.7095, "step": 9496 }, { "epoch": 0.61, "grad_norm": 1.6037627975313504, "learning_rate": 3.518537839051471e-06, "loss": 0.7068, "step": 9497 }, { "epoch": 0.61, "grad_norm": 1.506042722778361, "learning_rate": 3.5175478594887125e-06, "loss": 0.7447, "step": 9498 }, { "epoch": 0.61, "grad_norm": 1.5537456969807282, "learning_rate": 3.5165579436386323e-06, "loss": 0.6333, "step": 9499 }, { "epoch": 0.61, "grad_norm": 1.5060645733196267, "learning_rate": 3.515568091543774e-06, "loss": 0.6744, "step": 9500 }, { "epoch": 0.61, "grad_norm": 1.7243332987012785, "learning_rate": 3.51457830324668e-06, "loss": 0.7153, "step": 9501 }, { "epoch": 0.61, "grad_norm": 1.0564120684830276, "learning_rate": 3.5135885787898887e-06, "loss": 0.6339, "step": 9502 }, { "epoch": 0.61, "grad_norm": 1.6504641291052036, "learning_rate": 3.5125989182159393e-06, "loss": 0.6816, "step": 9503 }, { "epoch": 0.61, "grad_norm": 1.909286456768706, "learning_rate": 3.5116093215673603e-06, "loss": 0.753, "step": 9504 }, { "epoch": 0.61, "grad_norm": 2.4124455900356225, "learning_rate": 3.5106197888866873e-06, "loss": 0.7421, "step": 9505 }, { "epoch": 0.61, "grad_norm": 1.110280437576124, "learning_rate": 3.5096303202164437e-06, "loss": 0.5889, "step": 9506 }, { "epoch": 0.61, "grad_norm": 1.6948885775540692, "learning_rate": 3.508640915599159e-06, "loss": 0.577, "step": 9507 }, { "epoch": 0.61, "grad_norm": 1.4233293445648885, "learning_rate": 3.5076515750773533e-06, "loss": 0.5313, "step": 9508 }, { "epoch": 0.61, "grad_norm": 1.6185602325273312, "learning_rate": 3.506662298693546e-06, "loss": 0.7565, "step": 9509 }, { "epoch": 0.61, "grad_norm": 1.7988862796455851, "learning_rate": 3.5056730864902577e-06, "loss": 0.8239, "step": 9510 }, { "epoch": 0.61, "grad_norm": 1.9321780792998255, "learning_rate": 3.5046839385099977e-06, "loss": 0.745, "step": 9511 }, { "epoch": 0.61, "grad_norm": 1.7156676299419122, "learning_rate": 3.5036948547952824e-06, "loss": 0.7476, "step": 9512 }, { "epoch": 0.61, "grad_norm": 1.7018853183189149, "learning_rate": 3.502705835388616e-06, "loss": 0.708, "step": 9513 }, { "epoch": 0.61, "grad_norm": 1.2168272168273657, "learning_rate": 3.5017168803325076e-06, "loss": 0.6881, "step": 9514 }, { "epoch": 0.61, "grad_norm": 2.033780924831037, "learning_rate": 3.500727989669458e-06, "loss": 0.7458, "step": 9515 }, { "epoch": 0.61, "grad_norm": 1.9330599710878154, "learning_rate": 3.499739163441971e-06, "loss": 0.7851, "step": 9516 }, { "epoch": 0.61, "grad_norm": 1.1253005780499872, "learning_rate": 3.498750401692541e-06, "loss": 0.6999, "step": 9517 }, { "epoch": 0.61, "grad_norm": 1.847557015111128, "learning_rate": 3.4977617044636635e-06, "loss": 0.7418, "step": 9518 }, { "epoch": 0.61, "grad_norm": 2.1596665506956216, "learning_rate": 3.496773071797834e-06, "loss": 0.7237, "step": 9519 }, { "epoch": 0.61, "grad_norm": 1.7181274503326192, "learning_rate": 3.495784503737536e-06, "loss": 0.8072, "step": 9520 }, { "epoch": 0.61, "grad_norm": 1.8503421002019715, "learning_rate": 3.4947960003252614e-06, "loss": 0.6749, "step": 9521 }, { "epoch": 0.61, "grad_norm": 1.889549169855429, "learning_rate": 3.4938075616034903e-06, "loss": 0.6722, "step": 9522 }, { "epoch": 0.61, "grad_norm": 1.4524495325187503, "learning_rate": 3.492819187614707e-06, "loss": 0.7432, "step": 9523 }, { "epoch": 0.61, "grad_norm": 1.8606071865179479, "learning_rate": 3.4918308784013866e-06, "loss": 0.7077, "step": 9524 }, { "epoch": 0.61, "grad_norm": 1.5757713598540826, "learning_rate": 3.4908426340060075e-06, "loss": 0.6922, "step": 9525 }, { "epoch": 0.61, "grad_norm": 3.6595766926538955, "learning_rate": 3.489854454471039e-06, "loss": 0.7576, "step": 9526 }, { "epoch": 0.61, "grad_norm": 1.7310854297081537, "learning_rate": 3.488866339838953e-06, "loss": 0.7829, "step": 9527 }, { "epoch": 0.61, "grad_norm": 1.7070064388353907, "learning_rate": 3.487878290152217e-06, "loss": 0.8061, "step": 9528 }, { "epoch": 0.61, "grad_norm": 1.5455789578503778, "learning_rate": 3.4868903054532934e-06, "loss": 0.6869, "step": 9529 }, { "epoch": 0.61, "grad_norm": 1.5582328208082574, "learning_rate": 3.4859023857846473e-06, "loss": 0.6528, "step": 9530 }, { "epoch": 0.61, "grad_norm": 1.7645233327840744, "learning_rate": 3.4849145311887335e-06, "loss": 0.7743, "step": 9531 }, { "epoch": 0.61, "grad_norm": 0.9222452428010691, "learning_rate": 3.4839267417080113e-06, "loss": 0.5672, "step": 9532 }, { "epoch": 0.61, "grad_norm": 5.000788081272738, "learning_rate": 3.4829390173849296e-06, "loss": 0.7526, "step": 9533 }, { "epoch": 0.61, "grad_norm": 1.4022704252453395, "learning_rate": 3.481951358261942e-06, "loss": 0.6565, "step": 9534 }, { "epoch": 0.61, "grad_norm": 1.7664364843497182, "learning_rate": 3.4809637643814965e-06, "loss": 0.6303, "step": 9535 }, { "epoch": 0.61, "grad_norm": 1.9608136612613551, "learning_rate": 3.4799762357860343e-06, "loss": 0.6432, "step": 9536 }, { "epoch": 0.61, "grad_norm": 1.5883604352877072, "learning_rate": 3.478988772518001e-06, "loss": 0.7846, "step": 9537 }, { "epoch": 0.61, "grad_norm": 1.4684813086077104, "learning_rate": 3.4780013746198326e-06, "loss": 0.7026, "step": 9538 }, { "epoch": 0.61, "grad_norm": 2.472313251810277, "learning_rate": 3.47701404213397e-06, "loss": 0.7169, "step": 9539 }, { "epoch": 0.61, "grad_norm": 0.9997352868697303, "learning_rate": 3.476026775102841e-06, "loss": 0.5927, "step": 9540 }, { "epoch": 0.61, "grad_norm": 1.8419704679100712, "learning_rate": 3.475039573568881e-06, "loss": 0.7215, "step": 9541 }, { "epoch": 0.61, "grad_norm": 1.5853721239868828, "learning_rate": 3.4740524375745133e-06, "loss": 0.6395, "step": 9542 }, { "epoch": 0.61, "grad_norm": 1.8049922442904027, "learning_rate": 3.4730653671621667e-06, "loss": 0.742, "step": 9543 }, { "epoch": 0.61, "grad_norm": 2.2524371215412704, "learning_rate": 3.472078362374261e-06, "loss": 0.8402, "step": 9544 }, { "epoch": 0.61, "grad_norm": 1.5769891431154097, "learning_rate": 3.4710914232532167e-06, "loss": 0.7413, "step": 9545 }, { "epoch": 0.61, "grad_norm": 1.9757804103991172, "learning_rate": 3.470104549841452e-06, "loss": 0.7932, "step": 9546 }, { "epoch": 0.61, "grad_norm": 1.7650880663885733, "learning_rate": 3.4691177421813783e-06, "loss": 0.7375, "step": 9547 }, { "epoch": 0.61, "grad_norm": 1.6137015770699823, "learning_rate": 3.4681310003154076e-06, "loss": 0.7026, "step": 9548 }, { "epoch": 0.61, "grad_norm": 1.268404217343142, "learning_rate": 3.4671443242859465e-06, "loss": 0.6641, "step": 9549 }, { "epoch": 0.61, "grad_norm": 1.6259435318073119, "learning_rate": 3.466157714135402e-06, "loss": 0.7109, "step": 9550 }, { "epoch": 0.61, "grad_norm": 1.7712424802197966, "learning_rate": 3.465171169906175e-06, "loss": 0.8315, "step": 9551 }, { "epoch": 0.61, "grad_norm": 1.4165740333411203, "learning_rate": 3.4641846916406685e-06, "loss": 0.6726, "step": 9552 }, { "epoch": 0.61, "grad_norm": 1.648128888270021, "learning_rate": 3.4631982793812745e-06, "loss": 0.6431, "step": 9553 }, { "epoch": 0.61, "grad_norm": 1.6721043883837812, "learning_rate": 3.4622119331703884e-06, "loss": 0.7802, "step": 9554 }, { "epoch": 0.61, "grad_norm": 1.8245654170374561, "learning_rate": 3.461225653050405e-06, "loss": 0.6258, "step": 9555 }, { "epoch": 0.61, "grad_norm": 1.9597381873165916, "learning_rate": 3.4602394390637074e-06, "loss": 0.6443, "step": 9556 }, { "epoch": 0.61, "grad_norm": 1.2072217253065782, "learning_rate": 3.4592532912526845e-06, "loss": 0.6267, "step": 9557 }, { "epoch": 0.61, "grad_norm": 1.2669566169081647, "learning_rate": 3.458267209659716e-06, "loss": 0.688, "step": 9558 }, { "epoch": 0.61, "grad_norm": 1.987147166168119, "learning_rate": 3.4572811943271856e-06, "loss": 0.7886, "step": 9559 }, { "epoch": 0.61, "grad_norm": 2.056357297336852, "learning_rate": 3.456295245297465e-06, "loss": 0.829, "step": 9560 }, { "epoch": 0.61, "grad_norm": 1.6498077590763913, "learning_rate": 3.4553093626129343e-06, "loss": 0.7568, "step": 9561 }, { "epoch": 0.61, "grad_norm": 2.051877136598466, "learning_rate": 3.4543235463159587e-06, "loss": 0.8019, "step": 9562 }, { "epoch": 0.61, "grad_norm": 1.4998480254383382, "learning_rate": 3.45333779644891e-06, "loss": 0.696, "step": 9563 }, { "epoch": 0.61, "grad_norm": 1.6469014232655361, "learning_rate": 3.4523521130541517e-06, "loss": 0.7664, "step": 9564 }, { "epoch": 0.61, "grad_norm": 1.133054318290636, "learning_rate": 3.451366496174048e-06, "loss": 0.7291, "step": 9565 }, { "epoch": 0.61, "grad_norm": 1.7240239141659637, "learning_rate": 3.4503809458509596e-06, "loss": 0.6959, "step": 9566 }, { "epoch": 0.61, "grad_norm": 1.8907805952021142, "learning_rate": 3.4493954621272407e-06, "loss": 0.7258, "step": 9567 }, { "epoch": 0.61, "grad_norm": 1.5850638674022064, "learning_rate": 3.448410045045248e-06, "loss": 0.6631, "step": 9568 }, { "epoch": 0.61, "grad_norm": 1.7498284858550233, "learning_rate": 3.447424694647329e-06, "loss": 0.7813, "step": 9569 }, { "epoch": 0.61, "grad_norm": 1.6672884130248151, "learning_rate": 3.446439410975836e-06, "loss": 0.7413, "step": 9570 }, { "epoch": 0.61, "grad_norm": 2.2999398638272903, "learning_rate": 3.445454194073111e-06, "loss": 0.7752, "step": 9571 }, { "epoch": 0.61, "grad_norm": 2.225979032461261, "learning_rate": 3.4444690439815005e-06, "loss": 0.7965, "step": 9572 }, { "epoch": 0.61, "grad_norm": 1.5518477565031807, "learning_rate": 3.4434839607433396e-06, "loss": 0.784, "step": 9573 }, { "epoch": 0.61, "grad_norm": 1.7243150162494938, "learning_rate": 3.4424989444009677e-06, "loss": 0.7998, "step": 9574 }, { "epoch": 0.61, "grad_norm": 1.578437985624386, "learning_rate": 3.4415139949967203e-06, "loss": 0.7717, "step": 9575 }, { "epoch": 0.61, "grad_norm": 1.7061230637961993, "learning_rate": 3.4405291125729247e-06, "loss": 0.6971, "step": 9576 }, { "epoch": 0.61, "grad_norm": 1.5494917005076496, "learning_rate": 3.439544297171913e-06, "loss": 0.8754, "step": 9577 }, { "epoch": 0.61, "grad_norm": 1.0966007438380965, "learning_rate": 3.4385595488360056e-06, "loss": 0.595, "step": 9578 }, { "epoch": 0.61, "grad_norm": 1.4828271034489724, "learning_rate": 3.437574867607529e-06, "loss": 0.5491, "step": 9579 }, { "epoch": 0.61, "grad_norm": 1.192375140005737, "learning_rate": 3.436590253528801e-06, "loss": 0.7022, "step": 9580 }, { "epoch": 0.61, "grad_norm": 1.6557228220826323, "learning_rate": 3.435605706642141e-06, "loss": 0.9867, "step": 9581 }, { "epoch": 0.61, "grad_norm": 1.4524568633881616, "learning_rate": 3.434621226989858e-06, "loss": 0.712, "step": 9582 }, { "epoch": 0.61, "grad_norm": 1.9212036711615246, "learning_rate": 3.4336368146142653e-06, "loss": 0.6237, "step": 9583 }, { "epoch": 0.61, "grad_norm": 1.674210742347457, "learning_rate": 3.4326524695576734e-06, "loss": 0.6724, "step": 9584 }, { "epoch": 0.61, "grad_norm": 1.6856254399669146, "learning_rate": 3.4316681918623825e-06, "loss": 0.6289, "step": 9585 }, { "epoch": 0.61, "grad_norm": 2.243618888994748, "learning_rate": 3.4306839815706985e-06, "loss": 0.7826, "step": 9586 }, { "epoch": 0.61, "grad_norm": 1.5268502225357103, "learning_rate": 3.4296998387249175e-06, "loss": 0.7181, "step": 9587 }, { "epoch": 0.61, "grad_norm": 1.1533989652998375, "learning_rate": 3.428715763367341e-06, "loss": 0.6543, "step": 9588 }, { "epoch": 0.61, "grad_norm": 1.8488090436660196, "learning_rate": 3.427731755540256e-06, "loss": 0.73, "step": 9589 }, { "epoch": 0.61, "grad_norm": 1.7620725891114781, "learning_rate": 3.426747815285959e-06, "loss": 0.8237, "step": 9590 }, { "epoch": 0.61, "grad_norm": 1.2445250763483093, "learning_rate": 3.425763942646733e-06, "loss": 0.6208, "step": 9591 }, { "epoch": 0.61, "grad_norm": 2.6711400314204856, "learning_rate": 3.424780137664865e-06, "loss": 0.8088, "step": 9592 }, { "epoch": 0.61, "grad_norm": 2.092164954172822, "learning_rate": 3.423796400382637e-06, "loss": 0.7409, "step": 9593 }, { "epoch": 0.61, "grad_norm": 1.701597741376765, "learning_rate": 3.4228127308423276e-06, "loss": 0.7078, "step": 9594 }, { "epoch": 0.61, "grad_norm": 1.6011189077533956, "learning_rate": 3.421829129086215e-06, "loss": 0.6414, "step": 9595 }, { "epoch": 0.61, "grad_norm": 1.6586242468232046, "learning_rate": 3.420845595156568e-06, "loss": 0.8158, "step": 9596 }, { "epoch": 0.61, "grad_norm": 1.678673980801288, "learning_rate": 3.4198621290956603e-06, "loss": 0.7785, "step": 9597 }, { "epoch": 0.61, "grad_norm": 1.9547137951486, "learning_rate": 3.4188787309457565e-06, "loss": 0.7403, "step": 9598 }, { "epoch": 0.61, "grad_norm": 1.5002364570586983, "learning_rate": 3.4178954007491237e-06, "loss": 0.6919, "step": 9599 }, { "epoch": 0.61, "grad_norm": 1.6371026588624078, "learning_rate": 3.416912138548021e-06, "loss": 0.7518, "step": 9600 }, { "epoch": 0.61, "grad_norm": 1.6666898634259137, "learning_rate": 3.4159289443847077e-06, "loss": 0.7031, "step": 9601 }, { "epoch": 0.61, "grad_norm": 1.0712767956031313, "learning_rate": 3.4149458183014418e-06, "loss": 0.6609, "step": 9602 }, { "epoch": 0.61, "grad_norm": 1.4350978159129382, "learning_rate": 3.4139627603404724e-06, "loss": 0.5236, "step": 9603 }, { "epoch": 0.61, "grad_norm": 1.9530870378067893, "learning_rate": 3.4129797705440525e-06, "loss": 0.6193, "step": 9604 }, { "epoch": 0.61, "grad_norm": 1.6431643769486066, "learning_rate": 3.4119968489544254e-06, "loss": 0.6752, "step": 9605 }, { "epoch": 0.61, "grad_norm": 1.5615781665009214, "learning_rate": 3.4110139956138377e-06, "loss": 0.7505, "step": 9606 }, { "epoch": 0.61, "grad_norm": 1.1848334692482463, "learning_rate": 3.4100312105645283e-06, "loss": 0.6445, "step": 9607 }, { "epoch": 0.61, "grad_norm": 1.7165829166747315, "learning_rate": 3.409048493848739e-06, "loss": 0.7853, "step": 9608 }, { "epoch": 0.62, "grad_norm": 1.4827739015387293, "learning_rate": 3.4080658455087e-06, "loss": 0.6719, "step": 9609 }, { "epoch": 0.62, "grad_norm": 1.0570505249177953, "learning_rate": 3.4070832655866467e-06, "loss": 0.6215, "step": 9610 }, { "epoch": 0.62, "grad_norm": 1.4472786441056742, "learning_rate": 3.4061007541248093e-06, "loss": 0.6931, "step": 9611 }, { "epoch": 0.62, "grad_norm": 1.1748090761174, "learning_rate": 3.4051183111654097e-06, "loss": 0.6442, "step": 9612 }, { "epoch": 0.62, "grad_norm": 1.597650441577392, "learning_rate": 3.404135936750674e-06, "loss": 0.7011, "step": 9613 }, { "epoch": 0.62, "grad_norm": 1.3801540639964063, "learning_rate": 3.403153630922824e-06, "loss": 0.664, "step": 9614 }, { "epoch": 0.62, "grad_norm": 1.7699186125770852, "learning_rate": 3.4021713937240748e-06, "loss": 0.763, "step": 9615 }, { "epoch": 0.62, "grad_norm": 1.939010530454831, "learning_rate": 3.4011892251966403e-06, "loss": 0.685, "step": 9616 }, { "epoch": 0.62, "grad_norm": 1.8402885851175221, "learning_rate": 3.4002071253827356e-06, "loss": 0.6784, "step": 9617 }, { "epoch": 0.62, "grad_norm": 1.7577781006658377, "learning_rate": 3.3992250943245654e-06, "loss": 0.7409, "step": 9618 }, { "epoch": 0.62, "grad_norm": 1.5741845365988316, "learning_rate": 3.398243132064336e-06, "loss": 0.774, "step": 9619 }, { "epoch": 0.62, "grad_norm": 1.6574892017498177, "learning_rate": 3.397261238644254e-06, "loss": 0.723, "step": 9620 }, { "epoch": 0.62, "grad_norm": 1.7419039085965862, "learning_rate": 3.3962794141065136e-06, "loss": 0.648, "step": 9621 }, { "epoch": 0.62, "grad_norm": 1.0532938171929358, "learning_rate": 3.395297658493315e-06, "loss": 0.6363, "step": 9622 }, { "epoch": 0.62, "grad_norm": 1.902301474741716, "learning_rate": 3.3943159718468503e-06, "loss": 0.6812, "step": 9623 }, { "epoch": 0.62, "grad_norm": 1.581633848751703, "learning_rate": 3.3933343542093134e-06, "loss": 0.8239, "step": 9624 }, { "epoch": 0.62, "grad_norm": 1.178070589540287, "learning_rate": 3.392352805622888e-06, "loss": 0.6813, "step": 9625 }, { "epoch": 0.62, "grad_norm": 1.8059969845451698, "learning_rate": 3.391371326129762e-06, "loss": 0.76, "step": 9626 }, { "epoch": 0.62, "grad_norm": 2.108218021215444, "learning_rate": 3.3903899157721156e-06, "loss": 0.7592, "step": 9627 }, { "epoch": 0.62, "grad_norm": 1.621713030448759, "learning_rate": 3.3894085745921278e-06, "loss": 0.8355, "step": 9628 }, { "epoch": 0.62, "grad_norm": 1.6122954027602223, "learning_rate": 3.388427302631975e-06, "loss": 0.7274, "step": 9629 }, { "epoch": 0.62, "grad_norm": 1.6606508124868424, "learning_rate": 3.3874460999338297e-06, "loss": 0.7482, "step": 9630 }, { "epoch": 0.62, "grad_norm": 1.712891021961002, "learning_rate": 3.386464966539865e-06, "loss": 0.814, "step": 9631 }, { "epoch": 0.62, "grad_norm": 1.839963327743016, "learning_rate": 3.385483902492244e-06, "loss": 0.7126, "step": 9632 }, { "epoch": 0.62, "grad_norm": 1.1594361072650368, "learning_rate": 3.3845029078331344e-06, "loss": 0.6147, "step": 9633 }, { "epoch": 0.62, "grad_norm": 4.093797616725728, "learning_rate": 3.383521982604693e-06, "loss": 0.6499, "step": 9634 }, { "epoch": 0.62, "grad_norm": 1.6016214048540809, "learning_rate": 3.3825411268490803e-06, "loss": 0.7541, "step": 9635 }, { "epoch": 0.62, "grad_norm": 1.6964651087184761, "learning_rate": 3.3815603406084505e-06, "loss": 0.709, "step": 9636 }, { "epoch": 0.62, "grad_norm": 1.9548833825325689, "learning_rate": 3.380579623924959e-06, "loss": 0.7469, "step": 9637 }, { "epoch": 0.62, "grad_norm": 1.8984730862734938, "learning_rate": 3.37959897684075e-06, "loss": 0.7409, "step": 9638 }, { "epoch": 0.62, "grad_norm": 1.94739549326048, "learning_rate": 3.378618399397972e-06, "loss": 0.803, "step": 9639 }, { "epoch": 0.62, "grad_norm": 2.7615541471424954, "learning_rate": 3.377637891638771e-06, "loss": 0.7746, "step": 9640 }, { "epoch": 0.62, "grad_norm": 1.9293455607708876, "learning_rate": 3.3766574536052808e-06, "loss": 0.6892, "step": 9641 }, { "epoch": 0.62, "grad_norm": 1.5476205864497223, "learning_rate": 3.375677085339645e-06, "loss": 0.6767, "step": 9642 }, { "epoch": 0.62, "grad_norm": 1.199097414531893, "learning_rate": 3.374696786883992e-06, "loss": 0.6946, "step": 9643 }, { "epoch": 0.62, "grad_norm": 1.574914731758665, "learning_rate": 3.3737165582804587e-06, "loss": 0.7449, "step": 9644 }, { "epoch": 0.62, "grad_norm": 1.6271627183465476, "learning_rate": 3.3727363995711695e-06, "loss": 0.7641, "step": 9645 }, { "epoch": 0.62, "grad_norm": 0.9478714186219104, "learning_rate": 3.371756310798251e-06, "loss": 0.5874, "step": 9646 }, { "epoch": 0.62, "grad_norm": 1.8315182249730342, "learning_rate": 3.3707762920038235e-06, "loss": 0.7483, "step": 9647 }, { "epoch": 0.62, "grad_norm": 1.7292982116764062, "learning_rate": 3.3697963432300074e-06, "loss": 0.8971, "step": 9648 }, { "epoch": 0.62, "grad_norm": 2.353484602048459, "learning_rate": 3.3688164645189198e-06, "loss": 0.8208, "step": 9649 }, { "epoch": 0.62, "grad_norm": 1.5686314503320475, "learning_rate": 3.3678366559126728e-06, "loss": 0.6898, "step": 9650 }, { "epoch": 0.62, "grad_norm": 1.5446311820659262, "learning_rate": 3.366856917453377e-06, "loss": 0.8306, "step": 9651 }, { "epoch": 0.62, "grad_norm": 1.5477672836799166, "learning_rate": 3.365877249183138e-06, "loss": 0.6083, "step": 9652 }, { "epoch": 0.62, "grad_norm": 1.5836881063345318, "learning_rate": 3.3648976511440636e-06, "loss": 0.7914, "step": 9653 }, { "epoch": 0.62, "grad_norm": 1.611877029970015, "learning_rate": 3.3639181233782496e-06, "loss": 0.7363, "step": 9654 }, { "epoch": 0.62, "grad_norm": 1.8078839699905749, "learning_rate": 3.3629386659277984e-06, "loss": 0.7133, "step": 9655 }, { "epoch": 0.62, "grad_norm": 1.9886505694637033, "learning_rate": 3.361959278834803e-06, "loss": 0.7694, "step": 9656 }, { "epoch": 0.62, "grad_norm": 1.5214190079738095, "learning_rate": 3.3609799621413554e-06, "loss": 0.8276, "step": 9657 }, { "epoch": 0.62, "grad_norm": 1.4728637963468216, "learning_rate": 3.3600007158895453e-06, "loss": 0.7837, "step": 9658 }, { "epoch": 0.62, "grad_norm": 1.4978859914287233, "learning_rate": 3.359021540121457e-06, "loss": 0.7094, "step": 9659 }, { "epoch": 0.62, "grad_norm": 1.708400471400347, "learning_rate": 3.3580424348791773e-06, "loss": 0.5975, "step": 9660 }, { "epoch": 0.62, "grad_norm": 2.4192215702314397, "learning_rate": 3.3570634002047815e-06, "loss": 0.7711, "step": 9661 }, { "epoch": 0.62, "grad_norm": 1.52433177099388, "learning_rate": 3.3560844361403506e-06, "loss": 0.6678, "step": 9662 }, { "epoch": 0.62, "grad_norm": 1.0874548533311246, "learning_rate": 3.355105542727954e-06, "loss": 0.6204, "step": 9663 }, { "epoch": 0.62, "grad_norm": 1.796660846552236, "learning_rate": 3.354126720009666e-06, "loss": 0.8195, "step": 9664 }, { "epoch": 0.62, "grad_norm": 1.6708362417899085, "learning_rate": 3.353147968027552e-06, "loss": 0.6263, "step": 9665 }, { "epoch": 0.62, "grad_norm": 1.255685020839998, "learning_rate": 3.3521692868236777e-06, "loss": 0.6, "step": 9666 }, { "epoch": 0.62, "grad_norm": 1.7952726548134756, "learning_rate": 3.351190676440107e-06, "loss": 0.7507, "step": 9667 }, { "epoch": 0.62, "grad_norm": 1.87017925485025, "learning_rate": 3.3502121369188945e-06, "loss": 0.7905, "step": 9668 }, { "epoch": 0.62, "grad_norm": 1.8096240264947854, "learning_rate": 3.3492336683021e-06, "loss": 0.738, "step": 9669 }, { "epoch": 0.62, "grad_norm": 1.225235190459417, "learning_rate": 3.3482552706317705e-06, "loss": 0.5741, "step": 9670 }, { "epoch": 0.62, "grad_norm": 1.8154879133516766, "learning_rate": 3.347276943949961e-06, "loss": 0.7162, "step": 9671 }, { "epoch": 0.62, "grad_norm": 1.151567640279287, "learning_rate": 3.3462986882987137e-06, "loss": 0.5736, "step": 9672 }, { "epoch": 0.62, "grad_norm": 1.8560383045320394, "learning_rate": 3.3453205037200766e-06, "loss": 0.6373, "step": 9673 }, { "epoch": 0.62, "grad_norm": 1.964299617761105, "learning_rate": 3.3443423902560845e-06, "loss": 0.8012, "step": 9674 }, { "epoch": 0.62, "grad_norm": 1.9324014904336329, "learning_rate": 3.3433643479487777e-06, "loss": 0.8632, "step": 9675 }, { "epoch": 0.62, "grad_norm": 2.061970999169557, "learning_rate": 3.342386376840193e-06, "loss": 0.6967, "step": 9676 }, { "epoch": 0.62, "grad_norm": 1.4325562689714422, "learning_rate": 3.3414084769723554e-06, "loss": 0.6784, "step": 9677 }, { "epoch": 0.62, "grad_norm": 1.6975795504365876, "learning_rate": 3.3404306483872982e-06, "loss": 0.7104, "step": 9678 }, { "epoch": 0.62, "grad_norm": 1.6432899528691187, "learning_rate": 3.339452891127043e-06, "loss": 0.7938, "step": 9679 }, { "epoch": 0.62, "grad_norm": 1.651030074361749, "learning_rate": 3.3384752052336155e-06, "loss": 0.7057, "step": 9680 }, { "epoch": 0.62, "grad_norm": 1.580756822885765, "learning_rate": 3.33749759074903e-06, "loss": 0.6963, "step": 9681 }, { "epoch": 0.62, "grad_norm": 1.8969680122007437, "learning_rate": 3.336520047715307e-06, "loss": 0.7086, "step": 9682 }, { "epoch": 0.62, "grad_norm": 1.5682392718104756, "learning_rate": 3.3355425761744553e-06, "loss": 0.7234, "step": 9683 }, { "epoch": 0.62, "grad_norm": 2.1096537045260186, "learning_rate": 3.3345651761684856e-06, "loss": 0.7292, "step": 9684 }, { "epoch": 0.62, "grad_norm": 1.4616047367293366, "learning_rate": 3.3335878477394058e-06, "loss": 0.7218, "step": 9685 }, { "epoch": 0.62, "grad_norm": 1.9772795732305164, "learning_rate": 3.3326105909292194e-06, "loss": 0.7003, "step": 9686 }, { "epoch": 0.62, "grad_norm": 1.7176474156715154, "learning_rate": 3.331633405779926e-06, "loss": 0.6989, "step": 9687 }, { "epoch": 0.62, "grad_norm": 1.7228462918888894, "learning_rate": 3.3306562923335218e-06, "loss": 0.6835, "step": 9688 }, { "epoch": 0.62, "grad_norm": 1.873926154519509, "learning_rate": 3.3296792506320043e-06, "loss": 0.7795, "step": 9689 }, { "epoch": 0.62, "grad_norm": 1.5659423925065243, "learning_rate": 3.328702280717361e-06, "loss": 0.7365, "step": 9690 }, { "epoch": 0.62, "grad_norm": 1.6566746475537082, "learning_rate": 3.3277253826315824e-06, "loss": 0.6603, "step": 9691 }, { "epoch": 0.62, "grad_norm": 1.6581698016394546, "learning_rate": 3.3267485564166536e-06, "loss": 0.7638, "step": 9692 }, { "epoch": 0.62, "grad_norm": 1.9760332273450396, "learning_rate": 3.325771802114555e-06, "loss": 0.7204, "step": 9693 }, { "epoch": 0.62, "grad_norm": 1.6343748903409427, "learning_rate": 3.3247951197672663e-06, "loss": 0.695, "step": 9694 }, { "epoch": 0.62, "grad_norm": 2.177870818921682, "learning_rate": 3.323818509416763e-06, "loss": 0.7978, "step": 9695 }, { "epoch": 0.62, "grad_norm": 1.0286859508898132, "learning_rate": 3.32284197110502e-06, "loss": 0.6362, "step": 9696 }, { "epoch": 0.62, "grad_norm": 1.7107678845179697, "learning_rate": 3.321865504874002e-06, "loss": 0.7467, "step": 9697 }, { "epoch": 0.62, "grad_norm": 1.6150652104054433, "learning_rate": 3.3208891107656817e-06, "loss": 0.6614, "step": 9698 }, { "epoch": 0.62, "grad_norm": 1.851260979567344, "learning_rate": 3.3199127888220162e-06, "loss": 0.7341, "step": 9699 }, { "epoch": 0.62, "grad_norm": 1.184465719517968, "learning_rate": 3.31893653908497e-06, "loss": 0.8405, "step": 9700 }, { "epoch": 0.62, "grad_norm": 1.9070971281463431, "learning_rate": 3.317960361596498e-06, "loss": 0.6339, "step": 9701 }, { "epoch": 0.62, "grad_norm": 1.6147794974952505, "learning_rate": 3.3169842563985568e-06, "loss": 0.6987, "step": 9702 }, { "epoch": 0.62, "grad_norm": 1.7890963145344738, "learning_rate": 3.3160082235330937e-06, "loss": 0.7088, "step": 9703 }, { "epoch": 0.62, "grad_norm": 1.6158700192881856, "learning_rate": 3.3150322630420597e-06, "loss": 0.7384, "step": 9704 }, { "epoch": 0.62, "grad_norm": 1.717759990565104, "learning_rate": 3.3140563749673994e-06, "loss": 0.7725, "step": 9705 }, { "epoch": 0.62, "grad_norm": 1.5836501999231458, "learning_rate": 3.313080559351052e-06, "loss": 0.6474, "step": 9706 }, { "epoch": 0.62, "grad_norm": 1.1109065534147093, "learning_rate": 3.3121048162349577e-06, "loss": 0.6598, "step": 9707 }, { "epoch": 0.62, "grad_norm": 1.8666190349031446, "learning_rate": 3.3111291456610517e-06, "loss": 0.7488, "step": 9708 }, { "epoch": 0.62, "grad_norm": 1.61580275906462, "learning_rate": 3.3101535476712675e-06, "loss": 0.7987, "step": 9709 }, { "epoch": 0.62, "grad_norm": 1.5265715384870524, "learning_rate": 3.309178022307531e-06, "loss": 0.6814, "step": 9710 }, { "epoch": 0.62, "grad_norm": 1.827372225271639, "learning_rate": 3.3082025696117723e-06, "loss": 0.7308, "step": 9711 }, { "epoch": 0.62, "grad_norm": 1.7408991593623822, "learning_rate": 3.30722718962591e-06, "loss": 0.7177, "step": 9712 }, { "epoch": 0.62, "grad_norm": 1.77150168396271, "learning_rate": 3.3062518823918664e-06, "loss": 0.725, "step": 9713 }, { "epoch": 0.62, "grad_norm": 1.679856096997496, "learning_rate": 3.3052766479515585e-06, "loss": 0.5714, "step": 9714 }, { "epoch": 0.62, "grad_norm": 1.5848647861722347, "learning_rate": 3.3043014863468985e-06, "loss": 0.5713, "step": 9715 }, { "epoch": 0.62, "grad_norm": 2.3627928912506895, "learning_rate": 3.303326397619799e-06, "loss": 0.7501, "step": 9716 }, { "epoch": 0.62, "grad_norm": 0.9568598915631983, "learning_rate": 3.3023513818121645e-06, "loss": 0.5567, "step": 9717 }, { "epoch": 0.62, "grad_norm": 1.0594895208610153, "learning_rate": 3.301376438965902e-06, "loss": 0.6208, "step": 9718 }, { "epoch": 0.62, "grad_norm": 1.5242650656494119, "learning_rate": 3.3004015691229086e-06, "loss": 0.7215, "step": 9719 }, { "epoch": 0.62, "grad_norm": 1.191842140098218, "learning_rate": 3.299426772325086e-06, "loss": 0.7033, "step": 9720 }, { "epoch": 0.62, "grad_norm": 1.7476097533931256, "learning_rate": 3.298452048614326e-06, "loss": 0.7126, "step": 9721 }, { "epoch": 0.62, "grad_norm": 1.6789724875234726, "learning_rate": 3.2974773980325216e-06, "loss": 0.7135, "step": 9722 }, { "epoch": 0.62, "grad_norm": 1.829896847273044, "learning_rate": 3.2965028206215627e-06, "loss": 0.7269, "step": 9723 }, { "epoch": 0.62, "grad_norm": 1.7286271192687621, "learning_rate": 3.295528316423331e-06, "loss": 0.6905, "step": 9724 }, { "epoch": 0.62, "grad_norm": 1.7041502270694866, "learning_rate": 3.2945538854797134e-06, "loss": 0.6362, "step": 9725 }, { "epoch": 0.62, "grad_norm": 2.462483785071844, "learning_rate": 3.293579527832584e-06, "loss": 0.7638, "step": 9726 }, { "epoch": 0.62, "grad_norm": 1.9696211156966865, "learning_rate": 3.292605243523821e-06, "loss": 0.7337, "step": 9727 }, { "epoch": 0.62, "grad_norm": 0.929618310003621, "learning_rate": 3.2916310325952976e-06, "loss": 0.7043, "step": 9728 }, { "epoch": 0.62, "grad_norm": 1.5493627556129332, "learning_rate": 3.2906568950888827e-06, "loss": 0.7833, "step": 9729 }, { "epoch": 0.62, "grad_norm": 1.5238118705780597, "learning_rate": 3.2896828310464412e-06, "loss": 0.5874, "step": 9730 }, { "epoch": 0.62, "grad_norm": 1.1061345272911327, "learning_rate": 3.288708840509838e-06, "loss": 0.6124, "step": 9731 }, { "epoch": 0.62, "grad_norm": 1.25044009767545, "learning_rate": 3.2877349235209355e-06, "loss": 0.7038, "step": 9732 }, { "epoch": 0.62, "grad_norm": 1.8439602059451015, "learning_rate": 3.286761080121585e-06, "loss": 0.9079, "step": 9733 }, { "epoch": 0.62, "grad_norm": 1.724898766369118, "learning_rate": 3.285787310353646e-06, "loss": 0.6525, "step": 9734 }, { "epoch": 0.62, "grad_norm": 1.2053909266722993, "learning_rate": 3.2848136142589637e-06, "loss": 0.6401, "step": 9735 }, { "epoch": 0.62, "grad_norm": 1.1524046435141364, "learning_rate": 3.2838399918793893e-06, "loss": 0.6587, "step": 9736 }, { "epoch": 0.62, "grad_norm": 1.9839369672641434, "learning_rate": 3.282866443256765e-06, "loss": 0.7596, "step": 9737 }, { "epoch": 0.62, "grad_norm": 1.9325155752033305, "learning_rate": 3.2818929684329352e-06, "loss": 0.7652, "step": 9738 }, { "epoch": 0.62, "grad_norm": 1.507761766885698, "learning_rate": 3.280919567449733e-06, "loss": 0.6282, "step": 9739 }, { "epoch": 0.62, "grad_norm": 1.6017552194434, "learning_rate": 3.2799462403489955e-06, "loss": 0.7994, "step": 9740 }, { "epoch": 0.62, "grad_norm": 1.6322924115344062, "learning_rate": 3.278972987172556e-06, "loss": 0.6673, "step": 9741 }, { "epoch": 0.62, "grad_norm": 1.6022535094911, "learning_rate": 3.27799980796224e-06, "loss": 0.7144, "step": 9742 }, { "epoch": 0.62, "grad_norm": 2.197313185437587, "learning_rate": 3.277026702759874e-06, "loss": 0.7772, "step": 9743 }, { "epoch": 0.62, "grad_norm": 1.0568613960389122, "learning_rate": 3.276053671607279e-06, "loss": 0.7556, "step": 9744 }, { "epoch": 0.62, "grad_norm": 1.6889539010199146, "learning_rate": 3.275080714546277e-06, "loss": 1.0728, "step": 9745 }, { "epoch": 0.62, "grad_norm": 1.189476363097244, "learning_rate": 3.274107831618679e-06, "loss": 0.5704, "step": 9746 }, { "epoch": 0.62, "grad_norm": 1.6852087937503564, "learning_rate": 3.2731350228663024e-06, "loss": 0.8776, "step": 9747 }, { "epoch": 0.62, "grad_norm": 1.1427984749139055, "learning_rate": 3.2721622883309512e-06, "loss": 0.6113, "step": 9748 }, { "epoch": 0.62, "grad_norm": 1.941978852653936, "learning_rate": 3.2711896280544343e-06, "loss": 0.7009, "step": 9749 }, { "epoch": 0.62, "grad_norm": 1.8839100543524672, "learning_rate": 3.2702170420785558e-06, "loss": 0.8866, "step": 9750 }, { "epoch": 0.62, "grad_norm": 1.781423680550614, "learning_rate": 3.2692445304451128e-06, "loss": 0.7439, "step": 9751 }, { "epoch": 0.62, "grad_norm": 1.9599291196242508, "learning_rate": 3.2682720931959043e-06, "loss": 0.7224, "step": 9752 }, { "epoch": 0.62, "grad_norm": 1.6768218214034885, "learning_rate": 3.2672997303727217e-06, "loss": 0.7912, "step": 9753 }, { "epoch": 0.62, "grad_norm": 1.7488091472527008, "learning_rate": 3.2663274420173576e-06, "loss": 0.6735, "step": 9754 }, { "epoch": 0.62, "grad_norm": 1.8479326469340929, "learning_rate": 3.265355228171594e-06, "loss": 0.7609, "step": 9755 }, { "epoch": 0.62, "grad_norm": 1.720215216730523, "learning_rate": 3.26438308887722e-06, "loss": 0.6898, "step": 9756 }, { "epoch": 0.62, "grad_norm": 1.6441250747659684, "learning_rate": 3.263411024176012e-06, "loss": 0.7122, "step": 9757 }, { "epoch": 0.62, "grad_norm": 1.6231242018909318, "learning_rate": 3.262439034109749e-06, "loss": 0.8461, "step": 9758 }, { "epoch": 0.62, "grad_norm": 1.592646196588509, "learning_rate": 3.2614671187202075e-06, "loss": 0.694, "step": 9759 }, { "epoch": 0.62, "grad_norm": 1.5415311603553798, "learning_rate": 3.2604952780491537e-06, "loss": 0.7365, "step": 9760 }, { "epoch": 0.62, "grad_norm": 1.6548269279172108, "learning_rate": 3.2595235121383608e-06, "loss": 0.6603, "step": 9761 }, { "epoch": 0.62, "grad_norm": 1.559838982880404, "learning_rate": 3.2585518210295873e-06, "loss": 0.7274, "step": 9762 }, { "epoch": 0.62, "grad_norm": 1.174733416052114, "learning_rate": 3.2575802047645977e-06, "loss": 0.6921, "step": 9763 }, { "epoch": 0.62, "grad_norm": 1.2033955486248689, "learning_rate": 3.2566086633851498e-06, "loss": 0.6588, "step": 9764 }, { "epoch": 0.63, "grad_norm": 1.2244177862770291, "learning_rate": 3.2556371969329992e-06, "loss": 0.7678, "step": 9765 }, { "epoch": 0.63, "grad_norm": 1.3992998131702985, "learning_rate": 3.2546658054498947e-06, "loss": 0.7382, "step": 9766 }, { "epoch": 0.63, "grad_norm": 1.6703096216996836, "learning_rate": 3.2536944889775868e-06, "loss": 0.6668, "step": 9767 }, { "epoch": 0.63, "grad_norm": 1.1651890608952198, "learning_rate": 3.252723247557822e-06, "loss": 0.7118, "step": 9768 }, { "epoch": 0.63, "grad_norm": 1.8170044740149844, "learning_rate": 3.2517520812323382e-06, "loss": 0.8709, "step": 9769 }, { "epoch": 0.63, "grad_norm": 1.829290209633981, "learning_rate": 3.2507809900428786e-06, "loss": 0.7432, "step": 9770 }, { "epoch": 0.63, "grad_norm": 1.5986715973935661, "learning_rate": 3.2498099740311752e-06, "loss": 0.6737, "step": 9771 }, { "epoch": 0.63, "grad_norm": 1.6180371922497956, "learning_rate": 3.2488390332389613e-06, "loss": 0.7133, "step": 9772 }, { "epoch": 0.63, "grad_norm": 1.8901734552011091, "learning_rate": 3.2478681677079655e-06, "loss": 0.6409, "step": 9773 }, { "epoch": 0.63, "grad_norm": 1.527752647478991, "learning_rate": 3.246897377479916e-06, "loss": 0.7161, "step": 9774 }, { "epoch": 0.63, "grad_norm": 1.6124744563787043, "learning_rate": 3.2459266625965315e-06, "loss": 0.7223, "step": 9775 }, { "epoch": 0.63, "grad_norm": 1.6529325738962795, "learning_rate": 3.2449560230995354e-06, "loss": 0.8241, "step": 9776 }, { "epoch": 0.63, "grad_norm": 1.8041837538770316, "learning_rate": 3.24398545903064e-06, "loss": 0.7177, "step": 9777 }, { "epoch": 0.63, "grad_norm": 1.7751855875084825, "learning_rate": 3.243014970431558e-06, "loss": 0.8163, "step": 9778 }, { "epoch": 0.63, "grad_norm": 1.1545092499223963, "learning_rate": 3.2420445573440027e-06, "loss": 0.575, "step": 9779 }, { "epoch": 0.63, "grad_norm": 1.0552805196089174, "learning_rate": 3.241074219809678e-06, "loss": 0.6106, "step": 9780 }, { "epoch": 0.63, "grad_norm": 2.171306275276002, "learning_rate": 3.2401039578702886e-06, "loss": 0.6433, "step": 9781 }, { "epoch": 0.63, "grad_norm": 1.8304239448122321, "learning_rate": 3.2391337715675314e-06, "loss": 0.8902, "step": 9782 }, { "epoch": 0.63, "grad_norm": 1.8633743829866645, "learning_rate": 3.2381636609431065e-06, "loss": 0.7818, "step": 9783 }, { "epoch": 0.63, "grad_norm": 1.9427820412221923, "learning_rate": 3.2371936260387027e-06, "loss": 0.6976, "step": 9784 }, { "epoch": 0.63, "grad_norm": 1.6640806612640127, "learning_rate": 3.2362236668960144e-06, "loss": 0.7416, "step": 9785 }, { "epoch": 0.63, "grad_norm": 1.1469982892332908, "learning_rate": 3.2352537835567255e-06, "loss": 0.6131, "step": 9786 }, { "epoch": 0.63, "grad_norm": 1.728498098337557, "learning_rate": 3.234283976062521e-06, "loss": 0.7937, "step": 9787 }, { "epoch": 0.63, "grad_norm": 1.7428786335122972, "learning_rate": 3.2333142444550825e-06, "loss": 0.8019, "step": 9788 }, { "epoch": 0.63, "grad_norm": 1.7971477304587926, "learning_rate": 3.2323445887760837e-06, "loss": 0.793, "step": 9789 }, { "epoch": 0.63, "grad_norm": 1.6187930494139287, "learning_rate": 3.231375009067202e-06, "loss": 0.892, "step": 9790 }, { "epoch": 0.63, "grad_norm": 1.820680579395833, "learning_rate": 3.230405505370104e-06, "loss": 0.8292, "step": 9791 }, { "epoch": 0.63, "grad_norm": 1.7406110653778117, "learning_rate": 3.2294360777264598e-06, "loss": 0.6961, "step": 9792 }, { "epoch": 0.63, "grad_norm": 1.7869203652820076, "learning_rate": 3.228466726177932e-06, "loss": 0.728, "step": 9793 }, { "epoch": 0.63, "grad_norm": 1.990279548617102, "learning_rate": 3.227497450766184e-06, "loss": 0.7383, "step": 9794 }, { "epoch": 0.63, "grad_norm": 1.7044450672398868, "learning_rate": 3.2265282515328676e-06, "loss": 0.7658, "step": 9795 }, { "epoch": 0.63, "grad_norm": 2.132305052341977, "learning_rate": 3.225559128519642e-06, "loss": 0.68, "step": 9796 }, { "epoch": 0.63, "grad_norm": 2.0467060699492627, "learning_rate": 3.2245900817681576e-06, "loss": 0.7159, "step": 9797 }, { "epoch": 0.63, "grad_norm": 1.2857868762871085, "learning_rate": 3.223621111320059e-06, "loss": 0.5868, "step": 9798 }, { "epoch": 0.63, "grad_norm": 1.11461785410538, "learning_rate": 3.2226522172169928e-06, "loss": 0.8045, "step": 9799 }, { "epoch": 0.63, "grad_norm": 1.8008466441675315, "learning_rate": 3.221683399500599e-06, "loss": 0.7842, "step": 9800 }, { "epoch": 0.63, "grad_norm": 1.475797471892171, "learning_rate": 3.220714658212518e-06, "loss": 0.6909, "step": 9801 }, { "epoch": 0.63, "grad_norm": 2.3317929100121084, "learning_rate": 3.219745993394381e-06, "loss": 0.8624, "step": 9802 }, { "epoch": 0.63, "grad_norm": 1.7118175926217594, "learning_rate": 3.2187774050878213e-06, "loss": 0.7183, "step": 9803 }, { "epoch": 0.63, "grad_norm": 1.310648875853413, "learning_rate": 3.2178088933344644e-06, "loss": 0.7098, "step": 9804 }, { "epoch": 0.63, "grad_norm": 2.013144005831622, "learning_rate": 3.2168404581759362e-06, "loss": 0.8105, "step": 9805 }, { "epoch": 0.63, "grad_norm": 1.5718772693363166, "learning_rate": 3.215872099653859e-06, "loss": 0.8464, "step": 9806 }, { "epoch": 0.63, "grad_norm": 1.8130235078273493, "learning_rate": 3.21490381780985e-06, "loss": 0.7012, "step": 9807 }, { "epoch": 0.63, "grad_norm": 1.6605465302710953, "learning_rate": 3.2139356126855235e-06, "loss": 0.6783, "step": 9808 }, { "epoch": 0.63, "grad_norm": 1.8410081017630693, "learning_rate": 3.212967484322491e-06, "loss": 0.6446, "step": 9809 }, { "epoch": 0.63, "grad_norm": 1.7521454585773848, "learning_rate": 3.211999432762363e-06, "loss": 0.8962, "step": 9810 }, { "epoch": 0.63, "grad_norm": 1.7806744936713195, "learning_rate": 3.2110314580467404e-06, "loss": 0.7974, "step": 9811 }, { "epoch": 0.63, "grad_norm": 1.5622242328544826, "learning_rate": 3.210063560217228e-06, "loss": 0.7295, "step": 9812 }, { "epoch": 0.63, "grad_norm": 1.8308016118777373, "learning_rate": 3.209095739315421e-06, "loss": 0.8054, "step": 9813 }, { "epoch": 0.63, "grad_norm": 1.7083357691966679, "learning_rate": 3.208127995382916e-06, "loss": 0.6895, "step": 9814 }, { "epoch": 0.63, "grad_norm": 1.6363879714437513, "learning_rate": 3.2071603284613052e-06, "loss": 0.6968, "step": 9815 }, { "epoch": 0.63, "grad_norm": 1.7186607738046333, "learning_rate": 3.2061927385921756e-06, "loss": 0.8465, "step": 9816 }, { "epoch": 0.63, "grad_norm": 1.5615550945217, "learning_rate": 3.2052252258171142e-06, "loss": 0.6871, "step": 9817 }, { "epoch": 0.63, "grad_norm": 1.693552661735108, "learning_rate": 3.2042577901776994e-06, "loss": 0.7664, "step": 9818 }, { "epoch": 0.63, "grad_norm": 1.6188641152153107, "learning_rate": 3.2032904317155133e-06, "loss": 0.6657, "step": 9819 }, { "epoch": 0.63, "grad_norm": 1.1063990327366318, "learning_rate": 3.2023231504721274e-06, "loss": 0.5636, "step": 9820 }, { "epoch": 0.63, "grad_norm": 1.7175345239952124, "learning_rate": 3.2013559464891162e-06, "loss": 0.7497, "step": 9821 }, { "epoch": 0.63, "grad_norm": 2.2611843991083527, "learning_rate": 3.200388819808046e-06, "loss": 0.7612, "step": 9822 }, { "epoch": 0.63, "grad_norm": 1.8321574623587198, "learning_rate": 3.199421770470482e-06, "loss": 0.7192, "step": 9823 }, { "epoch": 0.63, "grad_norm": 1.920372336106212, "learning_rate": 3.198454798517989e-06, "loss": 0.791, "step": 9824 }, { "epoch": 0.63, "grad_norm": 1.1263104491532816, "learning_rate": 3.1974879039921214e-06, "loss": 0.72, "step": 9825 }, { "epoch": 0.63, "grad_norm": 1.72080478340369, "learning_rate": 3.1965210869344385e-06, "loss": 0.6801, "step": 9826 }, { "epoch": 0.63, "grad_norm": 1.174592238725707, "learning_rate": 3.1955543473864868e-06, "loss": 0.6629, "step": 9827 }, { "epoch": 0.63, "grad_norm": 1.1322674552974497, "learning_rate": 3.1945876853898194e-06, "loss": 0.6852, "step": 9828 }, { "epoch": 0.63, "grad_norm": 1.5962514536422985, "learning_rate": 3.1936211009859786e-06, "loss": 0.7943, "step": 9829 }, { "epoch": 0.63, "grad_norm": 1.7748837375294193, "learning_rate": 3.192654594216509e-06, "loss": 0.7961, "step": 9830 }, { "epoch": 0.63, "grad_norm": 1.66406666015641, "learning_rate": 3.1916881651229447e-06, "loss": 0.7667, "step": 9831 }, { "epoch": 0.63, "grad_norm": 1.5898131294251525, "learning_rate": 3.190721813746824e-06, "loss": 0.6113, "step": 9832 }, { "epoch": 0.63, "grad_norm": 1.5591539828242424, "learning_rate": 3.189755540129679e-06, "loss": 0.6848, "step": 9833 }, { "epoch": 0.63, "grad_norm": 1.9863266308981755, "learning_rate": 3.1887893443130356e-06, "loss": 0.7732, "step": 9834 }, { "epoch": 0.63, "grad_norm": 1.6650300536942648, "learning_rate": 3.1878232263384214e-06, "loss": 0.6827, "step": 9835 }, { "epoch": 0.63, "grad_norm": 1.6544719706025952, "learning_rate": 3.1868571862473563e-06, "loss": 0.5985, "step": 9836 }, { "epoch": 0.63, "grad_norm": 1.606585539667367, "learning_rate": 3.1858912240813607e-06, "loss": 0.6862, "step": 9837 }, { "epoch": 0.63, "grad_norm": 1.47818272813099, "learning_rate": 3.1849253398819467e-06, "loss": 0.7054, "step": 9838 }, { "epoch": 0.63, "grad_norm": 1.9176501128896013, "learning_rate": 3.183959533690629e-06, "loss": 0.6049, "step": 9839 }, { "epoch": 0.63, "grad_norm": 1.509771587356797, "learning_rate": 3.1829938055489135e-06, "loss": 0.5766, "step": 9840 }, { "epoch": 0.63, "grad_norm": 1.7728902850447181, "learning_rate": 3.1820281554983056e-06, "loss": 0.7478, "step": 9841 }, { "epoch": 0.63, "grad_norm": 1.0837924442591111, "learning_rate": 3.181062583580309e-06, "loss": 0.6648, "step": 9842 }, { "epoch": 0.63, "grad_norm": 1.8009191945166025, "learning_rate": 3.1800970898364194e-06, "loss": 0.7276, "step": 9843 }, { "epoch": 0.63, "grad_norm": 1.7253351534622086, "learning_rate": 3.1791316743081333e-06, "loss": 0.6982, "step": 9844 }, { "epoch": 0.63, "grad_norm": 2.0567828605182226, "learning_rate": 3.178166337036941e-06, "loss": 0.6693, "step": 9845 }, { "epoch": 0.63, "grad_norm": 1.6588498986664362, "learning_rate": 3.177201078064333e-06, "loss": 0.7463, "step": 9846 }, { "epoch": 0.63, "grad_norm": 1.794757277407738, "learning_rate": 3.176235897431791e-06, "loss": 0.7721, "step": 9847 }, { "epoch": 0.63, "grad_norm": 1.8651009601598232, "learning_rate": 3.175270795180799e-06, "loss": 0.7424, "step": 9848 }, { "epoch": 0.63, "grad_norm": 1.6216418642362187, "learning_rate": 3.1743057713528325e-06, "loss": 0.9775, "step": 9849 }, { "epoch": 0.63, "grad_norm": 1.8922160048120977, "learning_rate": 3.1733408259893683e-06, "loss": 0.7553, "step": 9850 }, { "epoch": 0.63, "grad_norm": 1.5168882711335778, "learning_rate": 3.172375959131876e-06, "loss": 0.645, "step": 9851 }, { "epoch": 0.63, "grad_norm": 1.6215033096055713, "learning_rate": 3.1714111708218247e-06, "loss": 0.6771, "step": 9852 }, { "epoch": 0.63, "grad_norm": 1.7213105419734187, "learning_rate": 3.1704464611006813e-06, "loss": 0.6692, "step": 9853 }, { "epoch": 0.63, "grad_norm": 1.267431321670988, "learning_rate": 3.169481830009902e-06, "loss": 0.651, "step": 9854 }, { "epoch": 0.63, "grad_norm": 1.0701191464865099, "learning_rate": 3.16851727759095e-06, "loss": 0.708, "step": 9855 }, { "epoch": 0.63, "grad_norm": 1.5678056768329554, "learning_rate": 3.1675528038852743e-06, "loss": 0.757, "step": 9856 }, { "epoch": 0.63, "grad_norm": 1.105828746487985, "learning_rate": 3.1665884089343296e-06, "loss": 0.7135, "step": 9857 }, { "epoch": 0.63, "grad_norm": 1.807566854699752, "learning_rate": 3.1656240927795617e-06, "loss": 0.8662, "step": 9858 }, { "epoch": 0.63, "grad_norm": 1.8364674032681032, "learning_rate": 3.1646598554624174e-06, "loss": 0.6138, "step": 9859 }, { "epoch": 0.63, "grad_norm": 2.949232978310159, "learning_rate": 3.1636956970243347e-06, "loss": 0.7329, "step": 9860 }, { "epoch": 0.63, "grad_norm": 1.7478863744689213, "learning_rate": 3.1627316175067515e-06, "loss": 0.8797, "step": 9861 }, { "epoch": 0.63, "grad_norm": 1.7418132589874376, "learning_rate": 3.161767616951105e-06, "loss": 0.7013, "step": 9862 }, { "epoch": 0.63, "grad_norm": 1.6845482976334396, "learning_rate": 3.160803695398822e-06, "loss": 0.819, "step": 9863 }, { "epoch": 0.63, "grad_norm": 1.6004799732390054, "learning_rate": 3.1598398528913323e-06, "loss": 0.7253, "step": 9864 }, { "epoch": 0.63, "grad_norm": 1.6599468687297183, "learning_rate": 3.1588760894700575e-06, "loss": 0.5931, "step": 9865 }, { "epoch": 0.63, "grad_norm": 2.6475618005200237, "learning_rate": 3.1579124051764224e-06, "loss": 0.776, "step": 9866 }, { "epoch": 0.63, "grad_norm": 2.0893057082046895, "learning_rate": 3.156948800051839e-06, "loss": 0.6788, "step": 9867 }, { "epoch": 0.63, "grad_norm": 1.7288682110757216, "learning_rate": 3.155985274137726e-06, "loss": 0.7178, "step": 9868 }, { "epoch": 0.63, "grad_norm": 1.2511703621413641, "learning_rate": 3.1550218274754887e-06, "loss": 0.7588, "step": 9869 }, { "epoch": 0.63, "grad_norm": 1.6773462371142083, "learning_rate": 3.154058460106536e-06, "loss": 0.8067, "step": 9870 }, { "epoch": 0.63, "grad_norm": 1.5867113163789572, "learning_rate": 3.153095172072273e-06, "loss": 0.6414, "step": 9871 }, { "epoch": 0.63, "grad_norm": 1.6045608149921373, "learning_rate": 3.1521319634140978e-06, "loss": 0.8136, "step": 9872 }, { "epoch": 0.63, "grad_norm": 0.9792943028624717, "learning_rate": 3.15116883417341e-06, "loss": 0.582, "step": 9873 }, { "epoch": 0.63, "grad_norm": 1.6435610756787264, "learning_rate": 3.1502057843915983e-06, "loss": 0.6934, "step": 9874 }, { "epoch": 0.63, "grad_norm": 1.6445452325015606, "learning_rate": 3.1492428141100575e-06, "loss": 0.7487, "step": 9875 }, { "epoch": 0.63, "grad_norm": 1.597046562574487, "learning_rate": 3.1482799233701696e-06, "loss": 0.7355, "step": 9876 }, { "epoch": 0.63, "grad_norm": 1.6663346825470875, "learning_rate": 3.1473171122133207e-06, "loss": 0.7639, "step": 9877 }, { "epoch": 0.63, "grad_norm": 1.833843261950952, "learning_rate": 3.1463543806808876e-06, "loss": 0.8067, "step": 9878 }, { "epoch": 0.63, "grad_norm": 1.507534496726687, "learning_rate": 3.14539172881425e-06, "loss": 0.6921, "step": 9879 }, { "epoch": 0.63, "grad_norm": 1.7635665771368503, "learning_rate": 3.144429156654778e-06, "loss": 0.7327, "step": 9880 }, { "epoch": 0.63, "grad_norm": 1.8072896418443138, "learning_rate": 3.1434666642438416e-06, "loss": 0.689, "step": 9881 }, { "epoch": 0.63, "grad_norm": 1.7598151707718717, "learning_rate": 3.1425042516228083e-06, "loss": 0.7597, "step": 9882 }, { "epoch": 0.63, "grad_norm": 1.611446030694388, "learning_rate": 3.141541918833037e-06, "loss": 0.7578, "step": 9883 }, { "epoch": 0.63, "grad_norm": 1.5685548962218563, "learning_rate": 3.1405796659158907e-06, "loss": 0.6384, "step": 9884 }, { "epoch": 0.63, "grad_norm": 3.6434990760375405, "learning_rate": 3.139617492912722e-06, "loss": 0.7739, "step": 9885 }, { "epoch": 0.63, "grad_norm": 1.626175502092923, "learning_rate": 3.138655399864885e-06, "loss": 0.6415, "step": 9886 }, { "epoch": 0.63, "grad_norm": 1.6274821560872925, "learning_rate": 3.1376933868137267e-06, "loss": 0.7251, "step": 9887 }, { "epoch": 0.63, "grad_norm": 1.8840786237124425, "learning_rate": 3.136731453800593e-06, "loss": 0.6838, "step": 9888 }, { "epoch": 0.63, "grad_norm": 1.5783750598776523, "learning_rate": 3.1357696008668285e-06, "loss": 0.6916, "step": 9889 }, { "epoch": 0.63, "grad_norm": 1.9448326387331296, "learning_rate": 3.1348078280537676e-06, "loss": 0.6236, "step": 9890 }, { "epoch": 0.63, "grad_norm": 1.7924003965310968, "learning_rate": 3.133846135402748e-06, "loss": 0.7523, "step": 9891 }, { "epoch": 0.63, "grad_norm": 1.8561948020179972, "learning_rate": 3.1328845229550988e-06, "loss": 0.9553, "step": 9892 }, { "epoch": 0.63, "grad_norm": 2.0808643224648367, "learning_rate": 3.1319229907521502e-06, "loss": 0.7754, "step": 9893 }, { "epoch": 0.63, "grad_norm": 1.8837034851910828, "learning_rate": 3.1309615388352255e-06, "loss": 0.6009, "step": 9894 }, { "epoch": 0.63, "grad_norm": 2.0248679527217215, "learning_rate": 3.130000167245648e-06, "loss": 0.7097, "step": 9895 }, { "epoch": 0.63, "grad_norm": 2.0231483287341274, "learning_rate": 3.129038876024732e-06, "loss": 0.7926, "step": 9896 }, { "epoch": 0.63, "grad_norm": 1.817005709834846, "learning_rate": 3.128077665213794e-06, "loss": 0.6905, "step": 9897 }, { "epoch": 0.63, "grad_norm": 1.5249412304847458, "learning_rate": 3.1271165348541465e-06, "loss": 0.6745, "step": 9898 }, { "epoch": 0.63, "grad_norm": 1.0211507514235874, "learning_rate": 3.1261554849870925e-06, "loss": 0.6358, "step": 9899 }, { "epoch": 0.63, "grad_norm": 1.815563140204922, "learning_rate": 3.1251945156539394e-06, "loss": 0.7584, "step": 9900 }, { "epoch": 0.63, "grad_norm": 1.6147584860196393, "learning_rate": 3.124233626895985e-06, "loss": 0.667, "step": 9901 }, { "epoch": 0.63, "grad_norm": 1.6389528216754379, "learning_rate": 3.1232728187545303e-06, "loss": 0.6786, "step": 9902 }, { "epoch": 0.63, "grad_norm": 1.7004745078365602, "learning_rate": 3.1223120912708647e-06, "loss": 0.6852, "step": 9903 }, { "epoch": 0.63, "grad_norm": 3.5728611980289915, "learning_rate": 3.1213514444862815e-06, "loss": 0.6949, "step": 9904 }, { "epoch": 0.63, "grad_norm": 2.0846575916369705, "learning_rate": 3.1203908784420635e-06, "loss": 0.9921, "step": 9905 }, { "epoch": 0.63, "grad_norm": 1.8612531063141413, "learning_rate": 3.1194303931794958e-06, "loss": 0.7965, "step": 9906 }, { "epoch": 0.63, "grad_norm": 1.603270089488313, "learning_rate": 3.1184699887398594e-06, "loss": 0.6512, "step": 9907 }, { "epoch": 0.63, "grad_norm": 1.7509272867110828, "learning_rate": 3.117509665164428e-06, "loss": 0.7132, "step": 9908 }, { "epoch": 0.63, "grad_norm": 1.8058544641277392, "learning_rate": 3.116549422494478e-06, "loss": 0.7222, "step": 9909 }, { "epoch": 0.63, "grad_norm": 1.6129747212426178, "learning_rate": 3.115589260771274e-06, "loss": 0.809, "step": 9910 }, { "epoch": 0.63, "grad_norm": 1.8501622171611722, "learning_rate": 3.1146291800360863e-06, "loss": 0.6789, "step": 9911 }, { "epoch": 0.63, "grad_norm": 1.836910426036105, "learning_rate": 3.1136691803301717e-06, "loss": 0.8887, "step": 9912 }, { "epoch": 0.63, "grad_norm": 1.7179608705359195, "learning_rate": 3.1127092616947935e-06, "loss": 0.7584, "step": 9913 }, { "epoch": 0.63, "grad_norm": 1.5660618796019614, "learning_rate": 3.1117494241712043e-06, "loss": 0.7714, "step": 9914 }, { "epoch": 0.63, "grad_norm": 1.6836771406390185, "learning_rate": 3.1107896678006577e-06, "loss": 0.8299, "step": 9915 }, { "epoch": 0.63, "grad_norm": 1.6648873804880095, "learning_rate": 3.109829992624403e-06, "loss": 0.7452, "step": 9916 }, { "epoch": 0.63, "grad_norm": 1.6042395379171743, "learning_rate": 3.108870398683681e-06, "loss": 0.6556, "step": 9917 }, { "epoch": 0.63, "grad_norm": 1.6867380702419408, "learning_rate": 3.1079108860197382e-06, "loss": 0.6847, "step": 9918 }, { "epoch": 0.63, "grad_norm": 1.7413227894328598, "learning_rate": 3.106951454673808e-06, "loss": 0.7257, "step": 9919 }, { "epoch": 0.63, "grad_norm": 1.5880665807150036, "learning_rate": 3.105992104687126e-06, "loss": 0.8032, "step": 9920 }, { "epoch": 0.64, "grad_norm": 0.9995044254819315, "learning_rate": 3.105032836100925e-06, "loss": 0.6557, "step": 9921 }, { "epoch": 0.64, "grad_norm": 1.5367104888287253, "learning_rate": 3.1040736489564296e-06, "loss": 0.7011, "step": 9922 }, { "epoch": 0.64, "grad_norm": 1.6286903648598412, "learning_rate": 3.1031145432948644e-06, "loss": 0.6956, "step": 9923 }, { "epoch": 0.64, "grad_norm": 1.6189967492616917, "learning_rate": 3.1021555191574527e-06, "loss": 0.8422, "step": 9924 }, { "epoch": 0.64, "grad_norm": 3.0968993236648785, "learning_rate": 3.1011965765854056e-06, "loss": 0.8086, "step": 9925 }, { "epoch": 0.64, "grad_norm": 1.4709139250123946, "learning_rate": 3.100237715619941e-06, "loss": 0.7457, "step": 9926 }, { "epoch": 0.64, "grad_norm": 1.7608359659514397, "learning_rate": 3.099278936302269e-06, "loss": 0.6778, "step": 9927 }, { "epoch": 0.64, "grad_norm": 1.5500567851643086, "learning_rate": 3.0983202386735923e-06, "loss": 0.8014, "step": 9928 }, { "epoch": 0.64, "grad_norm": 1.5355862363066122, "learning_rate": 3.097361622775117e-06, "loss": 0.8068, "step": 9929 }, { "epoch": 0.64, "grad_norm": 1.5712242252108084, "learning_rate": 3.096403088648039e-06, "loss": 0.6785, "step": 9930 }, { "epoch": 0.64, "grad_norm": 1.7456214702568953, "learning_rate": 3.095444636333559e-06, "loss": 0.6838, "step": 9931 }, { "epoch": 0.64, "grad_norm": 1.7429343928286738, "learning_rate": 3.094486265872865e-06, "loss": 0.6917, "step": 9932 }, { "epoch": 0.64, "grad_norm": 1.364459438645133, "learning_rate": 3.0935279773071492e-06, "loss": 0.5606, "step": 9933 }, { "epoch": 0.64, "grad_norm": 1.7425160884190245, "learning_rate": 3.0925697706775926e-06, "loss": 0.6489, "step": 9934 }, { "epoch": 0.64, "grad_norm": 1.7906099204198904, "learning_rate": 3.0916116460253787e-06, "loss": 0.8025, "step": 9935 }, { "epoch": 0.64, "grad_norm": 1.9775774303328897, "learning_rate": 3.0906536033916878e-06, "loss": 0.7166, "step": 9936 }, { "epoch": 0.64, "grad_norm": 1.0836127254131456, "learning_rate": 3.0896956428176916e-06, "loss": 0.6732, "step": 9937 }, { "epoch": 0.64, "grad_norm": 1.6340254922707882, "learning_rate": 3.088737764344565e-06, "loss": 0.7352, "step": 9938 }, { "epoch": 0.64, "grad_norm": 1.051818335541621, "learning_rate": 3.0877799680134713e-06, "loss": 0.5983, "step": 9939 }, { "epoch": 0.64, "grad_norm": 2.284048218667491, "learning_rate": 3.086822253865578e-06, "loss": 0.8361, "step": 9940 }, { "epoch": 0.64, "grad_norm": 1.6831887130159224, "learning_rate": 3.085864621942042e-06, "loss": 0.6358, "step": 9941 }, { "epoch": 0.64, "grad_norm": 1.8616255361654688, "learning_rate": 3.0849070722840234e-06, "loss": 0.743, "step": 9942 }, { "epoch": 0.64, "grad_norm": 1.8762732727279845, "learning_rate": 3.0839496049326745e-06, "loss": 0.6637, "step": 9943 }, { "epoch": 0.64, "grad_norm": 1.0318917369167966, "learning_rate": 3.082992219929144e-06, "loss": 0.659, "step": 9944 }, { "epoch": 0.64, "grad_norm": 1.6165404258119427, "learning_rate": 3.0820349173145823e-06, "loss": 1.0853, "step": 9945 }, { "epoch": 0.64, "grad_norm": 1.2154610357159592, "learning_rate": 3.0810776971301283e-06, "loss": 0.6996, "step": 9946 }, { "epoch": 0.64, "grad_norm": 1.9596685546412989, "learning_rate": 3.080120559416924e-06, "loss": 0.7873, "step": 9947 }, { "epoch": 0.64, "grad_norm": 1.7992646981693012, "learning_rate": 3.079163504216102e-06, "loss": 1.0055, "step": 9948 }, { "epoch": 0.64, "grad_norm": 1.8178664214896136, "learning_rate": 3.0782065315687975e-06, "loss": 0.6882, "step": 9949 }, { "epoch": 0.64, "grad_norm": 1.761383067988826, "learning_rate": 3.077249641516137e-06, "loss": 0.7648, "step": 9950 }, { "epoch": 0.64, "grad_norm": 1.3821594745946608, "learning_rate": 3.0762928340992483e-06, "loss": 0.6482, "step": 9951 }, { "epoch": 0.64, "grad_norm": 1.8731351305147654, "learning_rate": 3.075336109359249e-06, "loss": 0.7671, "step": 9952 }, { "epoch": 0.64, "grad_norm": 1.5508690662306694, "learning_rate": 3.0743794673372605e-06, "loss": 0.5906, "step": 9953 }, { "epoch": 0.64, "grad_norm": 1.735750991340485, "learning_rate": 3.073422908074398e-06, "loss": 0.6236, "step": 9954 }, { "epoch": 0.64, "grad_norm": 1.7582332920235668, "learning_rate": 3.072466431611768e-06, "loss": 0.7459, "step": 9955 }, { "epoch": 0.64, "grad_norm": 1.7046284400370852, "learning_rate": 3.071510037990482e-06, "loss": 0.7142, "step": 9956 }, { "epoch": 0.64, "grad_norm": 1.5393122613580832, "learning_rate": 3.0705537272516406e-06, "loss": 0.6664, "step": 9957 }, { "epoch": 0.64, "grad_norm": 1.490879788039718, "learning_rate": 3.0695974994363487e-06, "loss": 0.6388, "step": 9958 }, { "epoch": 0.64, "grad_norm": 1.1846211379488167, "learning_rate": 3.0686413545856975e-06, "loss": 0.6858, "step": 9959 }, { "epoch": 0.64, "grad_norm": 1.6881023319480757, "learning_rate": 3.067685292740784e-06, "loss": 0.7102, "step": 9960 }, { "epoch": 0.64, "grad_norm": 1.2079470430268595, "learning_rate": 3.0667293139426944e-06, "loss": 0.7029, "step": 9961 }, { "epoch": 0.64, "grad_norm": 5.037942991332126, "learning_rate": 3.065773418232517e-06, "loss": 0.7211, "step": 9962 }, { "epoch": 0.64, "grad_norm": 1.7214699496663524, "learning_rate": 3.064817605651336e-06, "loss": 0.7503, "step": 9963 }, { "epoch": 0.64, "grad_norm": 1.8422900333316021, "learning_rate": 3.063861876240225e-06, "loss": 0.7998, "step": 9964 }, { "epoch": 0.64, "grad_norm": 1.9549229591503123, "learning_rate": 3.0629062300402636e-06, "loss": 0.6995, "step": 9965 }, { "epoch": 0.64, "grad_norm": 1.7079883275572143, "learning_rate": 3.0619506670925214e-06, "loss": 0.7151, "step": 9966 }, { "epoch": 0.64, "grad_norm": 0.9428001970187901, "learning_rate": 3.0609951874380685e-06, "loss": 0.6515, "step": 9967 }, { "epoch": 0.64, "grad_norm": 1.1796846800170926, "learning_rate": 3.0600397911179662e-06, "loss": 0.6185, "step": 9968 }, { "epoch": 0.64, "grad_norm": 1.7714066669356479, "learning_rate": 3.05908447817328e-06, "loss": 0.8134, "step": 9969 }, { "epoch": 0.64, "grad_norm": 1.2582502483036744, "learning_rate": 3.0581292486450607e-06, "loss": 0.8064, "step": 9970 }, { "epoch": 0.64, "grad_norm": 1.0673909689330023, "learning_rate": 3.057174102574367e-06, "loss": 0.7383, "step": 9971 }, { "epoch": 0.64, "grad_norm": 2.471303038113688, "learning_rate": 3.0562190400022494e-06, "loss": 0.7547, "step": 9972 }, { "epoch": 0.64, "grad_norm": 1.1544741143950221, "learning_rate": 3.0552640609697514e-06, "loss": 0.6531, "step": 9973 }, { "epoch": 0.64, "grad_norm": 1.9038011069870793, "learning_rate": 3.0543091655179202e-06, "loss": 0.8104, "step": 9974 }, { "epoch": 0.64, "grad_norm": 1.6317199014229422, "learning_rate": 3.0533543536877897e-06, "loss": 0.7917, "step": 9975 }, { "epoch": 0.64, "grad_norm": 1.8205452340233081, "learning_rate": 3.0523996255204014e-06, "loss": 0.8007, "step": 9976 }, { "epoch": 0.64, "grad_norm": 1.588418662737032, "learning_rate": 3.0514449810567827e-06, "loss": 0.708, "step": 9977 }, { "epoch": 0.64, "grad_norm": 2.6828849809531756, "learning_rate": 3.0504904203379647e-06, "loss": 0.7172, "step": 9978 }, { "epoch": 0.64, "grad_norm": 1.6764016130584989, "learning_rate": 3.049535943404971e-06, "loss": 0.6829, "step": 9979 }, { "epoch": 0.64, "grad_norm": 1.7129686761893543, "learning_rate": 3.048581550298824e-06, "loss": 0.783, "step": 9980 }, { "epoch": 0.64, "grad_norm": 1.7186168587413764, "learning_rate": 3.047627241060544e-06, "loss": 0.7795, "step": 9981 }, { "epoch": 0.64, "grad_norm": 1.5934923703064474, "learning_rate": 3.0466730157311402e-06, "loss": 0.702, "step": 9982 }, { "epoch": 0.64, "grad_norm": 1.8405715999889023, "learning_rate": 3.0457188743516275e-06, "loss": 0.8825, "step": 9983 }, { "epoch": 0.64, "grad_norm": 1.1046064876666453, "learning_rate": 3.0447648169630094e-06, "loss": 0.7251, "step": 9984 }, { "epoch": 0.64, "grad_norm": 1.0831886416955623, "learning_rate": 3.043810843606292e-06, "loss": 0.6371, "step": 9985 }, { "epoch": 0.64, "grad_norm": 2.0212782509301386, "learning_rate": 3.0428569543224727e-06, "loss": 0.7689, "step": 9986 }, { "epoch": 0.64, "grad_norm": 1.6082840784955807, "learning_rate": 3.041903149152552e-06, "loss": 0.8076, "step": 9987 }, { "epoch": 0.64, "grad_norm": 1.9838050019224027, "learning_rate": 3.0409494281375163e-06, "loss": 0.7621, "step": 9988 }, { "epoch": 0.64, "grad_norm": 1.5754800938644735, "learning_rate": 3.039995791318358e-06, "loss": 0.7133, "step": 9989 }, { "epoch": 0.64, "grad_norm": 1.6830419742676013, "learning_rate": 3.039042238736064e-06, "loss": 0.7665, "step": 9990 }, { "epoch": 0.64, "grad_norm": 1.1127089271926855, "learning_rate": 3.038088770431612e-06, "loss": 0.7782, "step": 9991 }, { "epoch": 0.64, "grad_norm": 1.8691161120168334, "learning_rate": 3.037135386445983e-06, "loss": 0.6931, "step": 9992 }, { "epoch": 0.64, "grad_norm": 1.8165258505049893, "learning_rate": 3.0361820868201497e-06, "loss": 0.7557, "step": 9993 }, { "epoch": 0.64, "grad_norm": 2.112422949112546, "learning_rate": 3.0352288715950864e-06, "loss": 0.5831, "step": 9994 }, { "epoch": 0.64, "grad_norm": 1.5822291923028922, "learning_rate": 3.0342757408117544e-06, "loss": 0.6726, "step": 9995 }, { "epoch": 0.64, "grad_norm": 1.08613770626761, "learning_rate": 3.033322694511124e-06, "loss": 0.6499, "step": 9996 }, { "epoch": 0.64, "grad_norm": 1.6581521102762975, "learning_rate": 3.0323697327341483e-06, "loss": 0.6914, "step": 9997 }, { "epoch": 0.64, "grad_norm": 1.8836069800845217, "learning_rate": 3.031416855521788e-06, "loss": 0.8697, "step": 9998 }, { "epoch": 0.64, "grad_norm": 1.7474377124228881, "learning_rate": 3.030464062914995e-06, "loss": 0.7726, "step": 9999 }, { "epoch": 0.64, "grad_norm": 1.6679041662242156, "learning_rate": 3.0295113549547174e-06, "loss": 0.8434, "step": 10000 }, { "epoch": 0.64, "grad_norm": 1.1550544627131996, "learning_rate": 3.028558731681902e-06, "loss": 0.6744, "step": 10001 }, { "epoch": 0.64, "grad_norm": 1.9739030230753916, "learning_rate": 3.0276061931374882e-06, "loss": 0.7221, "step": 10002 }, { "epoch": 0.64, "grad_norm": 3.8270232397648245, "learning_rate": 3.0266537393624185e-06, "loss": 0.7335, "step": 10003 }, { "epoch": 0.64, "grad_norm": 1.6080979926260495, "learning_rate": 3.0257013703976225e-06, "loss": 0.6379, "step": 10004 }, { "epoch": 0.64, "grad_norm": 0.9857402852676658, "learning_rate": 3.024749086284034e-06, "loss": 0.6671, "step": 10005 }, { "epoch": 0.64, "grad_norm": 1.069566146573485, "learning_rate": 3.023796887062578e-06, "loss": 0.6811, "step": 10006 }, { "epoch": 0.64, "grad_norm": 1.768001014000654, "learning_rate": 3.02284477277418e-06, "loss": 0.7828, "step": 10007 }, { "epoch": 0.64, "grad_norm": 1.9393154502086174, "learning_rate": 3.021892743459758e-06, "loss": 0.7686, "step": 10008 }, { "epoch": 0.64, "grad_norm": 1.7767955763321746, "learning_rate": 3.020940799160229e-06, "loss": 0.6489, "step": 10009 }, { "epoch": 0.64, "grad_norm": 1.7764332433081813, "learning_rate": 3.0199889399165084e-06, "loss": 0.8637, "step": 10010 }, { "epoch": 0.64, "grad_norm": 1.9116913993369604, "learning_rate": 3.0190371657695005e-06, "loss": 0.7774, "step": 10011 }, { "epoch": 0.64, "grad_norm": 1.718890136003826, "learning_rate": 3.0180854767601153e-06, "loss": 0.6948, "step": 10012 }, { "epoch": 0.64, "grad_norm": 2.0532869964737, "learning_rate": 3.017133872929249e-06, "loss": 0.6626, "step": 10013 }, { "epoch": 0.64, "grad_norm": 1.6795277080327187, "learning_rate": 3.016182354317803e-06, "loss": 0.8299, "step": 10014 }, { "epoch": 0.64, "grad_norm": 1.7325070394136766, "learning_rate": 3.0152309209666703e-06, "loss": 0.7005, "step": 10015 }, { "epoch": 0.64, "grad_norm": 1.5408615126302143, "learning_rate": 3.014279572916744e-06, "loss": 0.7104, "step": 10016 }, { "epoch": 0.64, "grad_norm": 1.8237328905214027, "learning_rate": 3.0133283102089067e-06, "loss": 0.8009, "step": 10017 }, { "epoch": 0.64, "grad_norm": 1.7620907260710326, "learning_rate": 3.0123771328840447e-06, "loss": 0.6573, "step": 10018 }, { "epoch": 0.64, "grad_norm": 1.9684983409549754, "learning_rate": 3.0114260409830386e-06, "loss": 0.7585, "step": 10019 }, { "epoch": 0.64, "grad_norm": 1.7748904750098655, "learning_rate": 3.0104750345467603e-06, "loss": 0.7285, "step": 10020 }, { "epoch": 0.64, "grad_norm": 1.7352873712684047, "learning_rate": 3.009524113616086e-06, "loss": 0.7652, "step": 10021 }, { "epoch": 0.64, "grad_norm": 1.6322501303075587, "learning_rate": 3.008573278231881e-06, "loss": 0.7622, "step": 10022 }, { "epoch": 0.64, "grad_norm": 1.0311599822205424, "learning_rate": 3.0076225284350147e-06, "loss": 0.6862, "step": 10023 }, { "epoch": 0.64, "grad_norm": 1.6243760521284114, "learning_rate": 3.006671864266344e-06, "loss": 0.5827, "step": 10024 }, { "epoch": 0.64, "grad_norm": 2.3548464142641055, "learning_rate": 3.005721285766729e-06, "loss": 0.6406, "step": 10025 }, { "epoch": 0.64, "grad_norm": 1.5433777854228972, "learning_rate": 3.004770792977021e-06, "loss": 0.762, "step": 10026 }, { "epoch": 0.64, "grad_norm": 1.819731826117869, "learning_rate": 3.0038203859380717e-06, "loss": 0.6891, "step": 10027 }, { "epoch": 0.64, "grad_norm": 1.4960916926123784, "learning_rate": 3.002870064690729e-06, "loss": 0.7214, "step": 10028 }, { "epoch": 0.64, "grad_norm": 2.131474654159861, "learning_rate": 3.0019198292758327e-06, "loss": 0.685, "step": 10029 }, { "epoch": 0.64, "grad_norm": 1.2197429411989307, "learning_rate": 3.000969679734226e-06, "loss": 0.6675, "step": 10030 }, { "epoch": 0.64, "grad_norm": 1.7995945872393453, "learning_rate": 3.0000196161067398e-06, "loss": 0.7388, "step": 10031 }, { "epoch": 0.64, "grad_norm": 1.5986374440093565, "learning_rate": 2.99906963843421e-06, "loss": 0.7232, "step": 10032 }, { "epoch": 0.64, "grad_norm": 1.888958855156876, "learning_rate": 2.998119746757461e-06, "loss": 0.8426, "step": 10033 }, { "epoch": 0.64, "grad_norm": 1.7952144116289042, "learning_rate": 2.9971699411173196e-06, "loss": 0.6769, "step": 10034 }, { "epoch": 0.64, "grad_norm": 1.632066181663904, "learning_rate": 2.9962202215546043e-06, "loss": 0.7585, "step": 10035 }, { "epoch": 0.64, "grad_norm": 1.078960353404566, "learning_rate": 2.995270588110134e-06, "loss": 0.6401, "step": 10036 }, { "epoch": 0.64, "grad_norm": 1.0848786437957427, "learning_rate": 2.994321040824722e-06, "loss": 0.6743, "step": 10037 }, { "epoch": 0.64, "grad_norm": 1.6307348006341578, "learning_rate": 2.993371579739176e-06, "loss": 0.8274, "step": 10038 }, { "epoch": 0.64, "grad_norm": 1.4931154984317765, "learning_rate": 2.9924222048943046e-06, "loss": 0.7613, "step": 10039 }, { "epoch": 0.64, "grad_norm": 1.6080473021235844, "learning_rate": 2.991472916330906e-06, "loss": 0.6957, "step": 10040 }, { "epoch": 0.64, "grad_norm": 1.9544853841954195, "learning_rate": 2.990523714089785e-06, "loss": 0.5624, "step": 10041 }, { "epoch": 0.64, "grad_norm": 0.8918901254711901, "learning_rate": 2.9895745982117287e-06, "loss": 0.6576, "step": 10042 }, { "epoch": 0.64, "grad_norm": 1.4589246357349632, "learning_rate": 2.988625568737532e-06, "loss": 0.7792, "step": 10043 }, { "epoch": 0.64, "grad_norm": 1.7270089281533705, "learning_rate": 2.987676625707982e-06, "loss": 0.6378, "step": 10044 }, { "epoch": 0.64, "grad_norm": 1.4578294493092805, "learning_rate": 2.986727769163862e-06, "loss": 0.6689, "step": 10045 }, { "epoch": 0.64, "grad_norm": 1.635877560890962, "learning_rate": 2.985778999145954e-06, "loss": 0.7312, "step": 10046 }, { "epoch": 0.64, "grad_norm": 1.7010690424145216, "learning_rate": 2.9848303156950297e-06, "loss": 0.7195, "step": 10047 }, { "epoch": 0.64, "grad_norm": 1.230127834283598, "learning_rate": 2.9838817188518664e-06, "loss": 0.6071, "step": 10048 }, { "epoch": 0.64, "grad_norm": 1.5973757495548364, "learning_rate": 2.982933208657228e-06, "loss": 0.7873, "step": 10049 }, { "epoch": 0.64, "grad_norm": 1.5631307521530973, "learning_rate": 2.9819847851518833e-06, "loss": 0.6866, "step": 10050 }, { "epoch": 0.64, "grad_norm": 1.730101612034988, "learning_rate": 2.9810364483765913e-06, "loss": 0.6943, "step": 10051 }, { "epoch": 0.64, "grad_norm": 1.0197862477545112, "learning_rate": 2.980088198372112e-06, "loss": 0.7116, "step": 10052 }, { "epoch": 0.64, "grad_norm": 1.2186486339676956, "learning_rate": 2.9791400351791954e-06, "loss": 0.6436, "step": 10053 }, { "epoch": 0.64, "grad_norm": 1.6676961449621073, "learning_rate": 2.978191958838594e-06, "loss": 0.6339, "step": 10054 }, { "epoch": 0.64, "grad_norm": 1.0731421996517814, "learning_rate": 2.9772439693910554e-06, "loss": 0.5741, "step": 10055 }, { "epoch": 0.64, "grad_norm": 1.7438568533396008, "learning_rate": 2.9762960668773187e-06, "loss": 0.763, "step": 10056 }, { "epoch": 0.64, "grad_norm": 1.0302668607715377, "learning_rate": 2.975348251338126e-06, "loss": 0.6663, "step": 10057 }, { "epoch": 0.64, "grad_norm": 2.1233479793059393, "learning_rate": 2.97440052281421e-06, "loss": 0.6736, "step": 10058 }, { "epoch": 0.64, "grad_norm": 1.800334590486477, "learning_rate": 2.973452881346305e-06, "loss": 0.7313, "step": 10059 }, { "epoch": 0.64, "grad_norm": 1.7304114373705286, "learning_rate": 2.9725053269751348e-06, "loss": 0.7155, "step": 10060 }, { "epoch": 0.64, "grad_norm": 1.1452026967624842, "learning_rate": 2.9715578597414268e-06, "loss": 0.6476, "step": 10061 }, { "epoch": 0.64, "grad_norm": 1.5698384658919298, "learning_rate": 2.970610479685898e-06, "loss": 0.6747, "step": 10062 }, { "epoch": 0.64, "grad_norm": 1.7390927496016952, "learning_rate": 2.9696631868492664e-06, "loss": 0.8152, "step": 10063 }, { "epoch": 0.64, "grad_norm": 1.1238065490205358, "learning_rate": 2.9687159812722465e-06, "loss": 0.7208, "step": 10064 }, { "epoch": 0.64, "grad_norm": 1.0949125390203323, "learning_rate": 2.9677688629955438e-06, "loss": 0.6618, "step": 10065 }, { "epoch": 0.64, "grad_norm": 1.729556814381173, "learning_rate": 2.9668218320598667e-06, "loss": 0.7494, "step": 10066 }, { "epoch": 0.64, "grad_norm": 1.8249136109872321, "learning_rate": 2.965874888505913e-06, "loss": 0.6278, "step": 10067 }, { "epoch": 0.64, "grad_norm": 1.5503006170200446, "learning_rate": 2.964928032374385e-06, "loss": 0.61, "step": 10068 }, { "epoch": 0.64, "grad_norm": 2.201387101623297, "learning_rate": 2.9639812637059717e-06, "loss": 0.6686, "step": 10069 }, { "epoch": 0.64, "grad_norm": 1.9441864679513738, "learning_rate": 2.963034582541366e-06, "loss": 0.6761, "step": 10070 }, { "epoch": 0.64, "grad_norm": 1.6450958524174697, "learning_rate": 2.962087988921253e-06, "loss": 0.7309, "step": 10071 }, { "epoch": 0.64, "grad_norm": 1.280991823924943, "learning_rate": 2.9611414828863177e-06, "loss": 0.6929, "step": 10072 }, { "epoch": 0.64, "grad_norm": 2.2258981030632556, "learning_rate": 2.9601950644772352e-06, "loss": 0.7, "step": 10073 }, { "epoch": 0.64, "grad_norm": 2.8431994481723386, "learning_rate": 2.959248733734683e-06, "loss": 0.7175, "step": 10074 }, { "epoch": 0.64, "grad_norm": 4.579538135262639, "learning_rate": 2.958302490699334e-06, "loss": 0.6996, "step": 10075 }, { "epoch": 0.64, "grad_norm": 1.5458733685898414, "learning_rate": 2.957356335411852e-06, "loss": 0.6844, "step": 10076 }, { "epoch": 0.64, "grad_norm": 1.2023414356926698, "learning_rate": 2.9564102679129027e-06, "loss": 0.6818, "step": 10077 }, { "epoch": 0.65, "grad_norm": 2.013860924528988, "learning_rate": 2.9554642882431463e-06, "loss": 0.796, "step": 10078 }, { "epoch": 0.65, "grad_norm": 1.8468443864902873, "learning_rate": 2.954518396443239e-06, "loss": 0.7562, "step": 10079 }, { "epoch": 0.65, "grad_norm": 2.047396097040136, "learning_rate": 2.9535725925538313e-06, "loss": 0.8099, "step": 10080 }, { "epoch": 0.65, "grad_norm": 1.495505854599374, "learning_rate": 2.9526268766155753e-06, "loss": 0.7477, "step": 10081 }, { "epoch": 0.65, "grad_norm": 1.8351415024727973, "learning_rate": 2.9516812486691126e-06, "loss": 0.6593, "step": 10082 }, { "epoch": 0.65, "grad_norm": 3.124952299508001, "learning_rate": 2.9507357087550857e-06, "loss": 0.6717, "step": 10083 }, { "epoch": 0.65, "grad_norm": 1.8012425865999997, "learning_rate": 2.9497902569141335e-06, "loss": 0.6979, "step": 10084 }, { "epoch": 0.65, "grad_norm": 1.8213154544342849, "learning_rate": 2.948844893186886e-06, "loss": 0.7756, "step": 10085 }, { "epoch": 0.65, "grad_norm": 2.0321171298125926, "learning_rate": 2.9478996176139765e-06, "loss": 0.7905, "step": 10086 }, { "epoch": 0.65, "grad_norm": 1.7178377035700998, "learning_rate": 2.9469544302360283e-06, "loss": 0.7222, "step": 10087 }, { "epoch": 0.65, "grad_norm": 1.8079436927663952, "learning_rate": 2.946009331093666e-06, "loss": 0.7149, "step": 10088 }, { "epoch": 0.65, "grad_norm": 1.892594092386079, "learning_rate": 2.945064320227505e-06, "loss": 0.8296, "step": 10089 }, { "epoch": 0.65, "grad_norm": 1.8768950348982232, "learning_rate": 2.9441193976781637e-06, "loss": 0.7144, "step": 10090 }, { "epoch": 0.65, "grad_norm": 1.7009341735399153, "learning_rate": 2.9431745634862484e-06, "loss": 0.7045, "step": 10091 }, { "epoch": 0.65, "grad_norm": 1.3607258854187716, "learning_rate": 2.942229817692368e-06, "loss": 0.6412, "step": 10092 }, { "epoch": 0.65, "grad_norm": 1.9595976110737603, "learning_rate": 2.9412851603371277e-06, "loss": 0.6854, "step": 10093 }, { "epoch": 0.65, "grad_norm": 1.8245761199596042, "learning_rate": 2.9403405914611243e-06, "loss": 0.7631, "step": 10094 }, { "epoch": 0.65, "grad_norm": 1.0920321149890309, "learning_rate": 2.9393961111049564e-06, "loss": 0.6835, "step": 10095 }, { "epoch": 0.65, "grad_norm": 1.4562945491702617, "learning_rate": 2.938451719309211e-06, "loss": 0.6755, "step": 10096 }, { "epoch": 0.65, "grad_norm": 1.8176601975010354, "learning_rate": 2.937507416114481e-06, "loss": 0.7328, "step": 10097 }, { "epoch": 0.65, "grad_norm": 1.8037819768799987, "learning_rate": 2.9365632015613467e-06, "loss": 0.7534, "step": 10098 }, { "epoch": 0.65, "grad_norm": 1.7360462012979254, "learning_rate": 2.9356190756903913e-06, "loss": 0.6879, "step": 10099 }, { "epoch": 0.65, "grad_norm": 1.7469477615899751, "learning_rate": 2.9346750385421887e-06, "loss": 0.7727, "step": 10100 }, { "epoch": 0.65, "grad_norm": 1.5621855600704613, "learning_rate": 2.9337310901573134e-06, "loss": 0.7443, "step": 10101 }, { "epoch": 0.65, "grad_norm": 1.2070673316860572, "learning_rate": 2.932787230576336e-06, "loss": 0.5713, "step": 10102 }, { "epoch": 0.65, "grad_norm": 2.09508028873995, "learning_rate": 2.9318434598398173e-06, "loss": 0.6456, "step": 10103 }, { "epoch": 0.65, "grad_norm": 1.7540501963606216, "learning_rate": 2.9308997779883232e-06, "loss": 0.7757, "step": 10104 }, { "epoch": 0.65, "grad_norm": 1.773482289305622, "learning_rate": 2.929956185062407e-06, "loss": 0.8624, "step": 10105 }, { "epoch": 0.65, "grad_norm": 1.703821040728962, "learning_rate": 2.929012681102625e-06, "loss": 0.8251, "step": 10106 }, { "epoch": 0.65, "grad_norm": 1.8223991903331915, "learning_rate": 2.9280692661495247e-06, "loss": 0.6808, "step": 10107 }, { "epoch": 0.65, "grad_norm": 1.7894423044244845, "learning_rate": 2.927125940243656e-06, "loss": 0.6889, "step": 10108 }, { "epoch": 0.65, "grad_norm": 1.8081381147362465, "learning_rate": 2.9261827034255575e-06, "loss": 0.6698, "step": 10109 }, { "epoch": 0.65, "grad_norm": 1.1048291624579807, "learning_rate": 2.9252395557357675e-06, "loss": 0.6636, "step": 10110 }, { "epoch": 0.65, "grad_norm": 1.6406402414202013, "learning_rate": 2.9242964972148245e-06, "loss": 0.7648, "step": 10111 }, { "epoch": 0.65, "grad_norm": 1.7880021609109176, "learning_rate": 2.923353527903254e-06, "loss": 0.8718, "step": 10112 }, { "epoch": 0.65, "grad_norm": 1.5897737084107553, "learning_rate": 2.922410647841586e-06, "loss": 0.7056, "step": 10113 }, { "epoch": 0.65, "grad_norm": 1.6251991308681875, "learning_rate": 2.9214678570703436e-06, "loss": 0.7464, "step": 10114 }, { "epoch": 0.65, "grad_norm": 1.5106596080959465, "learning_rate": 2.920525155630044e-06, "loss": 0.6904, "step": 10115 }, { "epoch": 0.65, "grad_norm": 1.3849932343941358, "learning_rate": 2.9195825435612036e-06, "loss": 0.6704, "step": 10116 }, { "epoch": 0.65, "grad_norm": 1.0642687503175514, "learning_rate": 2.918640020904334e-06, "loss": 0.5535, "step": 10117 }, { "epoch": 0.65, "grad_norm": 2.9173994394961125, "learning_rate": 2.9176975876999434e-06, "loss": 0.7104, "step": 10118 }, { "epoch": 0.65, "grad_norm": 1.0523074663349927, "learning_rate": 2.9167552439885333e-06, "loss": 0.5448, "step": 10119 }, { "epoch": 0.65, "grad_norm": 1.257408929687253, "learning_rate": 2.915812989810609e-06, "loss": 0.7358, "step": 10120 }, { "epoch": 0.65, "grad_norm": 1.6548114079229808, "learning_rate": 2.914870825206659e-06, "loss": 0.7756, "step": 10121 }, { "epoch": 0.65, "grad_norm": 1.9764541473475847, "learning_rate": 2.913928750217183e-06, "loss": 0.7511, "step": 10122 }, { "epoch": 0.65, "grad_norm": 1.5468080186373636, "learning_rate": 2.9129867648826623e-06, "loss": 0.6334, "step": 10123 }, { "epoch": 0.65, "grad_norm": 1.1698694626022763, "learning_rate": 2.9120448692435866e-06, "loss": 0.668, "step": 10124 }, { "epoch": 0.65, "grad_norm": 1.7235401078208827, "learning_rate": 2.9111030633404354e-06, "loss": 0.8526, "step": 10125 }, { "epoch": 0.65, "grad_norm": 1.7873069650604394, "learning_rate": 2.9101613472136846e-06, "loss": 0.5602, "step": 10126 }, { "epoch": 0.65, "grad_norm": 1.6099990163626698, "learning_rate": 2.9092197209038087e-06, "loss": 0.7499, "step": 10127 }, { "epoch": 0.65, "grad_norm": 2.1985220898049826, "learning_rate": 2.908278184451273e-06, "loss": 0.6535, "step": 10128 }, { "epoch": 0.65, "grad_norm": 1.7164045211399819, "learning_rate": 2.90733673789655e-06, "loss": 0.7085, "step": 10129 }, { "epoch": 0.65, "grad_norm": 1.8645256530464962, "learning_rate": 2.9063953812800925e-06, "loss": 0.8186, "step": 10130 }, { "epoch": 0.65, "grad_norm": 1.8670771183482435, "learning_rate": 2.905454114642365e-06, "loss": 0.6419, "step": 10131 }, { "epoch": 0.65, "grad_norm": 1.0911945337079862, "learning_rate": 2.9045129380238177e-06, "loss": 0.5821, "step": 10132 }, { "epoch": 0.65, "grad_norm": 1.0919043982292753, "learning_rate": 2.9035718514649013e-06, "loss": 0.6341, "step": 10133 }, { "epoch": 0.65, "grad_norm": 1.612597812107582, "learning_rate": 2.902630855006061e-06, "loss": 0.707, "step": 10134 }, { "epoch": 0.65, "grad_norm": 1.5638151068730632, "learning_rate": 2.9016899486877404e-06, "loss": 0.727, "step": 10135 }, { "epoch": 0.65, "grad_norm": 1.5160268803961268, "learning_rate": 2.9007491325503757e-06, "loss": 0.5319, "step": 10136 }, { "epoch": 0.65, "grad_norm": 1.119210079309844, "learning_rate": 2.8998084066344005e-06, "loss": 0.6504, "step": 10137 }, { "epoch": 0.65, "grad_norm": 1.839639972303712, "learning_rate": 2.8988677709802514e-06, "loss": 0.8958, "step": 10138 }, { "epoch": 0.65, "grad_norm": 1.667049177575844, "learning_rate": 2.8979272256283453e-06, "loss": 0.7493, "step": 10139 }, { "epoch": 0.65, "grad_norm": 1.5706056778836928, "learning_rate": 2.896986770619113e-06, "loss": 0.6911, "step": 10140 }, { "epoch": 0.65, "grad_norm": 1.9104835273462129, "learning_rate": 2.89604640599297e-06, "loss": 0.7698, "step": 10141 }, { "epoch": 0.65, "grad_norm": 1.6216021952460278, "learning_rate": 2.8951061317903312e-06, "loss": 0.6629, "step": 10142 }, { "epoch": 0.65, "grad_norm": 1.4561116646233527, "learning_rate": 2.8941659480516083e-06, "loss": 0.6607, "step": 10143 }, { "epoch": 0.65, "grad_norm": 1.7267962480344061, "learning_rate": 2.8932258548172077e-06, "loss": 0.7028, "step": 10144 }, { "epoch": 0.65, "grad_norm": 1.4862232553752543, "learning_rate": 2.892285852127532e-06, "loss": 0.6996, "step": 10145 }, { "epoch": 0.65, "grad_norm": 1.6584782482995388, "learning_rate": 2.8913459400229825e-06, "loss": 0.6698, "step": 10146 }, { "epoch": 0.65, "grad_norm": 1.1038267899246297, "learning_rate": 2.8904061185439513e-06, "loss": 0.5694, "step": 10147 }, { "epoch": 0.65, "grad_norm": 1.0580271324290902, "learning_rate": 2.889466387730835e-06, "loss": 0.5801, "step": 10148 }, { "epoch": 0.65, "grad_norm": 1.6328071504306712, "learning_rate": 2.8885267476240173e-06, "loss": 0.6788, "step": 10149 }, { "epoch": 0.65, "grad_norm": 1.6480431070081718, "learning_rate": 2.8875871982638843e-06, "loss": 0.7575, "step": 10150 }, { "epoch": 0.65, "grad_norm": 1.7360028403465075, "learning_rate": 2.8866477396908143e-06, "loss": 0.7042, "step": 10151 }, { "epoch": 0.65, "grad_norm": 1.6689986337186042, "learning_rate": 2.8857083719451835e-06, "loss": 0.7446, "step": 10152 }, { "epoch": 0.65, "grad_norm": 1.7376136842320358, "learning_rate": 2.8847690950673642e-06, "loss": 0.7111, "step": 10153 }, { "epoch": 0.65, "grad_norm": 3.683406036499658, "learning_rate": 2.8838299090977224e-06, "loss": 0.7424, "step": 10154 }, { "epoch": 0.65, "grad_norm": 1.5328235092551523, "learning_rate": 2.882890814076629e-06, "loss": 0.7119, "step": 10155 }, { "epoch": 0.65, "grad_norm": 2.002251091747031, "learning_rate": 2.881951810044435e-06, "loss": 0.7542, "step": 10156 }, { "epoch": 0.65, "grad_norm": 1.6043845290543541, "learning_rate": 2.8810128970415035e-06, "loss": 0.6866, "step": 10157 }, { "epoch": 0.65, "grad_norm": 1.5385560004347467, "learning_rate": 2.880074075108186e-06, "loss": 0.6283, "step": 10158 }, { "epoch": 0.65, "grad_norm": 1.7878509887869591, "learning_rate": 2.879135344284829e-06, "loss": 0.7483, "step": 10159 }, { "epoch": 0.65, "grad_norm": 1.5357549354467452, "learning_rate": 2.8781967046117794e-06, "loss": 0.6165, "step": 10160 }, { "epoch": 0.65, "grad_norm": 1.7820383078398747, "learning_rate": 2.8772581561293743e-06, "loss": 0.7302, "step": 10161 }, { "epoch": 0.65, "grad_norm": 1.154986165862113, "learning_rate": 2.8763196988779573e-06, "loss": 0.6197, "step": 10162 }, { "epoch": 0.65, "grad_norm": 1.5656290991555954, "learning_rate": 2.875381332897853e-06, "loss": 0.7155, "step": 10163 }, { "epoch": 0.65, "grad_norm": 1.0970372599896674, "learning_rate": 2.874443058229399e-06, "loss": 0.7248, "step": 10164 }, { "epoch": 0.65, "grad_norm": 1.6289076835565142, "learning_rate": 2.873504874912911e-06, "loss": 0.6543, "step": 10165 }, { "epoch": 0.65, "grad_norm": 1.5003404318315317, "learning_rate": 2.872566782988718e-06, "loss": 0.7515, "step": 10166 }, { "epoch": 0.65, "grad_norm": 1.1607600937305464, "learning_rate": 2.8716287824971344e-06, "loss": 0.726, "step": 10167 }, { "epoch": 0.65, "grad_norm": 1.680793235292014, "learning_rate": 2.8706908734784713e-06, "loss": 0.6844, "step": 10168 }, { "epoch": 0.65, "grad_norm": 1.585449846839212, "learning_rate": 2.869753055973044e-06, "loss": 0.7388, "step": 10169 }, { "epoch": 0.65, "grad_norm": 1.3140651918882216, "learning_rate": 2.8688153300211503e-06, "loss": 0.7231, "step": 10170 }, { "epoch": 0.65, "grad_norm": 1.6637386116330832, "learning_rate": 2.8678776956630995e-06, "loss": 0.669, "step": 10171 }, { "epoch": 0.65, "grad_norm": 1.8901292682301936, "learning_rate": 2.8669401529391804e-06, "loss": 0.701, "step": 10172 }, { "epoch": 0.65, "grad_norm": 1.1272441839047287, "learning_rate": 2.866002701889694e-06, "loss": 0.5811, "step": 10173 }, { "epoch": 0.65, "grad_norm": 1.7852919468724207, "learning_rate": 2.865065342554927e-06, "loss": 0.6785, "step": 10174 }, { "epoch": 0.65, "grad_norm": 3.094527782347279, "learning_rate": 2.8641280749751655e-06, "loss": 0.7659, "step": 10175 }, { "epoch": 0.65, "grad_norm": 1.7565713660064008, "learning_rate": 2.8631908991906913e-06, "loss": 0.7703, "step": 10176 }, { "epoch": 0.65, "grad_norm": 1.6834791745447704, "learning_rate": 2.8622538152417804e-06, "loss": 0.7988, "step": 10177 }, { "epoch": 0.65, "grad_norm": 0.9153071792845668, "learning_rate": 2.8613168231687126e-06, "loss": 0.5979, "step": 10178 }, { "epoch": 0.65, "grad_norm": 19.8144884360942, "learning_rate": 2.8603799230117495e-06, "loss": 0.7864, "step": 10179 }, { "epoch": 0.65, "grad_norm": 1.662024533152111, "learning_rate": 2.8594431148111658e-06, "loss": 0.8091, "step": 10180 }, { "epoch": 0.65, "grad_norm": 1.926442668774275, "learning_rate": 2.858506398607214e-06, "loss": 0.7898, "step": 10181 }, { "epoch": 0.65, "grad_norm": 1.5853949184067568, "learning_rate": 2.8575697744401596e-06, "loss": 0.679, "step": 10182 }, { "epoch": 0.65, "grad_norm": 1.6556597538275495, "learning_rate": 2.8566332423502553e-06, "loss": 0.7165, "step": 10183 }, { "epoch": 0.65, "grad_norm": 2.5311211707237873, "learning_rate": 2.8556968023777476e-06, "loss": 0.7252, "step": 10184 }, { "epoch": 0.65, "grad_norm": 1.9042091340980831, "learning_rate": 2.8547604545628903e-06, "loss": 0.8066, "step": 10185 }, { "epoch": 0.65, "grad_norm": 1.7302647607898503, "learning_rate": 2.8538241989459164e-06, "loss": 0.7231, "step": 10186 }, { "epoch": 0.65, "grad_norm": 1.7156992518280878, "learning_rate": 2.852888035567073e-06, "loss": 0.7747, "step": 10187 }, { "epoch": 0.65, "grad_norm": 1.8786410697094262, "learning_rate": 2.8519519644665853e-06, "loss": 0.7626, "step": 10188 }, { "epoch": 0.65, "grad_norm": 1.8744167586866134, "learning_rate": 2.851015985684691e-06, "loss": 0.7244, "step": 10189 }, { "epoch": 0.65, "grad_norm": 1.7628375173081945, "learning_rate": 2.850080099261614e-06, "loss": 0.6777, "step": 10190 }, { "epoch": 0.65, "grad_norm": 1.9351709649127673, "learning_rate": 2.8491443052375767e-06, "loss": 0.7868, "step": 10191 }, { "epoch": 0.65, "grad_norm": 1.7655451528873036, "learning_rate": 2.8482086036527978e-06, "loss": 0.6783, "step": 10192 }, { "epoch": 0.65, "grad_norm": 1.6807991298937572, "learning_rate": 2.84727299454749e-06, "loss": 0.7244, "step": 10193 }, { "epoch": 0.65, "grad_norm": 1.65578263798606, "learning_rate": 2.8463374779618687e-06, "loss": 0.7084, "step": 10194 }, { "epoch": 0.65, "grad_norm": 1.856634499943464, "learning_rate": 2.845402053936133e-06, "loss": 0.7293, "step": 10195 }, { "epoch": 0.65, "grad_norm": 1.7871633600065642, "learning_rate": 2.8444667225104917e-06, "loss": 0.7209, "step": 10196 }, { "epoch": 0.65, "grad_norm": 1.6440335354600089, "learning_rate": 2.8435314837251414e-06, "loss": 0.7801, "step": 10197 }, { "epoch": 0.65, "grad_norm": 1.5610347734346792, "learning_rate": 2.842596337620277e-06, "loss": 0.67, "step": 10198 }, { "epoch": 0.65, "grad_norm": 1.5433789094242714, "learning_rate": 2.841661284236088e-06, "loss": 0.6208, "step": 10199 }, { "epoch": 0.65, "grad_norm": 1.7441502414984835, "learning_rate": 2.840726323612762e-06, "loss": 0.7118, "step": 10200 }, { "epoch": 0.65, "grad_norm": 1.5041964664884546, "learning_rate": 2.8397914557904816e-06, "loss": 0.7302, "step": 10201 }, { "epoch": 0.65, "grad_norm": 1.1385558486879261, "learning_rate": 2.8388566808094226e-06, "loss": 0.6802, "step": 10202 }, { "epoch": 0.65, "grad_norm": 1.8035996250607125, "learning_rate": 2.837921998709765e-06, "loss": 0.7516, "step": 10203 }, { "epoch": 0.65, "grad_norm": 1.4568248283403502, "learning_rate": 2.8369874095316763e-06, "loss": 0.6647, "step": 10204 }, { "epoch": 0.65, "grad_norm": 1.613188394967397, "learning_rate": 2.8360529133153237e-06, "loss": 0.8349, "step": 10205 }, { "epoch": 0.65, "grad_norm": 1.5531863055677737, "learning_rate": 2.8351185101008695e-06, "loss": 0.7653, "step": 10206 }, { "epoch": 0.65, "grad_norm": 1.199549455330059, "learning_rate": 2.8341841999284725e-06, "loss": 0.619, "step": 10207 }, { "epoch": 0.65, "grad_norm": 2.1178494731298936, "learning_rate": 2.8332499828382887e-06, "loss": 0.7887, "step": 10208 }, { "epoch": 0.65, "grad_norm": 1.7159189077890111, "learning_rate": 2.832315858870467e-06, "loss": 0.7785, "step": 10209 }, { "epoch": 0.65, "grad_norm": 1.53914244360285, "learning_rate": 2.8313818280651528e-06, "loss": 0.5763, "step": 10210 }, { "epoch": 0.65, "grad_norm": 1.5367109414374123, "learning_rate": 2.830447890462492e-06, "loss": 0.7465, "step": 10211 }, { "epoch": 0.65, "grad_norm": 1.9100352118149957, "learning_rate": 2.8295140461026232e-06, "loss": 0.6382, "step": 10212 }, { "epoch": 0.65, "grad_norm": 1.7458963322733885, "learning_rate": 2.828580295025679e-06, "loss": 0.875, "step": 10213 }, { "epoch": 0.65, "grad_norm": 1.552833758898227, "learning_rate": 2.8276466372717916e-06, "loss": 0.7593, "step": 10214 }, { "epoch": 0.65, "grad_norm": 2.1413232309540176, "learning_rate": 2.8267130728810866e-06, "loss": 0.7509, "step": 10215 }, { "epoch": 0.65, "grad_norm": 2.11359535119081, "learning_rate": 2.8257796018936872e-06, "loss": 0.7185, "step": 10216 }, { "epoch": 0.65, "grad_norm": 1.8130878451587797, "learning_rate": 2.824846224349712e-06, "loss": 0.7312, "step": 10217 }, { "epoch": 0.65, "grad_norm": 1.7566656521886224, "learning_rate": 2.823912940289275e-06, "loss": 0.8666, "step": 10218 }, { "epoch": 0.65, "grad_norm": 1.6398928497136513, "learning_rate": 2.822979749752486e-06, "loss": 0.7084, "step": 10219 }, { "epoch": 0.65, "grad_norm": 1.770265881824969, "learning_rate": 2.8220466527794555e-06, "loss": 0.8223, "step": 10220 }, { "epoch": 0.65, "grad_norm": 1.690923898051491, "learning_rate": 2.821113649410281e-06, "loss": 0.7432, "step": 10221 }, { "epoch": 0.65, "grad_norm": 2.688756894070788, "learning_rate": 2.820180739685064e-06, "loss": 0.8238, "step": 10222 }, { "epoch": 0.65, "grad_norm": 1.5700991363364398, "learning_rate": 2.8192479236438987e-06, "loss": 0.7207, "step": 10223 }, { "epoch": 0.65, "grad_norm": 2.255636354289974, "learning_rate": 2.8183152013268757e-06, "loss": 0.8282, "step": 10224 }, { "epoch": 0.65, "grad_norm": 1.7799199413697566, "learning_rate": 2.8173825727740808e-06, "loss": 0.6768, "step": 10225 }, { "epoch": 0.65, "grad_norm": 1.699098260964104, "learning_rate": 2.8164500380255942e-06, "loss": 0.7379, "step": 10226 }, { "epoch": 0.65, "grad_norm": 2.097731235458984, "learning_rate": 2.815517597121501e-06, "loss": 0.7381, "step": 10227 }, { "epoch": 0.65, "grad_norm": 1.7549024793371306, "learning_rate": 2.8145852501018678e-06, "loss": 0.6742, "step": 10228 }, { "epoch": 0.65, "grad_norm": 1.5773191643977895, "learning_rate": 2.8136529970067715e-06, "loss": 0.8078, "step": 10229 }, { "epoch": 0.65, "grad_norm": 1.582771899786014, "learning_rate": 2.8127208378762715e-06, "loss": 0.7371, "step": 10230 }, { "epoch": 0.65, "grad_norm": 1.4685220897054012, "learning_rate": 2.8117887727504355e-06, "loss": 0.6175, "step": 10231 }, { "epoch": 0.65, "grad_norm": 1.6447372314683062, "learning_rate": 2.8108568016693203e-06, "loss": 0.7814, "step": 10232 }, { "epoch": 0.65, "grad_norm": 1.7106785132816573, "learning_rate": 2.809924924672978e-06, "loss": 0.7914, "step": 10233 }, { "epoch": 0.66, "grad_norm": 1.5507621727289844, "learning_rate": 2.808993141801465e-06, "loss": 0.7618, "step": 10234 }, { "epoch": 0.66, "grad_norm": 1.5813313010441032, "learning_rate": 2.8080614530948174e-06, "loss": 0.7393, "step": 10235 }, { "epoch": 0.66, "grad_norm": 1.762565366175288, "learning_rate": 2.8071298585930873e-06, "loss": 0.7014, "step": 10236 }, { "epoch": 0.66, "grad_norm": 1.8751100336363993, "learning_rate": 2.806198358336304e-06, "loss": 0.6832, "step": 10237 }, { "epoch": 0.66, "grad_norm": 1.951954533749345, "learning_rate": 2.8052669523645075e-06, "loss": 0.7671, "step": 10238 }, { "epoch": 0.66, "grad_norm": 2.0786448157902604, "learning_rate": 2.804335640717726e-06, "loss": 0.8577, "step": 10239 }, { "epoch": 0.66, "grad_norm": 1.8109934232507932, "learning_rate": 2.8034044234359824e-06, "loss": 0.7422, "step": 10240 }, { "epoch": 0.66, "grad_norm": 1.099712827371796, "learning_rate": 2.802473300559305e-06, "loss": 0.6333, "step": 10241 }, { "epoch": 0.66, "grad_norm": 1.9032318681880769, "learning_rate": 2.8015422721277036e-06, "loss": 0.7395, "step": 10242 }, { "epoch": 0.66, "grad_norm": 1.625530293052695, "learning_rate": 2.8006113381812e-06, "loss": 0.7661, "step": 10243 }, { "epoch": 0.66, "grad_norm": 1.6838389491099175, "learning_rate": 2.799680498759795e-06, "loss": 0.621, "step": 10244 }, { "epoch": 0.66, "grad_norm": 1.1492672367745094, "learning_rate": 2.798749753903501e-06, "loss": 0.6874, "step": 10245 }, { "epoch": 0.66, "grad_norm": 1.2121807539456066, "learning_rate": 2.797819103652317e-06, "loss": 0.6611, "step": 10246 }, { "epoch": 0.66, "grad_norm": 1.9112623575058265, "learning_rate": 2.7968885480462406e-06, "loss": 0.7339, "step": 10247 }, { "epoch": 0.66, "grad_norm": 1.9466405770823205, "learning_rate": 2.7959580871252657e-06, "loss": 0.8409, "step": 10248 }, { "epoch": 0.66, "grad_norm": 1.468214853646422, "learning_rate": 2.7950277209293774e-06, "loss": 0.6603, "step": 10249 }, { "epoch": 0.66, "grad_norm": 1.7205247830996855, "learning_rate": 2.7940974494985695e-06, "loss": 0.7577, "step": 10250 }, { "epoch": 0.66, "grad_norm": 1.678173927587402, "learning_rate": 2.7931672728728137e-06, "loss": 0.752, "step": 10251 }, { "epoch": 0.66, "grad_norm": 1.9917575232674705, "learning_rate": 2.7922371910920933e-06, "loss": 0.868, "step": 10252 }, { "epoch": 0.66, "grad_norm": 1.7010053288884581, "learning_rate": 2.7913072041963797e-06, "loss": 0.647, "step": 10253 }, { "epoch": 0.66, "grad_norm": 1.9752268586870647, "learning_rate": 2.7903773122256405e-06, "loss": 0.6096, "step": 10254 }, { "epoch": 0.66, "grad_norm": 1.5311880225937389, "learning_rate": 2.789447515219842e-06, "loss": 0.6898, "step": 10255 }, { "epoch": 0.66, "grad_norm": 2.5223826687005273, "learning_rate": 2.788517813218944e-06, "loss": 0.7771, "step": 10256 }, { "epoch": 0.66, "grad_norm": 1.5396059536852524, "learning_rate": 2.7875882062629033e-06, "loss": 0.7072, "step": 10257 }, { "epoch": 0.66, "grad_norm": 2.087545294814886, "learning_rate": 2.786658694391671e-06, "loss": 0.7442, "step": 10258 }, { "epoch": 0.66, "grad_norm": 1.1654082179611922, "learning_rate": 2.7857292776452003e-06, "loss": 0.5979, "step": 10259 }, { "epoch": 0.66, "grad_norm": 1.662480360638094, "learning_rate": 2.784799956063429e-06, "loss": 0.811, "step": 10260 }, { "epoch": 0.66, "grad_norm": 1.9160934242452405, "learning_rate": 2.783870729686303e-06, "loss": 0.6589, "step": 10261 }, { "epoch": 0.66, "grad_norm": 1.7497830117420294, "learning_rate": 2.7829415985537567e-06, "loss": 0.6625, "step": 10262 }, { "epoch": 0.66, "grad_norm": 1.7040528653751876, "learning_rate": 2.782012562705721e-06, "loss": 0.7024, "step": 10263 }, { "epoch": 0.66, "grad_norm": 1.979202545142664, "learning_rate": 2.781083622182125e-06, "loss": 0.6866, "step": 10264 }, { "epoch": 0.66, "grad_norm": 1.515982275202087, "learning_rate": 2.780154777022892e-06, "loss": 0.6742, "step": 10265 }, { "epoch": 0.66, "grad_norm": 1.1605797623126766, "learning_rate": 2.779226027267943e-06, "loss": 0.7277, "step": 10266 }, { "epoch": 0.66, "grad_norm": 1.4465583428665405, "learning_rate": 2.77829737295719e-06, "loss": 0.6601, "step": 10267 }, { "epoch": 0.66, "grad_norm": 1.6062668322668785, "learning_rate": 2.77736881413055e-06, "loss": 0.7361, "step": 10268 }, { "epoch": 0.66, "grad_norm": 1.7801338520974446, "learning_rate": 2.7764403508279275e-06, "loss": 0.8198, "step": 10269 }, { "epoch": 0.66, "grad_norm": 2.04468506646194, "learning_rate": 2.7755119830892263e-06, "loss": 0.6362, "step": 10270 }, { "epoch": 0.66, "grad_norm": 1.6752848299310783, "learning_rate": 2.774583710954346e-06, "loss": 0.7501, "step": 10271 }, { "epoch": 0.66, "grad_norm": 1.6547952123936345, "learning_rate": 2.7736555344631823e-06, "loss": 0.6895, "step": 10272 }, { "epoch": 0.66, "grad_norm": 1.5036571977901305, "learning_rate": 2.7727274536556248e-06, "loss": 0.6611, "step": 10273 }, { "epoch": 0.66, "grad_norm": 1.992246142637352, "learning_rate": 2.771799468571561e-06, "loss": 0.7053, "step": 10274 }, { "epoch": 0.66, "grad_norm": 1.889512276214136, "learning_rate": 2.770871579250872e-06, "loss": 0.8625, "step": 10275 }, { "epoch": 0.66, "grad_norm": 1.6185239057468042, "learning_rate": 2.769943785733441e-06, "loss": 0.7829, "step": 10276 }, { "epoch": 0.66, "grad_norm": 1.0756925782682951, "learning_rate": 2.76901608805914e-06, "loss": 0.711, "step": 10277 }, { "epoch": 0.66, "grad_norm": 1.8806018883997828, "learning_rate": 2.768088486267839e-06, "loss": 0.682, "step": 10278 }, { "epoch": 0.66, "grad_norm": 1.670867838583053, "learning_rate": 2.767160980399406e-06, "loss": 0.7872, "step": 10279 }, { "epoch": 0.66, "grad_norm": 4.044441335890086, "learning_rate": 2.7662335704937015e-06, "loss": 0.6671, "step": 10280 }, { "epoch": 0.66, "grad_norm": 1.8872683523292575, "learning_rate": 2.7653062565905842e-06, "loss": 0.608, "step": 10281 }, { "epoch": 0.66, "grad_norm": 1.6411902918899277, "learning_rate": 2.764379038729907e-06, "loss": 0.7575, "step": 10282 }, { "epoch": 0.66, "grad_norm": 1.7460639733865895, "learning_rate": 2.7634519169515237e-06, "loss": 0.7781, "step": 10283 }, { "epoch": 0.66, "grad_norm": 3.164585497857283, "learning_rate": 2.7625248912952753e-06, "loss": 0.5435, "step": 10284 }, { "epoch": 0.66, "grad_norm": 1.5839986907662444, "learning_rate": 2.7615979618010057e-06, "loss": 0.805, "step": 10285 }, { "epoch": 0.66, "grad_norm": 1.7872346611584937, "learning_rate": 2.760671128508553e-06, "loss": 0.8186, "step": 10286 }, { "epoch": 0.66, "grad_norm": 1.8204538305068416, "learning_rate": 2.759744391457749e-06, "loss": 0.6627, "step": 10287 }, { "epoch": 0.66, "grad_norm": 1.692375989113866, "learning_rate": 2.758817750688425e-06, "loss": 0.6075, "step": 10288 }, { "epoch": 0.66, "grad_norm": 1.6068158714339857, "learning_rate": 2.757891206240404e-06, "loss": 0.9302, "step": 10289 }, { "epoch": 0.66, "grad_norm": 1.242267285459101, "learning_rate": 2.7569647581535077e-06, "loss": 0.6907, "step": 10290 }, { "epoch": 0.66, "grad_norm": 1.916913365645507, "learning_rate": 2.7560384064675505e-06, "loss": 0.7228, "step": 10291 }, { "epoch": 0.66, "grad_norm": 1.091243999957091, "learning_rate": 2.755112151222351e-06, "loss": 0.6722, "step": 10292 }, { "epoch": 0.66, "grad_norm": 1.4477257656337397, "learning_rate": 2.7541859924577106e-06, "loss": 0.6001, "step": 10293 }, { "epoch": 0.66, "grad_norm": 1.8392874583927938, "learning_rate": 2.753259930213442e-06, "loss": 0.9799, "step": 10294 }, { "epoch": 0.66, "grad_norm": 1.726274456425814, "learning_rate": 2.7523339645293357e-06, "loss": 0.7806, "step": 10295 }, { "epoch": 0.66, "grad_norm": 1.6059217176309228, "learning_rate": 2.7514080954451944e-06, "loss": 0.7396, "step": 10296 }, { "epoch": 0.66, "grad_norm": 2.0655037022383107, "learning_rate": 2.750482323000808e-06, "loss": 0.776, "step": 10297 }, { "epoch": 0.66, "grad_norm": 1.5277369770862086, "learning_rate": 2.7495566472359638e-06, "loss": 0.6202, "step": 10298 }, { "epoch": 0.66, "grad_norm": 1.5513653747152878, "learning_rate": 2.748631068190449e-06, "loss": 0.7285, "step": 10299 }, { "epoch": 0.66, "grad_norm": 1.7341620079768651, "learning_rate": 2.747705585904036e-06, "loss": 0.6594, "step": 10300 }, { "epoch": 0.66, "grad_norm": 1.6170734899894696, "learning_rate": 2.746780200416509e-06, "loss": 0.629, "step": 10301 }, { "epoch": 0.66, "grad_norm": 1.1387416545467357, "learning_rate": 2.7458549117676294e-06, "loss": 0.5593, "step": 10302 }, { "epoch": 0.66, "grad_norm": 1.6230199387639628, "learning_rate": 2.7449297199971713e-06, "loss": 0.6855, "step": 10303 }, { "epoch": 0.66, "grad_norm": 1.5063382423602665, "learning_rate": 2.744004625144896e-06, "loss": 0.7511, "step": 10304 }, { "epoch": 0.66, "grad_norm": 1.4766350431327966, "learning_rate": 2.743079627250559e-06, "loss": 0.7576, "step": 10305 }, { "epoch": 0.66, "grad_norm": 1.5570000557182113, "learning_rate": 2.7421547263539205e-06, "loss": 0.6735, "step": 10306 }, { "epoch": 0.66, "grad_norm": 1.5099587356354276, "learning_rate": 2.741229922494724e-06, "loss": 0.6861, "step": 10307 }, { "epoch": 0.66, "grad_norm": 1.7011973490391823, "learning_rate": 2.740305215712723e-06, "loss": 0.7335, "step": 10308 }, { "epoch": 0.66, "grad_norm": 1.7356386153167258, "learning_rate": 2.7393806060476525e-06, "loss": 0.7755, "step": 10309 }, { "epoch": 0.66, "grad_norm": 1.8922672789979564, "learning_rate": 2.738456093539254e-06, "loss": 0.7939, "step": 10310 }, { "epoch": 0.66, "grad_norm": 2.169084279880755, "learning_rate": 2.737531678227261e-06, "loss": 0.7278, "step": 10311 }, { "epoch": 0.66, "grad_norm": 2.039567789656839, "learning_rate": 2.736607360151402e-06, "loss": 0.838, "step": 10312 }, { "epoch": 0.66, "grad_norm": 1.841217057555176, "learning_rate": 2.7356831393514033e-06, "loss": 0.7742, "step": 10313 }, { "epoch": 0.66, "grad_norm": 1.692069369797368, "learning_rate": 2.734759015866983e-06, "loss": 0.6702, "step": 10314 }, { "epoch": 0.66, "grad_norm": 1.6778032911299574, "learning_rate": 2.733834989737864e-06, "loss": 0.8833, "step": 10315 }, { "epoch": 0.66, "grad_norm": 2.068653125927275, "learning_rate": 2.732911061003752e-06, "loss": 0.8119, "step": 10316 }, { "epoch": 0.66, "grad_norm": 1.200656455676539, "learning_rate": 2.7319872297043604e-06, "loss": 0.6962, "step": 10317 }, { "epoch": 0.66, "grad_norm": 1.6931090011607433, "learning_rate": 2.7310634958793915e-06, "loss": 0.7098, "step": 10318 }, { "epoch": 0.66, "grad_norm": 1.73282350170272, "learning_rate": 2.7301398595685456e-06, "loss": 0.7747, "step": 10319 }, { "epoch": 0.66, "grad_norm": 1.5843647902126818, "learning_rate": 2.72921632081152e-06, "loss": 0.6827, "step": 10320 }, { "epoch": 0.66, "grad_norm": 1.9806409895492323, "learning_rate": 2.7282928796480047e-06, "loss": 0.8232, "step": 10321 }, { "epoch": 0.66, "grad_norm": 1.931002497912759, "learning_rate": 2.7273695361176876e-06, "loss": 0.7354, "step": 10322 }, { "epoch": 0.66, "grad_norm": 1.5588145679102063, "learning_rate": 2.726446290260251e-06, "loss": 0.6184, "step": 10323 }, { "epoch": 0.66, "grad_norm": 2.3767056306394014, "learning_rate": 2.725523142115377e-06, "loss": 0.8388, "step": 10324 }, { "epoch": 0.66, "grad_norm": 1.0166447384509083, "learning_rate": 2.72460009172274e-06, "loss": 0.6154, "step": 10325 }, { "epoch": 0.66, "grad_norm": 1.5761296710668498, "learning_rate": 2.7236771391220084e-06, "loss": 0.7369, "step": 10326 }, { "epoch": 0.66, "grad_norm": 2.0618203444304997, "learning_rate": 2.722754284352851e-06, "loss": 0.7119, "step": 10327 }, { "epoch": 0.66, "grad_norm": 1.57018980838769, "learning_rate": 2.7218315274549296e-06, "loss": 0.714, "step": 10328 }, { "epoch": 0.66, "grad_norm": 1.4703329576504736, "learning_rate": 2.7209088684679015e-06, "loss": 0.7087, "step": 10329 }, { "epoch": 0.66, "grad_norm": 1.4142371938536495, "learning_rate": 2.7199863074314216e-06, "loss": 0.6523, "step": 10330 }, { "epoch": 0.66, "grad_norm": 1.2157426494110515, "learning_rate": 2.719063844385138e-06, "loss": 0.6302, "step": 10331 }, { "epoch": 0.66, "grad_norm": 1.611129106842194, "learning_rate": 2.7181414793686966e-06, "loss": 0.7126, "step": 10332 }, { "epoch": 0.66, "grad_norm": 1.6799072362811072, "learning_rate": 2.7172192124217418e-06, "loss": 0.7233, "step": 10333 }, { "epoch": 0.66, "grad_norm": 1.2613727545389652, "learning_rate": 2.7162970435839074e-06, "loss": 0.602, "step": 10334 }, { "epoch": 0.66, "grad_norm": 1.3559312553211134, "learning_rate": 2.7153749728948286e-06, "loss": 0.6172, "step": 10335 }, { "epoch": 0.66, "grad_norm": 1.6512198063543604, "learning_rate": 2.714453000394132e-06, "loss": 0.6994, "step": 10336 }, { "epoch": 0.66, "grad_norm": 1.9231915937380593, "learning_rate": 2.7135311261214425e-06, "loss": 0.759, "step": 10337 }, { "epoch": 0.66, "grad_norm": 1.4758985597515728, "learning_rate": 2.712609350116382e-06, "loss": 0.6278, "step": 10338 }, { "epoch": 0.66, "grad_norm": 1.2128762059236986, "learning_rate": 2.711687672418564e-06, "loss": 0.6602, "step": 10339 }, { "epoch": 0.66, "grad_norm": 2.0151365463234496, "learning_rate": 2.7107660930675992e-06, "loss": 0.7245, "step": 10340 }, { "epoch": 0.66, "grad_norm": 1.92153786307016, "learning_rate": 2.7098446121031008e-06, "loss": 0.762, "step": 10341 }, { "epoch": 0.66, "grad_norm": 3.401775147394514, "learning_rate": 2.708923229564667e-06, "loss": 0.7716, "step": 10342 }, { "epoch": 0.66, "grad_norm": 1.6585936894767732, "learning_rate": 2.7080019454919e-06, "loss": 0.6673, "step": 10343 }, { "epoch": 0.66, "grad_norm": 1.559068580644759, "learning_rate": 2.7070807599243925e-06, "loss": 0.713, "step": 10344 }, { "epoch": 0.66, "grad_norm": 1.8706835066977257, "learning_rate": 2.706159672901736e-06, "loss": 0.6947, "step": 10345 }, { "epoch": 0.66, "grad_norm": 1.6920020666335862, "learning_rate": 2.705238684463517e-06, "loss": 0.6403, "step": 10346 }, { "epoch": 0.66, "grad_norm": 2.1302161101892656, "learning_rate": 2.7043177946493148e-06, "loss": 0.7907, "step": 10347 }, { "epoch": 0.66, "grad_norm": 1.8204553784050372, "learning_rate": 2.7033970034987144e-06, "loss": 0.6957, "step": 10348 }, { "epoch": 0.66, "grad_norm": 1.707513965763255, "learning_rate": 2.7024763110512808e-06, "loss": 0.617, "step": 10349 }, { "epoch": 0.66, "grad_norm": 1.7076802000840796, "learning_rate": 2.7015557173465897e-06, "loss": 0.734, "step": 10350 }, { "epoch": 0.66, "grad_norm": 1.6839382910012646, "learning_rate": 2.700635222424204e-06, "loss": 0.7481, "step": 10351 }, { "epoch": 0.66, "grad_norm": 1.8431940961707936, "learning_rate": 2.699714826323685e-06, "loss": 0.8059, "step": 10352 }, { "epoch": 0.66, "grad_norm": 1.1311953465918954, "learning_rate": 2.6987945290845884e-06, "loss": 0.7242, "step": 10353 }, { "epoch": 0.66, "grad_norm": 1.803935812943476, "learning_rate": 2.6978743307464663e-06, "loss": 0.6817, "step": 10354 }, { "epoch": 0.66, "grad_norm": 1.6667011523504716, "learning_rate": 2.6969542313488716e-06, "loss": 0.74, "step": 10355 }, { "epoch": 0.66, "grad_norm": 1.134650268409749, "learning_rate": 2.6960342309313415e-06, "loss": 0.6624, "step": 10356 }, { "epoch": 0.66, "grad_norm": 1.2185986814753722, "learning_rate": 2.695114329533422e-06, "loss": 0.6568, "step": 10357 }, { "epoch": 0.66, "grad_norm": 1.698288251354418, "learning_rate": 2.694194527194641e-06, "loss": 0.673, "step": 10358 }, { "epoch": 0.66, "grad_norm": 1.531863110307744, "learning_rate": 2.693274823954537e-06, "loss": 0.6607, "step": 10359 }, { "epoch": 0.66, "grad_norm": 1.1447331323104883, "learning_rate": 2.6923552198526337e-06, "loss": 0.6537, "step": 10360 }, { "epoch": 0.66, "grad_norm": 1.8002833271145904, "learning_rate": 2.6914357149284522e-06, "loss": 0.8031, "step": 10361 }, { "epoch": 0.66, "grad_norm": 1.8204759678931932, "learning_rate": 2.690516309221517e-06, "loss": 0.7261, "step": 10362 }, { "epoch": 0.66, "grad_norm": 1.6706347401049364, "learning_rate": 2.689597002771333e-06, "loss": 0.7231, "step": 10363 }, { "epoch": 0.66, "grad_norm": 2.506909354469126, "learning_rate": 2.688677795617419e-06, "loss": 0.7561, "step": 10364 }, { "epoch": 0.66, "grad_norm": 1.6371971643999415, "learning_rate": 2.6877586877992735e-06, "loss": 0.7034, "step": 10365 }, { "epoch": 0.66, "grad_norm": 4.338408404098978, "learning_rate": 2.686839679356402e-06, "loss": 0.787, "step": 10366 }, { "epoch": 0.66, "grad_norm": 1.7139400775773976, "learning_rate": 2.6859207703283012e-06, "loss": 0.8251, "step": 10367 }, { "epoch": 0.66, "grad_norm": 1.629967319057076, "learning_rate": 2.685001960754463e-06, "loss": 0.672, "step": 10368 }, { "epoch": 0.66, "grad_norm": 1.7092481458989306, "learning_rate": 2.6840832506743757e-06, "loss": 0.7112, "step": 10369 }, { "epoch": 0.66, "grad_norm": 1.6957931802298443, "learning_rate": 2.6831646401275226e-06, "loss": 0.7654, "step": 10370 }, { "epoch": 0.66, "grad_norm": 1.7152224687400555, "learning_rate": 2.6822461291533874e-06, "loss": 0.7031, "step": 10371 }, { "epoch": 0.66, "grad_norm": 1.2966607725285646, "learning_rate": 2.681327717791441e-06, "loss": 0.7131, "step": 10372 }, { "epoch": 0.66, "grad_norm": 1.762120396500431, "learning_rate": 2.6804094060811604e-06, "loss": 0.6393, "step": 10373 }, { "epoch": 0.66, "grad_norm": 1.1037194947226348, "learning_rate": 2.679491194062005e-06, "loss": 0.5808, "step": 10374 }, { "epoch": 0.66, "grad_norm": 1.620184882381432, "learning_rate": 2.6785730817734446e-06, "loss": 0.6966, "step": 10375 }, { "epoch": 0.66, "grad_norm": 1.6928967511571473, "learning_rate": 2.6776550692549346e-06, "loss": 0.667, "step": 10376 }, { "epoch": 0.66, "grad_norm": 1.66366703890849, "learning_rate": 2.67673715654593e-06, "loss": 0.694, "step": 10377 }, { "epoch": 0.66, "grad_norm": 1.6475450874112183, "learning_rate": 2.675819343685881e-06, "loss": 0.7044, "step": 10378 }, { "epoch": 0.66, "grad_norm": 2.1087794932216393, "learning_rate": 2.6749016307142307e-06, "loss": 0.8179, "step": 10379 }, { "epoch": 0.66, "grad_norm": 1.9064182144314288, "learning_rate": 2.673984017670426e-06, "loss": 0.7266, "step": 10380 }, { "epoch": 0.66, "grad_norm": 1.567455739807368, "learning_rate": 2.6730665045938975e-06, "loss": 0.6714, "step": 10381 }, { "epoch": 0.66, "grad_norm": 1.5780338066084953, "learning_rate": 2.6721490915240823e-06, "loss": 0.7635, "step": 10382 }, { "epoch": 0.66, "grad_norm": 1.231566326172886, "learning_rate": 2.671231778500408e-06, "loss": 0.5881, "step": 10383 }, { "epoch": 0.66, "grad_norm": 1.7291342562390946, "learning_rate": 2.670314565562298e-06, "loss": 0.7412, "step": 10384 }, { "epoch": 0.66, "grad_norm": 1.8278645932629405, "learning_rate": 2.669397452749173e-06, "loss": 0.8119, "step": 10385 }, { "epoch": 0.66, "grad_norm": 1.6775545229601616, "learning_rate": 2.668480440100448e-06, "loss": 0.7025, "step": 10386 }, { "epoch": 0.66, "grad_norm": 1.1814145015341169, "learning_rate": 2.667563527655535e-06, "loss": 0.7048, "step": 10387 }, { "epoch": 0.66, "grad_norm": 1.8879717281715238, "learning_rate": 2.6666467154538383e-06, "loss": 0.7558, "step": 10388 }, { "epoch": 0.66, "grad_norm": 1.8962245444370485, "learning_rate": 2.6657300035347643e-06, "loss": 0.7544, "step": 10389 }, { "epoch": 0.67, "grad_norm": 1.056879752165534, "learning_rate": 2.6648133919377106e-06, "loss": 0.6574, "step": 10390 }, { "epoch": 0.67, "grad_norm": 1.6215637186166387, "learning_rate": 2.6638968807020693e-06, "loss": 0.5919, "step": 10391 }, { "epoch": 0.67, "grad_norm": 2.3934784489575973, "learning_rate": 2.6629804698672325e-06, "loss": 0.5662, "step": 10392 }, { "epoch": 0.67, "grad_norm": 1.0951269823031284, "learning_rate": 2.6620641594725834e-06, "loss": 0.5813, "step": 10393 }, { "epoch": 0.67, "grad_norm": 1.718108716561199, "learning_rate": 2.6611479495575044e-06, "loss": 0.6669, "step": 10394 }, { "epoch": 0.67, "grad_norm": 1.041394248156012, "learning_rate": 2.6602318401613715e-06, "loss": 0.6257, "step": 10395 }, { "epoch": 0.67, "grad_norm": 2.29696767122708, "learning_rate": 2.6593158313235556e-06, "loss": 0.739, "step": 10396 }, { "epoch": 0.67, "grad_norm": 1.6964391606893179, "learning_rate": 2.6583999230834294e-06, "loss": 0.6975, "step": 10397 }, { "epoch": 0.67, "grad_norm": 1.238560167775118, "learning_rate": 2.6574841154803546e-06, "loss": 0.6136, "step": 10398 }, { "epoch": 0.67, "grad_norm": 1.721244667750514, "learning_rate": 2.656568408553689e-06, "loss": 0.8185, "step": 10399 }, { "epoch": 0.67, "grad_norm": 1.5637080425376615, "learning_rate": 2.655652802342789e-06, "loss": 0.6182, "step": 10400 }, { "epoch": 0.67, "grad_norm": 1.7066557161657232, "learning_rate": 2.654737296887006e-06, "loss": 0.9063, "step": 10401 }, { "epoch": 0.67, "grad_norm": 1.716847204567125, "learning_rate": 2.6538218922256857e-06, "loss": 0.7026, "step": 10402 }, { "epoch": 0.67, "grad_norm": 1.7343641670050947, "learning_rate": 2.652906588398171e-06, "loss": 0.8621, "step": 10403 }, { "epoch": 0.67, "grad_norm": 2.436865519873671, "learning_rate": 2.6519913854437984e-06, "loss": 0.8201, "step": 10404 }, { "epoch": 0.67, "grad_norm": 1.564329668595808, "learning_rate": 2.6510762834019006e-06, "loss": 0.7303, "step": 10405 }, { "epoch": 0.67, "grad_norm": 1.7479613136141259, "learning_rate": 2.65016128231181e-06, "loss": 0.8172, "step": 10406 }, { "epoch": 0.67, "grad_norm": 1.7114155791454428, "learning_rate": 2.64924638221285e-06, "loss": 0.7189, "step": 10407 }, { "epoch": 0.67, "grad_norm": 1.4932873036332577, "learning_rate": 2.6483315831443412e-06, "loss": 0.7568, "step": 10408 }, { "epoch": 0.67, "grad_norm": 1.8401675171901508, "learning_rate": 2.6474168851455995e-06, "loss": 0.7535, "step": 10409 }, { "epoch": 0.67, "grad_norm": 3.8914311470053344, "learning_rate": 2.646502288255936e-06, "loss": 0.6374, "step": 10410 }, { "epoch": 0.67, "grad_norm": 1.7871341594346892, "learning_rate": 2.645587792514659e-06, "loss": 0.6849, "step": 10411 }, { "epoch": 0.67, "grad_norm": 1.6636972208344163, "learning_rate": 2.6446733979610696e-06, "loss": 0.7009, "step": 10412 }, { "epoch": 0.67, "grad_norm": 1.742664840349299, "learning_rate": 2.6437591046344724e-06, "loss": 0.7464, "step": 10413 }, { "epoch": 0.67, "grad_norm": 1.7564763094242917, "learning_rate": 2.642844912574153e-06, "loss": 0.6273, "step": 10414 }, { "epoch": 0.67, "grad_norm": 1.9299324513023628, "learning_rate": 2.641930821819409e-06, "loss": 0.693, "step": 10415 }, { "epoch": 0.67, "grad_norm": 1.6642653836267025, "learning_rate": 2.641016832409523e-06, "loss": 0.6609, "step": 10416 }, { "epoch": 0.67, "grad_norm": 1.0679598494969782, "learning_rate": 2.6401029443837767e-06, "loss": 0.7091, "step": 10417 }, { "epoch": 0.67, "grad_norm": 1.6329152994525595, "learning_rate": 2.6391891577814468e-06, "loss": 0.6856, "step": 10418 }, { "epoch": 0.67, "grad_norm": 1.7151948121676028, "learning_rate": 2.6382754726418047e-06, "loss": 0.5869, "step": 10419 }, { "epoch": 0.67, "grad_norm": 1.6335151934259466, "learning_rate": 2.637361889004123e-06, "loss": 0.6274, "step": 10420 }, { "epoch": 0.67, "grad_norm": 1.7214534695742165, "learning_rate": 2.63644840690766e-06, "loss": 0.7473, "step": 10421 }, { "epoch": 0.67, "grad_norm": 1.7044824460700831, "learning_rate": 2.6355350263916814e-06, "loss": 0.7183, "step": 10422 }, { "epoch": 0.67, "grad_norm": 1.769225600810237, "learning_rate": 2.634621747495435e-06, "loss": 0.671, "step": 10423 }, { "epoch": 0.67, "grad_norm": 1.6541403945112279, "learning_rate": 2.6337085702581776e-06, "loss": 0.7577, "step": 10424 }, { "epoch": 0.67, "grad_norm": 1.6766711867505888, "learning_rate": 2.632795494719154e-06, "loss": 0.6692, "step": 10425 }, { "epoch": 0.67, "grad_norm": 1.7418198633876525, "learning_rate": 2.6318825209176025e-06, "loss": 0.6348, "step": 10426 }, { "epoch": 0.67, "grad_norm": 1.7827353331313833, "learning_rate": 2.6309696488927692e-06, "loss": 0.763, "step": 10427 }, { "epoch": 0.67, "grad_norm": 1.2507961821687867, "learning_rate": 2.630056878683878e-06, "loss": 0.5965, "step": 10428 }, { "epoch": 0.67, "grad_norm": 1.4675378212007686, "learning_rate": 2.6291442103301667e-06, "loss": 0.5746, "step": 10429 }, { "epoch": 0.67, "grad_norm": 1.7091957726207652, "learning_rate": 2.628231643870851e-06, "loss": 0.7969, "step": 10430 }, { "epoch": 0.67, "grad_norm": 1.931389257753846, "learning_rate": 2.6273191793451575e-06, "loss": 0.8214, "step": 10431 }, { "epoch": 0.67, "grad_norm": 1.6537507308019421, "learning_rate": 2.6264068167923003e-06, "loss": 0.6698, "step": 10432 }, { "epoch": 0.67, "grad_norm": 1.9423765899820244, "learning_rate": 2.6254945562514877e-06, "loss": 0.7585, "step": 10433 }, { "epoch": 0.67, "grad_norm": 1.8399329685215249, "learning_rate": 2.6245823977619343e-06, "loss": 0.7612, "step": 10434 }, { "epoch": 0.67, "grad_norm": 1.6626226631012626, "learning_rate": 2.623670341362834e-06, "loss": 0.7544, "step": 10435 }, { "epoch": 0.67, "grad_norm": 1.631536840368992, "learning_rate": 2.6227583870933926e-06, "loss": 0.7038, "step": 10436 }, { "epoch": 0.67, "grad_norm": 1.7926135435285762, "learning_rate": 2.621846534992797e-06, "loss": 0.7491, "step": 10437 }, { "epoch": 0.67, "grad_norm": 1.1758086376397188, "learning_rate": 2.620934785100242e-06, "loss": 0.7228, "step": 10438 }, { "epoch": 0.67, "grad_norm": 1.6572054040354824, "learning_rate": 2.62002313745491e-06, "loss": 0.7994, "step": 10439 }, { "epoch": 0.67, "grad_norm": 1.5849618060860444, "learning_rate": 2.619111592095984e-06, "loss": 0.6517, "step": 10440 }, { "epoch": 0.67, "grad_norm": 1.8798673337228158, "learning_rate": 2.6182001490626374e-06, "loss": 0.7548, "step": 10441 }, { "epoch": 0.67, "grad_norm": 1.5485256513924435, "learning_rate": 2.6172888083940444e-06, "loss": 0.7347, "step": 10442 }, { "epoch": 0.67, "grad_norm": 2.130099990608433, "learning_rate": 2.6163775701293714e-06, "loss": 0.72, "step": 10443 }, { "epoch": 0.67, "grad_norm": 1.1371927953657046, "learning_rate": 2.6154664343077798e-06, "loss": 0.5758, "step": 10444 }, { "epoch": 0.67, "grad_norm": 1.7451342477573886, "learning_rate": 2.6145554009684343e-06, "loss": 0.9073, "step": 10445 }, { "epoch": 0.67, "grad_norm": 1.6666276552601684, "learning_rate": 2.6136444701504813e-06, "loss": 0.6958, "step": 10446 }, { "epoch": 0.67, "grad_norm": 2.2150637221759224, "learning_rate": 2.612733641893076e-06, "loss": 0.7346, "step": 10447 }, { "epoch": 0.67, "grad_norm": 2.0762972918498708, "learning_rate": 2.611822916235363e-06, "loss": 0.7781, "step": 10448 }, { "epoch": 0.67, "grad_norm": 1.433038107467086, "learning_rate": 2.610912293216483e-06, "loss": 0.6249, "step": 10449 }, { "epoch": 0.67, "grad_norm": 1.7517568737939588, "learning_rate": 2.6100017728755717e-06, "loss": 0.6757, "step": 10450 }, { "epoch": 0.67, "grad_norm": 2.5370395681172733, "learning_rate": 2.6090913552517626e-06, "loss": 0.8511, "step": 10451 }, { "epoch": 0.67, "grad_norm": 2.0149293791631, "learning_rate": 2.608181040384183e-06, "loss": 0.7595, "step": 10452 }, { "epoch": 0.67, "grad_norm": 1.7107041977632222, "learning_rate": 2.6072708283119543e-06, "loss": 0.6713, "step": 10453 }, { "epoch": 0.67, "grad_norm": 1.9978027180611198, "learning_rate": 2.6063607190741997e-06, "loss": 0.8343, "step": 10454 }, { "epoch": 0.67, "grad_norm": 1.5966319455011515, "learning_rate": 2.6054507127100315e-06, "loss": 0.6071, "step": 10455 }, { "epoch": 0.67, "grad_norm": 1.8072123331559518, "learning_rate": 2.60454080925856e-06, "loss": 0.7368, "step": 10456 }, { "epoch": 0.67, "grad_norm": 1.7129063789038503, "learning_rate": 2.6036310087588913e-06, "loss": 0.7526, "step": 10457 }, { "epoch": 0.67, "grad_norm": 1.6171433762983909, "learning_rate": 2.602721311250126e-06, "loss": 0.774, "step": 10458 }, { "epoch": 0.67, "grad_norm": 2.1431930554573526, "learning_rate": 2.601811716771362e-06, "loss": 0.8123, "step": 10459 }, { "epoch": 0.67, "grad_norm": 1.8877236821266539, "learning_rate": 2.60090222536169e-06, "loss": 0.7959, "step": 10460 }, { "epoch": 0.67, "grad_norm": 1.8591870272955524, "learning_rate": 2.599992837060198e-06, "loss": 0.7432, "step": 10461 }, { "epoch": 0.67, "grad_norm": 1.4894611373977866, "learning_rate": 2.5990835519059724e-06, "loss": 0.8023, "step": 10462 }, { "epoch": 0.67, "grad_norm": 1.047327887304436, "learning_rate": 2.598174369938091e-06, "loss": 0.5945, "step": 10463 }, { "epoch": 0.67, "grad_norm": 1.689467324748465, "learning_rate": 2.5972652911956287e-06, "loss": 0.7326, "step": 10464 }, { "epoch": 0.67, "grad_norm": 1.7242632695354378, "learning_rate": 2.596356315717654e-06, "loss": 0.8655, "step": 10465 }, { "epoch": 0.67, "grad_norm": 1.7888085448041113, "learning_rate": 2.5954474435432353e-06, "loss": 0.7057, "step": 10466 }, { "epoch": 0.67, "grad_norm": 1.1980146652715442, "learning_rate": 2.594538674711432e-06, "loss": 0.679, "step": 10467 }, { "epoch": 0.67, "grad_norm": 1.6402174117784698, "learning_rate": 2.593630009261301e-06, "loss": 0.7037, "step": 10468 }, { "epoch": 0.67, "grad_norm": 1.712739229362937, "learning_rate": 2.5927214472318983e-06, "loss": 0.7969, "step": 10469 }, { "epoch": 0.67, "grad_norm": 1.7722768549693202, "learning_rate": 2.5918129886622667e-06, "loss": 0.6524, "step": 10470 }, { "epoch": 0.67, "grad_norm": 1.6099554608224387, "learning_rate": 2.5909046335914533e-06, "loss": 0.7456, "step": 10471 }, { "epoch": 0.67, "grad_norm": 1.5757393742742332, "learning_rate": 2.5899963820584973e-06, "loss": 0.7537, "step": 10472 }, { "epoch": 0.67, "grad_norm": 1.8173233667470599, "learning_rate": 2.5890882341024328e-06, "loss": 0.6663, "step": 10473 }, { "epoch": 0.67, "grad_norm": 1.721649779220694, "learning_rate": 2.5881801897622895e-06, "loss": 0.7133, "step": 10474 }, { "epoch": 0.67, "grad_norm": 2.322430678346655, "learning_rate": 2.5872722490770917e-06, "loss": 0.686, "step": 10475 }, { "epoch": 0.67, "grad_norm": 1.766785817948101, "learning_rate": 2.5863644120858675e-06, "loss": 0.8575, "step": 10476 }, { "epoch": 0.67, "grad_norm": 1.7435282879724472, "learning_rate": 2.5854566788276247e-06, "loss": 0.7919, "step": 10477 }, { "epoch": 0.67, "grad_norm": 1.5071153887199347, "learning_rate": 2.584549049341384e-06, "loss": 0.6898, "step": 10478 }, { "epoch": 0.67, "grad_norm": 1.9758646900415198, "learning_rate": 2.583641523666146e-06, "loss": 0.7647, "step": 10479 }, { "epoch": 0.67, "grad_norm": 1.7536347692991805, "learning_rate": 2.5827341018409197e-06, "loss": 0.753, "step": 10480 }, { "epoch": 0.67, "grad_norm": 1.7198444586330883, "learning_rate": 2.5818267839047018e-06, "loss": 0.7115, "step": 10481 }, { "epoch": 0.67, "grad_norm": 1.9261112400288258, "learning_rate": 2.5809195698964885e-06, "loss": 0.7005, "step": 10482 }, { "epoch": 0.67, "grad_norm": 1.6829034550715667, "learning_rate": 2.5800124598552676e-06, "loss": 0.8747, "step": 10483 }, { "epoch": 0.67, "grad_norm": 1.7568886475927084, "learning_rate": 2.5791054538200255e-06, "loss": 0.7163, "step": 10484 }, { "epoch": 0.67, "grad_norm": 4.176879818236095, "learning_rate": 2.5781985518297468e-06, "loss": 0.7119, "step": 10485 }, { "epoch": 0.67, "grad_norm": 1.7270853093975613, "learning_rate": 2.577291753923402e-06, "loss": 0.825, "step": 10486 }, { "epoch": 0.67, "grad_norm": 1.9540528714138086, "learning_rate": 2.576385060139971e-06, "loss": 0.7139, "step": 10487 }, { "epoch": 0.67, "grad_norm": 2.009952076862381, "learning_rate": 2.575478470518413e-06, "loss": 0.6753, "step": 10488 }, { "epoch": 0.67, "grad_norm": 1.8876737921887203, "learning_rate": 2.574571985097698e-06, "loss": 0.6691, "step": 10489 }, { "epoch": 0.67, "grad_norm": 1.6578535097003444, "learning_rate": 2.5736656039167817e-06, "loss": 0.6967, "step": 10490 }, { "epoch": 0.67, "grad_norm": 1.7651007952801263, "learning_rate": 2.572759327014618e-06, "loss": 0.5987, "step": 10491 }, { "epoch": 0.67, "grad_norm": 1.8259455981392605, "learning_rate": 2.5718531544301623e-06, "loss": 0.7154, "step": 10492 }, { "epoch": 0.67, "grad_norm": 1.9212105013209546, "learning_rate": 2.5709470862023517e-06, "loss": 0.7275, "step": 10493 }, { "epoch": 0.67, "grad_norm": 1.6386693916275246, "learning_rate": 2.570041122370135e-06, "loss": 0.6486, "step": 10494 }, { "epoch": 0.67, "grad_norm": 1.952620726721253, "learning_rate": 2.5691352629724407e-06, "loss": 0.85, "step": 10495 }, { "epoch": 0.67, "grad_norm": 1.8080796935409427, "learning_rate": 2.5682295080482073e-06, "loss": 0.6366, "step": 10496 }, { "epoch": 0.67, "grad_norm": 1.1237493953444708, "learning_rate": 2.567323857636359e-06, "loss": 0.6012, "step": 10497 }, { "epoch": 0.67, "grad_norm": 1.415589090871729, "learning_rate": 2.5664183117758184e-06, "loss": 0.7226, "step": 10498 }, { "epoch": 0.67, "grad_norm": 1.6752677810407048, "learning_rate": 2.565512870505509e-06, "loss": 0.7643, "step": 10499 }, { "epoch": 0.67, "grad_norm": 1.6096680748849836, "learning_rate": 2.5646075338643362e-06, "loss": 0.7728, "step": 10500 }, { "epoch": 0.67, "grad_norm": 1.7926537014806863, "learning_rate": 2.5637023018912187e-06, "loss": 0.7431, "step": 10501 }, { "epoch": 0.67, "grad_norm": 1.6286269322344364, "learning_rate": 2.5627971746250523e-06, "loss": 0.6499, "step": 10502 }, { "epoch": 0.67, "grad_norm": 1.541894668248813, "learning_rate": 2.561892152104745e-06, "loss": 0.8103, "step": 10503 }, { "epoch": 0.67, "grad_norm": 1.5762787833502048, "learning_rate": 2.560987234369189e-06, "loss": 0.6625, "step": 10504 }, { "epoch": 0.67, "grad_norm": 1.574216154538552, "learning_rate": 2.5600824214572774e-06, "loss": 0.6482, "step": 10505 }, { "epoch": 0.67, "grad_norm": 2.1919381785536096, "learning_rate": 2.5591777134078954e-06, "loss": 0.6272, "step": 10506 }, { "epoch": 0.67, "grad_norm": 2.0036484326360555, "learning_rate": 2.558273110259925e-06, "loss": 0.6455, "step": 10507 }, { "epoch": 0.67, "grad_norm": 1.604512744305081, "learning_rate": 2.5573686120522496e-06, "loss": 0.8202, "step": 10508 }, { "epoch": 0.67, "grad_norm": 1.0199424865173197, "learning_rate": 2.5564642188237345e-06, "loss": 0.5039, "step": 10509 }, { "epoch": 0.67, "grad_norm": 1.2152590505804866, "learning_rate": 2.5555599306132547e-06, "loss": 0.6077, "step": 10510 }, { "epoch": 0.67, "grad_norm": 1.6419569672745997, "learning_rate": 2.5546557474596725e-06, "loss": 0.7461, "step": 10511 }, { "epoch": 0.67, "grad_norm": 2.0968010150319305, "learning_rate": 2.5537516694018476e-06, "loss": 0.699, "step": 10512 }, { "epoch": 0.67, "grad_norm": 2.4800399686976524, "learning_rate": 2.5528476964786365e-06, "loss": 0.6437, "step": 10513 }, { "epoch": 0.67, "grad_norm": 1.7680991837111921, "learning_rate": 2.551943828728888e-06, "loss": 0.7893, "step": 10514 }, { "epoch": 0.67, "grad_norm": 1.7541034613654916, "learning_rate": 2.5510400661914503e-06, "loss": 0.7629, "step": 10515 }, { "epoch": 0.67, "grad_norm": 1.7926813311457335, "learning_rate": 2.550136408905165e-06, "loss": 0.8612, "step": 10516 }, { "epoch": 0.67, "grad_norm": 1.6632060602161263, "learning_rate": 2.5492328569088666e-06, "loss": 0.7456, "step": 10517 }, { "epoch": 0.67, "grad_norm": 1.0783144098527673, "learning_rate": 2.5483294102413925e-06, "loss": 0.7043, "step": 10518 }, { "epoch": 0.67, "grad_norm": 1.5051124467329213, "learning_rate": 2.5474260689415687e-06, "loss": 0.7033, "step": 10519 }, { "epoch": 0.67, "grad_norm": 1.5743491893106825, "learning_rate": 2.5465228330482184e-06, "loss": 0.6297, "step": 10520 }, { "epoch": 0.67, "grad_norm": 1.5823814039999091, "learning_rate": 2.5456197026001613e-06, "loss": 0.6558, "step": 10521 }, { "epoch": 0.67, "grad_norm": 2.5153657624593095, "learning_rate": 2.544716677636212e-06, "loss": 0.6621, "step": 10522 }, { "epoch": 0.67, "grad_norm": 1.7854004628649487, "learning_rate": 2.543813758195181e-06, "loss": 0.7378, "step": 10523 }, { "epoch": 0.67, "grad_norm": 1.8467774849035898, "learning_rate": 2.5429109443158735e-06, "loss": 0.9527, "step": 10524 }, { "epoch": 0.67, "grad_norm": 1.1237067713737452, "learning_rate": 2.54200823603709e-06, "loss": 0.641, "step": 10525 }, { "epoch": 0.67, "grad_norm": 4.444706859982213, "learning_rate": 2.5411056333976266e-06, "loss": 0.7489, "step": 10526 }, { "epoch": 0.67, "grad_norm": 1.612200019792298, "learning_rate": 2.5402031364362772e-06, "loss": 0.6837, "step": 10527 }, { "epoch": 0.67, "grad_norm": 1.9530192064303065, "learning_rate": 2.539300745191829e-06, "loss": 0.7019, "step": 10528 }, { "epoch": 0.67, "grad_norm": 1.4390031795005953, "learning_rate": 2.5383984597030638e-06, "loss": 0.7347, "step": 10529 }, { "epoch": 0.67, "grad_norm": 1.1899198043338692, "learning_rate": 2.53749628000876e-06, "loss": 0.6686, "step": 10530 }, { "epoch": 0.67, "grad_norm": 1.5807777426813039, "learning_rate": 2.5365942061476924e-06, "loss": 0.7981, "step": 10531 }, { "epoch": 0.67, "grad_norm": 1.7921253330053881, "learning_rate": 2.5356922381586287e-06, "loss": 0.683, "step": 10532 }, { "epoch": 0.67, "grad_norm": 1.6002937412071119, "learning_rate": 2.5347903760803327e-06, "loss": 0.6995, "step": 10533 }, { "epoch": 0.67, "grad_norm": 1.6666378549059078, "learning_rate": 2.53388861995157e-06, "loss": 0.6261, "step": 10534 }, { "epoch": 0.67, "grad_norm": 1.7595983840271356, "learning_rate": 2.532986969811089e-06, "loss": 0.7173, "step": 10535 }, { "epoch": 0.67, "grad_norm": 1.7628180697553721, "learning_rate": 2.532085425697646e-06, "loss": 0.7909, "step": 10536 }, { "epoch": 0.67, "grad_norm": 1.533544072788719, "learning_rate": 2.5311839876499843e-06, "loss": 0.6812, "step": 10537 }, { "epoch": 0.67, "grad_norm": 1.620735354390883, "learning_rate": 2.530282655706848e-06, "loss": 0.7928, "step": 10538 }, { "epoch": 0.67, "grad_norm": 1.0597424210802433, "learning_rate": 2.5293814299069742e-06, "loss": 0.6285, "step": 10539 }, { "epoch": 0.67, "grad_norm": 2.2709734340023853, "learning_rate": 2.528480310289093e-06, "loss": 0.68, "step": 10540 }, { "epoch": 0.67, "grad_norm": 2.0691307591164114, "learning_rate": 2.5275792968919376e-06, "loss": 0.7613, "step": 10541 }, { "epoch": 0.67, "grad_norm": 1.7488485112003411, "learning_rate": 2.526678389754226e-06, "loss": 0.6456, "step": 10542 }, { "epoch": 0.67, "grad_norm": 1.6315681113078753, "learning_rate": 2.525777588914683e-06, "loss": 0.7841, "step": 10543 }, { "epoch": 0.67, "grad_norm": 1.9859981452410267, "learning_rate": 2.524876894412017e-06, "loss": 0.7242, "step": 10544 }, { "epoch": 0.67, "grad_norm": 1.7198130357096413, "learning_rate": 2.5239763062849424e-06, "loss": 0.7205, "step": 10545 }, { "epoch": 0.68, "grad_norm": 1.8578531108125311, "learning_rate": 2.5230758245721636e-06, "loss": 0.736, "step": 10546 }, { "epoch": 0.68, "grad_norm": 1.0274701322597464, "learning_rate": 2.5221754493123798e-06, "loss": 0.6651, "step": 10547 }, { "epoch": 0.68, "grad_norm": 1.631837008178408, "learning_rate": 2.5212751805442916e-06, "loss": 0.6436, "step": 10548 }, { "epoch": 0.68, "grad_norm": 1.496171738040921, "learning_rate": 2.520375018306584e-06, "loss": 0.6805, "step": 10549 }, { "epoch": 0.68, "grad_norm": 1.6545670452811327, "learning_rate": 2.5194749626379515e-06, "loss": 0.6828, "step": 10550 }, { "epoch": 0.68, "grad_norm": 2.1146075759616956, "learning_rate": 2.5185750135770694e-06, "loss": 0.791, "step": 10551 }, { "epoch": 0.68, "grad_norm": 2.2404312329373313, "learning_rate": 2.51767517116262e-06, "loss": 0.6861, "step": 10552 }, { "epoch": 0.68, "grad_norm": 2.0627470565297346, "learning_rate": 2.516775435433276e-06, "loss": 0.8228, "step": 10553 }, { "epoch": 0.68, "grad_norm": 1.5781528936400189, "learning_rate": 2.5158758064277068e-06, "loss": 0.692, "step": 10554 }, { "epoch": 0.68, "grad_norm": 1.705221296499766, "learning_rate": 2.514976284184575e-06, "loss": 0.7175, "step": 10555 }, { "epoch": 0.68, "grad_norm": 1.7268843999931838, "learning_rate": 2.514076868742538e-06, "loss": 0.8059, "step": 10556 }, { "epoch": 0.68, "grad_norm": 1.5818744495403085, "learning_rate": 2.5131775601402588e-06, "loss": 0.642, "step": 10557 }, { "epoch": 0.68, "grad_norm": 1.6274410135243826, "learning_rate": 2.5122783584163774e-06, "loss": 0.6137, "step": 10558 }, { "epoch": 0.68, "grad_norm": 2.3330208710730824, "learning_rate": 2.511379263609547e-06, "loss": 0.8145, "step": 10559 }, { "epoch": 0.68, "grad_norm": 1.623424357558957, "learning_rate": 2.5104802757584067e-06, "loss": 0.6134, "step": 10560 }, { "epoch": 0.68, "grad_norm": 1.609236626482998, "learning_rate": 2.5095813949015934e-06, "loss": 0.6455, "step": 10561 }, { "epoch": 0.68, "grad_norm": 1.9672709034503144, "learning_rate": 2.5086826210777383e-06, "loss": 0.7097, "step": 10562 }, { "epoch": 0.68, "grad_norm": 1.5925153733874027, "learning_rate": 2.507783954325467e-06, "loss": 0.6863, "step": 10563 }, { "epoch": 0.68, "grad_norm": 1.6898548185532063, "learning_rate": 2.506885394683408e-06, "loss": 0.6597, "step": 10564 }, { "epoch": 0.68, "grad_norm": 1.5014612612975209, "learning_rate": 2.505986942190173e-06, "loss": 0.7463, "step": 10565 }, { "epoch": 0.68, "grad_norm": 2.009828115412889, "learning_rate": 2.5050885968843817e-06, "loss": 0.7756, "step": 10566 }, { "epoch": 0.68, "grad_norm": 1.6529380234543858, "learning_rate": 2.504190358804637e-06, "loss": 0.7288, "step": 10567 }, { "epoch": 0.68, "grad_norm": 1.2611642683620292, "learning_rate": 2.5032922279895476e-06, "loss": 0.703, "step": 10568 }, { "epoch": 0.68, "grad_norm": 6.554716317744197, "learning_rate": 2.5023942044777114e-06, "loss": 0.728, "step": 10569 }, { "epoch": 0.68, "grad_norm": 1.777647532513758, "learning_rate": 2.5014962883077256e-06, "loss": 0.7877, "step": 10570 }, { "epoch": 0.68, "grad_norm": 1.1105444569000977, "learning_rate": 2.5005984795181783e-06, "loss": 0.6467, "step": 10571 }, { "epoch": 0.68, "grad_norm": 2.155250980698469, "learning_rate": 2.499700778147654e-06, "loss": 0.711, "step": 10572 }, { "epoch": 0.68, "grad_norm": 2.085390367181912, "learning_rate": 2.498803184234741e-06, "loss": 0.7058, "step": 10573 }, { "epoch": 0.68, "grad_norm": 1.5307737955900085, "learning_rate": 2.4979056978180073e-06, "loss": 0.6392, "step": 10574 }, { "epoch": 0.68, "grad_norm": 1.1534275857939222, "learning_rate": 2.497008318936031e-06, "loss": 0.6198, "step": 10575 }, { "epoch": 0.68, "grad_norm": 2.452204183564028, "learning_rate": 2.4961110476273775e-06, "loss": 0.6828, "step": 10576 }, { "epoch": 0.68, "grad_norm": 1.1704763716496178, "learning_rate": 2.49521388393061e-06, "loss": 0.6721, "step": 10577 }, { "epoch": 0.68, "grad_norm": 1.1922253035792683, "learning_rate": 2.4943168278842865e-06, "loss": 0.6727, "step": 10578 }, { "epoch": 0.68, "grad_norm": 1.3717135590716802, "learning_rate": 2.4934198795269603e-06, "loss": 0.645, "step": 10579 }, { "epoch": 0.68, "grad_norm": 1.805367447112204, "learning_rate": 2.492523038897181e-06, "loss": 0.6638, "step": 10580 }, { "epoch": 0.68, "grad_norm": 1.0911506590835232, "learning_rate": 2.4916263060334903e-06, "loss": 0.6762, "step": 10581 }, { "epoch": 0.68, "grad_norm": 1.6820694066344564, "learning_rate": 2.4907296809744324e-06, "loss": 0.6477, "step": 10582 }, { "epoch": 0.68, "grad_norm": 1.108853838033233, "learning_rate": 2.48983316375854e-06, "loss": 0.5954, "step": 10583 }, { "epoch": 0.68, "grad_norm": 1.5891663214386422, "learning_rate": 2.4889367544243437e-06, "loss": 0.7607, "step": 10584 }, { "epoch": 0.68, "grad_norm": 2.3374999665892635, "learning_rate": 2.488040453010369e-06, "loss": 0.7831, "step": 10585 }, { "epoch": 0.68, "grad_norm": 2.0030560842826506, "learning_rate": 2.4871442595551378e-06, "loss": 0.7001, "step": 10586 }, { "epoch": 0.68, "grad_norm": 1.9526300458368149, "learning_rate": 2.486248174097165e-06, "loss": 0.7126, "step": 10587 }, { "epoch": 0.68, "grad_norm": 1.1993073006589026, "learning_rate": 2.4853521966749643e-06, "loss": 0.7291, "step": 10588 }, { "epoch": 0.68, "grad_norm": 1.715879761847729, "learning_rate": 2.4844563273270404e-06, "loss": 0.7162, "step": 10589 }, { "epoch": 0.68, "grad_norm": 1.7330983423271145, "learning_rate": 2.483560566091901e-06, "loss": 0.7214, "step": 10590 }, { "epoch": 0.68, "grad_norm": 1.6035822938919226, "learning_rate": 2.482664913008037e-06, "loss": 0.7008, "step": 10591 }, { "epoch": 0.68, "grad_norm": 1.6493995725328114, "learning_rate": 2.4817693681139475e-06, "loss": 0.6843, "step": 10592 }, { "epoch": 0.68, "grad_norm": 1.1085857218788582, "learning_rate": 2.4808739314481196e-06, "loss": 0.705, "step": 10593 }, { "epoch": 0.68, "grad_norm": 1.5020060056213187, "learning_rate": 2.479978603049036e-06, "loss": 0.7532, "step": 10594 }, { "epoch": 0.68, "grad_norm": 1.6335261631102294, "learning_rate": 2.4790833829551764e-06, "loss": 0.7942, "step": 10595 }, { "epoch": 0.68, "grad_norm": 1.7079114453467132, "learning_rate": 2.4781882712050164e-06, "loss": 0.7189, "step": 10596 }, { "epoch": 0.68, "grad_norm": 1.7174927090109127, "learning_rate": 2.4772932678370255e-06, "loss": 0.6497, "step": 10597 }, { "epoch": 0.68, "grad_norm": 1.6451836538643236, "learning_rate": 2.476398372889667e-06, "loss": 0.7593, "step": 10598 }, { "epoch": 0.68, "grad_norm": 1.7235302741968166, "learning_rate": 2.475503586401407e-06, "loss": 0.6454, "step": 10599 }, { "epoch": 0.68, "grad_norm": 1.6925307573232822, "learning_rate": 2.474608908410695e-06, "loss": 0.7467, "step": 10600 }, { "epoch": 0.68, "grad_norm": 1.7622474574556162, "learning_rate": 2.473714338955987e-06, "loss": 0.7666, "step": 10601 }, { "epoch": 0.68, "grad_norm": 1.6590251161135117, "learning_rate": 2.4728198780757283e-06, "loss": 0.6415, "step": 10602 }, { "epoch": 0.68, "grad_norm": 1.7757362248878255, "learning_rate": 2.4719255258083614e-06, "loss": 0.7523, "step": 10603 }, { "epoch": 0.68, "grad_norm": 1.769090508430764, "learning_rate": 2.4710312821923225e-06, "loss": 0.7155, "step": 10604 }, { "epoch": 0.68, "grad_norm": 1.8002367089896714, "learning_rate": 2.4701371472660433e-06, "loss": 0.7813, "step": 10605 }, { "epoch": 0.68, "grad_norm": 1.777256015226217, "learning_rate": 2.4692431210679575e-06, "loss": 0.6424, "step": 10606 }, { "epoch": 0.68, "grad_norm": 1.65687882294274, "learning_rate": 2.4683492036364805e-06, "loss": 0.7674, "step": 10607 }, { "epoch": 0.68, "grad_norm": 1.6421390025756908, "learning_rate": 2.4674553950100388e-06, "loss": 0.7441, "step": 10608 }, { "epoch": 0.68, "grad_norm": 1.7514384434631625, "learning_rate": 2.466561695227039e-06, "loss": 0.7437, "step": 10609 }, { "epoch": 0.68, "grad_norm": 1.714426793568641, "learning_rate": 2.465668104325896e-06, "loss": 0.7788, "step": 10610 }, { "epoch": 0.68, "grad_norm": 1.188294918385381, "learning_rate": 2.4647746223450123e-06, "loss": 0.5728, "step": 10611 }, { "epoch": 0.68, "grad_norm": 1.7016116702000468, "learning_rate": 2.4638812493227864e-06, "loss": 0.8448, "step": 10612 }, { "epoch": 0.68, "grad_norm": 2.0788934052718804, "learning_rate": 2.462987985297619e-06, "loss": 0.7093, "step": 10613 }, { "epoch": 0.68, "grad_norm": 1.9740590348882556, "learning_rate": 2.462094830307893e-06, "loss": 0.7558, "step": 10614 }, { "epoch": 0.68, "grad_norm": 1.7574415467302986, "learning_rate": 2.461201784392002e-06, "loss": 0.7998, "step": 10615 }, { "epoch": 0.68, "grad_norm": 1.501000779568918, "learning_rate": 2.4603088475883203e-06, "loss": 0.6276, "step": 10616 }, { "epoch": 0.68, "grad_norm": 1.8574033184093925, "learning_rate": 2.459416019935229e-06, "loss": 0.8744, "step": 10617 }, { "epoch": 0.68, "grad_norm": 1.4788057171168032, "learning_rate": 2.458523301471099e-06, "loss": 0.8149, "step": 10618 }, { "epoch": 0.68, "grad_norm": 1.745670711870188, "learning_rate": 2.4576306922342956e-06, "loss": 0.7388, "step": 10619 }, { "epoch": 0.68, "grad_norm": 1.6003649647447424, "learning_rate": 2.456738192263186e-06, "loss": 0.6245, "step": 10620 }, { "epoch": 0.68, "grad_norm": 1.0030063775784295, "learning_rate": 2.4558458015961213e-06, "loss": 0.6696, "step": 10621 }, { "epoch": 0.68, "grad_norm": 1.5088298363952575, "learning_rate": 2.454953520271462e-06, "loss": 0.6623, "step": 10622 }, { "epoch": 0.68, "grad_norm": 1.7273672423628685, "learning_rate": 2.4540613483275484e-06, "loss": 0.7814, "step": 10623 }, { "epoch": 0.68, "grad_norm": 1.5856502171998341, "learning_rate": 2.45316928580273e-06, "loss": 0.7151, "step": 10624 }, { "epoch": 0.68, "grad_norm": 1.5910228422165236, "learning_rate": 2.4522773327353446e-06, "loss": 0.6497, "step": 10625 }, { "epoch": 0.68, "grad_norm": 1.7405372228826963, "learning_rate": 2.4513854891637253e-06, "loss": 0.7341, "step": 10626 }, { "epoch": 0.68, "grad_norm": 1.682900241995513, "learning_rate": 2.4504937551262024e-06, "loss": 0.7725, "step": 10627 }, { "epoch": 0.68, "grad_norm": 1.7019149872277308, "learning_rate": 2.4496021306610996e-06, "loss": 0.6607, "step": 10628 }, { "epoch": 0.68, "grad_norm": 1.6961217752697895, "learning_rate": 2.448710615806741e-06, "loss": 0.8619, "step": 10629 }, { "epoch": 0.68, "grad_norm": 1.8582500733456082, "learning_rate": 2.4478192106014354e-06, "loss": 0.8005, "step": 10630 }, { "epoch": 0.68, "grad_norm": 1.1109341434574909, "learning_rate": 2.4469279150834996e-06, "loss": 0.6852, "step": 10631 }, { "epoch": 0.68, "grad_norm": 1.6468477589262938, "learning_rate": 2.4460367292912367e-06, "loss": 0.7393, "step": 10632 }, { "epoch": 0.68, "grad_norm": 1.0011663038261456, "learning_rate": 2.445145653262949e-06, "loss": 0.6009, "step": 10633 }, { "epoch": 0.68, "grad_norm": 1.7436438124869973, "learning_rate": 2.444254687036933e-06, "loss": 0.6881, "step": 10634 }, { "epoch": 0.68, "grad_norm": 1.136874355143883, "learning_rate": 2.4433638306514794e-06, "loss": 0.6339, "step": 10635 }, { "epoch": 0.68, "grad_norm": 1.6137686330857859, "learning_rate": 2.4424730841448766e-06, "loss": 0.7213, "step": 10636 }, { "epoch": 0.68, "grad_norm": 1.9024295363978665, "learning_rate": 2.4415824475554046e-06, "loss": 0.8304, "step": 10637 }, { "epoch": 0.68, "grad_norm": 1.546431490430132, "learning_rate": 2.4406919209213466e-06, "loss": 0.6396, "step": 10638 }, { "epoch": 0.68, "grad_norm": 1.4497829275595253, "learning_rate": 2.439801504280968e-06, "loss": 0.6243, "step": 10639 }, { "epoch": 0.68, "grad_norm": 1.5137822028029344, "learning_rate": 2.4389111976725437e-06, "loss": 0.6272, "step": 10640 }, { "epoch": 0.68, "grad_norm": 1.7549874184740661, "learning_rate": 2.438021001134334e-06, "loss": 0.8621, "step": 10641 }, { "epoch": 0.68, "grad_norm": 1.5760184361287068, "learning_rate": 2.437130914704598e-06, "loss": 0.7359, "step": 10642 }, { "epoch": 0.68, "grad_norm": 1.5661786458211904, "learning_rate": 2.436240938421591e-06, "loss": 0.7383, "step": 10643 }, { "epoch": 0.68, "grad_norm": 3.3835208808018287, "learning_rate": 2.43535107232356e-06, "loss": 0.8126, "step": 10644 }, { "epoch": 0.68, "grad_norm": 1.2540713405724253, "learning_rate": 2.4344613164487513e-06, "loss": 0.6706, "step": 10645 }, { "epoch": 0.68, "grad_norm": 1.1764119293994748, "learning_rate": 2.433571670835402e-06, "loss": 0.6199, "step": 10646 }, { "epoch": 0.68, "grad_norm": 1.878702070359928, "learning_rate": 2.432682135521752e-06, "loss": 0.7959, "step": 10647 }, { "epoch": 0.68, "grad_norm": 1.8845126872735225, "learning_rate": 2.4317927105460294e-06, "loss": 0.7115, "step": 10648 }, { "epoch": 0.68, "grad_norm": 1.5812399070063787, "learning_rate": 2.430903395946459e-06, "loss": 0.7084, "step": 10649 }, { "epoch": 0.68, "grad_norm": 2.0681215916060856, "learning_rate": 2.4300141917612623e-06, "loss": 0.7318, "step": 10650 }, { "epoch": 0.68, "grad_norm": 1.6420178650351744, "learning_rate": 2.429125098028655e-06, "loss": 0.6579, "step": 10651 }, { "epoch": 0.68, "grad_norm": 1.7207135505019002, "learning_rate": 2.4282361147868494e-06, "loss": 0.7369, "step": 10652 }, { "epoch": 0.68, "grad_norm": 1.5348174048447976, "learning_rate": 2.427347242074051e-06, "loss": 0.7367, "step": 10653 }, { "epoch": 0.68, "grad_norm": 1.7709467892382644, "learning_rate": 2.4264584799284614e-06, "loss": 0.871, "step": 10654 }, { "epoch": 0.68, "grad_norm": 1.732291468573473, "learning_rate": 2.4255698283882794e-06, "loss": 0.7218, "step": 10655 }, { "epoch": 0.68, "grad_norm": 1.212373774072572, "learning_rate": 2.424681287491697e-06, "loss": 0.5704, "step": 10656 }, { "epoch": 0.68, "grad_norm": 2.2889625930853716, "learning_rate": 2.4237928572769016e-06, "loss": 0.8023, "step": 10657 }, { "epoch": 0.68, "grad_norm": 1.6638864766141142, "learning_rate": 2.4229045377820752e-06, "loss": 0.6942, "step": 10658 }, { "epoch": 0.68, "grad_norm": 1.8538052091389794, "learning_rate": 2.4220163290453973e-06, "loss": 0.6242, "step": 10659 }, { "epoch": 0.68, "grad_norm": 2.0583107858370377, "learning_rate": 2.4211282311050397e-06, "loss": 0.7112, "step": 10660 }, { "epoch": 0.68, "grad_norm": 1.6364095377009875, "learning_rate": 2.4202402439991708e-06, "loss": 0.6632, "step": 10661 }, { "epoch": 0.68, "grad_norm": 1.6836537812606422, "learning_rate": 2.4193523677659585e-06, "loss": 0.6801, "step": 10662 }, { "epoch": 0.68, "grad_norm": 1.9851124776164564, "learning_rate": 2.4184646024435547e-06, "loss": 0.7444, "step": 10663 }, { "epoch": 0.68, "grad_norm": 1.9838226420545053, "learning_rate": 2.4175769480701222e-06, "loss": 0.6789, "step": 10664 }, { "epoch": 0.68, "grad_norm": 1.705772359293435, "learning_rate": 2.416689404683802e-06, "loss": 0.8004, "step": 10665 }, { "epoch": 0.68, "grad_norm": 5.930737517429468, "learning_rate": 2.4158019723227447e-06, "loss": 0.6931, "step": 10666 }, { "epoch": 0.68, "grad_norm": 1.7344186597367977, "learning_rate": 2.4149146510250885e-06, "loss": 0.8125, "step": 10667 }, { "epoch": 0.68, "grad_norm": 1.8721560604008076, "learning_rate": 2.414027440828967e-06, "loss": 0.7369, "step": 10668 }, { "epoch": 0.68, "grad_norm": 1.6406999386459802, "learning_rate": 2.4131403417725156e-06, "loss": 0.6748, "step": 10669 }, { "epoch": 0.68, "grad_norm": 1.8458993016332048, "learning_rate": 2.412253353893853e-06, "loss": 0.807, "step": 10670 }, { "epoch": 0.68, "grad_norm": 1.8187457327424352, "learning_rate": 2.4113664772311076e-06, "loss": 0.689, "step": 10671 }, { "epoch": 0.68, "grad_norm": 2.1158218366292396, "learning_rate": 2.410479711822388e-06, "loss": 0.7211, "step": 10672 }, { "epoch": 0.68, "grad_norm": 1.6915812131600028, "learning_rate": 2.40959305770581e-06, "loss": 0.8219, "step": 10673 }, { "epoch": 0.68, "grad_norm": 2.2277734531394695, "learning_rate": 2.40870651491948e-06, "loss": 0.7124, "step": 10674 }, { "epoch": 0.68, "grad_norm": 1.6651640745813763, "learning_rate": 2.4078200835014983e-06, "loss": 0.6024, "step": 10675 }, { "epoch": 0.68, "grad_norm": 3.5908457560704607, "learning_rate": 2.4069337634899624e-06, "loss": 0.7766, "step": 10676 }, { "epoch": 0.68, "grad_norm": 1.7117740131775885, "learning_rate": 2.4060475549229634e-06, "loss": 0.6658, "step": 10677 }, { "epoch": 0.68, "grad_norm": 1.5794125198625066, "learning_rate": 2.4051614578385937e-06, "loss": 0.7437, "step": 10678 }, { "epoch": 0.68, "grad_norm": 1.8710048318453911, "learning_rate": 2.4042754722749274e-06, "loss": 0.8228, "step": 10679 }, { "epoch": 0.68, "grad_norm": 2.2651197965651573, "learning_rate": 2.403389598270051e-06, "loss": 0.7463, "step": 10680 }, { "epoch": 0.68, "grad_norm": 2.7864315948971567, "learning_rate": 2.40250383586203e-06, "loss": 0.7138, "step": 10681 }, { "epoch": 0.68, "grad_norm": 1.7002100701433267, "learning_rate": 2.4016181850889383e-06, "loss": 0.7398, "step": 10682 }, { "epoch": 0.68, "grad_norm": 1.9139820338540214, "learning_rate": 2.400732645988836e-06, "loss": 0.8295, "step": 10683 }, { "epoch": 0.68, "grad_norm": 1.615770717196149, "learning_rate": 2.3998472185997813e-06, "loss": 0.7289, "step": 10684 }, { "epoch": 0.68, "grad_norm": 1.6188898647166439, "learning_rate": 2.398961902959833e-06, "loss": 0.6282, "step": 10685 }, { "epoch": 0.68, "grad_norm": 1.3346460236778535, "learning_rate": 2.398076699107034e-06, "loss": 0.7165, "step": 10686 }, { "epoch": 0.68, "grad_norm": 2.6579850775189473, "learning_rate": 2.3971916070794337e-06, "loss": 0.8303, "step": 10687 }, { "epoch": 0.68, "grad_norm": 1.7502722487382985, "learning_rate": 2.3963066269150653e-06, "loss": 0.6916, "step": 10688 }, { "epoch": 0.68, "grad_norm": 1.2305594974574001, "learning_rate": 2.3954217586519692e-06, "loss": 0.5937, "step": 10689 }, { "epoch": 0.68, "grad_norm": 1.1225416042698408, "learning_rate": 2.394537002328172e-06, "loss": 0.6359, "step": 10690 }, { "epoch": 0.68, "grad_norm": 1.6460277231794902, "learning_rate": 2.3936523579817005e-06, "loss": 0.6884, "step": 10691 }, { "epoch": 0.68, "grad_norm": 1.723084975039364, "learning_rate": 2.3927678256505737e-06, "loss": 0.7157, "step": 10692 }, { "epoch": 0.68, "grad_norm": 1.6484269700710914, "learning_rate": 2.391883405372805e-06, "loss": 0.7137, "step": 10693 }, { "epoch": 0.68, "grad_norm": 1.694864671001242, "learning_rate": 2.3909990971864117e-06, "loss": 0.722, "step": 10694 }, { "epoch": 0.68, "grad_norm": 1.3363227218146891, "learning_rate": 2.3901149011293906e-06, "loss": 0.7038, "step": 10695 }, { "epoch": 0.68, "grad_norm": 1.732939634528286, "learning_rate": 2.3892308172397487e-06, "loss": 0.7222, "step": 10696 }, { "epoch": 0.68, "grad_norm": 1.4297285663211272, "learning_rate": 2.388346845555481e-06, "loss": 0.7453, "step": 10697 }, { "epoch": 0.68, "grad_norm": 1.8061190979956798, "learning_rate": 2.3874629861145775e-06, "loss": 0.7766, "step": 10698 }, { "epoch": 0.68, "grad_norm": 1.799201725728818, "learning_rate": 2.386579238955025e-06, "loss": 0.6939, "step": 10699 }, { "epoch": 0.68, "grad_norm": 4.035595322728664, "learning_rate": 2.3856956041148056e-06, "loss": 0.7943, "step": 10700 }, { "epoch": 0.68, "grad_norm": 1.7853164451628154, "learning_rate": 2.384812081631895e-06, "loss": 0.7279, "step": 10701 }, { "epoch": 0.68, "grad_norm": 2.012008367757041, "learning_rate": 2.383928671544265e-06, "loss": 0.7035, "step": 10702 }, { "epoch": 0.69, "grad_norm": 1.6543147056398797, "learning_rate": 2.3830453738898852e-06, "loss": 0.8082, "step": 10703 }, { "epoch": 0.69, "grad_norm": 2.3717368573844126, "learning_rate": 2.382162188706716e-06, "loss": 0.6636, "step": 10704 }, { "epoch": 0.69, "grad_norm": 1.9558654583302888, "learning_rate": 2.3812791160327154e-06, "loss": 0.7217, "step": 10705 }, { "epoch": 0.69, "grad_norm": 1.6488509946124033, "learning_rate": 2.3803961559058355e-06, "loss": 0.7358, "step": 10706 }, { "epoch": 0.69, "grad_norm": 1.7562394090275975, "learning_rate": 2.379513308364024e-06, "loss": 0.7556, "step": 10707 }, { "epoch": 0.69, "grad_norm": 1.6078128431954657, "learning_rate": 2.378630573445225e-06, "loss": 0.678, "step": 10708 }, { "epoch": 0.69, "grad_norm": 1.5645120743851406, "learning_rate": 2.377747951187375e-06, "loss": 0.6397, "step": 10709 }, { "epoch": 0.69, "grad_norm": 1.721096270091385, "learning_rate": 2.376865441628406e-06, "loss": 0.8262, "step": 10710 }, { "epoch": 0.69, "grad_norm": 2.638410157777238, "learning_rate": 2.3759830448062497e-06, "loss": 0.6518, "step": 10711 }, { "epoch": 0.69, "grad_norm": 1.7198235845247416, "learning_rate": 2.3751007607588294e-06, "loss": 0.7418, "step": 10712 }, { "epoch": 0.69, "grad_norm": 1.0217493244899354, "learning_rate": 2.3742185895240623e-06, "loss": 0.5785, "step": 10713 }, { "epoch": 0.69, "grad_norm": 3.079687304326308, "learning_rate": 2.3733365311398628e-06, "loss": 0.6474, "step": 10714 }, { "epoch": 0.69, "grad_norm": 1.775476483944783, "learning_rate": 2.3724545856441394e-06, "loss": 0.7157, "step": 10715 }, { "epoch": 0.69, "grad_norm": 2.1648739105054338, "learning_rate": 2.3715727530747973e-06, "loss": 0.6686, "step": 10716 }, { "epoch": 0.69, "grad_norm": 1.9096562611459038, "learning_rate": 2.370691033469735e-06, "loss": 0.7892, "step": 10717 }, { "epoch": 0.69, "grad_norm": 1.8461342315029148, "learning_rate": 2.3698094268668466e-06, "loss": 0.7542, "step": 10718 }, { "epoch": 0.69, "grad_norm": 2.1955168666467237, "learning_rate": 2.368927933304021e-06, "loss": 0.7344, "step": 10719 }, { "epoch": 0.69, "grad_norm": 1.852234508083585, "learning_rate": 2.368046552819146e-06, "loss": 0.794, "step": 10720 }, { "epoch": 0.69, "grad_norm": 1.9556789360719358, "learning_rate": 2.3671652854500995e-06, "loss": 0.9107, "step": 10721 }, { "epoch": 0.69, "grad_norm": 2.3275641424512683, "learning_rate": 2.3662841312347563e-06, "loss": 0.7, "step": 10722 }, { "epoch": 0.69, "grad_norm": 1.8567896970930915, "learning_rate": 2.3654030902109877e-06, "loss": 0.8126, "step": 10723 }, { "epoch": 0.69, "grad_norm": 1.6586254893460013, "learning_rate": 2.3645221624166577e-06, "loss": 0.7553, "step": 10724 }, { "epoch": 0.69, "grad_norm": 1.648602384757649, "learning_rate": 2.3636413478896276e-06, "loss": 0.9508, "step": 10725 }, { "epoch": 0.69, "grad_norm": 1.5955153580180712, "learning_rate": 2.3627606466677506e-06, "loss": 0.6716, "step": 10726 }, { "epoch": 0.69, "grad_norm": 1.1773500169279436, "learning_rate": 2.361880058788883e-06, "loss": 0.6699, "step": 10727 }, { "epoch": 0.69, "grad_norm": 1.1188558303387108, "learning_rate": 2.3609995842908636e-06, "loss": 0.6884, "step": 10728 }, { "epoch": 0.69, "grad_norm": 1.7807162589737626, "learning_rate": 2.3601192232115376e-06, "loss": 0.791, "step": 10729 }, { "epoch": 0.69, "grad_norm": 1.7505560988814484, "learning_rate": 2.359238975588741e-06, "loss": 0.711, "step": 10730 }, { "epoch": 0.69, "grad_norm": 2.046559714436597, "learning_rate": 2.3583588414603046e-06, "loss": 0.7837, "step": 10731 }, { "epoch": 0.69, "grad_norm": 1.9423632965562836, "learning_rate": 2.357478820864054e-06, "loss": 0.7652, "step": 10732 }, { "epoch": 0.69, "grad_norm": 1.795255380482426, "learning_rate": 2.3565989138378094e-06, "loss": 0.7136, "step": 10733 }, { "epoch": 0.69, "grad_norm": 1.6894281117458079, "learning_rate": 2.3557191204193923e-06, "loss": 0.6839, "step": 10734 }, { "epoch": 0.69, "grad_norm": 1.1932438832811116, "learning_rate": 2.3548394406466074e-06, "loss": 0.6652, "step": 10735 }, { "epoch": 0.69, "grad_norm": 1.756424046994092, "learning_rate": 2.353959874557269e-06, "loss": 0.7249, "step": 10736 }, { "epoch": 0.69, "grad_norm": 1.559500469622784, "learning_rate": 2.3530804221891707e-06, "loss": 0.7413, "step": 10737 }, { "epoch": 0.69, "grad_norm": 1.596502025826093, "learning_rate": 2.3522010835801163e-06, "loss": 0.7357, "step": 10738 }, { "epoch": 0.69, "grad_norm": 0.9759573671319942, "learning_rate": 2.3513218587678952e-06, "loss": 0.7179, "step": 10739 }, { "epoch": 0.69, "grad_norm": 1.5320239581164328, "learning_rate": 2.3504427477902937e-06, "loss": 0.704, "step": 10740 }, { "epoch": 0.69, "grad_norm": 1.7271222585338393, "learning_rate": 2.349563750685099e-06, "loss": 0.7021, "step": 10741 }, { "epoch": 0.69, "grad_norm": 1.6645993092376912, "learning_rate": 2.348684867490081e-06, "loss": 0.7424, "step": 10742 }, { "epoch": 0.69, "grad_norm": 1.722432949075612, "learning_rate": 2.3478060982430193e-06, "loss": 0.6881, "step": 10743 }, { "epoch": 0.69, "grad_norm": 1.6068405878138718, "learning_rate": 2.3469274429816758e-06, "loss": 0.6226, "step": 10744 }, { "epoch": 0.69, "grad_norm": 1.6092941494805633, "learning_rate": 2.3460489017438176e-06, "loss": 0.7693, "step": 10745 }, { "epoch": 0.69, "grad_norm": 0.9931400576348693, "learning_rate": 2.3451704745672006e-06, "loss": 0.6481, "step": 10746 }, { "epoch": 0.69, "grad_norm": 1.818388213160554, "learning_rate": 2.3442921614895783e-06, "loss": 0.7365, "step": 10747 }, { "epoch": 0.69, "grad_norm": 1.702487985381036, "learning_rate": 2.3434139625486985e-06, "loss": 0.6613, "step": 10748 }, { "epoch": 0.69, "grad_norm": 1.721463910675681, "learning_rate": 2.342535877782302e-06, "loss": 0.6468, "step": 10749 }, { "epoch": 0.69, "grad_norm": 1.8323536660219573, "learning_rate": 2.3416579072281333e-06, "loss": 0.8046, "step": 10750 }, { "epoch": 0.69, "grad_norm": 1.180130748356216, "learning_rate": 2.340780050923918e-06, "loss": 0.6724, "step": 10751 }, { "epoch": 0.69, "grad_norm": 1.7379427921101844, "learning_rate": 2.339902308907391e-06, "loss": 0.6906, "step": 10752 }, { "epoch": 0.69, "grad_norm": 1.585834567871323, "learning_rate": 2.339024681216272e-06, "loss": 0.7315, "step": 10753 }, { "epoch": 0.69, "grad_norm": 1.500897704863733, "learning_rate": 2.338147167888281e-06, "loss": 0.7738, "step": 10754 }, { "epoch": 0.69, "grad_norm": 1.814049034665018, "learning_rate": 2.3372697689611317e-06, "loss": 0.6345, "step": 10755 }, { "epoch": 0.69, "grad_norm": 1.5196244843780127, "learning_rate": 2.3363924844725326e-06, "loss": 0.6137, "step": 10756 }, { "epoch": 0.69, "grad_norm": 1.957481386114158, "learning_rate": 2.3355153144601873e-06, "loss": 0.7239, "step": 10757 }, { "epoch": 0.69, "grad_norm": 1.785234564867128, "learning_rate": 2.3346382589617933e-06, "loss": 0.6813, "step": 10758 }, { "epoch": 0.69, "grad_norm": 1.6717205851420993, "learning_rate": 2.3337613180150497e-06, "loss": 0.6677, "step": 10759 }, { "epoch": 0.69, "grad_norm": 1.7613901027647458, "learning_rate": 2.332884491657639e-06, "loss": 0.9479, "step": 10760 }, { "epoch": 0.69, "grad_norm": 1.6144255687098248, "learning_rate": 2.3320077799272493e-06, "loss": 0.857, "step": 10761 }, { "epoch": 0.69, "grad_norm": 1.6184207076297488, "learning_rate": 2.3311311828615594e-06, "loss": 0.7692, "step": 10762 }, { "epoch": 0.69, "grad_norm": 1.8772233544749868, "learning_rate": 2.3302547004982434e-06, "loss": 0.7505, "step": 10763 }, { "epoch": 0.69, "grad_norm": 3.091231749542249, "learning_rate": 2.3293783328749705e-06, "loss": 0.6368, "step": 10764 }, { "epoch": 0.69, "grad_norm": 1.7139834435504047, "learning_rate": 2.328502080029405e-06, "loss": 0.7939, "step": 10765 }, { "epoch": 0.69, "grad_norm": 1.8036572779433129, "learning_rate": 2.3276259419992066e-06, "loss": 0.7473, "step": 10766 }, { "epoch": 0.69, "grad_norm": 1.9267169315462611, "learning_rate": 2.326749918822028e-06, "loss": 0.7148, "step": 10767 }, { "epoch": 0.69, "grad_norm": 2.2137615858810658, "learning_rate": 2.325874010535522e-06, "loss": 0.7922, "step": 10768 }, { "epoch": 0.69, "grad_norm": 1.7477021165157227, "learning_rate": 2.3249982171773322e-06, "loss": 0.6995, "step": 10769 }, { "epoch": 0.69, "grad_norm": 1.5601978822395324, "learning_rate": 2.324122538785098e-06, "loss": 0.6672, "step": 10770 }, { "epoch": 0.69, "grad_norm": 0.9626954558035998, "learning_rate": 2.323246975396454e-06, "loss": 0.6281, "step": 10771 }, { "epoch": 0.69, "grad_norm": 1.605936667278439, "learning_rate": 2.322371527049031e-06, "loss": 0.6549, "step": 10772 }, { "epoch": 0.69, "grad_norm": 2.012454979176065, "learning_rate": 2.321496193780453e-06, "loss": 0.6676, "step": 10773 }, { "epoch": 0.69, "grad_norm": 1.2091385370340961, "learning_rate": 2.3206209756283403e-06, "loss": 0.6977, "step": 10774 }, { "epoch": 0.69, "grad_norm": 1.035418842453688, "learning_rate": 2.3197458726303064e-06, "loss": 0.5964, "step": 10775 }, { "epoch": 0.69, "grad_norm": 1.3411022427358605, "learning_rate": 2.3188708848239638e-06, "loss": 0.7699, "step": 10776 }, { "epoch": 0.69, "grad_norm": 1.8297308946001218, "learning_rate": 2.3179960122469173e-06, "loss": 0.8299, "step": 10777 }, { "epoch": 0.69, "grad_norm": 5.113792897674842, "learning_rate": 2.317121254936767e-06, "loss": 0.7308, "step": 10778 }, { "epoch": 0.69, "grad_norm": 2.286591388602406, "learning_rate": 2.3162466129311074e-06, "loss": 0.7307, "step": 10779 }, { "epoch": 0.69, "grad_norm": 2.171407244383724, "learning_rate": 2.3153720862675286e-06, "loss": 0.7001, "step": 10780 }, { "epoch": 0.69, "grad_norm": 1.6546999266248164, "learning_rate": 2.314497674983617e-06, "loss": 0.6903, "step": 10781 }, { "epoch": 0.69, "grad_norm": 1.7395294178878549, "learning_rate": 2.31362337911695e-06, "loss": 0.6363, "step": 10782 }, { "epoch": 0.69, "grad_norm": 1.4984565303771769, "learning_rate": 2.3127491987051086e-06, "loss": 0.6469, "step": 10783 }, { "epoch": 0.69, "grad_norm": 1.724042332830025, "learning_rate": 2.311875133785657e-06, "loss": 0.7426, "step": 10784 }, { "epoch": 0.69, "grad_norm": 1.4519024811385715, "learning_rate": 2.311001184396165e-06, "loss": 0.6148, "step": 10785 }, { "epoch": 0.69, "grad_norm": 2.9568412046974646, "learning_rate": 2.3101273505741922e-06, "loss": 0.8238, "step": 10786 }, { "epoch": 0.69, "grad_norm": 1.5428090080606853, "learning_rate": 2.3092536323572933e-06, "loss": 0.6679, "step": 10787 }, { "epoch": 0.69, "grad_norm": 1.9960346033692435, "learning_rate": 2.3083800297830194e-06, "loss": 0.6365, "step": 10788 }, { "epoch": 0.69, "grad_norm": 1.7196879601585855, "learning_rate": 2.307506542888916e-06, "loss": 0.7674, "step": 10789 }, { "epoch": 0.69, "grad_norm": 1.8504445487517573, "learning_rate": 2.3066331717125235e-06, "loss": 0.8717, "step": 10790 }, { "epoch": 0.69, "grad_norm": 1.7524049325041549, "learning_rate": 2.305759916291376e-06, "loss": 0.6526, "step": 10791 }, { "epoch": 0.69, "grad_norm": 1.5171880112893117, "learning_rate": 2.3048867766630096e-06, "loss": 0.6364, "step": 10792 }, { "epoch": 0.69, "grad_norm": 1.6301980356054713, "learning_rate": 2.3040137528649424e-06, "loss": 0.724, "step": 10793 }, { "epoch": 0.69, "grad_norm": 1.699582135782186, "learning_rate": 2.3031408449347017e-06, "loss": 0.68, "step": 10794 }, { "epoch": 0.69, "grad_norm": 1.3399147517049987, "learning_rate": 2.3022680529097995e-06, "loss": 0.6278, "step": 10795 }, { "epoch": 0.69, "grad_norm": 2.316625437312138, "learning_rate": 2.301395376827748e-06, "loss": 0.7545, "step": 10796 }, { "epoch": 0.69, "grad_norm": 1.5093469113548266, "learning_rate": 2.300522816726053e-06, "loss": 0.7397, "step": 10797 }, { "epoch": 0.69, "grad_norm": 2.1683461962758215, "learning_rate": 2.2996503726422133e-06, "loss": 0.7404, "step": 10798 }, { "epoch": 0.69, "grad_norm": 1.0241639693316442, "learning_rate": 2.298778044613729e-06, "loss": 0.5924, "step": 10799 }, { "epoch": 0.69, "grad_norm": 1.611233121890662, "learning_rate": 2.2979058326780855e-06, "loss": 0.6703, "step": 10800 }, { "epoch": 0.69, "grad_norm": 1.6291300701981308, "learning_rate": 2.2970337368727753e-06, "loss": 0.6973, "step": 10801 }, { "epoch": 0.69, "grad_norm": 1.6542685240119157, "learning_rate": 2.2961617572352712e-06, "loss": 0.7549, "step": 10802 }, { "epoch": 0.69, "grad_norm": 2.2602780914702154, "learning_rate": 2.2952898938030554e-06, "loss": 0.6776, "step": 10803 }, { "epoch": 0.69, "grad_norm": 1.873505906061784, "learning_rate": 2.294418146613596e-06, "loss": 0.8109, "step": 10804 }, { "epoch": 0.69, "grad_norm": 1.8171141212634474, "learning_rate": 2.293546515704358e-06, "loss": 0.5541, "step": 10805 }, { "epoch": 0.69, "grad_norm": 1.0777133780701078, "learning_rate": 2.2926750011128074e-06, "loss": 0.5727, "step": 10806 }, { "epoch": 0.69, "grad_norm": 2.0061042520535692, "learning_rate": 2.2918036028763928e-06, "loss": 0.7957, "step": 10807 }, { "epoch": 0.69, "grad_norm": 1.7946528588978226, "learning_rate": 2.2909323210325724e-06, "loss": 0.9666, "step": 10808 }, { "epoch": 0.69, "grad_norm": 1.9312847552108972, "learning_rate": 2.290061155618784e-06, "loss": 0.8348, "step": 10809 }, { "epoch": 0.69, "grad_norm": 1.0355750231431085, "learning_rate": 2.2891901066724755e-06, "loss": 0.6247, "step": 10810 }, { "epoch": 0.69, "grad_norm": 1.1513002166642943, "learning_rate": 2.2883191742310795e-06, "loss": 0.6058, "step": 10811 }, { "epoch": 0.69, "grad_norm": 1.4881783548684333, "learning_rate": 2.2874483583320274e-06, "loss": 0.8823, "step": 10812 }, { "epoch": 0.69, "grad_norm": 4.069409149863399, "learning_rate": 2.2865776590127447e-06, "loss": 0.8309, "step": 10813 }, { "epoch": 0.69, "grad_norm": 1.695433744159023, "learning_rate": 2.285707076310651e-06, "loss": 0.8026, "step": 10814 }, { "epoch": 0.69, "grad_norm": 1.8087869522362525, "learning_rate": 2.2848366102631674e-06, "loss": 0.6959, "step": 10815 }, { "epoch": 0.69, "grad_norm": 2.067849729213905, "learning_rate": 2.2839662609076975e-06, "loss": 0.7816, "step": 10816 }, { "epoch": 0.69, "grad_norm": 1.6390498292778275, "learning_rate": 2.283096028281652e-06, "loss": 0.6151, "step": 10817 }, { "epoch": 0.69, "grad_norm": 1.7792043929102483, "learning_rate": 2.28222591242243e-06, "loss": 0.5933, "step": 10818 }, { "epoch": 0.69, "grad_norm": 1.6147300461917955, "learning_rate": 2.281355913367428e-06, "loss": 0.7808, "step": 10819 }, { "epoch": 0.69, "grad_norm": 1.7915747168501652, "learning_rate": 2.2804860311540358e-06, "loss": 0.7323, "step": 10820 }, { "epoch": 0.69, "grad_norm": 1.7723069664549354, "learning_rate": 2.2796162658196397e-06, "loss": 0.6777, "step": 10821 }, { "epoch": 0.69, "grad_norm": 1.7905045309883616, "learning_rate": 2.2787466174016197e-06, "loss": 0.7221, "step": 10822 }, { "epoch": 0.69, "grad_norm": 1.6138218609447261, "learning_rate": 2.2778770859373504e-06, "loss": 0.7137, "step": 10823 }, { "epoch": 0.69, "grad_norm": 1.0466419505941764, "learning_rate": 2.2770076714642066e-06, "loss": 0.7331, "step": 10824 }, { "epoch": 0.69, "grad_norm": 1.8442977990825913, "learning_rate": 2.27613837401955e-06, "loss": 0.6814, "step": 10825 }, { "epoch": 0.69, "grad_norm": 1.6811919024514563, "learning_rate": 2.2752691936407436e-06, "loss": 0.6975, "step": 10826 }, { "epoch": 0.69, "grad_norm": 1.6252801435194182, "learning_rate": 2.2744001303651407e-06, "loss": 0.6186, "step": 10827 }, { "epoch": 0.69, "grad_norm": 1.6356169255833322, "learning_rate": 2.273531184230094e-06, "loss": 0.7617, "step": 10828 }, { "epoch": 0.69, "grad_norm": 1.7529685747529893, "learning_rate": 2.2726623552729473e-06, "loss": 0.6992, "step": 10829 }, { "epoch": 0.69, "grad_norm": 2.260693596880676, "learning_rate": 2.2717936435310417e-06, "loss": 0.6786, "step": 10830 }, { "epoch": 0.69, "grad_norm": 1.1573428388858567, "learning_rate": 2.2709250490417124e-06, "loss": 0.6667, "step": 10831 }, { "epoch": 0.69, "grad_norm": 1.7603818160757343, "learning_rate": 2.2700565718422885e-06, "loss": 0.7434, "step": 10832 }, { "epoch": 0.69, "grad_norm": 1.719718912902059, "learning_rate": 2.2691882119700983e-06, "loss": 0.8169, "step": 10833 }, { "epoch": 0.69, "grad_norm": 1.569946892120029, "learning_rate": 2.2683199694624604e-06, "loss": 0.6984, "step": 10834 }, { "epoch": 0.69, "grad_norm": 1.702716609859048, "learning_rate": 2.2674518443566908e-06, "loss": 0.7187, "step": 10835 }, { "epoch": 0.69, "grad_norm": 1.7551077713368675, "learning_rate": 2.2665838366900983e-06, "loss": 0.7831, "step": 10836 }, { "epoch": 0.69, "grad_norm": 1.5993400127324442, "learning_rate": 2.265715946499989e-06, "loss": 0.6497, "step": 10837 }, { "epoch": 0.69, "grad_norm": 2.2301172376275775, "learning_rate": 2.264848173823663e-06, "loss": 0.7799, "step": 10838 }, { "epoch": 0.69, "grad_norm": 1.6213940573890364, "learning_rate": 2.2639805186984148e-06, "loss": 0.7053, "step": 10839 }, { "epoch": 0.69, "grad_norm": 1.7267370663510797, "learning_rate": 2.2631129811615334e-06, "loss": 0.6433, "step": 10840 }, { "epoch": 0.69, "grad_norm": 1.8002828453257007, "learning_rate": 2.2622455612503064e-06, "loss": 0.8275, "step": 10841 }, { "epoch": 0.69, "grad_norm": 1.7928552655797327, "learning_rate": 2.2613782590020126e-06, "loss": 0.8698, "step": 10842 }, { "epoch": 0.69, "grad_norm": 1.4381543932834693, "learning_rate": 2.2605110744539266e-06, "loss": 0.7414, "step": 10843 }, { "epoch": 0.69, "grad_norm": 1.5732899696533391, "learning_rate": 2.2596440076433177e-06, "loss": 0.7113, "step": 10844 }, { "epoch": 0.69, "grad_norm": 1.8286350256853086, "learning_rate": 2.258777058607452e-06, "loss": 0.7452, "step": 10845 }, { "epoch": 0.69, "grad_norm": 1.5108223373237133, "learning_rate": 2.2579102273835877e-06, "loss": 0.5971, "step": 10846 }, { "epoch": 0.69, "grad_norm": 1.6511856139820802, "learning_rate": 2.257043514008978e-06, "loss": 0.6832, "step": 10847 }, { "epoch": 0.69, "grad_norm": 1.7095168509831136, "learning_rate": 2.2561769185208783e-06, "loss": 0.7013, "step": 10848 }, { "epoch": 0.69, "grad_norm": 1.7322055844936146, "learning_rate": 2.255310440956525e-06, "loss": 0.6826, "step": 10849 }, { "epoch": 0.69, "grad_norm": 1.8056717655539443, "learning_rate": 2.2544440813531637e-06, "loss": 0.6937, "step": 10850 }, { "epoch": 0.69, "grad_norm": 1.4454316786631052, "learning_rate": 2.2535778397480263e-06, "loss": 0.6851, "step": 10851 }, { "epoch": 0.69, "grad_norm": 1.637100975772524, "learning_rate": 2.2527117161783426e-06, "loss": 0.7104, "step": 10852 }, { "epoch": 0.69, "grad_norm": 1.8544767906266806, "learning_rate": 2.251845710681336e-06, "loss": 0.8084, "step": 10853 }, { "epoch": 0.69, "grad_norm": 0.8737918565019482, "learning_rate": 2.250979823294225e-06, "loss": 0.6381, "step": 10854 }, { "epoch": 0.69, "grad_norm": 1.9681526819385418, "learning_rate": 2.250114054054228e-06, "loss": 0.6765, "step": 10855 }, { "epoch": 0.69, "grad_norm": 1.655710727161028, "learning_rate": 2.249248402998547e-06, "loss": 0.7469, "step": 10856 }, { "epoch": 0.69, "grad_norm": 1.8476747534464921, "learning_rate": 2.2483828701643933e-06, "loss": 0.7703, "step": 10857 }, { "epoch": 0.69, "grad_norm": 1.7297823475182188, "learning_rate": 2.2475174555889577e-06, "loss": 0.81, "step": 10858 }, { "epoch": 0.7, "grad_norm": 1.862554940814663, "learning_rate": 2.2466521593094404e-06, "loss": 0.7481, "step": 10859 }, { "epoch": 0.7, "grad_norm": 1.6313448515372837, "learning_rate": 2.245786981363028e-06, "loss": 0.6895, "step": 10860 }, { "epoch": 0.7, "grad_norm": 1.5807493980568925, "learning_rate": 2.2449219217869013e-06, "loss": 0.7375, "step": 10861 }, { "epoch": 0.7, "grad_norm": 1.7392521627030344, "learning_rate": 2.244056980618245e-06, "loss": 0.7814, "step": 10862 }, { "epoch": 0.7, "grad_norm": 1.5839284765715262, "learning_rate": 2.243192157894225e-06, "loss": 0.745, "step": 10863 }, { "epoch": 0.7, "grad_norm": 1.2850429080764967, "learning_rate": 2.2423274536520172e-06, "loss": 0.7044, "step": 10864 }, { "epoch": 0.7, "grad_norm": 1.652744609347793, "learning_rate": 2.241462867928777e-06, "loss": 0.7579, "step": 10865 }, { "epoch": 0.7, "grad_norm": 1.647463313338856, "learning_rate": 2.2405984007616676e-06, "loss": 0.6294, "step": 10866 }, { "epoch": 0.7, "grad_norm": 1.8987531962514805, "learning_rate": 2.239734052187841e-06, "loss": 0.7322, "step": 10867 }, { "epoch": 0.7, "grad_norm": 1.6542217645367363, "learning_rate": 2.238869822244445e-06, "loss": 0.6989, "step": 10868 }, { "epoch": 0.7, "grad_norm": 1.7809845985598591, "learning_rate": 2.2380057109686213e-06, "loss": 0.699, "step": 10869 }, { "epoch": 0.7, "grad_norm": 2.397239489052469, "learning_rate": 2.2371417183975076e-06, "loss": 0.8079, "step": 10870 }, { "epoch": 0.7, "grad_norm": 1.5205831482134478, "learning_rate": 2.2362778445682403e-06, "loss": 0.622, "step": 10871 }, { "epoch": 0.7, "grad_norm": 1.4788452868836441, "learning_rate": 2.2354140895179403e-06, "loss": 0.6494, "step": 10872 }, { "epoch": 0.7, "grad_norm": 1.6394887914468204, "learning_rate": 2.234550453283737e-06, "loss": 0.8103, "step": 10873 }, { "epoch": 0.7, "grad_norm": 1.5451344888750098, "learning_rate": 2.2336869359027406e-06, "loss": 0.6572, "step": 10874 }, { "epoch": 0.7, "grad_norm": 1.6358511610768356, "learning_rate": 2.2328235374120694e-06, "loss": 0.6822, "step": 10875 }, { "epoch": 0.7, "grad_norm": 2.138667843332364, "learning_rate": 2.231960257848827e-06, "loss": 0.7996, "step": 10876 }, { "epoch": 0.7, "grad_norm": 1.6476400679730756, "learning_rate": 2.231097097250115e-06, "loss": 0.7492, "step": 10877 }, { "epoch": 0.7, "grad_norm": 1.2076814522917392, "learning_rate": 2.230234055653035e-06, "loss": 0.6237, "step": 10878 }, { "epoch": 0.7, "grad_norm": 1.4893369465978759, "learning_rate": 2.229371133094671e-06, "loss": 0.6156, "step": 10879 }, { "epoch": 0.7, "grad_norm": 1.5709290571481582, "learning_rate": 2.2285083296121185e-06, "loss": 0.7486, "step": 10880 }, { "epoch": 0.7, "grad_norm": 1.2591351222784186, "learning_rate": 2.22764564524245e-06, "loss": 0.7034, "step": 10881 }, { "epoch": 0.7, "grad_norm": 1.7727243122443035, "learning_rate": 2.226783080022748e-06, "loss": 0.7527, "step": 10882 }, { "epoch": 0.7, "grad_norm": 1.956067420339436, "learning_rate": 2.225920633990082e-06, "loss": 0.7916, "step": 10883 }, { "epoch": 0.7, "grad_norm": 1.7484652485475471, "learning_rate": 2.225058307181518e-06, "loss": 0.8223, "step": 10884 }, { "epoch": 0.7, "grad_norm": 1.4361982530328505, "learning_rate": 2.2241960996341166e-06, "loss": 0.6795, "step": 10885 }, { "epoch": 0.7, "grad_norm": 1.1773280057138849, "learning_rate": 2.2233340113849343e-06, "loss": 0.5677, "step": 10886 }, { "epoch": 0.7, "grad_norm": 1.7414872708918705, "learning_rate": 2.2224720424710222e-06, "loss": 0.749, "step": 10887 }, { "epoch": 0.7, "grad_norm": 1.7411057072625185, "learning_rate": 2.221610192929423e-06, "loss": 0.6793, "step": 10888 }, { "epoch": 0.7, "grad_norm": 1.7009813823420186, "learning_rate": 2.2207484627971817e-06, "loss": 0.7382, "step": 10889 }, { "epoch": 0.7, "grad_norm": 1.6690804463148583, "learning_rate": 2.219886852111331e-06, "loss": 0.7304, "step": 10890 }, { "epoch": 0.7, "grad_norm": 1.6988482609888518, "learning_rate": 2.2190253609089014e-06, "loss": 0.7486, "step": 10891 }, { "epoch": 0.7, "grad_norm": 2.5616961722497544, "learning_rate": 2.2181639892269183e-06, "loss": 0.7531, "step": 10892 }, { "epoch": 0.7, "grad_norm": 1.5873911640245921, "learning_rate": 2.217302737102402e-06, "loss": 0.5821, "step": 10893 }, { "epoch": 0.7, "grad_norm": 1.630778367503158, "learning_rate": 2.2164416045723662e-06, "loss": 0.7427, "step": 10894 }, { "epoch": 0.7, "grad_norm": 1.5956003656526614, "learning_rate": 2.2155805916738215e-06, "loss": 0.7387, "step": 10895 }, { "epoch": 0.7, "grad_norm": 1.2134640463863764, "learning_rate": 2.21471969844377e-06, "loss": 0.7659, "step": 10896 }, { "epoch": 0.7, "grad_norm": 1.974026474183126, "learning_rate": 2.2138589249192156e-06, "loss": 0.8257, "step": 10897 }, { "epoch": 0.7, "grad_norm": 1.6202203878223065, "learning_rate": 2.2129982711371495e-06, "loss": 0.5945, "step": 10898 }, { "epoch": 0.7, "grad_norm": 1.742922377620356, "learning_rate": 2.212137737134562e-06, "loss": 0.8133, "step": 10899 }, { "epoch": 0.7, "grad_norm": 1.883857248140653, "learning_rate": 2.211277322948436e-06, "loss": 0.7236, "step": 10900 }, { "epoch": 0.7, "grad_norm": 2.6343055327376015, "learning_rate": 2.2104170286157506e-06, "loss": 0.7744, "step": 10901 }, { "epoch": 0.7, "grad_norm": 1.6743296774509366, "learning_rate": 2.2095568541734804e-06, "loss": 0.7262, "step": 10902 }, { "epoch": 0.7, "grad_norm": 1.3761368023904677, "learning_rate": 2.20869679965859e-06, "loss": 0.5936, "step": 10903 }, { "epoch": 0.7, "grad_norm": 1.7432670502328849, "learning_rate": 2.2078368651080506e-06, "loss": 0.6129, "step": 10904 }, { "epoch": 0.7, "grad_norm": 2.387871909453718, "learning_rate": 2.206977050558811e-06, "loss": 0.7086, "step": 10905 }, { "epoch": 0.7, "grad_norm": 1.7598011839411387, "learning_rate": 2.2061173560478317e-06, "loss": 0.6342, "step": 10906 }, { "epoch": 0.7, "grad_norm": 1.7583014963564183, "learning_rate": 2.205257781612057e-06, "loss": 0.8189, "step": 10907 }, { "epoch": 0.7, "grad_norm": 0.981551384664088, "learning_rate": 2.204398327288431e-06, "loss": 0.6224, "step": 10908 }, { "epoch": 0.7, "grad_norm": 1.8553906084473921, "learning_rate": 2.2035389931138896e-06, "loss": 0.7083, "step": 10909 }, { "epoch": 0.7, "grad_norm": 1.0402586431435887, "learning_rate": 2.202679779125366e-06, "loss": 0.6158, "step": 10910 }, { "epoch": 0.7, "grad_norm": 1.0512919387078579, "learning_rate": 2.201820685359788e-06, "loss": 0.6951, "step": 10911 }, { "epoch": 0.7, "grad_norm": 2.1810134035670345, "learning_rate": 2.2009617118540755e-06, "loss": 0.746, "step": 10912 }, { "epoch": 0.7, "grad_norm": 1.6783882543524764, "learning_rate": 2.20010285864515e-06, "loss": 0.6314, "step": 10913 }, { "epoch": 0.7, "grad_norm": 2.506747971855297, "learning_rate": 2.199244125769917e-06, "loss": 0.7627, "step": 10914 }, { "epoch": 0.7, "grad_norm": 1.5795719745376706, "learning_rate": 2.198385513265289e-06, "loss": 0.631, "step": 10915 }, { "epoch": 0.7, "grad_norm": 1.7360984171368223, "learning_rate": 2.1975270211681634e-06, "loss": 0.8386, "step": 10916 }, { "epoch": 0.7, "grad_norm": 1.6250136897763978, "learning_rate": 2.1966686495154375e-06, "loss": 0.6816, "step": 10917 }, { "epoch": 0.7, "grad_norm": 1.7082379603458122, "learning_rate": 2.1958103983440034e-06, "loss": 0.7276, "step": 10918 }, { "epoch": 0.7, "grad_norm": 2.0309588297456456, "learning_rate": 2.194952267690744e-06, "loss": 0.7331, "step": 10919 }, { "epoch": 0.7, "grad_norm": 1.7162582024671826, "learning_rate": 2.194094257592545e-06, "loss": 0.6276, "step": 10920 }, { "epoch": 0.7, "grad_norm": 1.8658744617219292, "learning_rate": 2.193236368086275e-06, "loss": 0.6855, "step": 10921 }, { "epoch": 0.7, "grad_norm": 1.6766075571137606, "learning_rate": 2.1923785992088126e-06, "loss": 0.6715, "step": 10922 }, { "epoch": 0.7, "grad_norm": 1.0638400038511795, "learning_rate": 2.191520950997014e-06, "loss": 0.6344, "step": 10923 }, { "epoch": 0.7, "grad_norm": 1.6430185128776165, "learning_rate": 2.1906634234877453e-06, "loss": 0.6172, "step": 10924 }, { "epoch": 0.7, "grad_norm": 1.6262194512357508, "learning_rate": 2.1898060167178604e-06, "loss": 0.8226, "step": 10925 }, { "epoch": 0.7, "grad_norm": 1.4963235701426587, "learning_rate": 2.1889487307242054e-06, "loss": 0.6378, "step": 10926 }, { "epoch": 0.7, "grad_norm": 1.6848409935996271, "learning_rate": 2.18809156554363e-06, "loss": 0.6269, "step": 10927 }, { "epoch": 0.7, "grad_norm": 1.651389939872268, "learning_rate": 2.187234521212968e-06, "loss": 0.6888, "step": 10928 }, { "epoch": 0.7, "grad_norm": 2.5211595338722943, "learning_rate": 2.1863775977690588e-06, "loss": 0.7388, "step": 10929 }, { "epoch": 0.7, "grad_norm": 1.8297064548900792, "learning_rate": 2.185520795248725e-06, "loss": 0.7995, "step": 10930 }, { "epoch": 0.7, "grad_norm": 1.723018004141761, "learning_rate": 2.1846641136887947e-06, "loss": 0.6994, "step": 10931 }, { "epoch": 0.7, "grad_norm": 1.685681279445443, "learning_rate": 2.1838075531260854e-06, "loss": 0.6441, "step": 10932 }, { "epoch": 0.7, "grad_norm": 1.5855619331381348, "learning_rate": 2.182951113597408e-06, "loss": 0.7284, "step": 10933 }, { "epoch": 0.7, "grad_norm": 1.7882678794582842, "learning_rate": 2.182094795139576e-06, "loss": 0.8486, "step": 10934 }, { "epoch": 0.7, "grad_norm": 1.5953302413307275, "learning_rate": 2.1812385977893844e-06, "loss": 0.7614, "step": 10935 }, { "epoch": 0.7, "grad_norm": 1.6228085204995197, "learning_rate": 2.1803825215836387e-06, "loss": 0.6835, "step": 10936 }, { "epoch": 0.7, "grad_norm": 1.955606634667778, "learning_rate": 2.1795265665591236e-06, "loss": 0.7686, "step": 10937 }, { "epoch": 0.7, "grad_norm": 1.7310666321627555, "learning_rate": 2.1786707327526325e-06, "loss": 0.7159, "step": 10938 }, { "epoch": 0.7, "grad_norm": 1.6683631662251086, "learning_rate": 2.177815020200944e-06, "loss": 0.5961, "step": 10939 }, { "epoch": 0.7, "grad_norm": 1.3641843941244638, "learning_rate": 2.176959428940836e-06, "loss": 0.7287, "step": 10940 }, { "epoch": 0.7, "grad_norm": 1.5709616922225307, "learning_rate": 2.17610395900908e-06, "loss": 0.7042, "step": 10941 }, { "epoch": 0.7, "grad_norm": 1.8462129480768887, "learning_rate": 2.1752486104424403e-06, "loss": 0.8575, "step": 10942 }, { "epoch": 0.7, "grad_norm": 1.7230269613156435, "learning_rate": 2.174393383277683e-06, "loss": 0.625, "step": 10943 }, { "epoch": 0.7, "grad_norm": 1.6328706169382123, "learning_rate": 2.173538277551557e-06, "loss": 0.6674, "step": 10944 }, { "epoch": 0.7, "grad_norm": 1.6541391388652256, "learning_rate": 2.1726832933008185e-06, "loss": 0.7708, "step": 10945 }, { "epoch": 0.7, "grad_norm": 1.87934924917725, "learning_rate": 2.171828430562211e-06, "loss": 0.8993, "step": 10946 }, { "epoch": 0.7, "grad_norm": 1.828562261099561, "learning_rate": 2.170973689372475e-06, "loss": 0.7467, "step": 10947 }, { "epoch": 0.7, "grad_norm": 1.7125419746249557, "learning_rate": 2.170119069768345e-06, "loss": 0.7531, "step": 10948 }, { "epoch": 0.7, "grad_norm": 1.610797674802134, "learning_rate": 2.1692645717865515e-06, "loss": 0.7093, "step": 10949 }, { "epoch": 0.7, "grad_norm": 1.120732333460158, "learning_rate": 2.1684101954638176e-06, "loss": 0.6692, "step": 10950 }, { "epoch": 0.7, "grad_norm": 1.172339833890898, "learning_rate": 2.1675559408368623e-06, "loss": 0.7426, "step": 10951 }, { "epoch": 0.7, "grad_norm": 1.8279045270639067, "learning_rate": 2.166701807942404e-06, "loss": 0.7444, "step": 10952 }, { "epoch": 0.7, "grad_norm": 1.8405161194004696, "learning_rate": 2.165847796817145e-06, "loss": 0.8109, "step": 10953 }, { "epoch": 0.7, "grad_norm": 1.8632288215559898, "learning_rate": 2.1649939074977945e-06, "loss": 0.6953, "step": 10954 }, { "epoch": 0.7, "grad_norm": 1.4589444535243883, "learning_rate": 2.164140140021049e-06, "loss": 0.8761, "step": 10955 }, { "epoch": 0.7, "grad_norm": 1.7653790364234747, "learning_rate": 2.1632864944236004e-06, "loss": 0.7787, "step": 10956 }, { "epoch": 0.7, "grad_norm": 1.6888231636017286, "learning_rate": 2.1624329707421374e-06, "loss": 0.7074, "step": 10957 }, { "epoch": 0.7, "grad_norm": 1.7837972605025645, "learning_rate": 2.161579569013344e-06, "loss": 0.6914, "step": 10958 }, { "epoch": 0.7, "grad_norm": 1.9297390679193624, "learning_rate": 2.1607262892738956e-06, "loss": 0.7845, "step": 10959 }, { "epoch": 0.7, "grad_norm": 1.7992505653492992, "learning_rate": 2.1598731315604647e-06, "loss": 0.7093, "step": 10960 }, { "epoch": 0.7, "grad_norm": 1.6646761075916845, "learning_rate": 2.1590200959097173e-06, "loss": 0.7253, "step": 10961 }, { "epoch": 0.7, "grad_norm": 1.156270231075282, "learning_rate": 2.1581671823583183e-06, "loss": 0.6863, "step": 10962 }, { "epoch": 0.7, "grad_norm": 1.9201465641437805, "learning_rate": 2.157314390942923e-06, "loss": 0.7526, "step": 10963 }, { "epoch": 0.7, "grad_norm": 1.0350276618074596, "learning_rate": 2.156461721700181e-06, "loss": 0.6382, "step": 10964 }, { "epoch": 0.7, "grad_norm": 1.0115106531594267, "learning_rate": 2.1556091746667392e-06, "loss": 0.6692, "step": 10965 }, { "epoch": 0.7, "grad_norm": 0.9845377533856635, "learning_rate": 2.1547567498792382e-06, "loss": 0.6591, "step": 10966 }, { "epoch": 0.7, "grad_norm": 1.7388397421862465, "learning_rate": 2.1539044473743136e-06, "loss": 0.8087, "step": 10967 }, { "epoch": 0.7, "grad_norm": 1.7577185970532625, "learning_rate": 2.1530522671885935e-06, "loss": 0.7677, "step": 10968 }, { "epoch": 0.7, "grad_norm": 1.810048491017457, "learning_rate": 2.152200209358708e-06, "loss": 0.6419, "step": 10969 }, { "epoch": 0.7, "grad_norm": 2.243707696771618, "learning_rate": 2.15134827392127e-06, "loss": 0.7584, "step": 10970 }, { "epoch": 0.7, "grad_norm": 1.0069028612807078, "learning_rate": 2.1504964609128994e-06, "loss": 0.6322, "step": 10971 }, { "epoch": 0.7, "grad_norm": 1.6917255508429614, "learning_rate": 2.1496447703702035e-06, "loss": 0.6452, "step": 10972 }, { "epoch": 0.7, "grad_norm": 1.2102738820117453, "learning_rate": 2.1487932023297853e-06, "loss": 0.6766, "step": 10973 }, { "epoch": 0.7, "grad_norm": 1.615435781789293, "learning_rate": 2.147941756828244e-06, "loss": 0.7231, "step": 10974 }, { "epoch": 0.7, "grad_norm": 2.027545027876938, "learning_rate": 2.147090433902172e-06, "loss": 0.7975, "step": 10975 }, { "epoch": 0.7, "grad_norm": 1.8698070362840975, "learning_rate": 2.146239233588161e-06, "loss": 0.7502, "step": 10976 }, { "epoch": 0.7, "grad_norm": 2.1591634643208586, "learning_rate": 2.1453881559227883e-06, "loss": 0.7986, "step": 10977 }, { "epoch": 0.7, "grad_norm": 1.7295682334467122, "learning_rate": 2.1445372009426374e-06, "loss": 0.8815, "step": 10978 }, { "epoch": 0.7, "grad_norm": 1.7006537729124243, "learning_rate": 2.143686368684274e-06, "loss": 0.657, "step": 10979 }, { "epoch": 0.7, "grad_norm": 1.5279582824526563, "learning_rate": 2.1428356591842707e-06, "loss": 0.6361, "step": 10980 }, { "epoch": 0.7, "grad_norm": 1.626435955115534, "learning_rate": 2.141985072479187e-06, "loss": 0.6663, "step": 10981 }, { "epoch": 0.7, "grad_norm": 1.972311301544426, "learning_rate": 2.141134608605579e-06, "loss": 0.7439, "step": 10982 }, { "epoch": 0.7, "grad_norm": 1.1737465877339794, "learning_rate": 2.1402842675999978e-06, "loss": 0.6127, "step": 10983 }, { "epoch": 0.7, "grad_norm": 1.1484166179524222, "learning_rate": 2.139434049498989e-06, "loss": 0.6349, "step": 10984 }, { "epoch": 0.7, "grad_norm": 1.7097059358057445, "learning_rate": 2.1385839543390967e-06, "loss": 0.6018, "step": 10985 }, { "epoch": 0.7, "grad_norm": 1.5143155324311781, "learning_rate": 2.13773398215685e-06, "loss": 0.6776, "step": 10986 }, { "epoch": 0.7, "grad_norm": 1.8123431348984709, "learning_rate": 2.136884132988784e-06, "loss": 0.6516, "step": 10987 }, { "epoch": 0.7, "grad_norm": 1.8875373564533524, "learning_rate": 2.1360344068714216e-06, "loss": 0.6363, "step": 10988 }, { "epoch": 0.7, "grad_norm": 2.063598651881485, "learning_rate": 2.1351848038412832e-06, "loss": 0.6848, "step": 10989 }, { "epoch": 0.7, "grad_norm": 1.9425751123551438, "learning_rate": 2.1343353239348812e-06, "loss": 0.6447, "step": 10990 }, { "epoch": 0.7, "grad_norm": 1.66217205344835, "learning_rate": 2.1334859671887236e-06, "loss": 0.7731, "step": 10991 }, { "epoch": 0.7, "grad_norm": 2.1296438537635227, "learning_rate": 2.13263673363932e-06, "loss": 0.7533, "step": 10992 }, { "epoch": 0.7, "grad_norm": 1.4834700351338266, "learning_rate": 2.1317876233231606e-06, "loss": 0.7094, "step": 10993 }, { "epoch": 0.7, "grad_norm": 1.6139429514303911, "learning_rate": 2.1309386362767453e-06, "loss": 0.7122, "step": 10994 }, { "epoch": 0.7, "grad_norm": 1.0821699401160945, "learning_rate": 2.1300897725365555e-06, "loss": 0.6906, "step": 10995 }, { "epoch": 0.7, "grad_norm": 2.328718088947144, "learning_rate": 2.1292410321390786e-06, "loss": 0.7135, "step": 10996 }, { "epoch": 0.7, "grad_norm": 1.9042670555583534, "learning_rate": 2.12839241512079e-06, "loss": 0.6746, "step": 10997 }, { "epoch": 0.7, "grad_norm": 1.723813943905398, "learning_rate": 2.127543921518159e-06, "loss": 0.734, "step": 10998 }, { "epoch": 0.7, "grad_norm": 1.5169954472333702, "learning_rate": 2.1266955513676584e-06, "loss": 0.6625, "step": 10999 }, { "epoch": 0.7, "grad_norm": 1.9682610960761286, "learning_rate": 2.1258473047057416e-06, "loss": 0.5707, "step": 11000 }, { "epoch": 0.7, "grad_norm": 1.7475865821602579, "learning_rate": 2.124999181568872e-06, "loss": 0.7308, "step": 11001 }, { "epoch": 0.7, "grad_norm": 1.721097865064862, "learning_rate": 2.1241511819934923e-06, "loss": 0.8366, "step": 11002 }, { "epoch": 0.7, "grad_norm": 1.75166119792158, "learning_rate": 2.1233033060160542e-06, "loss": 0.7494, "step": 11003 }, { "epoch": 0.7, "grad_norm": 1.9746619370666334, "learning_rate": 2.1224555536729952e-06, "loss": 0.7369, "step": 11004 }, { "epoch": 0.7, "grad_norm": 1.8077985588747714, "learning_rate": 2.121607925000749e-06, "loss": 0.8289, "step": 11005 }, { "epoch": 0.7, "grad_norm": 1.702873239324883, "learning_rate": 2.1207604200357466e-06, "loss": 0.7851, "step": 11006 }, { "epoch": 0.7, "grad_norm": 1.6722516792050632, "learning_rate": 2.1199130388144098e-06, "loss": 0.7065, "step": 11007 }, { "epoch": 0.7, "grad_norm": 1.70235144677009, "learning_rate": 2.119065781373162e-06, "loss": 0.6101, "step": 11008 }, { "epoch": 0.7, "grad_norm": 1.7240431885811807, "learning_rate": 2.1182186477484094e-06, "loss": 0.7822, "step": 11009 }, { "epoch": 0.7, "grad_norm": 1.7057487646321077, "learning_rate": 2.1173716379765656e-06, "loss": 0.7521, "step": 11010 }, { "epoch": 0.7, "grad_norm": 1.555884642072164, "learning_rate": 2.1165247520940317e-06, "loss": 0.7336, "step": 11011 }, { "epoch": 0.7, "grad_norm": 1.6256227366454028, "learning_rate": 2.1156779901372053e-06, "loss": 0.7041, "step": 11012 }, { "epoch": 0.7, "grad_norm": 1.7921612761740768, "learning_rate": 2.114831352142478e-06, "loss": 0.7424, "step": 11013 }, { "epoch": 0.7, "grad_norm": 1.7422800288584235, "learning_rate": 2.1139848381462363e-06, "loss": 0.6731, "step": 11014 }, { "epoch": 0.71, "grad_norm": 1.6831211855763575, "learning_rate": 2.1131384481848614e-06, "loss": 0.7649, "step": 11015 }, { "epoch": 0.71, "grad_norm": 1.6262663905119747, "learning_rate": 2.112292182294729e-06, "loss": 0.6366, "step": 11016 }, { "epoch": 0.71, "grad_norm": 1.1666424695230107, "learning_rate": 2.111446040512212e-06, "loss": 0.6462, "step": 11017 }, { "epoch": 0.71, "grad_norm": 1.8369988484415947, "learning_rate": 2.110600022873675e-06, "loss": 0.7519, "step": 11018 }, { "epoch": 0.71, "grad_norm": 1.5719867907366682, "learning_rate": 2.1097541294154773e-06, "loss": 0.7344, "step": 11019 }, { "epoch": 0.71, "grad_norm": 1.8867787885673215, "learning_rate": 2.1089083601739735e-06, "loss": 0.6831, "step": 11020 }, { "epoch": 0.71, "grad_norm": 2.0198363411495, "learning_rate": 2.108062715185514e-06, "loss": 0.7713, "step": 11021 }, { "epoch": 0.71, "grad_norm": 1.4730140310005877, "learning_rate": 2.1072171944864415e-06, "loss": 0.7441, "step": 11022 }, { "epoch": 0.71, "grad_norm": 1.4278442494141788, "learning_rate": 2.1063717981130952e-06, "loss": 0.6384, "step": 11023 }, { "epoch": 0.71, "grad_norm": 1.6621183573246625, "learning_rate": 2.10552652610181e-06, "loss": 0.8081, "step": 11024 }, { "epoch": 0.71, "grad_norm": 1.8116953251064445, "learning_rate": 2.10468137848891e-06, "loss": 0.7786, "step": 11025 }, { "epoch": 0.71, "grad_norm": 1.5332900903693407, "learning_rate": 2.103836355310722e-06, "loss": 0.7176, "step": 11026 }, { "epoch": 0.71, "grad_norm": 1.9486116686323804, "learning_rate": 2.102991456603562e-06, "loss": 0.84, "step": 11027 }, { "epoch": 0.71, "grad_norm": 1.7432930616100646, "learning_rate": 2.102146682403742e-06, "loss": 0.7024, "step": 11028 }, { "epoch": 0.71, "grad_norm": 1.980243227372212, "learning_rate": 2.1013020327475683e-06, "loss": 0.7493, "step": 11029 }, { "epoch": 0.71, "grad_norm": 1.7789228953687435, "learning_rate": 2.100457507671341e-06, "loss": 0.9645, "step": 11030 }, { "epoch": 0.71, "grad_norm": 1.6587574190684524, "learning_rate": 2.099613107211359e-06, "loss": 0.6701, "step": 11031 }, { "epoch": 0.71, "grad_norm": 2.000035947433861, "learning_rate": 2.09876883140391e-06, "loss": 0.8665, "step": 11032 }, { "epoch": 0.71, "grad_norm": 1.6958327483311189, "learning_rate": 2.0979246802852794e-06, "loss": 0.6549, "step": 11033 }, { "epoch": 0.71, "grad_norm": 2.31624564241777, "learning_rate": 2.0970806538917506e-06, "loss": 0.751, "step": 11034 }, { "epoch": 0.71, "grad_norm": 1.5692524826615597, "learning_rate": 2.096236752259592e-06, "loss": 0.6795, "step": 11035 }, { "epoch": 0.71, "grad_norm": 1.710772927846449, "learning_rate": 2.0953929754250783e-06, "loss": 0.7045, "step": 11036 }, { "epoch": 0.71, "grad_norm": 1.9075219370578693, "learning_rate": 2.0945493234244714e-06, "loss": 0.7583, "step": 11037 }, { "epoch": 0.71, "grad_norm": 1.696146274396779, "learning_rate": 2.0937057962940287e-06, "loss": 0.7395, "step": 11038 }, { "epoch": 0.71, "grad_norm": 1.6289315881207667, "learning_rate": 2.0928623940700044e-06, "loss": 0.8239, "step": 11039 }, { "epoch": 0.71, "grad_norm": 1.1534844767214492, "learning_rate": 2.0920191167886435e-06, "loss": 0.6984, "step": 11040 }, { "epoch": 0.71, "grad_norm": 2.225288910478805, "learning_rate": 2.091175964486194e-06, "loss": 0.8684, "step": 11041 }, { "epoch": 0.71, "grad_norm": 1.7101873718068659, "learning_rate": 2.0903329371988864e-06, "loss": 0.8101, "step": 11042 }, { "epoch": 0.71, "grad_norm": 1.7574566848450186, "learning_rate": 2.0894900349629576e-06, "loss": 0.698, "step": 11043 }, { "epoch": 0.71, "grad_norm": 1.7809272760399832, "learning_rate": 2.0886472578146284e-06, "loss": 0.8124, "step": 11044 }, { "epoch": 0.71, "grad_norm": 2.0101305761873505, "learning_rate": 2.087804605790124e-06, "loss": 0.6949, "step": 11045 }, { "epoch": 0.71, "grad_norm": 2.255323095741255, "learning_rate": 2.0869620789256583e-06, "loss": 0.6796, "step": 11046 }, { "epoch": 0.71, "grad_norm": 1.6696109392256535, "learning_rate": 2.08611967725744e-06, "loss": 0.8104, "step": 11047 }, { "epoch": 0.71, "grad_norm": 1.8671464319513393, "learning_rate": 2.0852774008216782e-06, "loss": 0.7362, "step": 11048 }, { "epoch": 0.71, "grad_norm": 1.366970637183689, "learning_rate": 2.0844352496545652e-06, "loss": 0.7062, "step": 11049 }, { "epoch": 0.71, "grad_norm": 1.6570702066885103, "learning_rate": 2.0835932237923027e-06, "loss": 0.7092, "step": 11050 }, { "epoch": 0.71, "grad_norm": 1.6841546471512605, "learning_rate": 2.0827513232710716e-06, "loss": 0.825, "step": 11051 }, { "epoch": 0.71, "grad_norm": 2.005936341838298, "learning_rate": 2.0819095481270603e-06, "loss": 0.8984, "step": 11052 }, { "epoch": 0.71, "grad_norm": 2.1044325364726992, "learning_rate": 2.081067898396445e-06, "loss": 0.7649, "step": 11053 }, { "epoch": 0.71, "grad_norm": 1.7203846404017542, "learning_rate": 2.080226374115396e-06, "loss": 0.6424, "step": 11054 }, { "epoch": 0.71, "grad_norm": 1.6461263338775074, "learning_rate": 2.0793849753200855e-06, "loss": 0.8245, "step": 11055 }, { "epoch": 0.71, "grad_norm": 1.7001657099473422, "learning_rate": 2.0785437020466686e-06, "loss": 0.6928, "step": 11056 }, { "epoch": 0.71, "grad_norm": 1.7990139191518284, "learning_rate": 2.077702554331308e-06, "loss": 0.7119, "step": 11057 }, { "epoch": 0.71, "grad_norm": 1.7514161380781947, "learning_rate": 2.076861532210148e-06, "loss": 0.7226, "step": 11058 }, { "epoch": 0.71, "grad_norm": 1.7499929753407268, "learning_rate": 2.0760206357193373e-06, "loss": 0.7318, "step": 11059 }, { "epoch": 0.71, "grad_norm": 1.3946475951260846, "learning_rate": 2.075179864895017e-06, "loss": 0.6736, "step": 11060 }, { "epoch": 0.71, "grad_norm": 1.8592950116733398, "learning_rate": 2.0743392197733193e-06, "loss": 0.7622, "step": 11061 }, { "epoch": 0.71, "grad_norm": 1.6450794332734802, "learning_rate": 2.0734987003903747e-06, "loss": 0.6692, "step": 11062 }, { "epoch": 0.71, "grad_norm": 1.822288973009874, "learning_rate": 2.0726583067823046e-06, "loss": 0.7563, "step": 11063 }, { "epoch": 0.71, "grad_norm": 1.5490860788434155, "learning_rate": 2.0718180389852325e-06, "loss": 0.7932, "step": 11064 }, { "epoch": 0.71, "grad_norm": 1.680005283704735, "learning_rate": 2.0709778970352657e-06, "loss": 0.6844, "step": 11065 }, { "epoch": 0.71, "grad_norm": 1.2274005227852998, "learning_rate": 2.070137880968517e-06, "loss": 0.5911, "step": 11066 }, { "epoch": 0.71, "grad_norm": 1.8350032818857682, "learning_rate": 2.069297990821082e-06, "loss": 0.7555, "step": 11067 }, { "epoch": 0.71, "grad_norm": 1.7374371870301069, "learning_rate": 2.0684582266290626e-06, "loss": 0.7594, "step": 11068 }, { "epoch": 0.71, "grad_norm": 1.5703056025785846, "learning_rate": 2.0676185884285495e-06, "loss": 0.662, "step": 11069 }, { "epoch": 0.71, "grad_norm": 1.5265847539532653, "learning_rate": 2.0667790762556267e-06, "loss": 0.6925, "step": 11070 }, { "epoch": 0.71, "grad_norm": 1.7082407997343165, "learning_rate": 2.0659396901463764e-06, "loss": 0.686, "step": 11071 }, { "epoch": 0.71, "grad_norm": 1.6965599308647514, "learning_rate": 2.0651004301368712e-06, "loss": 0.6995, "step": 11072 }, { "epoch": 0.71, "grad_norm": 1.6321284210250997, "learning_rate": 2.064261296263185e-06, "loss": 0.6791, "step": 11073 }, { "epoch": 0.71, "grad_norm": 1.9544170932913016, "learning_rate": 2.063422288561377e-06, "loss": 0.6261, "step": 11074 }, { "epoch": 0.71, "grad_norm": 1.5464268179040752, "learning_rate": 2.0625834070675094e-06, "loss": 0.6283, "step": 11075 }, { "epoch": 0.71, "grad_norm": 1.547177397064989, "learning_rate": 2.0617446518176354e-06, "loss": 0.6866, "step": 11076 }, { "epoch": 0.71, "grad_norm": 1.0610454877620181, "learning_rate": 2.0609060228478017e-06, "loss": 0.7134, "step": 11077 }, { "epoch": 0.71, "grad_norm": 1.8371303239182233, "learning_rate": 2.0600675201940513e-06, "loss": 0.7215, "step": 11078 }, { "epoch": 0.71, "grad_norm": 1.228457046977463, "learning_rate": 2.0592291438924213e-06, "loss": 0.6937, "step": 11079 }, { "epoch": 0.71, "grad_norm": 1.6248269147655627, "learning_rate": 2.0583908939789426e-06, "loss": 0.7358, "step": 11080 }, { "epoch": 0.71, "grad_norm": 1.4027301680633342, "learning_rate": 2.0575527704896414e-06, "loss": 0.6413, "step": 11081 }, { "epoch": 0.71, "grad_norm": 1.608759174132962, "learning_rate": 2.05671477346054e-06, "loss": 0.7876, "step": 11082 }, { "epoch": 0.71, "grad_norm": 1.602774104646564, "learning_rate": 2.055876902927654e-06, "loss": 0.7842, "step": 11083 }, { "epoch": 0.71, "grad_norm": 1.562879442523913, "learning_rate": 2.0550391589269913e-06, "loss": 0.7381, "step": 11084 }, { "epoch": 0.71, "grad_norm": 1.7906515436673776, "learning_rate": 2.0542015414945577e-06, "loss": 0.6696, "step": 11085 }, { "epoch": 0.71, "grad_norm": 1.3050626628177435, "learning_rate": 2.0533640506663523e-06, "loss": 0.5612, "step": 11086 }, { "epoch": 0.71, "grad_norm": 2.4121423157614905, "learning_rate": 2.0525266864783676e-06, "loss": 0.7912, "step": 11087 }, { "epoch": 0.71, "grad_norm": 1.6226579539952026, "learning_rate": 2.051689448966593e-06, "loss": 0.7015, "step": 11088 }, { "epoch": 0.71, "grad_norm": 2.476469787685936, "learning_rate": 2.050852338167008e-06, "loss": 0.675, "step": 11089 }, { "epoch": 0.71, "grad_norm": 1.6797099099032127, "learning_rate": 2.050015354115595e-06, "loss": 0.7238, "step": 11090 }, { "epoch": 0.71, "grad_norm": 1.7351457145143359, "learning_rate": 2.049178496848323e-06, "loss": 0.6731, "step": 11091 }, { "epoch": 0.71, "grad_norm": 1.6990527251391272, "learning_rate": 2.048341766401159e-06, "loss": 0.7567, "step": 11092 }, { "epoch": 0.71, "grad_norm": 1.1695961244733526, "learning_rate": 2.0475051628100635e-06, "loss": 0.7347, "step": 11093 }, { "epoch": 0.71, "grad_norm": 1.6244146093449188, "learning_rate": 2.0466686861109913e-06, "loss": 0.7171, "step": 11094 }, { "epoch": 0.71, "grad_norm": 1.4920840620802605, "learning_rate": 2.045832336339894e-06, "loss": 0.7405, "step": 11095 }, { "epoch": 0.71, "grad_norm": 1.841282027946464, "learning_rate": 2.0449961135327135e-06, "loss": 0.7773, "step": 11096 }, { "epoch": 0.71, "grad_norm": 1.6092575760152676, "learning_rate": 2.044160017725394e-06, "loss": 0.7834, "step": 11097 }, { "epoch": 0.71, "grad_norm": 1.787700834751813, "learning_rate": 2.0433240489538624e-06, "loss": 0.7823, "step": 11098 }, { "epoch": 0.71, "grad_norm": 2.9322258432285278, "learning_rate": 2.042488207254054e-06, "loss": 0.7456, "step": 11099 }, { "epoch": 0.71, "grad_norm": 1.8097578068653306, "learning_rate": 2.041652492661884e-06, "loss": 0.6911, "step": 11100 }, { "epoch": 0.71, "grad_norm": 1.7163033680406319, "learning_rate": 2.040816905213276e-06, "loss": 0.7671, "step": 11101 }, { "epoch": 0.71, "grad_norm": 1.7282267162899099, "learning_rate": 2.0399814449441385e-06, "loss": 0.8356, "step": 11102 }, { "epoch": 0.71, "grad_norm": 1.6916683098961895, "learning_rate": 2.0391461118903788e-06, "loss": 0.754, "step": 11103 }, { "epoch": 0.71, "grad_norm": 1.8302370774080332, "learning_rate": 2.038310906087898e-06, "loss": 0.7317, "step": 11104 }, { "epoch": 0.71, "grad_norm": 1.7732369888924508, "learning_rate": 2.0374758275725893e-06, "loss": 0.8968, "step": 11105 }, { "epoch": 0.71, "grad_norm": 1.6264231493201962, "learning_rate": 2.0366408763803476e-06, "loss": 0.6281, "step": 11106 }, { "epoch": 0.71, "grad_norm": 2.2290952391952636, "learning_rate": 2.0358060525470507e-06, "loss": 0.6929, "step": 11107 }, { "epoch": 0.71, "grad_norm": 1.6895671789736204, "learning_rate": 2.034971356108585e-06, "loss": 0.6295, "step": 11108 }, { "epoch": 0.71, "grad_norm": 1.4251971351117232, "learning_rate": 2.0341367871008154e-06, "loss": 0.644, "step": 11109 }, { "epoch": 0.71, "grad_norm": 1.7448566493379198, "learning_rate": 2.0333023455596173e-06, "loss": 0.7668, "step": 11110 }, { "epoch": 0.71, "grad_norm": 2.029107194372651, "learning_rate": 2.0324680315208505e-06, "loss": 0.7547, "step": 11111 }, { "epoch": 0.71, "grad_norm": 1.6826601341124114, "learning_rate": 2.03163384502037e-06, "loss": 0.7044, "step": 11112 }, { "epoch": 0.71, "grad_norm": 1.7352272865720078, "learning_rate": 2.0307997860940333e-06, "loss": 0.6671, "step": 11113 }, { "epoch": 0.71, "grad_norm": 1.716998959572017, "learning_rate": 2.0299658547776784e-06, "loss": 0.8396, "step": 11114 }, { "epoch": 0.71, "grad_norm": 1.7502424682735196, "learning_rate": 2.0291320511071544e-06, "loss": 0.6584, "step": 11115 }, { "epoch": 0.71, "grad_norm": 1.8861222020922226, "learning_rate": 2.0282983751182884e-06, "loss": 0.8332, "step": 11116 }, { "epoch": 0.71, "grad_norm": 1.6723356411151578, "learning_rate": 2.0274648268469154e-06, "loss": 0.7127, "step": 11117 }, { "epoch": 0.71, "grad_norm": 1.7804864992543024, "learning_rate": 2.026631406328858e-06, "loss": 0.7216, "step": 11118 }, { "epoch": 0.71, "grad_norm": 1.8331349418192522, "learning_rate": 2.025798113599933e-06, "loss": 0.9071, "step": 11119 }, { "epoch": 0.71, "grad_norm": 1.7144037476850673, "learning_rate": 2.0249649486959595e-06, "loss": 0.6898, "step": 11120 }, { "epoch": 0.71, "grad_norm": 1.5232027751272157, "learning_rate": 2.0241319116527376e-06, "loss": 0.7956, "step": 11121 }, { "epoch": 0.71, "grad_norm": 2.091611726837744, "learning_rate": 2.0232990025060757e-06, "loss": 0.6805, "step": 11122 }, { "epoch": 0.71, "grad_norm": 1.8050375404013488, "learning_rate": 2.022466221291765e-06, "loss": 0.6617, "step": 11123 }, { "epoch": 0.71, "grad_norm": 1.6704166123644533, "learning_rate": 2.0216335680456005e-06, "loss": 0.6703, "step": 11124 }, { "epoch": 0.71, "grad_norm": 1.5679558295963856, "learning_rate": 2.0208010428033675e-06, "loss": 0.8286, "step": 11125 }, { "epoch": 0.71, "grad_norm": 1.8237188586656783, "learning_rate": 2.0199686456008456e-06, "loss": 0.7455, "step": 11126 }, { "epoch": 0.71, "grad_norm": 1.5580998509185622, "learning_rate": 2.0191363764738087e-06, "loss": 0.6962, "step": 11127 }, { "epoch": 0.71, "grad_norm": 1.7812341945590215, "learning_rate": 2.018304235458025e-06, "loss": 0.721, "step": 11128 }, { "epoch": 0.71, "grad_norm": 1.5279723266159018, "learning_rate": 2.017472222589264e-06, "loss": 0.5832, "step": 11129 }, { "epoch": 0.71, "grad_norm": 1.612251895490938, "learning_rate": 2.0166403379032755e-06, "loss": 0.7023, "step": 11130 }, { "epoch": 0.71, "grad_norm": 1.7263508727403178, "learning_rate": 2.0158085814358187e-06, "loss": 0.6913, "step": 11131 }, { "epoch": 0.71, "grad_norm": 1.697344105101964, "learning_rate": 2.0149769532226383e-06, "loss": 0.6807, "step": 11132 }, { "epoch": 0.71, "grad_norm": 1.0615167930109708, "learning_rate": 2.014145453299476e-06, "loss": 0.6644, "step": 11133 }, { "epoch": 0.71, "grad_norm": 1.2571388999452915, "learning_rate": 2.013314081702068e-06, "loss": 0.7284, "step": 11134 }, { "epoch": 0.71, "grad_norm": 1.1468181431955087, "learning_rate": 2.012482838466145e-06, "loss": 0.5907, "step": 11135 }, { "epoch": 0.71, "grad_norm": 1.666386726493931, "learning_rate": 2.0116517236274312e-06, "loss": 0.6876, "step": 11136 }, { "epoch": 0.71, "grad_norm": 1.8153917312769732, "learning_rate": 2.010820737221646e-06, "loss": 0.8252, "step": 11137 }, { "epoch": 0.71, "grad_norm": 1.6425048783749976, "learning_rate": 2.0099898792845057e-06, "loss": 0.7508, "step": 11138 }, { "epoch": 0.71, "grad_norm": 1.6812573492850216, "learning_rate": 2.0091591498517184e-06, "loss": 0.5994, "step": 11139 }, { "epoch": 0.71, "grad_norm": 1.7734916141002457, "learning_rate": 2.008328548958985e-06, "loss": 0.7064, "step": 11140 }, { "epoch": 0.71, "grad_norm": 1.5767785851916516, "learning_rate": 2.007498076642005e-06, "loss": 0.6474, "step": 11141 }, { "epoch": 0.71, "grad_norm": 2.1696181897635545, "learning_rate": 2.006667732936469e-06, "loss": 0.7528, "step": 11142 }, { "epoch": 0.71, "grad_norm": 1.7005913004212891, "learning_rate": 2.0058375178780644e-06, "loss": 0.6225, "step": 11143 }, { "epoch": 0.71, "grad_norm": 1.971499801057309, "learning_rate": 2.0050074315024716e-06, "loss": 0.8136, "step": 11144 }, { "epoch": 0.71, "grad_norm": 2.2151766753473066, "learning_rate": 2.004177473845366e-06, "loss": 0.7059, "step": 11145 }, { "epoch": 0.71, "grad_norm": 1.6447125263558193, "learning_rate": 2.0033476449424156e-06, "loss": 0.6973, "step": 11146 }, { "epoch": 0.71, "grad_norm": 1.4438962631290404, "learning_rate": 2.0025179448292886e-06, "loss": 0.6599, "step": 11147 }, { "epoch": 0.71, "grad_norm": 1.0532676428663386, "learning_rate": 2.0016883735416415e-06, "loss": 0.5714, "step": 11148 }, { "epoch": 0.71, "grad_norm": 1.054909047655265, "learning_rate": 2.000858931115128e-06, "loss": 0.7093, "step": 11149 }, { "epoch": 0.71, "grad_norm": 2.661190263599757, "learning_rate": 2.0000296175853956e-06, "loss": 0.6853, "step": 11150 }, { "epoch": 0.71, "grad_norm": 1.8221381825214806, "learning_rate": 1.999200432988086e-06, "loss": 0.8535, "step": 11151 }, { "epoch": 0.71, "grad_norm": 2.1067541226697384, "learning_rate": 1.9983713773588367e-06, "loss": 0.6502, "step": 11152 }, { "epoch": 0.71, "grad_norm": 2.21319403318184, "learning_rate": 1.997542450733278e-06, "loss": 0.704, "step": 11153 }, { "epoch": 0.71, "grad_norm": 1.6683935845978914, "learning_rate": 1.9967136531470345e-06, "loss": 0.7305, "step": 11154 }, { "epoch": 0.71, "grad_norm": 2.6817722012672864, "learning_rate": 1.9958849846357287e-06, "loss": 0.6381, "step": 11155 }, { "epoch": 0.71, "grad_norm": 1.2192650575466097, "learning_rate": 1.9950564452349733e-06, "loss": 0.6034, "step": 11156 }, { "epoch": 0.71, "grad_norm": 1.8698742334617016, "learning_rate": 1.994228034980378e-06, "loss": 0.7513, "step": 11157 }, { "epoch": 0.71, "grad_norm": 1.7986753558999107, "learning_rate": 1.9933997539075468e-06, "loss": 0.6887, "step": 11158 }, { "epoch": 0.71, "grad_norm": 1.855618612805934, "learning_rate": 1.992571602052075e-06, "loss": 0.7171, "step": 11159 }, { "epoch": 0.71, "grad_norm": 1.5967329447757046, "learning_rate": 1.991743579449557e-06, "loss": 0.6881, "step": 11160 }, { "epoch": 0.71, "grad_norm": 1.7745548999744047, "learning_rate": 1.9909156861355767e-06, "loss": 0.7585, "step": 11161 }, { "epoch": 0.71, "grad_norm": 1.5282106489237073, "learning_rate": 1.990087922145721e-06, "loss": 0.7617, "step": 11162 }, { "epoch": 0.71, "grad_norm": 1.5286463856913828, "learning_rate": 1.9892602875155582e-06, "loss": 0.6813, "step": 11163 }, { "epoch": 0.71, "grad_norm": 1.7348631646221582, "learning_rate": 1.988432782280663e-06, "loss": 0.8554, "step": 11164 }, { "epoch": 0.71, "grad_norm": 2.0154697077806696, "learning_rate": 1.9876054064765993e-06, "loss": 0.5695, "step": 11165 }, { "epoch": 0.71, "grad_norm": 1.675221210254862, "learning_rate": 1.9867781601389254e-06, "loss": 0.6504, "step": 11166 }, { "epoch": 0.71, "grad_norm": 1.8806896619833484, "learning_rate": 1.9859510433031943e-06, "loss": 0.6035, "step": 11167 }, { "epoch": 0.71, "grad_norm": 1.6217771064462898, "learning_rate": 1.9851240560049516e-06, "loss": 0.815, "step": 11168 }, { "epoch": 0.71, "grad_norm": 1.573050574306797, "learning_rate": 1.984297198279746e-06, "loss": 0.7414, "step": 11169 }, { "epoch": 0.71, "grad_norm": 1.8557646686289688, "learning_rate": 1.9834704701631063e-06, "loss": 0.8111, "step": 11170 }, { "epoch": 0.72, "grad_norm": 2.086266327759189, "learning_rate": 1.982643871690571e-06, "loss": 0.6792, "step": 11171 }, { "epoch": 0.72, "grad_norm": 1.0940414170248995, "learning_rate": 1.9818174028976576e-06, "loss": 0.6484, "step": 11172 }, { "epoch": 0.72, "grad_norm": 1.577813336043298, "learning_rate": 1.980991063819893e-06, "loss": 0.654, "step": 11173 }, { "epoch": 0.72, "grad_norm": 1.6732763847611936, "learning_rate": 1.9801648544927876e-06, "loss": 0.8998, "step": 11174 }, { "epoch": 0.72, "grad_norm": 2.0724112765708878, "learning_rate": 1.9793387749518517e-06, "loss": 0.7947, "step": 11175 }, { "epoch": 0.72, "grad_norm": 1.8199836480776383, "learning_rate": 1.9785128252325877e-06, "loss": 0.6495, "step": 11176 }, { "epoch": 0.72, "grad_norm": 1.0795821693335623, "learning_rate": 1.9776870053704917e-06, "loss": 0.5538, "step": 11177 }, { "epoch": 0.72, "grad_norm": 1.5830370989611189, "learning_rate": 1.9768613154010612e-06, "loss": 0.7708, "step": 11178 }, { "epoch": 0.72, "grad_norm": 1.810846360126351, "learning_rate": 1.976035755359775e-06, "loss": 0.7769, "step": 11179 }, { "epoch": 0.72, "grad_norm": 1.1408475194458625, "learning_rate": 1.9752103252821202e-06, "loss": 0.6426, "step": 11180 }, { "epoch": 0.72, "grad_norm": 2.172841120199503, "learning_rate": 1.974385025203569e-06, "loss": 0.6727, "step": 11181 }, { "epoch": 0.72, "grad_norm": 1.8376594005772893, "learning_rate": 1.9735598551595927e-06, "loss": 0.729, "step": 11182 }, { "epoch": 0.72, "grad_norm": 9.87464179100724, "learning_rate": 1.9727348151856535e-06, "loss": 0.7422, "step": 11183 }, { "epoch": 0.72, "grad_norm": 1.4908815932010657, "learning_rate": 1.971909905317209e-06, "loss": 0.6239, "step": 11184 }, { "epoch": 0.72, "grad_norm": 1.8517975205934754, "learning_rate": 1.9710851255897173e-06, "loss": 0.7516, "step": 11185 }, { "epoch": 0.72, "grad_norm": 1.6851310434786007, "learning_rate": 1.9702604760386194e-06, "loss": 0.733, "step": 11186 }, { "epoch": 0.72, "grad_norm": 1.6874915520258733, "learning_rate": 1.969435956699363e-06, "loss": 0.6311, "step": 11187 }, { "epoch": 0.72, "grad_norm": 1.6944567835674795, "learning_rate": 1.9686115676073775e-06, "loss": 0.8548, "step": 11188 }, { "epoch": 0.72, "grad_norm": 1.7682023739787143, "learning_rate": 1.967787308798099e-06, "loss": 0.8394, "step": 11189 }, { "epoch": 0.72, "grad_norm": 1.6122664129743438, "learning_rate": 1.9669631803069506e-06, "loss": 0.6536, "step": 11190 }, { "epoch": 0.72, "grad_norm": 1.6332251350290927, "learning_rate": 1.966139182169351e-06, "loss": 0.8184, "step": 11191 }, { "epoch": 0.72, "grad_norm": 1.5573434970155506, "learning_rate": 1.9653153144207144e-06, "loss": 0.7648, "step": 11192 }, { "epoch": 0.72, "grad_norm": 1.522784013054539, "learning_rate": 1.9644915770964472e-06, "loss": 0.6152, "step": 11193 }, { "epoch": 0.72, "grad_norm": 1.5298359804443713, "learning_rate": 1.9636679702319566e-06, "loss": 0.8131, "step": 11194 }, { "epoch": 0.72, "grad_norm": 1.7131004065930087, "learning_rate": 1.9628444938626336e-06, "loss": 0.7408, "step": 11195 }, { "epoch": 0.72, "grad_norm": 1.7420400424261557, "learning_rate": 1.9620211480238737e-06, "loss": 0.5852, "step": 11196 }, { "epoch": 0.72, "grad_norm": 1.7092743647729, "learning_rate": 1.9611979327510617e-06, "loss": 0.6818, "step": 11197 }, { "epoch": 0.72, "grad_norm": 1.87936126509389, "learning_rate": 1.9603748480795763e-06, "loss": 0.787, "step": 11198 }, { "epoch": 0.72, "grad_norm": 1.6424908376941119, "learning_rate": 1.9595518940447933e-06, "loss": 0.6567, "step": 11199 }, { "epoch": 0.72, "grad_norm": 1.4856531281761012, "learning_rate": 1.958729070682081e-06, "loss": 0.7044, "step": 11200 }, { "epoch": 0.72, "grad_norm": 1.0561169587376416, "learning_rate": 1.9579063780268026e-06, "loss": 0.7005, "step": 11201 }, { "epoch": 0.72, "grad_norm": 5.154822436796968, "learning_rate": 1.957083816114314e-06, "loss": 0.7479, "step": 11202 }, { "epoch": 0.72, "grad_norm": 1.7236781783232393, "learning_rate": 1.9562613849799704e-06, "loss": 0.6064, "step": 11203 }, { "epoch": 0.72, "grad_norm": 1.7677323321758593, "learning_rate": 1.955439084659117e-06, "loss": 0.6901, "step": 11204 }, { "epoch": 0.72, "grad_norm": 1.8614397182754256, "learning_rate": 1.9546169151870943e-06, "loss": 0.7936, "step": 11205 }, { "epoch": 0.72, "grad_norm": 1.4915474104427595, "learning_rate": 1.953794876599237e-06, "loss": 0.7142, "step": 11206 }, { "epoch": 0.72, "grad_norm": 1.53622925315327, "learning_rate": 1.9529729689308756e-06, "loss": 0.7061, "step": 11207 }, { "epoch": 0.72, "grad_norm": 1.7788178144917017, "learning_rate": 1.952151192217333e-06, "loss": 0.7955, "step": 11208 }, { "epoch": 0.72, "grad_norm": 1.541544493986656, "learning_rate": 1.9513295464939274e-06, "loss": 0.7221, "step": 11209 }, { "epoch": 0.72, "grad_norm": 1.5973099276588947, "learning_rate": 1.9505080317959702e-06, "loss": 0.7706, "step": 11210 }, { "epoch": 0.72, "grad_norm": 1.8141016534016272, "learning_rate": 1.9496866481587717e-06, "loss": 0.871, "step": 11211 }, { "epoch": 0.72, "grad_norm": 1.5670957381776323, "learning_rate": 1.948865395617632e-06, "loss": 0.7004, "step": 11212 }, { "epoch": 0.72, "grad_norm": 2.0422310106455703, "learning_rate": 1.9480442742078455e-06, "loss": 0.819, "step": 11213 }, { "epoch": 0.72, "grad_norm": 2.210579710676004, "learning_rate": 1.9472232839647032e-06, "loss": 0.7733, "step": 11214 }, { "epoch": 0.72, "grad_norm": 1.9710047783572027, "learning_rate": 1.9464024249234895e-06, "loss": 0.733, "step": 11215 }, { "epoch": 0.72, "grad_norm": 1.6792034922788492, "learning_rate": 1.9455816971194834e-06, "loss": 0.7002, "step": 11216 }, { "epoch": 0.72, "grad_norm": 1.8627965519835172, "learning_rate": 1.9447611005879573e-06, "loss": 0.7868, "step": 11217 }, { "epoch": 0.72, "grad_norm": 1.0065533317719337, "learning_rate": 1.943940635364179e-06, "loss": 0.5845, "step": 11218 }, { "epoch": 0.72, "grad_norm": 1.6804694854098043, "learning_rate": 1.9431203014834093e-06, "loss": 0.7231, "step": 11219 }, { "epoch": 0.72, "grad_norm": 1.5497249765493784, "learning_rate": 1.942300098980907e-06, "loss": 0.7055, "step": 11220 }, { "epoch": 0.72, "grad_norm": 1.656023091800675, "learning_rate": 1.9414800278919223e-06, "loss": 0.8252, "step": 11221 }, { "epoch": 0.72, "grad_norm": 1.6372719017020718, "learning_rate": 1.940660088251698e-06, "loss": 0.8052, "step": 11222 }, { "epoch": 0.72, "grad_norm": 1.774626187428944, "learning_rate": 1.9398402800954746e-06, "loss": 0.7402, "step": 11223 }, { "epoch": 0.72, "grad_norm": 2.001575679710364, "learning_rate": 1.939020603458486e-06, "loss": 0.8426, "step": 11224 }, { "epoch": 0.72, "grad_norm": 1.7109876071363324, "learning_rate": 1.9382010583759604e-06, "loss": 0.6672, "step": 11225 }, { "epoch": 0.72, "grad_norm": 1.6745874271245167, "learning_rate": 1.937381644883117e-06, "loss": 0.6711, "step": 11226 }, { "epoch": 0.72, "grad_norm": 1.9922739371230054, "learning_rate": 1.936562363015179e-06, "loss": 0.7599, "step": 11227 }, { "epoch": 0.72, "grad_norm": 1.8498377198590668, "learning_rate": 1.93574321280735e-06, "loss": 0.6901, "step": 11228 }, { "epoch": 0.72, "grad_norm": 1.6395530279875148, "learning_rate": 1.9349241942948405e-06, "loss": 0.8122, "step": 11229 }, { "epoch": 0.72, "grad_norm": 1.5559243869177317, "learning_rate": 1.934105307512848e-06, "loss": 0.7598, "step": 11230 }, { "epoch": 0.72, "grad_norm": 1.8371402939152153, "learning_rate": 1.9332865524965677e-06, "loss": 0.8031, "step": 11231 }, { "epoch": 0.72, "grad_norm": 1.5239267587602667, "learning_rate": 1.932467929281187e-06, "loss": 0.6214, "step": 11232 }, { "epoch": 0.72, "grad_norm": 1.8616508812812351, "learning_rate": 1.9316494379018876e-06, "loss": 0.7282, "step": 11233 }, { "epoch": 0.72, "grad_norm": 1.5570385569048006, "learning_rate": 1.9308310783938505e-06, "loss": 0.749, "step": 11234 }, { "epoch": 0.72, "grad_norm": 1.961732502484338, "learning_rate": 1.9300128507922417e-06, "loss": 0.7732, "step": 11235 }, { "epoch": 0.72, "grad_norm": 1.7017743207137659, "learning_rate": 1.9291947551322327e-06, "loss": 0.7693, "step": 11236 }, { "epoch": 0.72, "grad_norm": 1.8886304285519522, "learning_rate": 1.9283767914489777e-06, "loss": 0.8494, "step": 11237 }, { "epoch": 0.72, "grad_norm": 1.0283321374799304, "learning_rate": 1.9275589597776346e-06, "loss": 0.683, "step": 11238 }, { "epoch": 0.72, "grad_norm": 1.8237659101563621, "learning_rate": 1.926741260153352e-06, "loss": 0.694, "step": 11239 }, { "epoch": 0.72, "grad_norm": 1.9242234844476391, "learning_rate": 1.9259236926112702e-06, "loss": 0.704, "step": 11240 }, { "epoch": 0.72, "grad_norm": 1.5141591698825811, "learning_rate": 1.925106257186532e-06, "loss": 0.727, "step": 11241 }, { "epoch": 0.72, "grad_norm": 2.499606733999025, "learning_rate": 1.9242889539142624e-06, "loss": 0.6231, "step": 11242 }, { "epoch": 0.72, "grad_norm": 2.240420330907048, "learning_rate": 1.923471782829594e-06, "loss": 0.739, "step": 11243 }, { "epoch": 0.72, "grad_norm": 1.6941762574809474, "learning_rate": 1.9226547439676404e-06, "loss": 0.7306, "step": 11244 }, { "epoch": 0.72, "grad_norm": 2.181562229850409, "learning_rate": 1.921837837363521e-06, "loss": 0.6329, "step": 11245 }, { "epoch": 0.72, "grad_norm": 4.3948115134339565, "learning_rate": 1.921021063052343e-06, "loss": 0.6083, "step": 11246 }, { "epoch": 0.72, "grad_norm": 1.159360352876568, "learning_rate": 1.92020442106921e-06, "loss": 0.5515, "step": 11247 }, { "epoch": 0.72, "grad_norm": 1.0196577235206707, "learning_rate": 1.9193879114492198e-06, "loss": 0.5946, "step": 11248 }, { "epoch": 0.72, "grad_norm": 1.2567657137013233, "learning_rate": 1.918571534227462e-06, "loss": 0.6635, "step": 11249 }, { "epoch": 0.72, "grad_norm": 1.6135098806195696, "learning_rate": 1.917755289439028e-06, "loss": 0.6675, "step": 11250 }, { "epoch": 0.72, "grad_norm": 1.998677117814949, "learning_rate": 1.9169391771189915e-06, "loss": 0.6639, "step": 11251 }, { "epoch": 0.72, "grad_norm": 0.9984391533361187, "learning_rate": 1.916123197302433e-06, "loss": 0.6789, "step": 11252 }, { "epoch": 0.72, "grad_norm": 1.7202349871523699, "learning_rate": 1.915307350024419e-06, "loss": 0.7233, "step": 11253 }, { "epoch": 0.72, "grad_norm": 2.4994810199589645, "learning_rate": 1.914491635320013e-06, "loss": 0.7286, "step": 11254 }, { "epoch": 0.72, "grad_norm": 1.7821754594553636, "learning_rate": 1.913676053224273e-06, "loss": 0.5869, "step": 11255 }, { "epoch": 0.72, "grad_norm": 1.9985879874911274, "learning_rate": 1.9128606037722512e-06, "loss": 0.707, "step": 11256 }, { "epoch": 0.72, "grad_norm": 1.8577378975446193, "learning_rate": 1.9120452869989943e-06, "loss": 0.6357, "step": 11257 }, { "epoch": 0.72, "grad_norm": 1.7850718673761572, "learning_rate": 1.9112301029395397e-06, "loss": 0.7624, "step": 11258 }, { "epoch": 0.72, "grad_norm": 1.1572160203109245, "learning_rate": 1.9104150516289283e-06, "loss": 0.6593, "step": 11259 }, { "epoch": 0.72, "grad_norm": 1.7746094323992516, "learning_rate": 1.909600133102183e-06, "loss": 0.6934, "step": 11260 }, { "epoch": 0.72, "grad_norm": 1.6156367330796229, "learning_rate": 1.9087853473943313e-06, "loss": 0.5986, "step": 11261 }, { "epoch": 0.72, "grad_norm": 1.5082689510164085, "learning_rate": 1.9079706945403905e-06, "loss": 0.8171, "step": 11262 }, { "epoch": 0.72, "grad_norm": 1.5426670033215255, "learning_rate": 1.9071561745753715e-06, "loss": 0.6474, "step": 11263 }, { "epoch": 0.72, "grad_norm": 1.6604833150968379, "learning_rate": 1.906341787534281e-06, "loss": 0.6516, "step": 11264 }, { "epoch": 0.72, "grad_norm": 1.8450354698177445, "learning_rate": 1.90552753345212e-06, "loss": 0.9714, "step": 11265 }, { "epoch": 0.72, "grad_norm": 1.5480479669048592, "learning_rate": 1.9047134123638833e-06, "loss": 0.6926, "step": 11266 }, { "epoch": 0.72, "grad_norm": 1.2856812688465924, "learning_rate": 1.9038994243045582e-06, "loss": 0.7076, "step": 11267 }, { "epoch": 0.72, "grad_norm": 1.0464172715810311, "learning_rate": 1.9030855693091316e-06, "loss": 0.6269, "step": 11268 }, { "epoch": 0.72, "grad_norm": 2.0017403860177656, "learning_rate": 1.90227184741258e-06, "loss": 0.7306, "step": 11269 }, { "epoch": 0.72, "grad_norm": 2.0163495845279464, "learning_rate": 1.9014582586498754e-06, "loss": 0.7362, "step": 11270 }, { "epoch": 0.72, "grad_norm": 1.5563270778265617, "learning_rate": 1.9006448030559832e-06, "loss": 0.7074, "step": 11271 }, { "epoch": 0.72, "grad_norm": 2.0061444039181318, "learning_rate": 1.8998314806658652e-06, "loss": 0.6439, "step": 11272 }, { "epoch": 0.72, "grad_norm": 1.5325150817090947, "learning_rate": 1.899018291514476e-06, "loss": 0.7735, "step": 11273 }, { "epoch": 0.72, "grad_norm": 1.8407215575897302, "learning_rate": 1.8982052356367641e-06, "loss": 0.642, "step": 11274 }, { "epoch": 0.72, "grad_norm": 1.1338248626693124, "learning_rate": 1.897392313067672e-06, "loss": 0.598, "step": 11275 }, { "epoch": 0.72, "grad_norm": 1.831719117253517, "learning_rate": 1.8965795238421408e-06, "loss": 0.6832, "step": 11276 }, { "epoch": 0.72, "grad_norm": 1.6743660946570533, "learning_rate": 1.8957668679950997e-06, "loss": 0.6622, "step": 11277 }, { "epoch": 0.72, "grad_norm": 1.8642845275568205, "learning_rate": 1.8949543455614767e-06, "loss": 0.7635, "step": 11278 }, { "epoch": 0.72, "grad_norm": 1.778196609700339, "learning_rate": 1.8941419565761903e-06, "loss": 0.7679, "step": 11279 }, { "epoch": 0.72, "grad_norm": 1.8272296320801493, "learning_rate": 1.8933297010741569e-06, "loss": 0.7912, "step": 11280 }, { "epoch": 0.72, "grad_norm": 1.6105352306014729, "learning_rate": 1.892517579090285e-06, "loss": 0.6717, "step": 11281 }, { "epoch": 0.72, "grad_norm": 1.0525674056681669, "learning_rate": 1.8917055906594755e-06, "loss": 0.6498, "step": 11282 }, { "epoch": 0.72, "grad_norm": 1.957546909214822, "learning_rate": 1.8908937358166323e-06, "loss": 0.7168, "step": 11283 }, { "epoch": 0.72, "grad_norm": 2.019250959749575, "learning_rate": 1.8900820145966397e-06, "loss": 0.6815, "step": 11284 }, { "epoch": 0.72, "grad_norm": 2.018736679090472, "learning_rate": 1.8892704270343887e-06, "loss": 0.8092, "step": 11285 }, { "epoch": 0.72, "grad_norm": 1.9397257514317772, "learning_rate": 1.8884589731647584e-06, "loss": 0.7388, "step": 11286 }, { "epoch": 0.72, "grad_norm": 1.6072614170430073, "learning_rate": 1.8876476530226235e-06, "loss": 0.7787, "step": 11287 }, { "epoch": 0.72, "grad_norm": 2.2405972482569676, "learning_rate": 1.8868364666428523e-06, "loss": 0.7682, "step": 11288 }, { "epoch": 0.72, "grad_norm": 1.0127039431580929, "learning_rate": 1.8860254140603063e-06, "loss": 0.5486, "step": 11289 }, { "epoch": 0.72, "grad_norm": 1.9634830796968572, "learning_rate": 1.8852144953098478e-06, "loss": 0.7803, "step": 11290 }, { "epoch": 0.72, "grad_norm": 1.0584743949959718, "learning_rate": 1.8844037104263225e-06, "loss": 0.7006, "step": 11291 }, { "epoch": 0.72, "grad_norm": 1.1905493291171363, "learning_rate": 1.8835930594445817e-06, "loss": 0.6695, "step": 11292 }, { "epoch": 0.72, "grad_norm": 1.771641057333405, "learning_rate": 1.8827825423994595e-06, "loss": 0.6987, "step": 11293 }, { "epoch": 0.72, "grad_norm": 1.9300693077861173, "learning_rate": 1.881972159325795e-06, "loss": 0.7392, "step": 11294 }, { "epoch": 0.72, "grad_norm": 1.6086173448186725, "learning_rate": 1.8811619102584155e-06, "loss": 0.6834, "step": 11295 }, { "epoch": 0.72, "grad_norm": 1.696366443976838, "learning_rate": 1.8803517952321438e-06, "loss": 0.681, "step": 11296 }, { "epoch": 0.72, "grad_norm": 1.8568734050876912, "learning_rate": 1.8795418142817962e-06, "loss": 0.6344, "step": 11297 }, { "epoch": 0.72, "grad_norm": 2.2899258515016596, "learning_rate": 1.8787319674421827e-06, "loss": 0.7873, "step": 11298 }, { "epoch": 0.72, "grad_norm": 1.732142364800068, "learning_rate": 1.877922254748114e-06, "loss": 0.7764, "step": 11299 }, { "epoch": 0.72, "grad_norm": 1.0388485498315407, "learning_rate": 1.8771126762343834e-06, "loss": 0.7343, "step": 11300 }, { "epoch": 0.72, "grad_norm": 1.7529994203746886, "learning_rate": 1.876303231935791e-06, "loss": 0.7648, "step": 11301 }, { "epoch": 0.72, "grad_norm": 2.824744320769347, "learning_rate": 1.8754939218871183e-06, "loss": 0.7613, "step": 11302 }, { "epoch": 0.72, "grad_norm": 1.7145915736432793, "learning_rate": 1.8746847461231533e-06, "loss": 0.7701, "step": 11303 }, { "epoch": 0.72, "grad_norm": 1.6190942895463647, "learning_rate": 1.8738757046786705e-06, "loss": 0.6561, "step": 11304 }, { "epoch": 0.72, "grad_norm": 1.7211583752441162, "learning_rate": 1.8730667975884398e-06, "loss": 0.7283, "step": 11305 }, { "epoch": 0.72, "grad_norm": 1.6342298007181244, "learning_rate": 1.8722580248872302e-06, "loss": 0.7527, "step": 11306 }, { "epoch": 0.72, "grad_norm": 1.7851488720538196, "learning_rate": 1.8714493866097955e-06, "loss": 0.6285, "step": 11307 }, { "epoch": 0.72, "grad_norm": 1.7556664334484182, "learning_rate": 1.8706408827908956e-06, "loss": 0.8255, "step": 11308 }, { "epoch": 0.72, "grad_norm": 1.7566838311201016, "learning_rate": 1.8698325134652711e-06, "loss": 0.5915, "step": 11309 }, { "epoch": 0.72, "grad_norm": 1.6747434860493418, "learning_rate": 1.86902427866767e-06, "loss": 0.7178, "step": 11310 }, { "epoch": 0.72, "grad_norm": 1.8053630043217266, "learning_rate": 1.8682161784328262e-06, "loss": 0.7404, "step": 11311 }, { "epoch": 0.72, "grad_norm": 1.6774100429176335, "learning_rate": 1.8674082127954684e-06, "loss": 0.8178, "step": 11312 }, { "epoch": 0.72, "grad_norm": 1.8045610256452391, "learning_rate": 1.8666003817903267e-06, "loss": 0.7422, "step": 11313 }, { "epoch": 0.72, "grad_norm": 2.126025128500264, "learning_rate": 1.8657926854521125e-06, "loss": 0.7459, "step": 11314 }, { "epoch": 0.72, "grad_norm": 1.0973319996613828, "learning_rate": 1.8649851238155465e-06, "loss": 0.5777, "step": 11315 }, { "epoch": 0.72, "grad_norm": 1.8120907384952052, "learning_rate": 1.864177696915329e-06, "loss": 0.6552, "step": 11316 }, { "epoch": 0.72, "grad_norm": 1.651289699018709, "learning_rate": 1.8633704047861667e-06, "loss": 0.8081, "step": 11317 }, { "epoch": 0.72, "grad_norm": 1.632397678639915, "learning_rate": 1.862563247462753e-06, "loss": 0.7417, "step": 11318 }, { "epoch": 0.72, "grad_norm": 1.7796992188579472, "learning_rate": 1.8617562249797788e-06, "loss": 0.6474, "step": 11319 }, { "epoch": 0.72, "grad_norm": 1.8507458813377347, "learning_rate": 1.8609493373719273e-06, "loss": 0.6719, "step": 11320 }, { "epoch": 0.72, "grad_norm": 1.6256646314604875, "learning_rate": 1.8601425846738775e-06, "loss": 0.6595, "step": 11321 }, { "epoch": 0.72, "grad_norm": 1.5383463392380592, "learning_rate": 1.859335966920301e-06, "loss": 0.7152, "step": 11322 }, { "epoch": 0.72, "grad_norm": 1.9908176051298434, "learning_rate": 1.858529484145864e-06, "loss": 0.7785, "step": 11323 }, { "epoch": 0.72, "grad_norm": 1.2347970845230698, "learning_rate": 1.8577231363852305e-06, "loss": 0.7111, "step": 11324 }, { "epoch": 0.72, "grad_norm": 1.567400310527822, "learning_rate": 1.8569169236730533e-06, "loss": 0.559, "step": 11325 }, { "epoch": 0.72, "grad_norm": 1.7882666434599799, "learning_rate": 1.8561108460439825e-06, "loss": 0.6431, "step": 11326 }, { "epoch": 0.72, "grad_norm": 1.9484249370387559, "learning_rate": 1.8553049035326615e-06, "loss": 0.7553, "step": 11327 }, { "epoch": 0.73, "grad_norm": 1.753625302547307, "learning_rate": 1.8544990961737274e-06, "loss": 0.6849, "step": 11328 }, { "epoch": 0.73, "grad_norm": 1.5415218135989635, "learning_rate": 1.8536934240018129e-06, "loss": 0.9739, "step": 11329 }, { "epoch": 0.73, "grad_norm": 2.073716825800393, "learning_rate": 1.8528878870515433e-06, "loss": 0.7103, "step": 11330 }, { "epoch": 0.73, "grad_norm": 2.4264772077176726, "learning_rate": 1.852082485357538e-06, "loss": 0.6799, "step": 11331 }, { "epoch": 0.73, "grad_norm": 1.7218449153982844, "learning_rate": 1.8512772189544142e-06, "loss": 0.5578, "step": 11332 }, { "epoch": 0.73, "grad_norm": 1.6717710559261343, "learning_rate": 1.8504720878767797e-06, "loss": 0.8136, "step": 11333 }, { "epoch": 0.73, "grad_norm": 1.0407180361999042, "learning_rate": 1.8496670921592364e-06, "loss": 0.626, "step": 11334 }, { "epoch": 0.73, "grad_norm": 2.361063076732098, "learning_rate": 1.8488622318363814e-06, "loss": 0.645, "step": 11335 }, { "epoch": 0.73, "grad_norm": 1.5513176410505798, "learning_rate": 1.848057506942807e-06, "loss": 0.6697, "step": 11336 }, { "epoch": 0.73, "grad_norm": 1.8859516320353642, "learning_rate": 1.847252917513097e-06, "loss": 0.7204, "step": 11337 }, { "epoch": 0.73, "grad_norm": 2.3643149294031027, "learning_rate": 1.8464484635818326e-06, "loss": 0.7837, "step": 11338 }, { "epoch": 0.73, "grad_norm": 1.7563041334092306, "learning_rate": 1.845644145183586e-06, "loss": 0.7447, "step": 11339 }, { "epoch": 0.73, "grad_norm": 1.775993989491267, "learning_rate": 1.8448399623529246e-06, "loss": 0.7435, "step": 11340 }, { "epoch": 0.73, "grad_norm": 1.8840001773131994, "learning_rate": 1.844035915124413e-06, "loss": 0.7742, "step": 11341 }, { "epoch": 0.73, "grad_norm": 1.6347722680029408, "learning_rate": 1.8432320035326062e-06, "loss": 0.622, "step": 11342 }, { "epoch": 0.73, "grad_norm": 1.882634127730973, "learning_rate": 1.8424282276120547e-06, "loss": 0.7427, "step": 11343 }, { "epoch": 0.73, "grad_norm": 1.0594622706754795, "learning_rate": 1.8416245873973031e-06, "loss": 0.7054, "step": 11344 }, { "epoch": 0.73, "grad_norm": 1.861445008114734, "learning_rate": 1.84082108292289e-06, "loss": 0.6585, "step": 11345 }, { "epoch": 0.73, "grad_norm": 1.6530489204390173, "learning_rate": 1.8400177142233489e-06, "loss": 0.6719, "step": 11346 }, { "epoch": 0.73, "grad_norm": 1.5344084317075344, "learning_rate": 1.8392144813332041e-06, "loss": 0.5668, "step": 11347 }, { "epoch": 0.73, "grad_norm": 1.6599651888119566, "learning_rate": 1.8384113842869827e-06, "loss": 0.6486, "step": 11348 }, { "epoch": 0.73, "grad_norm": 1.8374741114365605, "learning_rate": 1.8376084231191932e-06, "loss": 0.7678, "step": 11349 }, { "epoch": 0.73, "grad_norm": 1.0584921515347752, "learning_rate": 1.8368055978643501e-06, "loss": 0.5931, "step": 11350 }, { "epoch": 0.73, "grad_norm": 1.7810845319310034, "learning_rate": 1.8360029085569558e-06, "loss": 0.6965, "step": 11351 }, { "epoch": 0.73, "grad_norm": 1.1221436741996056, "learning_rate": 1.8352003552315078e-06, "loss": 0.6374, "step": 11352 }, { "epoch": 0.73, "grad_norm": 2.1230425786550504, "learning_rate": 1.8343979379224991e-06, "loss": 0.7092, "step": 11353 }, { "epoch": 0.73, "grad_norm": 1.5726223150535512, "learning_rate": 1.8335956566644125e-06, "loss": 0.6548, "step": 11354 }, { "epoch": 0.73, "grad_norm": 1.5602391540646585, "learning_rate": 1.832793511491735e-06, "loss": 0.6527, "step": 11355 }, { "epoch": 0.73, "grad_norm": 1.8625810641295284, "learning_rate": 1.831991502438934e-06, "loss": 0.8039, "step": 11356 }, { "epoch": 0.73, "grad_norm": 1.9113264868241835, "learning_rate": 1.831189629540484e-06, "loss": 0.7011, "step": 11357 }, { "epoch": 0.73, "grad_norm": 1.5229859033548638, "learning_rate": 1.8303878928308421e-06, "loss": 0.6774, "step": 11358 }, { "epoch": 0.73, "grad_norm": 1.828791029794198, "learning_rate": 1.8295862923444702e-06, "loss": 0.7734, "step": 11359 }, { "epoch": 0.73, "grad_norm": 1.112865222954811, "learning_rate": 1.8287848281158178e-06, "loss": 0.6827, "step": 11360 }, { "epoch": 0.73, "grad_norm": 1.9761305679338492, "learning_rate": 1.8279835001793272e-06, "loss": 0.812, "step": 11361 }, { "epoch": 0.73, "grad_norm": 1.6995977859333493, "learning_rate": 1.8271823085694446e-06, "loss": 0.7575, "step": 11362 }, { "epoch": 0.73, "grad_norm": 1.06116728910963, "learning_rate": 1.8263812533205955e-06, "loss": 0.6239, "step": 11363 }, { "epoch": 0.73, "grad_norm": 1.7477757617629033, "learning_rate": 1.825580334467215e-06, "loss": 0.7097, "step": 11364 }, { "epoch": 0.73, "grad_norm": 1.1893979028410404, "learning_rate": 1.8247795520437177e-06, "loss": 0.7802, "step": 11365 }, { "epoch": 0.73, "grad_norm": 1.2756316096422686, "learning_rate": 1.8239789060845254e-06, "loss": 0.6593, "step": 11366 }, { "epoch": 0.73, "grad_norm": 1.1601416162760902, "learning_rate": 1.8231783966240458e-06, "loss": 0.6692, "step": 11367 }, { "epoch": 0.73, "grad_norm": 1.8629298416171611, "learning_rate": 1.8223780236966832e-06, "loss": 0.711, "step": 11368 }, { "epoch": 0.73, "grad_norm": 2.118340160330948, "learning_rate": 1.8215777873368363e-06, "loss": 0.6815, "step": 11369 }, { "epoch": 0.73, "grad_norm": 1.6797561489660853, "learning_rate": 1.8207776875788952e-06, "loss": 0.6342, "step": 11370 }, { "epoch": 0.73, "grad_norm": 1.6843647709129879, "learning_rate": 1.8199777244572525e-06, "loss": 0.7236, "step": 11371 }, { "epoch": 0.73, "grad_norm": 1.796409721748534, "learning_rate": 1.8191778980062813e-06, "loss": 0.8242, "step": 11372 }, { "epoch": 0.73, "grad_norm": 1.7633802815887583, "learning_rate": 1.8183782082603618e-06, "loss": 0.7651, "step": 11373 }, { "epoch": 0.73, "grad_norm": 1.5062724991130187, "learning_rate": 1.8175786552538616e-06, "loss": 0.6312, "step": 11374 }, { "epoch": 0.73, "grad_norm": 2.3597664735941253, "learning_rate": 1.8167792390211435e-06, "loss": 0.954, "step": 11375 }, { "epoch": 0.73, "grad_norm": 1.74539758535333, "learning_rate": 1.815979959596565e-06, "loss": 0.7486, "step": 11376 }, { "epoch": 0.73, "grad_norm": 1.5300085467258495, "learning_rate": 1.8151808170144751e-06, "loss": 0.7092, "step": 11377 }, { "epoch": 0.73, "grad_norm": 1.7352164479652539, "learning_rate": 1.8143818113092254e-06, "loss": 0.6354, "step": 11378 }, { "epoch": 0.73, "grad_norm": 1.673484559232145, "learning_rate": 1.813582942515148e-06, "loss": 0.7668, "step": 11379 }, { "epoch": 0.73, "grad_norm": 1.785739608177273, "learning_rate": 1.8127842106665837e-06, "loss": 0.6563, "step": 11380 }, { "epoch": 0.73, "grad_norm": 1.1369175641677032, "learning_rate": 1.8119856157978534e-06, "loss": 0.624, "step": 11381 }, { "epoch": 0.73, "grad_norm": 2.050961460888117, "learning_rate": 1.8111871579432839e-06, "loss": 0.8186, "step": 11382 }, { "epoch": 0.73, "grad_norm": 1.665570215897299, "learning_rate": 1.8103888371371898e-06, "loss": 0.7128, "step": 11383 }, { "epoch": 0.73, "grad_norm": 1.8890528088953902, "learning_rate": 1.8095906534138813e-06, "loss": 0.7291, "step": 11384 }, { "epoch": 0.73, "grad_norm": 1.7014021576665255, "learning_rate": 1.8087926068076622e-06, "loss": 0.6516, "step": 11385 }, { "epoch": 0.73, "grad_norm": 1.585920516048225, "learning_rate": 1.8079946973528295e-06, "loss": 0.688, "step": 11386 }, { "epoch": 0.73, "grad_norm": 1.6482243424202303, "learning_rate": 1.8071969250836813e-06, "loss": 0.7686, "step": 11387 }, { "epoch": 0.73, "grad_norm": 1.7061192410330113, "learning_rate": 1.8063992900344962e-06, "loss": 0.6843, "step": 11388 }, { "epoch": 0.73, "grad_norm": 1.826755018260604, "learning_rate": 1.8056017922395607e-06, "loss": 0.7196, "step": 11389 }, { "epoch": 0.73, "grad_norm": 1.9603671104646339, "learning_rate": 1.804804431733148e-06, "loss": 0.7331, "step": 11390 }, { "epoch": 0.73, "grad_norm": 1.700058737774352, "learning_rate": 1.8040072085495276e-06, "loss": 0.7252, "step": 11391 }, { "epoch": 0.73, "grad_norm": 1.5618735630781022, "learning_rate": 1.8032101227229604e-06, "loss": 0.6498, "step": 11392 }, { "epoch": 0.73, "grad_norm": 1.8663081453139883, "learning_rate": 1.8024131742877054e-06, "loss": 0.7649, "step": 11393 }, { "epoch": 0.73, "grad_norm": 1.888924841710798, "learning_rate": 1.8016163632780132e-06, "loss": 0.7523, "step": 11394 }, { "epoch": 0.73, "grad_norm": 1.6441635812594795, "learning_rate": 1.8008196897281287e-06, "loss": 0.7709, "step": 11395 }, { "epoch": 0.73, "grad_norm": 1.1460777039905554, "learning_rate": 1.8000231536722896e-06, "loss": 0.6046, "step": 11396 }, { "epoch": 0.73, "grad_norm": 1.7660520295186979, "learning_rate": 1.7992267551447334e-06, "loss": 0.7122, "step": 11397 }, { "epoch": 0.73, "grad_norm": 1.7878386446854293, "learning_rate": 1.7984304941796854e-06, "loss": 0.8539, "step": 11398 }, { "epoch": 0.73, "grad_norm": 1.2314749619552245, "learning_rate": 1.7976343708113675e-06, "loss": 0.6316, "step": 11399 }, { "epoch": 0.73, "grad_norm": 1.9686192607726236, "learning_rate": 1.796838385073995e-06, "loss": 0.7456, "step": 11400 }, { "epoch": 0.73, "grad_norm": 1.2071264195168359, "learning_rate": 1.7960425370017782e-06, "loss": 0.7535, "step": 11401 }, { "epoch": 0.73, "grad_norm": 1.731920885957786, "learning_rate": 1.7952468266289203e-06, "loss": 0.7418, "step": 11402 }, { "epoch": 0.73, "grad_norm": 1.5749682052162655, "learning_rate": 1.794451253989618e-06, "loss": 0.8192, "step": 11403 }, { "epoch": 0.73, "grad_norm": 1.5215972969605942, "learning_rate": 1.7936558191180686e-06, "loss": 0.6549, "step": 11404 }, { "epoch": 0.73, "grad_norm": 1.5768991564636887, "learning_rate": 1.7928605220484513e-06, "loss": 0.7436, "step": 11405 }, { "epoch": 0.73, "grad_norm": 1.7187077203696448, "learning_rate": 1.7920653628149515e-06, "loss": 0.7482, "step": 11406 }, { "epoch": 0.73, "grad_norm": 1.6794692339530315, "learning_rate": 1.7912703414517413e-06, "loss": 0.6758, "step": 11407 }, { "epoch": 0.73, "grad_norm": 1.9080265864849013, "learning_rate": 1.7904754579929895e-06, "loss": 0.7818, "step": 11408 }, { "epoch": 0.73, "grad_norm": 1.8129412207190645, "learning_rate": 1.7896807124728582e-06, "loss": 0.6927, "step": 11409 }, { "epoch": 0.73, "grad_norm": 1.6559766466852264, "learning_rate": 1.7888861049255041e-06, "loss": 0.6935, "step": 11410 }, { "epoch": 0.73, "grad_norm": 1.6757362702017038, "learning_rate": 1.788091635385078e-06, "loss": 0.601, "step": 11411 }, { "epoch": 0.73, "grad_norm": 1.8010153545895538, "learning_rate": 1.7872973038857221e-06, "loss": 0.677, "step": 11412 }, { "epoch": 0.73, "grad_norm": 1.8441977992705745, "learning_rate": 1.7865031104615809e-06, "loss": 0.6678, "step": 11413 }, { "epoch": 0.73, "grad_norm": 1.6047260648136927, "learning_rate": 1.7857090551467805e-06, "loss": 0.8838, "step": 11414 }, { "epoch": 0.73, "grad_norm": 1.6966614239769604, "learning_rate": 1.784915137975452e-06, "loss": 0.7573, "step": 11415 }, { "epoch": 0.73, "grad_norm": 1.2548030833104138, "learning_rate": 1.7841213589817152e-06, "loss": 0.7167, "step": 11416 }, { "epoch": 0.73, "grad_norm": 1.9106609281825742, "learning_rate": 1.783327718199685e-06, "loss": 0.8738, "step": 11417 }, { "epoch": 0.73, "grad_norm": 1.4404273293654328, "learning_rate": 1.7825342156634701e-06, "loss": 0.6915, "step": 11418 }, { "epoch": 0.73, "grad_norm": 1.7478249494887887, "learning_rate": 1.7817408514071722e-06, "loss": 0.7721, "step": 11419 }, { "epoch": 0.73, "grad_norm": 1.660639800906483, "learning_rate": 1.7809476254648932e-06, "loss": 0.7154, "step": 11420 }, { "epoch": 0.73, "grad_norm": 1.9732463726918805, "learning_rate": 1.780154537870718e-06, "loss": 0.6778, "step": 11421 }, { "epoch": 0.73, "grad_norm": 1.1973566711886698, "learning_rate": 1.7793615886587384e-06, "loss": 0.6448, "step": 11422 }, { "epoch": 0.73, "grad_norm": 4.838162694722882, "learning_rate": 1.7785687778630268e-06, "loss": 0.8413, "step": 11423 }, { "epoch": 0.73, "grad_norm": 1.5685989468915547, "learning_rate": 1.7777761055176612e-06, "loss": 0.651, "step": 11424 }, { "epoch": 0.73, "grad_norm": 2.1319600810057313, "learning_rate": 1.776983571656708e-06, "loss": 0.769, "step": 11425 }, { "epoch": 0.73, "grad_norm": 1.794053494124472, "learning_rate": 1.776191176314227e-06, "loss": 0.6659, "step": 11426 }, { "epoch": 0.73, "grad_norm": 1.5452526700943896, "learning_rate": 1.7753989195242782e-06, "loss": 0.7383, "step": 11427 }, { "epoch": 0.73, "grad_norm": 1.4364590092190677, "learning_rate": 1.7746068013209045e-06, "loss": 0.654, "step": 11428 }, { "epoch": 0.73, "grad_norm": 1.730714396727603, "learning_rate": 1.7738148217381568e-06, "loss": 0.7489, "step": 11429 }, { "epoch": 0.73, "grad_norm": 1.471825862064207, "learning_rate": 1.7730229808100652e-06, "loss": 0.6503, "step": 11430 }, { "epoch": 0.73, "grad_norm": 1.5881321791184446, "learning_rate": 1.772231278570667e-06, "loss": 0.7264, "step": 11431 }, { "epoch": 0.73, "grad_norm": 1.0737714377977265, "learning_rate": 1.7714397150539853e-06, "loss": 0.5854, "step": 11432 }, { "epoch": 0.73, "grad_norm": 1.638179916326303, "learning_rate": 1.7706482902940397e-06, "loss": 0.653, "step": 11433 }, { "epoch": 0.73, "grad_norm": 1.7831828002776315, "learning_rate": 1.7698570043248476e-06, "loss": 0.7851, "step": 11434 }, { "epoch": 0.73, "grad_norm": 2.1113382054681744, "learning_rate": 1.7690658571804109e-06, "loss": 0.8764, "step": 11435 }, { "epoch": 0.73, "grad_norm": 1.7688977145722649, "learning_rate": 1.7682748488947377e-06, "loss": 0.8325, "step": 11436 }, { "epoch": 0.73, "grad_norm": 1.6171817620665137, "learning_rate": 1.7674839795018173e-06, "loss": 0.6297, "step": 11437 }, { "epoch": 0.73, "grad_norm": 1.5878946128051488, "learning_rate": 1.7666932490356448e-06, "loss": 0.7763, "step": 11438 }, { "epoch": 0.73, "grad_norm": 1.1745893860512744, "learning_rate": 1.7659026575302025e-06, "loss": 0.5696, "step": 11439 }, { "epoch": 0.73, "grad_norm": 1.5333456668716776, "learning_rate": 1.765112205019468e-06, "loss": 0.7247, "step": 11440 }, { "epoch": 0.73, "grad_norm": 1.4581108888718974, "learning_rate": 1.764321891537414e-06, "loss": 0.6651, "step": 11441 }, { "epoch": 0.73, "grad_norm": 1.86145758087036, "learning_rate": 1.7635317171180032e-06, "loss": 0.8077, "step": 11442 }, { "epoch": 0.73, "grad_norm": 1.9600336709983062, "learning_rate": 1.7627416817952032e-06, "loss": 0.6606, "step": 11443 }, { "epoch": 0.73, "grad_norm": 1.8057862602379213, "learning_rate": 1.7619517856029589e-06, "loss": 0.7014, "step": 11444 }, { "epoch": 0.73, "grad_norm": 1.6366435687075587, "learning_rate": 1.7611620285752246e-06, "loss": 0.6651, "step": 11445 }, { "epoch": 0.73, "grad_norm": 1.8608229704519226, "learning_rate": 1.7603724107459408e-06, "loss": 0.6172, "step": 11446 }, { "epoch": 0.73, "grad_norm": 1.8010346365159489, "learning_rate": 1.7595829321490437e-06, "loss": 0.7127, "step": 11447 }, { "epoch": 0.73, "grad_norm": 1.5884991830374255, "learning_rate": 1.7587935928184624e-06, "loss": 0.7584, "step": 11448 }, { "epoch": 0.73, "grad_norm": 1.1450127528305938, "learning_rate": 1.7580043927881224e-06, "loss": 0.5969, "step": 11449 }, { "epoch": 0.73, "grad_norm": 1.686625584409141, "learning_rate": 1.757215332091941e-06, "loss": 0.578, "step": 11450 }, { "epoch": 0.73, "grad_norm": 1.8041971986711713, "learning_rate": 1.756426410763829e-06, "loss": 0.7418, "step": 11451 }, { "epoch": 0.73, "grad_norm": 1.259256968017843, "learning_rate": 1.755637628837697e-06, "loss": 0.7206, "step": 11452 }, { "epoch": 0.73, "grad_norm": 2.050002066098552, "learning_rate": 1.7548489863474393e-06, "loss": 0.7123, "step": 11453 }, { "epoch": 0.73, "grad_norm": 1.6619218136575225, "learning_rate": 1.7540604833269553e-06, "loss": 0.6825, "step": 11454 }, { "epoch": 0.73, "grad_norm": 2.019468056464534, "learning_rate": 1.753272119810131e-06, "loss": 0.7274, "step": 11455 }, { "epoch": 0.73, "grad_norm": 1.614963810326829, "learning_rate": 1.7524838958308481e-06, "loss": 0.7691, "step": 11456 }, { "epoch": 0.73, "grad_norm": 1.9685704433074833, "learning_rate": 1.7516958114229837e-06, "loss": 0.6476, "step": 11457 }, { "epoch": 0.73, "grad_norm": 1.7531939507797831, "learning_rate": 1.7509078666204076e-06, "loss": 0.8089, "step": 11458 }, { "epoch": 0.73, "grad_norm": 2.241799318989963, "learning_rate": 1.7501200614569847e-06, "loss": 0.6552, "step": 11459 }, { "epoch": 0.73, "grad_norm": 1.6276102790513192, "learning_rate": 1.7493323959665703e-06, "loss": 0.6854, "step": 11460 }, { "epoch": 0.73, "grad_norm": 1.9558538639253669, "learning_rate": 1.7485448701830205e-06, "loss": 0.7422, "step": 11461 }, { "epoch": 0.73, "grad_norm": 1.6056682126878987, "learning_rate": 1.74775748414018e-06, "loss": 0.8602, "step": 11462 }, { "epoch": 0.73, "grad_norm": 1.6687905288256406, "learning_rate": 1.7469702378718894e-06, "loss": 0.7249, "step": 11463 }, { "epoch": 0.73, "grad_norm": 1.680770152486273, "learning_rate": 1.746183131411982e-06, "loss": 0.7325, "step": 11464 }, { "epoch": 0.73, "grad_norm": 1.5840936292991907, "learning_rate": 1.7453961647942868e-06, "loss": 0.6459, "step": 11465 }, { "epoch": 0.73, "grad_norm": 1.7012789712958012, "learning_rate": 1.7446093380526253e-06, "loss": 0.73, "step": 11466 }, { "epoch": 0.73, "grad_norm": 1.5344174273373998, "learning_rate": 1.743822651220814e-06, "loss": 0.6736, "step": 11467 }, { "epoch": 0.73, "grad_norm": 1.5606185482294115, "learning_rate": 1.743036104332661e-06, "loss": 0.7027, "step": 11468 }, { "epoch": 0.73, "grad_norm": 1.725639747751314, "learning_rate": 1.7422496974219761e-06, "loss": 0.7381, "step": 11469 }, { "epoch": 0.73, "grad_norm": 1.0632755082266176, "learning_rate": 1.7414634305225504e-06, "loss": 0.6592, "step": 11470 }, { "epoch": 0.73, "grad_norm": 1.0569624455202358, "learning_rate": 1.7406773036681807e-06, "loss": 0.5456, "step": 11471 }, { "epoch": 0.73, "grad_norm": 1.704265138846725, "learning_rate": 1.7398913168926523e-06, "loss": 0.6242, "step": 11472 }, { "epoch": 0.73, "grad_norm": 1.7370674789433576, "learning_rate": 1.7391054702297439e-06, "loss": 0.8072, "step": 11473 }, { "epoch": 0.73, "grad_norm": 1.953680797804883, "learning_rate": 1.738319763713231e-06, "loss": 0.6887, "step": 11474 }, { "epoch": 0.73, "grad_norm": 1.639902019638946, "learning_rate": 1.737534197376879e-06, "loss": 0.6986, "step": 11475 }, { "epoch": 0.73, "grad_norm": 1.919865298025219, "learning_rate": 1.736748771254455e-06, "loss": 0.6546, "step": 11476 }, { "epoch": 0.73, "grad_norm": 1.9962679650186226, "learning_rate": 1.7359634853797081e-06, "loss": 0.6822, "step": 11477 }, { "epoch": 0.73, "grad_norm": 1.5208934683622246, "learning_rate": 1.7351783397863958e-06, "loss": 0.7551, "step": 11478 }, { "epoch": 0.73, "grad_norm": 1.6022310489291294, "learning_rate": 1.7343933345082547e-06, "loss": 0.6085, "step": 11479 }, { "epoch": 0.73, "grad_norm": 1.5867902148965622, "learning_rate": 1.7336084695790278e-06, "loss": 0.6873, "step": 11480 }, { "epoch": 0.73, "grad_norm": 1.6509612170120194, "learning_rate": 1.7328237450324454e-06, "loss": 0.7014, "step": 11481 }, { "epoch": 0.73, "grad_norm": 1.6943258596773043, "learning_rate": 1.7320391609022308e-06, "loss": 0.7777, "step": 11482 }, { "epoch": 0.73, "grad_norm": 1.6095888366381377, "learning_rate": 1.73125471722211e-06, "loss": 0.6995, "step": 11483 }, { "epoch": 0.74, "grad_norm": 1.6212599745882337, "learning_rate": 1.7304704140257894e-06, "loss": 0.7826, "step": 11484 }, { "epoch": 0.74, "grad_norm": 1.9689195298998476, "learning_rate": 1.7296862513469836e-06, "loss": 0.8274, "step": 11485 }, { "epoch": 0.74, "grad_norm": 1.5287174626529978, "learning_rate": 1.7289022292193875e-06, "loss": 0.6767, "step": 11486 }, { "epoch": 0.74, "grad_norm": 1.6489260779446755, "learning_rate": 1.7281183476767016e-06, "loss": 0.7495, "step": 11487 }, { "epoch": 0.74, "grad_norm": 1.2011065907427565, "learning_rate": 1.7273346067526143e-06, "loss": 0.6072, "step": 11488 }, { "epoch": 0.74, "grad_norm": 1.1723769827527302, "learning_rate": 1.7265510064808084e-06, "loss": 0.6848, "step": 11489 }, { "epoch": 0.74, "grad_norm": 1.6860028633296662, "learning_rate": 1.725767546894962e-06, "loss": 0.8453, "step": 11490 }, { "epoch": 0.74, "grad_norm": 1.5896157319642796, "learning_rate": 1.7249842280287442e-06, "loss": 0.711, "step": 11491 }, { "epoch": 0.74, "grad_norm": 1.9879029817587108, "learning_rate": 1.7242010499158263e-06, "loss": 0.7673, "step": 11492 }, { "epoch": 0.74, "grad_norm": 1.5675411239555606, "learning_rate": 1.7234180125898608e-06, "loss": 0.6887, "step": 11493 }, { "epoch": 0.74, "grad_norm": 1.270542662143509, "learning_rate": 1.7226351160845067e-06, "loss": 0.6948, "step": 11494 }, { "epoch": 0.74, "grad_norm": 1.6782716118675622, "learning_rate": 1.721852360433406e-06, "loss": 0.7501, "step": 11495 }, { "epoch": 0.74, "grad_norm": 1.8658684609862846, "learning_rate": 1.7210697456702036e-06, "loss": 0.7135, "step": 11496 }, { "epoch": 0.74, "grad_norm": 1.6953324733144912, "learning_rate": 1.7202872718285341e-06, "loss": 0.6969, "step": 11497 }, { "epoch": 0.74, "grad_norm": 1.6076760672974708, "learning_rate": 1.7195049389420238e-06, "loss": 0.6479, "step": 11498 }, { "epoch": 0.74, "grad_norm": 1.719575929953584, "learning_rate": 1.7187227470443013e-06, "loss": 0.6995, "step": 11499 }, { "epoch": 0.74, "grad_norm": 1.567844731656186, "learning_rate": 1.717940696168977e-06, "loss": 0.7123, "step": 11500 }, { "epoch": 0.74, "grad_norm": 2.055353395994905, "learning_rate": 1.7171587863496686e-06, "loss": 0.6622, "step": 11501 }, { "epoch": 0.74, "grad_norm": 1.9529368524754311, "learning_rate": 1.716377017619974e-06, "loss": 0.7942, "step": 11502 }, { "epoch": 0.74, "grad_norm": 1.7200883904927282, "learning_rate": 1.715595390013497e-06, "loss": 0.677, "step": 11503 }, { "epoch": 0.74, "grad_norm": 1.8145067088441005, "learning_rate": 1.7148139035638283e-06, "loss": 0.7385, "step": 11504 }, { "epoch": 0.74, "grad_norm": 1.1891382096105825, "learning_rate": 1.7140325583045553e-06, "loss": 0.6844, "step": 11505 }, { "epoch": 0.74, "grad_norm": 1.084719493590098, "learning_rate": 1.7132513542692581e-06, "loss": 0.6582, "step": 11506 }, { "epoch": 0.74, "grad_norm": 2.121184311212885, "learning_rate": 1.7124702914915097e-06, "loss": 0.7245, "step": 11507 }, { "epoch": 0.74, "grad_norm": 1.7940504637000156, "learning_rate": 1.7116893700048832e-06, "loss": 0.8137, "step": 11508 }, { "epoch": 0.74, "grad_norm": 1.7612259794302503, "learning_rate": 1.7109085898429345e-06, "loss": 0.7548, "step": 11509 }, { "epoch": 0.74, "grad_norm": 1.5609049994330104, "learning_rate": 1.7101279510392255e-06, "loss": 0.7437, "step": 11510 }, { "epoch": 0.74, "grad_norm": 1.6993622441186849, "learning_rate": 1.7093474536273037e-06, "loss": 0.6903, "step": 11511 }, { "epoch": 0.74, "grad_norm": 1.7998238041094174, "learning_rate": 1.7085670976407137e-06, "loss": 1.0176, "step": 11512 }, { "epoch": 0.74, "grad_norm": 1.967387534270951, "learning_rate": 1.7077868831129935e-06, "loss": 0.6987, "step": 11513 }, { "epoch": 0.74, "grad_norm": 2.3385040908059636, "learning_rate": 1.7070068100776755e-06, "loss": 0.7836, "step": 11514 }, { "epoch": 0.74, "grad_norm": 1.7728239604072336, "learning_rate": 1.7062268785682852e-06, "loss": 0.8368, "step": 11515 }, { "epoch": 0.74, "grad_norm": 2.1976086639884254, "learning_rate": 1.7054470886183405e-06, "loss": 0.6775, "step": 11516 }, { "epoch": 0.74, "grad_norm": 1.770633248939586, "learning_rate": 1.7046674402613594e-06, "loss": 0.615, "step": 11517 }, { "epoch": 0.74, "grad_norm": 1.6868837533273897, "learning_rate": 1.7038879335308466e-06, "loss": 0.6184, "step": 11518 }, { "epoch": 0.74, "grad_norm": 1.9719657101765145, "learning_rate": 1.703108568460305e-06, "loss": 0.6942, "step": 11519 }, { "epoch": 0.74, "grad_norm": 1.6184104645825814, "learning_rate": 1.702329345083229e-06, "loss": 0.6007, "step": 11520 }, { "epoch": 0.74, "grad_norm": 1.2081903482317105, "learning_rate": 1.7015502634331083e-06, "loss": 0.6703, "step": 11521 }, { "epoch": 0.74, "grad_norm": 1.6578064050339012, "learning_rate": 1.7007713235434264e-06, "loss": 0.7742, "step": 11522 }, { "epoch": 0.74, "grad_norm": 1.736912702278915, "learning_rate": 1.6999925254476606e-06, "loss": 0.6627, "step": 11523 }, { "epoch": 0.74, "grad_norm": 1.8905043861111037, "learning_rate": 1.6992138691792798e-06, "loss": 0.6792, "step": 11524 }, { "epoch": 0.74, "grad_norm": 1.7568058139599758, "learning_rate": 1.698435354771753e-06, "loss": 0.6935, "step": 11525 }, { "epoch": 0.74, "grad_norm": 1.7634142449892947, "learning_rate": 1.6976569822585366e-06, "loss": 0.7275, "step": 11526 }, { "epoch": 0.74, "grad_norm": 1.022491168654342, "learning_rate": 1.6968787516730845e-06, "loss": 0.5816, "step": 11527 }, { "epoch": 0.74, "grad_norm": 1.5209044418173803, "learning_rate": 1.6961006630488424e-06, "loss": 0.7363, "step": 11528 }, { "epoch": 0.74, "grad_norm": 1.8873487205418684, "learning_rate": 1.6953227164192516e-06, "loss": 0.8629, "step": 11529 }, { "epoch": 0.74, "grad_norm": 1.9804278383475664, "learning_rate": 1.694544911817746e-06, "loss": 0.7789, "step": 11530 }, { "epoch": 0.74, "grad_norm": 1.5034386646370537, "learning_rate": 1.6937672492777547e-06, "loss": 0.722, "step": 11531 }, { "epoch": 0.74, "grad_norm": 1.5599067135005102, "learning_rate": 1.6929897288326996e-06, "loss": 0.7496, "step": 11532 }, { "epoch": 0.74, "grad_norm": 1.1312794433956574, "learning_rate": 1.6922123505159955e-06, "loss": 0.5929, "step": 11533 }, { "epoch": 0.74, "grad_norm": 1.0959026354723542, "learning_rate": 1.6914351143610553e-06, "loss": 0.6844, "step": 11534 }, { "epoch": 0.74, "grad_norm": 1.5666960872177775, "learning_rate": 1.6906580204012818e-06, "loss": 0.706, "step": 11535 }, { "epoch": 0.74, "grad_norm": 1.6800685442915007, "learning_rate": 1.6898810686700728e-06, "loss": 0.7515, "step": 11536 }, { "epoch": 0.74, "grad_norm": 1.6994915669582136, "learning_rate": 1.68910425920082e-06, "loss": 0.7045, "step": 11537 }, { "epoch": 0.74, "grad_norm": 1.842950916887669, "learning_rate": 1.6883275920269087e-06, "loss": 0.6497, "step": 11538 }, { "epoch": 0.74, "grad_norm": 1.2273221949499713, "learning_rate": 1.6875510671817186e-06, "loss": 0.6391, "step": 11539 }, { "epoch": 0.74, "grad_norm": 2.1716322609671534, "learning_rate": 1.6867746846986215e-06, "loss": 0.7411, "step": 11540 }, { "epoch": 0.74, "grad_norm": 1.8532034183760537, "learning_rate": 1.6859984446109906e-06, "loss": 0.74, "step": 11541 }, { "epoch": 0.74, "grad_norm": 1.764469835952714, "learning_rate": 1.6852223469521789e-06, "loss": 0.7259, "step": 11542 }, { "epoch": 0.74, "grad_norm": 2.4093394314864383, "learning_rate": 1.6844463917555487e-06, "loss": 0.8063, "step": 11543 }, { "epoch": 0.74, "grad_norm": 1.439668573908196, "learning_rate": 1.6836705790544422e-06, "loss": 0.6559, "step": 11544 }, { "epoch": 0.74, "grad_norm": 1.6396104715401667, "learning_rate": 1.6828949088822077e-06, "loss": 0.6494, "step": 11545 }, { "epoch": 0.74, "grad_norm": 1.110287485948483, "learning_rate": 1.6821193812721797e-06, "loss": 0.6293, "step": 11546 }, { "epoch": 0.74, "grad_norm": 1.6709091833493792, "learning_rate": 1.6813439962576872e-06, "loss": 0.7447, "step": 11547 }, { "epoch": 0.74, "grad_norm": 1.7032944411206348, "learning_rate": 1.6805687538720595e-06, "loss": 0.6433, "step": 11548 }, { "epoch": 0.74, "grad_norm": 1.7506083433115858, "learning_rate": 1.6797936541486082e-06, "loss": 0.7579, "step": 11549 }, { "epoch": 0.74, "grad_norm": 1.7740718694887194, "learning_rate": 1.6790186971206523e-06, "loss": 0.7899, "step": 11550 }, { "epoch": 0.74, "grad_norm": 1.566050707625084, "learning_rate": 1.6782438828214913e-06, "loss": 0.6543, "step": 11551 }, { "epoch": 0.74, "grad_norm": 1.9312280640093118, "learning_rate": 1.6774692112844303e-06, "loss": 0.7001, "step": 11552 }, { "epoch": 0.74, "grad_norm": 1.3046865502425904, "learning_rate": 1.6766946825427605e-06, "loss": 0.5937, "step": 11553 }, { "epoch": 0.74, "grad_norm": 2.362048936073744, "learning_rate": 1.6759202966297682e-06, "loss": 0.716, "step": 11554 }, { "epoch": 0.74, "grad_norm": 1.5752651510668056, "learning_rate": 1.6751460535787407e-06, "loss": 0.7131, "step": 11555 }, { "epoch": 0.74, "grad_norm": 1.1235813604495148, "learning_rate": 1.6743719534229452e-06, "loss": 0.6462, "step": 11556 }, { "epoch": 0.74, "grad_norm": 1.1812817354055394, "learning_rate": 1.6735979961956588e-06, "loss": 0.6488, "step": 11557 }, { "epoch": 0.74, "grad_norm": 1.710214219530626, "learning_rate": 1.6728241819301377e-06, "loss": 0.6784, "step": 11558 }, { "epoch": 0.74, "grad_norm": 1.5680581827498006, "learning_rate": 1.6720505106596429e-06, "loss": 0.6985, "step": 11559 }, { "epoch": 0.74, "grad_norm": 1.4903380900866519, "learning_rate": 1.6712769824174245e-06, "loss": 0.6874, "step": 11560 }, { "epoch": 0.74, "grad_norm": 1.0760472394404104, "learning_rate": 1.6705035972367272e-06, "loss": 0.7082, "step": 11561 }, { "epoch": 0.74, "grad_norm": 2.0486069253102475, "learning_rate": 1.6697303551507888e-06, "loss": 0.7629, "step": 11562 }, { "epoch": 0.74, "grad_norm": 1.0687904328784992, "learning_rate": 1.6689572561928397e-06, "loss": 0.6303, "step": 11563 }, { "epoch": 0.74, "grad_norm": 2.5517276817101417, "learning_rate": 1.6681843003961124e-06, "loss": 0.8509, "step": 11564 }, { "epoch": 0.74, "grad_norm": 1.8868285719691746, "learning_rate": 1.6674114877938185e-06, "loss": 0.7234, "step": 11565 }, { "epoch": 0.74, "grad_norm": 1.9069993798878713, "learning_rate": 1.6666388184191784e-06, "loss": 0.711, "step": 11566 }, { "epoch": 0.74, "grad_norm": 1.825625957878106, "learning_rate": 1.6658662923053974e-06, "loss": 0.786, "step": 11567 }, { "epoch": 0.74, "grad_norm": 1.8491485887784191, "learning_rate": 1.6650939094856772e-06, "loss": 0.7871, "step": 11568 }, { "epoch": 0.74, "grad_norm": 1.844742428775582, "learning_rate": 1.664321669993213e-06, "loss": 0.7056, "step": 11569 }, { "epoch": 0.74, "grad_norm": 1.6671438672012773, "learning_rate": 1.6635495738611945e-06, "loss": 0.7936, "step": 11570 }, { "epoch": 0.74, "grad_norm": 1.8361346986152702, "learning_rate": 1.6627776211228041e-06, "loss": 0.9117, "step": 11571 }, { "epoch": 0.74, "grad_norm": 1.7185118631298064, "learning_rate": 1.6620058118112182e-06, "loss": 0.7199, "step": 11572 }, { "epoch": 0.74, "grad_norm": 1.6171669513418974, "learning_rate": 1.661234145959611e-06, "loss": 0.6676, "step": 11573 }, { "epoch": 0.74, "grad_norm": 1.7136659800699934, "learning_rate": 1.6604626236011413e-06, "loss": 0.8438, "step": 11574 }, { "epoch": 0.74, "grad_norm": 1.742751516148593, "learning_rate": 1.6596912447689723e-06, "loss": 0.7612, "step": 11575 }, { "epoch": 0.74, "grad_norm": 1.695977860288862, "learning_rate": 1.658920009496255e-06, "loss": 0.6059, "step": 11576 }, { "epoch": 0.74, "grad_norm": 2.1893092500375273, "learning_rate": 1.6581489178161348e-06, "loss": 0.7829, "step": 11577 }, { "epoch": 0.74, "grad_norm": 1.5965598595517054, "learning_rate": 1.657377969761752e-06, "loss": 0.6349, "step": 11578 }, { "epoch": 0.74, "grad_norm": 1.8690721943328608, "learning_rate": 1.6566071653662403e-06, "loss": 0.6782, "step": 11579 }, { "epoch": 0.74, "grad_norm": 1.6560402170599502, "learning_rate": 1.6558365046627268e-06, "loss": 0.6125, "step": 11580 }, { "epoch": 0.74, "grad_norm": 1.6430108858090553, "learning_rate": 1.6550659876843317e-06, "loss": 0.6754, "step": 11581 }, { "epoch": 0.74, "grad_norm": 1.6713063124104628, "learning_rate": 1.6542956144641742e-06, "loss": 0.7296, "step": 11582 }, { "epoch": 0.74, "grad_norm": 1.7217241660001386, "learning_rate": 1.65352538503536e-06, "loss": 0.8112, "step": 11583 }, { "epoch": 0.74, "grad_norm": 3.092637128637326, "learning_rate": 1.652755299430993e-06, "loss": 0.6523, "step": 11584 }, { "epoch": 0.74, "grad_norm": 1.6552236162242284, "learning_rate": 1.6519853576841698e-06, "loss": 0.6222, "step": 11585 }, { "epoch": 0.74, "grad_norm": 1.7963271103344904, "learning_rate": 1.651215559827981e-06, "loss": 0.7531, "step": 11586 }, { "epoch": 0.74, "grad_norm": 1.6828372466519363, "learning_rate": 1.6504459058955108e-06, "loss": 0.7324, "step": 11587 }, { "epoch": 0.74, "grad_norm": 1.830947639026214, "learning_rate": 1.649676395919837e-06, "loss": 0.6962, "step": 11588 }, { "epoch": 0.74, "grad_norm": 1.8593513733984819, "learning_rate": 1.6489070299340298e-06, "loss": 0.9793, "step": 11589 }, { "epoch": 0.74, "grad_norm": 1.8559519126799646, "learning_rate": 1.6481378079711586e-06, "loss": 0.8289, "step": 11590 }, { "epoch": 0.74, "grad_norm": 1.081175928604098, "learning_rate": 1.6473687300642815e-06, "loss": 0.6129, "step": 11591 }, { "epoch": 0.74, "grad_norm": 12.92215563797339, "learning_rate": 1.6465997962464514e-06, "loss": 0.7654, "step": 11592 }, { "epoch": 0.74, "grad_norm": 1.0883743289658983, "learning_rate": 1.6458310065507154e-06, "loss": 0.5972, "step": 11593 }, { "epoch": 0.74, "grad_norm": 1.7824083265905315, "learning_rate": 1.6450623610101147e-06, "loss": 0.6954, "step": 11594 }, { "epoch": 0.74, "grad_norm": 1.7498503709498803, "learning_rate": 1.6442938596576842e-06, "loss": 0.7015, "step": 11595 }, { "epoch": 0.74, "grad_norm": 0.9613298659525976, "learning_rate": 1.6435255025264502e-06, "loss": 0.6189, "step": 11596 }, { "epoch": 0.74, "grad_norm": 2.051295554908208, "learning_rate": 1.6427572896494408e-06, "loss": 0.6467, "step": 11597 }, { "epoch": 0.74, "grad_norm": 1.8806315905171354, "learning_rate": 1.6419892210596655e-06, "loss": 0.6006, "step": 11598 }, { "epoch": 0.74, "grad_norm": 1.6840231288231122, "learning_rate": 1.6412212967901386e-06, "loss": 0.736, "step": 11599 }, { "epoch": 0.74, "grad_norm": 1.0465399630547831, "learning_rate": 1.6404535168738629e-06, "loss": 0.6863, "step": 11600 }, { "epoch": 0.74, "grad_norm": 1.6811251523456234, "learning_rate": 1.6396858813438355e-06, "loss": 0.7605, "step": 11601 }, { "epoch": 0.74, "grad_norm": 1.743376734662532, "learning_rate": 1.6389183902330475e-06, "loss": 0.6589, "step": 11602 }, { "epoch": 0.74, "grad_norm": 1.7790396923825498, "learning_rate": 1.638151043574485e-06, "loss": 0.8266, "step": 11603 }, { "epoch": 0.74, "grad_norm": 1.884456493099315, "learning_rate": 1.6373838414011261e-06, "loss": 0.7237, "step": 11604 }, { "epoch": 0.74, "grad_norm": 1.594357956678892, "learning_rate": 1.6366167837459429e-06, "loss": 0.742, "step": 11605 }, { "epoch": 0.74, "grad_norm": 1.3510544517979253, "learning_rate": 1.6358498706419056e-06, "loss": 0.7083, "step": 11606 }, { "epoch": 0.74, "grad_norm": 1.581726600917093, "learning_rate": 1.6350831021219686e-06, "loss": 0.6084, "step": 11607 }, { "epoch": 0.74, "grad_norm": 1.7681280565138926, "learning_rate": 1.6343164782190906e-06, "loss": 0.753, "step": 11608 }, { "epoch": 0.74, "grad_norm": 1.08881578421627, "learning_rate": 1.6335499989662185e-06, "loss": 0.7319, "step": 11609 }, { "epoch": 0.74, "grad_norm": 1.3975032747866154, "learning_rate": 1.6327836643962936e-06, "loss": 0.6829, "step": 11610 }, { "epoch": 0.74, "grad_norm": 1.6827633424832438, "learning_rate": 1.6320174745422513e-06, "loss": 0.8143, "step": 11611 }, { "epoch": 0.74, "grad_norm": 1.6605540912246428, "learning_rate": 1.631251429437019e-06, "loss": 0.7908, "step": 11612 }, { "epoch": 0.74, "grad_norm": 1.6894769987448983, "learning_rate": 1.6304855291135247e-06, "loss": 0.8088, "step": 11613 }, { "epoch": 0.74, "grad_norm": 1.535013217013212, "learning_rate": 1.629719773604679e-06, "loss": 0.6422, "step": 11614 }, { "epoch": 0.74, "grad_norm": 1.5532789497139736, "learning_rate": 1.6289541629433986e-06, "loss": 0.6782, "step": 11615 }, { "epoch": 0.74, "grad_norm": 1.7001809248067785, "learning_rate": 1.6281886971625815e-06, "loss": 0.6884, "step": 11616 }, { "epoch": 0.74, "grad_norm": 1.4139219373898118, "learning_rate": 1.6274233762951308e-06, "loss": 0.6547, "step": 11617 }, { "epoch": 0.74, "grad_norm": 1.9589448106847058, "learning_rate": 1.6266582003739367e-06, "loss": 0.7126, "step": 11618 }, { "epoch": 0.74, "grad_norm": 1.6558504816450386, "learning_rate": 1.6258931694318831e-06, "loss": 0.828, "step": 11619 }, { "epoch": 0.74, "grad_norm": 2.545063675995461, "learning_rate": 1.625128283501854e-06, "loss": 0.6721, "step": 11620 }, { "epoch": 0.74, "grad_norm": 1.843822285542483, "learning_rate": 1.624363542616717e-06, "loss": 0.7082, "step": 11621 }, { "epoch": 0.74, "grad_norm": 1.5961657399469835, "learning_rate": 1.6235989468093444e-06, "loss": 0.7, "step": 11622 }, { "epoch": 0.74, "grad_norm": 1.650594685351836, "learning_rate": 1.6228344961125914e-06, "loss": 0.895, "step": 11623 }, { "epoch": 0.74, "grad_norm": 1.6607294998531086, "learning_rate": 1.6220701905593168e-06, "loss": 0.7091, "step": 11624 }, { "epoch": 0.74, "grad_norm": 1.6231597640314952, "learning_rate": 1.6213060301823674e-06, "loss": 0.6691, "step": 11625 }, { "epoch": 0.74, "grad_norm": 1.1059915761864483, "learning_rate": 1.6205420150145857e-06, "loss": 0.5898, "step": 11626 }, { "epoch": 0.74, "grad_norm": 1.5845567808427015, "learning_rate": 1.6197781450888067e-06, "loss": 0.7055, "step": 11627 }, { "epoch": 0.74, "grad_norm": 1.6047417819578709, "learning_rate": 1.6190144204378583e-06, "loss": 0.8099, "step": 11628 }, { "epoch": 0.74, "grad_norm": 1.909897739113282, "learning_rate": 1.618250841094569e-06, "loss": 0.8427, "step": 11629 }, { "epoch": 0.74, "grad_norm": 1.7750361135770116, "learning_rate": 1.6174874070917495e-06, "loss": 0.7808, "step": 11630 }, { "epoch": 0.74, "grad_norm": 1.134506130497282, "learning_rate": 1.616724118462215e-06, "loss": 0.6391, "step": 11631 }, { "epoch": 0.74, "grad_norm": 2.2013393557695133, "learning_rate": 1.615960975238769e-06, "loss": 0.7532, "step": 11632 }, { "epoch": 0.74, "grad_norm": 1.668608317558878, "learning_rate": 1.6151979774542087e-06, "loss": 0.6907, "step": 11633 }, { "epoch": 0.74, "grad_norm": 1.5871470945189743, "learning_rate": 1.6144351251413277e-06, "loss": 0.697, "step": 11634 }, { "epoch": 0.74, "grad_norm": 1.9810100948231724, "learning_rate": 1.6136724183329106e-06, "loss": 0.754, "step": 11635 }, { "epoch": 0.74, "grad_norm": 1.5074966775603091, "learning_rate": 1.6129098570617374e-06, "loss": 0.7493, "step": 11636 }, { "epoch": 0.74, "grad_norm": 1.0378609957475948, "learning_rate": 1.6121474413605792e-06, "loss": 0.5316, "step": 11637 }, { "epoch": 0.74, "grad_norm": 1.5820333706752079, "learning_rate": 1.6113851712622076e-06, "loss": 0.6321, "step": 11638 }, { "epoch": 0.74, "grad_norm": 1.5472448863046477, "learning_rate": 1.61062304679938e-06, "loss": 0.5632, "step": 11639 }, { "epoch": 0.75, "grad_norm": 1.463108564202528, "learning_rate": 1.6098610680048527e-06, "loss": 0.5932, "step": 11640 }, { "epoch": 0.75, "grad_norm": 1.6594777235367053, "learning_rate": 1.609099234911372e-06, "loss": 0.7363, "step": 11641 }, { "epoch": 0.75, "grad_norm": 1.628736153462717, "learning_rate": 1.6083375475516815e-06, "loss": 0.7449, "step": 11642 }, { "epoch": 0.75, "grad_norm": 1.8375341582183506, "learning_rate": 1.6075760059585166e-06, "loss": 0.7672, "step": 11643 }, { "epoch": 0.75, "grad_norm": 1.5717920757998378, "learning_rate": 1.6068146101646059e-06, "loss": 0.751, "step": 11644 }, { "epoch": 0.75, "grad_norm": 1.893480405218534, "learning_rate": 1.6060533602026734e-06, "loss": 0.6674, "step": 11645 }, { "epoch": 0.75, "grad_norm": 1.4625330187395489, "learning_rate": 1.6052922561054346e-06, "loss": 0.6247, "step": 11646 }, { "epoch": 0.75, "grad_norm": 1.662583925784778, "learning_rate": 1.6045312979056027e-06, "loss": 0.7624, "step": 11647 }, { "epoch": 0.75, "grad_norm": 1.604054215694201, "learning_rate": 1.6037704856358805e-06, "loss": 0.7134, "step": 11648 }, { "epoch": 0.75, "grad_norm": 2.028396244868318, "learning_rate": 1.6030098193289667e-06, "loss": 0.9076, "step": 11649 }, { "epoch": 0.75, "grad_norm": 2.073531815913631, "learning_rate": 1.6022492990175525e-06, "loss": 0.6661, "step": 11650 }, { "epoch": 0.75, "grad_norm": 1.275247128891915, "learning_rate": 1.601488924734324e-06, "loss": 0.5884, "step": 11651 }, { "epoch": 0.75, "grad_norm": 1.5357568556089956, "learning_rate": 1.6007286965119602e-06, "loss": 0.6508, "step": 11652 }, { "epoch": 0.75, "grad_norm": 1.7811962039887463, "learning_rate": 1.5999686143831344e-06, "loss": 0.6609, "step": 11653 }, { "epoch": 0.75, "grad_norm": 1.077203441799848, "learning_rate": 1.5992086783805111e-06, "loss": 0.5566, "step": 11654 }, { "epoch": 0.75, "grad_norm": 1.7027626236182998, "learning_rate": 1.5984488885367543e-06, "loss": 0.6412, "step": 11655 }, { "epoch": 0.75, "grad_norm": 1.708418108130936, "learning_rate": 1.5976892448845172e-06, "loss": 0.7646, "step": 11656 }, { "epoch": 0.75, "grad_norm": 1.5644096020239286, "learning_rate": 1.5969297474564465e-06, "loss": 0.6922, "step": 11657 }, { "epoch": 0.75, "grad_norm": 1.1383992614730198, "learning_rate": 1.5961703962851848e-06, "loss": 0.6597, "step": 11658 }, { "epoch": 0.75, "grad_norm": 1.7994201744914335, "learning_rate": 1.595411191403367e-06, "loss": 0.7716, "step": 11659 }, { "epoch": 0.75, "grad_norm": 1.6607163408950463, "learning_rate": 1.5946521328436221e-06, "loss": 0.6985, "step": 11660 }, { "epoch": 0.75, "grad_norm": 1.1601235598453303, "learning_rate": 1.5938932206385716e-06, "loss": 0.6523, "step": 11661 }, { "epoch": 0.75, "grad_norm": 1.603393016642913, "learning_rate": 1.593134454820836e-06, "loss": 0.7063, "step": 11662 }, { "epoch": 0.75, "grad_norm": 2.0872596189132717, "learning_rate": 1.5923758354230196e-06, "loss": 0.8099, "step": 11663 }, { "epoch": 0.75, "grad_norm": 1.9224576073215178, "learning_rate": 1.5916173624777308e-06, "loss": 0.7367, "step": 11664 }, { "epoch": 0.75, "grad_norm": 1.782453236017292, "learning_rate": 1.5908590360175663e-06, "loss": 0.7759, "step": 11665 }, { "epoch": 0.75, "grad_norm": 1.5592944368222263, "learning_rate": 1.590100856075117e-06, "loss": 0.7145, "step": 11666 }, { "epoch": 0.75, "grad_norm": 1.6133443515248387, "learning_rate": 1.5893428226829672e-06, "loss": 0.7353, "step": 11667 }, { "epoch": 0.75, "grad_norm": 1.4738692278751875, "learning_rate": 1.5885849358736944e-06, "loss": 0.5813, "step": 11668 }, { "epoch": 0.75, "grad_norm": 1.8491600323625828, "learning_rate": 1.5878271956798762e-06, "loss": 0.7818, "step": 11669 }, { "epoch": 0.75, "grad_norm": 1.6637166644130543, "learning_rate": 1.5870696021340714e-06, "loss": 0.6804, "step": 11670 }, { "epoch": 0.75, "grad_norm": 1.7871459304395036, "learning_rate": 1.5863121552688477e-06, "loss": 0.8507, "step": 11671 }, { "epoch": 0.75, "grad_norm": 1.3781847212479441, "learning_rate": 1.5855548551167505e-06, "loss": 0.6532, "step": 11672 }, { "epoch": 0.75, "grad_norm": 1.6100918805830533, "learning_rate": 1.5847977017103327e-06, "loss": 0.6928, "step": 11673 }, { "epoch": 0.75, "grad_norm": 1.6636456270126199, "learning_rate": 1.5840406950821335e-06, "loss": 0.9167, "step": 11674 }, { "epoch": 0.75, "grad_norm": 1.9855727965745982, "learning_rate": 1.5832838352646856e-06, "loss": 0.7634, "step": 11675 }, { "epoch": 0.75, "grad_norm": 1.6120261320732363, "learning_rate": 1.582527122290522e-06, "loss": 0.8118, "step": 11676 }, { "epoch": 0.75, "grad_norm": 1.811330786308119, "learning_rate": 1.5817705561921587e-06, "loss": 0.8649, "step": 11677 }, { "epoch": 0.75, "grad_norm": 1.6293047074949092, "learning_rate": 1.581014137002117e-06, "loss": 0.6823, "step": 11678 }, { "epoch": 0.75, "grad_norm": 1.7183256480871942, "learning_rate": 1.5802578647529005e-06, "loss": 0.7675, "step": 11679 }, { "epoch": 0.75, "grad_norm": 1.6276800992646776, "learning_rate": 1.5795017394770167e-06, "loss": 0.5111, "step": 11680 }, { "epoch": 0.75, "grad_norm": 2.576506702050152, "learning_rate": 1.5787457612069607e-06, "loss": 0.5928, "step": 11681 }, { "epoch": 0.75, "grad_norm": 1.5356822067781744, "learning_rate": 1.5779899299752227e-06, "loss": 0.6809, "step": 11682 }, { "epoch": 0.75, "grad_norm": 1.8796841726141191, "learning_rate": 1.577234245814287e-06, "loss": 0.6657, "step": 11683 }, { "epoch": 0.75, "grad_norm": 1.8070783714679826, "learning_rate": 1.576478708756629e-06, "loss": 0.6992, "step": 11684 }, { "epoch": 0.75, "grad_norm": 1.8704649577668837, "learning_rate": 1.5757233188347265e-06, "loss": 0.7184, "step": 11685 }, { "epoch": 0.75, "grad_norm": 1.4907582328261504, "learning_rate": 1.5749680760810365e-06, "loss": 0.782, "step": 11686 }, { "epoch": 0.75, "grad_norm": 2.2089507138897817, "learning_rate": 1.5742129805280249e-06, "loss": 0.7573, "step": 11687 }, { "epoch": 0.75, "grad_norm": 1.0450987397273477, "learning_rate": 1.5734580322081377e-06, "loss": 0.7042, "step": 11688 }, { "epoch": 0.75, "grad_norm": 1.1793882082358285, "learning_rate": 1.572703231153826e-06, "loss": 0.6345, "step": 11689 }, { "epoch": 0.75, "grad_norm": 1.6612751266580534, "learning_rate": 1.5719485773975275e-06, "loss": 0.7165, "step": 11690 }, { "epoch": 0.75, "grad_norm": 1.086996093437598, "learning_rate": 1.5711940709716755e-06, "loss": 0.6763, "step": 11691 }, { "epoch": 0.75, "grad_norm": 1.6771767255302579, "learning_rate": 1.5704397119086974e-06, "loss": 0.6633, "step": 11692 }, { "epoch": 0.75, "grad_norm": 1.6414088225663044, "learning_rate": 1.5696855002410127e-06, "loss": 0.9299, "step": 11693 }, { "epoch": 0.75, "grad_norm": 1.4974757821656097, "learning_rate": 1.5689314360010404e-06, "loss": 0.5941, "step": 11694 }, { "epoch": 0.75, "grad_norm": 1.654630067251506, "learning_rate": 1.5681775192211819e-06, "loss": 0.6525, "step": 11695 }, { "epoch": 0.75, "grad_norm": 2.0149629291876034, "learning_rate": 1.567423749933844e-06, "loss": 0.8255, "step": 11696 }, { "epoch": 0.75, "grad_norm": 1.144793884717763, "learning_rate": 1.5666701281714202e-06, "loss": 0.615, "step": 11697 }, { "epoch": 0.75, "grad_norm": 2.0095760478746554, "learning_rate": 1.5659166539663001e-06, "loss": 0.8257, "step": 11698 }, { "epoch": 0.75, "grad_norm": 2.056873297455482, "learning_rate": 1.5651633273508666e-06, "loss": 0.7015, "step": 11699 }, { "epoch": 0.75, "grad_norm": 4.192984988323936, "learning_rate": 1.5644101483574953e-06, "loss": 0.68, "step": 11700 }, { "epoch": 0.75, "grad_norm": 1.7915779576645188, "learning_rate": 1.5636571170185565e-06, "loss": 0.7747, "step": 11701 }, { "epoch": 0.75, "grad_norm": 1.679602244230251, "learning_rate": 1.562904233366413e-06, "loss": 0.6691, "step": 11702 }, { "epoch": 0.75, "grad_norm": 1.8306437641116202, "learning_rate": 1.5621514974334246e-06, "loss": 0.6295, "step": 11703 }, { "epoch": 0.75, "grad_norm": 1.7860282968487462, "learning_rate": 1.5613989092519406e-06, "loss": 0.652, "step": 11704 }, { "epoch": 0.75, "grad_norm": 1.622513533287796, "learning_rate": 1.5606464688543066e-06, "loss": 0.5588, "step": 11705 }, { "epoch": 0.75, "grad_norm": 1.7718900035273446, "learning_rate": 1.5598941762728597e-06, "loss": 0.6352, "step": 11706 }, { "epoch": 0.75, "grad_norm": 1.567417120752693, "learning_rate": 1.5591420315399324e-06, "loss": 0.7327, "step": 11707 }, { "epoch": 0.75, "grad_norm": 2.57693403081348, "learning_rate": 1.5583900346878505e-06, "loss": 0.7757, "step": 11708 }, { "epoch": 0.75, "grad_norm": 1.5734754468982817, "learning_rate": 1.5576381857489337e-06, "loss": 0.6055, "step": 11709 }, { "epoch": 0.75, "grad_norm": 1.8037692453123995, "learning_rate": 1.5568864847554922e-06, "loss": 0.754, "step": 11710 }, { "epoch": 0.75, "grad_norm": 1.8782439128096997, "learning_rate": 1.556134931739836e-06, "loss": 0.622, "step": 11711 }, { "epoch": 0.75, "grad_norm": 1.7904027261333244, "learning_rate": 1.5553835267342642e-06, "loss": 0.6782, "step": 11712 }, { "epoch": 0.75, "grad_norm": 1.7598471917628076, "learning_rate": 1.5546322697710697e-06, "loss": 0.7708, "step": 11713 }, { "epoch": 0.75, "grad_norm": 1.6723727402060513, "learning_rate": 1.5538811608825411e-06, "loss": 0.762, "step": 11714 }, { "epoch": 0.75, "grad_norm": 1.5623820967661548, "learning_rate": 1.553130200100959e-06, "loss": 0.6848, "step": 11715 }, { "epoch": 0.75, "grad_norm": 1.7973737207409566, "learning_rate": 1.5523793874585979e-06, "loss": 0.6516, "step": 11716 }, { "epoch": 0.75, "grad_norm": 2.472233361897403, "learning_rate": 1.5516287229877242e-06, "loss": 0.7717, "step": 11717 }, { "epoch": 0.75, "grad_norm": 1.716362752514767, "learning_rate": 1.5508782067206056e-06, "loss": 0.6649, "step": 11718 }, { "epoch": 0.75, "grad_norm": 2.0863325376070985, "learning_rate": 1.5501278386894907e-06, "loss": 0.7684, "step": 11719 }, { "epoch": 0.75, "grad_norm": 1.1341289499310732, "learning_rate": 1.549377618926634e-06, "loss": 0.6663, "step": 11720 }, { "epoch": 0.75, "grad_norm": 2.2849715499125662, "learning_rate": 1.5486275474642765e-06, "loss": 0.6572, "step": 11721 }, { "epoch": 0.75, "grad_norm": 1.896752654919832, "learning_rate": 1.5478776243346544e-06, "loss": 0.6708, "step": 11722 }, { "epoch": 0.75, "grad_norm": 1.6468169741148533, "learning_rate": 1.5471278495699982e-06, "loss": 0.7638, "step": 11723 }, { "epoch": 0.75, "grad_norm": 1.6165715411623545, "learning_rate": 1.5463782232025315e-06, "loss": 0.6901, "step": 11724 }, { "epoch": 0.75, "grad_norm": 1.6962721306024515, "learning_rate": 1.545628745264472e-06, "loss": 0.7355, "step": 11725 }, { "epoch": 0.75, "grad_norm": 1.6279780507659174, "learning_rate": 1.5448794157880282e-06, "loss": 0.7461, "step": 11726 }, { "epoch": 0.75, "grad_norm": 1.6304436312982635, "learning_rate": 1.5441302348054105e-06, "loss": 0.6651, "step": 11727 }, { "epoch": 0.75, "grad_norm": 1.6611526832995447, "learning_rate": 1.5433812023488104e-06, "loss": 0.7383, "step": 11728 }, { "epoch": 0.75, "grad_norm": 1.6186318198980487, "learning_rate": 1.5426323184504244e-06, "loss": 0.6273, "step": 11729 }, { "epoch": 0.75, "grad_norm": 1.1932416382474802, "learning_rate": 1.541883583142436e-06, "loss": 0.6718, "step": 11730 }, { "epoch": 0.75, "grad_norm": 1.5472553829813522, "learning_rate": 1.5411349964570254e-06, "loss": 0.5984, "step": 11731 }, { "epoch": 0.75, "grad_norm": 1.573576995292129, "learning_rate": 1.5403865584263638e-06, "loss": 0.6856, "step": 11732 }, { "epoch": 0.75, "grad_norm": 1.5486547427415092, "learning_rate": 1.5396382690826173e-06, "loss": 0.6005, "step": 11733 }, { "epoch": 0.75, "grad_norm": 1.680876824771432, "learning_rate": 1.53889012845795e-06, "loss": 0.7207, "step": 11734 }, { "epoch": 0.75, "grad_norm": 1.8222908786589245, "learning_rate": 1.5381421365845083e-06, "loss": 0.687, "step": 11735 }, { "epoch": 0.75, "grad_norm": 1.705973669223655, "learning_rate": 1.5373942934944468e-06, "loss": 0.7752, "step": 11736 }, { "epoch": 0.75, "grad_norm": 1.722990407920775, "learning_rate": 1.5366465992198997e-06, "loss": 0.6965, "step": 11737 }, { "epoch": 0.75, "grad_norm": 2.0871434229600707, "learning_rate": 1.5358990537930058e-06, "loss": 0.7135, "step": 11738 }, { "epoch": 0.75, "grad_norm": 1.7021115562811844, "learning_rate": 1.5351516572458913e-06, "loss": 0.6405, "step": 11739 }, { "epoch": 0.75, "grad_norm": 1.7451299603464812, "learning_rate": 1.5344044096106763e-06, "loss": 0.7112, "step": 11740 }, { "epoch": 0.75, "grad_norm": 2.2277664791203597, "learning_rate": 1.5336573109194807e-06, "loss": 0.8204, "step": 11741 }, { "epoch": 0.75, "grad_norm": 2.095721694492987, "learning_rate": 1.5329103612044072e-06, "loss": 0.7002, "step": 11742 }, { "epoch": 0.75, "grad_norm": 1.8325679788890006, "learning_rate": 1.5321635604975637e-06, "loss": 0.8098, "step": 11743 }, { "epoch": 0.75, "grad_norm": 1.5994363029344698, "learning_rate": 1.5314169088310405e-06, "loss": 0.7138, "step": 11744 }, { "epoch": 0.75, "grad_norm": 1.7187516077222358, "learning_rate": 1.530670406236932e-06, "loss": 0.6555, "step": 11745 }, { "epoch": 0.75, "grad_norm": 1.7060347783962533, "learning_rate": 1.5299240527473191e-06, "loss": 0.7863, "step": 11746 }, { "epoch": 0.75, "grad_norm": 2.2549238444338804, "learning_rate": 1.5291778483942771e-06, "loss": 0.8313, "step": 11747 }, { "epoch": 0.75, "grad_norm": 1.6819424857029752, "learning_rate": 1.5284317932098813e-06, "loss": 0.7055, "step": 11748 }, { "epoch": 0.75, "grad_norm": 1.6692704674760481, "learning_rate": 1.527685887226189e-06, "loss": 0.6761, "step": 11749 }, { "epoch": 0.75, "grad_norm": 1.6855807699902692, "learning_rate": 1.526940130475264e-06, "loss": 0.6663, "step": 11750 }, { "epoch": 0.75, "grad_norm": 1.537595034676139, "learning_rate": 1.5261945229891512e-06, "loss": 0.7807, "step": 11751 }, { "epoch": 0.75, "grad_norm": 1.6975151453961101, "learning_rate": 1.5254490647999e-06, "loss": 0.6333, "step": 11752 }, { "epoch": 0.75, "grad_norm": 1.6972703833189335, "learning_rate": 1.5247037559395467e-06, "loss": 0.7341, "step": 11753 }, { "epoch": 0.75, "grad_norm": 1.7509893825528706, "learning_rate": 1.5239585964401237e-06, "loss": 0.6615, "step": 11754 }, { "epoch": 0.75, "grad_norm": 1.7805651981113166, "learning_rate": 1.5232135863336556e-06, "loss": 0.7448, "step": 11755 }, { "epoch": 0.75, "grad_norm": 2.37629293092652, "learning_rate": 1.522468725652161e-06, "loss": 0.8092, "step": 11756 }, { "epoch": 0.75, "grad_norm": 2.86876886308634, "learning_rate": 1.5217240144276558e-06, "loss": 0.7359, "step": 11757 }, { "epoch": 0.75, "grad_norm": 1.9595851107264255, "learning_rate": 1.5209794526921406e-06, "loss": 0.7196, "step": 11758 }, { "epoch": 0.75, "grad_norm": 1.132086206935804, "learning_rate": 1.5202350404776196e-06, "loss": 0.6343, "step": 11759 }, { "epoch": 0.75, "grad_norm": 1.0673602594268776, "learning_rate": 1.519490777816085e-06, "loss": 0.6168, "step": 11760 }, { "epoch": 0.75, "grad_norm": 1.0314276047076567, "learning_rate": 1.5187466647395227e-06, "loss": 0.6197, "step": 11761 }, { "epoch": 0.75, "grad_norm": 1.8908016259276765, "learning_rate": 1.5180027012799138e-06, "loss": 0.7454, "step": 11762 }, { "epoch": 0.75, "grad_norm": 1.3563834521745834, "learning_rate": 1.5172588874692318e-06, "loss": 0.6584, "step": 11763 }, { "epoch": 0.75, "grad_norm": 1.156874243868522, "learning_rate": 1.5165152233394453e-06, "loss": 0.6553, "step": 11764 }, { "epoch": 0.75, "grad_norm": 1.7054822920002037, "learning_rate": 1.5157717089225144e-06, "loss": 0.6693, "step": 11765 }, { "epoch": 0.75, "grad_norm": 1.8044014232530228, "learning_rate": 1.5150283442503937e-06, "loss": 0.6679, "step": 11766 }, { "epoch": 0.75, "grad_norm": 1.6711744624274674, "learning_rate": 1.5142851293550303e-06, "loss": 0.7044, "step": 11767 }, { "epoch": 0.75, "grad_norm": 1.784943982259229, "learning_rate": 1.5135420642683696e-06, "loss": 0.6688, "step": 11768 }, { "epoch": 0.75, "grad_norm": 1.6465069654981823, "learning_rate": 1.5127991490223449e-06, "loss": 0.6773, "step": 11769 }, { "epoch": 0.75, "grad_norm": 1.9192093880161674, "learning_rate": 1.5120563836488844e-06, "loss": 0.7671, "step": 11770 }, { "epoch": 0.75, "grad_norm": 1.6086279364157101, "learning_rate": 1.5113137681799123e-06, "loss": 0.7295, "step": 11771 }, { "epoch": 0.75, "grad_norm": 1.766018368983987, "learning_rate": 1.5105713026473429e-06, "loss": 0.7029, "step": 11772 }, { "epoch": 0.75, "grad_norm": 1.7038081490093708, "learning_rate": 1.5098289870830869e-06, "loss": 0.8069, "step": 11773 }, { "epoch": 0.75, "grad_norm": 1.9702403423942374, "learning_rate": 1.5090868215190474e-06, "loss": 0.7327, "step": 11774 }, { "epoch": 0.75, "grad_norm": 1.809565007231733, "learning_rate": 1.508344805987119e-06, "loss": 0.8345, "step": 11775 }, { "epoch": 0.75, "grad_norm": 1.7700644807906465, "learning_rate": 1.5076029405191955e-06, "loss": 0.744, "step": 11776 }, { "epoch": 0.75, "grad_norm": 1.481470792407965, "learning_rate": 1.5068612251471592e-06, "loss": 0.5838, "step": 11777 }, { "epoch": 0.75, "grad_norm": 1.5839998488581384, "learning_rate": 1.5061196599028877e-06, "loss": 0.7608, "step": 11778 }, { "epoch": 0.75, "grad_norm": 1.9333017417465523, "learning_rate": 1.5053782448182509e-06, "loss": 0.7924, "step": 11779 }, { "epoch": 0.75, "grad_norm": 1.9157404707121943, "learning_rate": 1.5046369799251143e-06, "loss": 0.7102, "step": 11780 }, { "epoch": 0.75, "grad_norm": 1.6053713065718462, "learning_rate": 1.5038958652553354e-06, "loss": 0.8268, "step": 11781 }, { "epoch": 0.75, "grad_norm": 0.985379804352312, "learning_rate": 1.5031549008407642e-06, "loss": 0.6994, "step": 11782 }, { "epoch": 0.75, "grad_norm": 1.6395135429701817, "learning_rate": 1.502414086713251e-06, "loss": 0.7489, "step": 11783 }, { "epoch": 0.75, "grad_norm": 1.9691861320802602, "learning_rate": 1.5016734229046277e-06, "loss": 0.8029, "step": 11784 }, { "epoch": 0.75, "grad_norm": 1.9175141976255077, "learning_rate": 1.5009329094467313e-06, "loss": 0.8038, "step": 11785 }, { "epoch": 0.75, "grad_norm": 2.2525443635251565, "learning_rate": 1.5001925463713857e-06, "loss": 0.8668, "step": 11786 }, { "epoch": 0.75, "grad_norm": 1.700538188134921, "learning_rate": 1.49945233371041e-06, "loss": 0.7703, "step": 11787 }, { "epoch": 0.75, "grad_norm": 1.774103180032678, "learning_rate": 1.4987122714956177e-06, "loss": 0.6902, "step": 11788 }, { "epoch": 0.75, "grad_norm": 1.8513185266041592, "learning_rate": 1.497972359758813e-06, "loss": 0.7768, "step": 11789 }, { "epoch": 0.75, "grad_norm": 1.8351735077211937, "learning_rate": 1.4972325985318009e-06, "loss": 0.6959, "step": 11790 }, { "epoch": 0.75, "grad_norm": 1.673884911762254, "learning_rate": 1.4964929878463685e-06, "loss": 0.6478, "step": 11791 }, { "epoch": 0.75, "grad_norm": 1.9112008862031409, "learning_rate": 1.4957535277343083e-06, "loss": 0.6687, "step": 11792 }, { "epoch": 0.75, "grad_norm": 2.4193059344287944, "learning_rate": 1.4950142182273947e-06, "loss": 0.8657, "step": 11793 }, { "epoch": 0.75, "grad_norm": 1.9914043387218185, "learning_rate": 1.4942750593574073e-06, "loss": 0.719, "step": 11794 }, { "epoch": 0.75, "grad_norm": 1.1016601767712446, "learning_rate": 1.493536051156111e-06, "loss": 0.638, "step": 11795 }, { "epoch": 0.76, "grad_norm": 1.0655085879105162, "learning_rate": 1.4927971936552666e-06, "loss": 0.7541, "step": 11796 }, { "epoch": 0.76, "grad_norm": 1.7636759249720084, "learning_rate": 1.4920584868866295e-06, "loss": 0.7448, "step": 11797 }, { "epoch": 0.76, "grad_norm": 1.756131430379291, "learning_rate": 1.4913199308819458e-06, "loss": 0.7026, "step": 11798 }, { "epoch": 0.76, "grad_norm": 1.8559406284708928, "learning_rate": 1.4905815256729621e-06, "loss": 0.7244, "step": 11799 }, { "epoch": 0.76, "grad_norm": 1.6811208555913737, "learning_rate": 1.4898432712914074e-06, "loss": 0.6624, "step": 11800 }, { "epoch": 0.76, "grad_norm": 1.8526094532140287, "learning_rate": 1.4891051677690156e-06, "loss": 0.6941, "step": 11801 }, { "epoch": 0.76, "grad_norm": 1.7188523593525076, "learning_rate": 1.4883672151375029e-06, "loss": 0.6969, "step": 11802 }, { "epoch": 0.76, "grad_norm": 1.1769342491711188, "learning_rate": 1.4876294134285902e-06, "loss": 0.6681, "step": 11803 }, { "epoch": 0.76, "grad_norm": 1.9235128765013403, "learning_rate": 1.4868917626739848e-06, "loss": 0.7373, "step": 11804 }, { "epoch": 0.76, "grad_norm": 1.5717471698075334, "learning_rate": 1.4861542629053882e-06, "loss": 0.6924, "step": 11805 }, { "epoch": 0.76, "grad_norm": 1.9824896260256268, "learning_rate": 1.4854169141545004e-06, "loss": 0.7499, "step": 11806 }, { "epoch": 0.76, "grad_norm": 1.694437900619618, "learning_rate": 1.4846797164530051e-06, "loss": 0.678, "step": 11807 }, { "epoch": 0.76, "grad_norm": 1.7759394463666467, "learning_rate": 1.4839426698325927e-06, "loss": 0.7725, "step": 11808 }, { "epoch": 0.76, "grad_norm": 0.9990959927420149, "learning_rate": 1.4832057743249329e-06, "loss": 0.5621, "step": 11809 }, { "epoch": 0.76, "grad_norm": 1.9241163039873264, "learning_rate": 1.4824690299617e-06, "loss": 0.7716, "step": 11810 }, { "epoch": 0.76, "grad_norm": 1.2104054594411862, "learning_rate": 1.4817324367745573e-06, "loss": 0.664, "step": 11811 }, { "epoch": 0.76, "grad_norm": 1.738532837132582, "learning_rate": 1.4809959947951602e-06, "loss": 0.6878, "step": 11812 }, { "epoch": 0.76, "grad_norm": 1.587637131722029, "learning_rate": 1.4802597040551636e-06, "loss": 0.7625, "step": 11813 }, { "epoch": 0.76, "grad_norm": 1.1904799034417557, "learning_rate": 1.479523564586206e-06, "loss": 0.7182, "step": 11814 }, { "epoch": 0.76, "grad_norm": 1.5959087536911007, "learning_rate": 1.4787875764199312e-06, "loss": 0.6862, "step": 11815 }, { "epoch": 0.76, "grad_norm": 1.622529866369753, "learning_rate": 1.478051739587964e-06, "loss": 0.6524, "step": 11816 }, { "epoch": 0.76, "grad_norm": 1.704860674382765, "learning_rate": 1.4773160541219338e-06, "loss": 0.7819, "step": 11817 }, { "epoch": 0.76, "grad_norm": 1.7117764660038677, "learning_rate": 1.4765805200534578e-06, "loss": 0.6525, "step": 11818 }, { "epoch": 0.76, "grad_norm": 1.1995441456644043, "learning_rate": 1.4758451374141469e-06, "loss": 0.7715, "step": 11819 }, { "epoch": 0.76, "grad_norm": 1.6557541720864908, "learning_rate": 1.4751099062356073e-06, "loss": 0.8051, "step": 11820 }, { "epoch": 0.76, "grad_norm": 1.7025649590639735, "learning_rate": 1.474374826549435e-06, "loss": 0.6891, "step": 11821 }, { "epoch": 0.76, "grad_norm": 1.1838467037300247, "learning_rate": 1.473639898387228e-06, "loss": 0.6479, "step": 11822 }, { "epoch": 0.76, "grad_norm": 1.7969349899677736, "learning_rate": 1.4729051217805645e-06, "loss": 0.7142, "step": 11823 }, { "epoch": 0.76, "grad_norm": 1.6970823997643867, "learning_rate": 1.4721704967610294e-06, "loss": 0.7174, "step": 11824 }, { "epoch": 0.76, "grad_norm": 1.6435346930737003, "learning_rate": 1.4714360233601933e-06, "loss": 0.7074, "step": 11825 }, { "epoch": 0.76, "grad_norm": 1.9682956551261386, "learning_rate": 1.470701701609622e-06, "loss": 0.7238, "step": 11826 }, { "epoch": 0.76, "grad_norm": 1.6808352517019804, "learning_rate": 1.4699675315408756e-06, "loss": 0.6969, "step": 11827 }, { "epoch": 0.76, "grad_norm": 1.7354874318613502, "learning_rate": 1.4692335131855074e-06, "loss": 0.6149, "step": 11828 }, { "epoch": 0.76, "grad_norm": 2.083928761827559, "learning_rate": 1.468499646575064e-06, "loss": 0.7388, "step": 11829 }, { "epoch": 0.76, "grad_norm": 1.829461580749556, "learning_rate": 1.4677659317410826e-06, "loss": 0.7153, "step": 11830 }, { "epoch": 0.76, "grad_norm": 2.2141999810012716, "learning_rate": 1.4670323687151012e-06, "loss": 0.5729, "step": 11831 }, { "epoch": 0.76, "grad_norm": 1.7061717331686654, "learning_rate": 1.466298957528645e-06, "loss": 0.5861, "step": 11832 }, { "epoch": 0.76, "grad_norm": 1.347093124157022, "learning_rate": 1.4655656982132338e-06, "loss": 0.6993, "step": 11833 }, { "epoch": 0.76, "grad_norm": 1.8424009875257923, "learning_rate": 1.4648325908003824e-06, "loss": 0.6851, "step": 11834 }, { "epoch": 0.76, "grad_norm": 1.830422941717581, "learning_rate": 1.464099635321598e-06, "loss": 0.7283, "step": 11835 }, { "epoch": 0.76, "grad_norm": 1.8789387090604326, "learning_rate": 1.463366831808381e-06, "loss": 0.7249, "step": 11836 }, { "epoch": 0.76, "grad_norm": 1.4398688119742598, "learning_rate": 1.4626341802922262e-06, "loss": 0.6645, "step": 11837 }, { "epoch": 0.76, "grad_norm": 1.1028444637738712, "learning_rate": 1.461901680804621e-06, "loss": 0.6536, "step": 11838 }, { "epoch": 0.76, "grad_norm": 1.8476345905888862, "learning_rate": 1.461169333377047e-06, "loss": 0.6306, "step": 11839 }, { "epoch": 0.76, "grad_norm": 1.9312061023667066, "learning_rate": 1.4604371380409776e-06, "loss": 0.7667, "step": 11840 }, { "epoch": 0.76, "grad_norm": 1.5132407226187166, "learning_rate": 1.459705094827884e-06, "loss": 0.7502, "step": 11841 }, { "epoch": 0.76, "grad_norm": 1.7319881597588338, "learning_rate": 1.4589732037692262e-06, "loss": 0.6573, "step": 11842 }, { "epoch": 0.76, "grad_norm": 1.6149376870623393, "learning_rate": 1.4582414648964594e-06, "loss": 0.6477, "step": 11843 }, { "epoch": 0.76, "grad_norm": 1.681335459321694, "learning_rate": 1.4575098782410324e-06, "loss": 0.6535, "step": 11844 }, { "epoch": 0.76, "grad_norm": 1.6975788887247227, "learning_rate": 1.4567784438343868e-06, "loss": 0.7007, "step": 11845 }, { "epoch": 0.76, "grad_norm": 1.8660139058562777, "learning_rate": 1.456047161707958e-06, "loss": 0.7205, "step": 11846 }, { "epoch": 0.76, "grad_norm": 1.489311331210615, "learning_rate": 1.455316031893174e-06, "loss": 0.7134, "step": 11847 }, { "epoch": 0.76, "grad_norm": 1.1291237862666412, "learning_rate": 1.4545850544214618e-06, "loss": 0.6534, "step": 11848 }, { "epoch": 0.76, "grad_norm": 1.7245901909824366, "learning_rate": 1.4538542293242307e-06, "loss": 0.7519, "step": 11849 }, { "epoch": 0.76, "grad_norm": 1.5801350378862373, "learning_rate": 1.4531235566328954e-06, "loss": 0.7148, "step": 11850 }, { "epoch": 0.76, "grad_norm": 1.8738038695723196, "learning_rate": 1.4523930363788562e-06, "loss": 0.7672, "step": 11851 }, { "epoch": 0.76, "grad_norm": 1.1138000372405745, "learning_rate": 1.4516626685935097e-06, "loss": 0.6684, "step": 11852 }, { "epoch": 0.76, "grad_norm": 1.8274152900054539, "learning_rate": 1.450932453308246e-06, "loss": 0.7017, "step": 11853 }, { "epoch": 0.76, "grad_norm": 1.9828927066113062, "learning_rate": 1.450202390554446e-06, "loss": 0.8257, "step": 11854 }, { "epoch": 0.76, "grad_norm": 1.7849241201348678, "learning_rate": 1.4494724803634912e-06, "loss": 0.8141, "step": 11855 }, { "epoch": 0.76, "grad_norm": 2.0819150873250187, "learning_rate": 1.4487427227667456e-06, "loss": 0.7987, "step": 11856 }, { "epoch": 0.76, "grad_norm": 1.979284360386318, "learning_rate": 1.448013117795578e-06, "loss": 0.6911, "step": 11857 }, { "epoch": 0.76, "grad_norm": 1.9423535491581154, "learning_rate": 1.4472836654813405e-06, "loss": 0.6922, "step": 11858 }, { "epoch": 0.76, "grad_norm": 1.6755519118125248, "learning_rate": 1.446554365855387e-06, "loss": 0.6765, "step": 11859 }, { "epoch": 0.76, "grad_norm": 1.6561098727094639, "learning_rate": 1.4458252189490602e-06, "loss": 0.8021, "step": 11860 }, { "epoch": 0.76, "grad_norm": 1.9152140800850688, "learning_rate": 1.445096224793695e-06, "loss": 0.7325, "step": 11861 }, { "epoch": 0.76, "grad_norm": 2.4493599722334234, "learning_rate": 1.444367383420628e-06, "loss": 0.7134, "step": 11862 }, { "epoch": 0.76, "grad_norm": 1.8217095819111429, "learning_rate": 1.4436386948611763e-06, "loss": 0.7547, "step": 11863 }, { "epoch": 0.76, "grad_norm": 1.90236671266097, "learning_rate": 1.4429101591466632e-06, "loss": 0.7023, "step": 11864 }, { "epoch": 0.76, "grad_norm": 2.5217064116155425, "learning_rate": 1.442181776308394e-06, "loss": 0.8712, "step": 11865 }, { "epoch": 0.76, "grad_norm": 1.6238230878110103, "learning_rate": 1.441453546377678e-06, "loss": 0.7285, "step": 11866 }, { "epoch": 0.76, "grad_norm": 1.5648434537923026, "learning_rate": 1.4407254693858108e-06, "loss": 0.6643, "step": 11867 }, { "epoch": 0.76, "grad_norm": 1.760048306189408, "learning_rate": 1.4399975453640824e-06, "loss": 0.6104, "step": 11868 }, { "epoch": 0.76, "grad_norm": 3.0827994616864114, "learning_rate": 1.4392697743437816e-06, "loss": 0.6509, "step": 11869 }, { "epoch": 0.76, "grad_norm": 1.6537152806003819, "learning_rate": 1.4385421563561808e-06, "loss": 0.7878, "step": 11870 }, { "epoch": 0.76, "grad_norm": 1.6196291782463381, "learning_rate": 1.437814691432558e-06, "loss": 0.6284, "step": 11871 }, { "epoch": 0.76, "grad_norm": 1.5721970525551077, "learning_rate": 1.4370873796041713e-06, "loss": 0.7569, "step": 11872 }, { "epoch": 0.76, "grad_norm": 1.7019493683730842, "learning_rate": 1.4363602209022837e-06, "loss": 0.6956, "step": 11873 }, { "epoch": 0.76, "grad_norm": 1.8247655403750265, "learning_rate": 1.4356332153581454e-06, "loss": 0.6961, "step": 11874 }, { "epoch": 0.76, "grad_norm": 3.480658711931921, "learning_rate": 1.4349063630030018e-06, "loss": 0.8035, "step": 11875 }, { "epoch": 0.76, "grad_norm": 1.934766725179263, "learning_rate": 1.434179663868092e-06, "loss": 0.7863, "step": 11876 }, { "epoch": 0.76, "grad_norm": 1.789292501410027, "learning_rate": 1.4334531179846455e-06, "loss": 0.7501, "step": 11877 }, { "epoch": 0.76, "grad_norm": 1.7560144316304314, "learning_rate": 1.4327267253838928e-06, "loss": 0.6634, "step": 11878 }, { "epoch": 0.76, "grad_norm": 2.489976680579143, "learning_rate": 1.432000486097046e-06, "loss": 0.758, "step": 11879 }, { "epoch": 0.76, "grad_norm": 1.8892339464218746, "learning_rate": 1.4312744001553247e-06, "loss": 0.6511, "step": 11880 }, { "epoch": 0.76, "grad_norm": 1.6181268465140228, "learning_rate": 1.4305484675899272e-06, "loss": 0.7131, "step": 11881 }, { "epoch": 0.76, "grad_norm": 1.6717377963376254, "learning_rate": 1.4298226884320577e-06, "loss": 0.7602, "step": 11882 }, { "epoch": 0.76, "grad_norm": 1.6845653411987842, "learning_rate": 1.4290970627129075e-06, "loss": 0.6644, "step": 11883 }, { "epoch": 0.76, "grad_norm": 1.6447358136254475, "learning_rate": 1.4283715904636614e-06, "loss": 0.7648, "step": 11884 }, { "epoch": 0.76, "grad_norm": 1.8500933688733587, "learning_rate": 1.4276462717154999e-06, "loss": 0.6876, "step": 11885 }, { "epoch": 0.76, "grad_norm": 1.7313980961553728, "learning_rate": 1.4269211064995931e-06, "loss": 0.7767, "step": 11886 }, { "epoch": 0.76, "grad_norm": 1.6819127669903338, "learning_rate": 1.4261960948471122e-06, "loss": 0.6968, "step": 11887 }, { "epoch": 0.76, "grad_norm": 1.706085614828327, "learning_rate": 1.4254712367892109e-06, "loss": 0.6424, "step": 11888 }, { "epoch": 0.76, "grad_norm": 1.5304279564773835, "learning_rate": 1.424746532357046e-06, "loss": 0.6276, "step": 11889 }, { "epoch": 0.76, "grad_norm": 0.942463979991483, "learning_rate": 1.4240219815817624e-06, "loss": 0.5318, "step": 11890 }, { "epoch": 0.76, "grad_norm": 1.80394490903832, "learning_rate": 1.4232975844944997e-06, "loss": 0.6562, "step": 11891 }, { "epoch": 0.76, "grad_norm": 1.9506626661410098, "learning_rate": 1.4225733411263914e-06, "loss": 0.699, "step": 11892 }, { "epoch": 0.76, "grad_norm": 1.7505655146024868, "learning_rate": 1.4218492515085636e-06, "loss": 0.7252, "step": 11893 }, { "epoch": 0.76, "grad_norm": 1.6129146520473086, "learning_rate": 1.4211253156721366e-06, "loss": 0.6685, "step": 11894 }, { "epoch": 0.76, "grad_norm": 1.6013704326633393, "learning_rate": 1.4204015336482213e-06, "loss": 0.5708, "step": 11895 }, { "epoch": 0.76, "grad_norm": 1.7880908523095003, "learning_rate": 1.4196779054679276e-06, "loss": 0.7373, "step": 11896 }, { "epoch": 0.76, "grad_norm": 1.5808889035793585, "learning_rate": 1.418954431162355e-06, "loss": 0.707, "step": 11897 }, { "epoch": 0.76, "grad_norm": 1.5376954710263966, "learning_rate": 1.4182311107625956e-06, "loss": 0.7168, "step": 11898 }, { "epoch": 0.76, "grad_norm": 1.0667870936646668, "learning_rate": 1.417507944299737e-06, "loss": 0.5844, "step": 11899 }, { "epoch": 0.76, "grad_norm": 1.5623351499153921, "learning_rate": 1.4167849318048588e-06, "loss": 0.7833, "step": 11900 }, { "epoch": 0.76, "grad_norm": 2.0198736171789053, "learning_rate": 1.4160620733090351e-06, "loss": 0.7082, "step": 11901 }, { "epoch": 0.76, "grad_norm": 1.6406943588785692, "learning_rate": 1.4153393688433326e-06, "loss": 0.7697, "step": 11902 }, { "epoch": 0.76, "grad_norm": 1.3644875873325004, "learning_rate": 1.4146168184388099e-06, "loss": 0.6592, "step": 11903 }, { "epoch": 0.76, "grad_norm": 1.9428338007984158, "learning_rate": 1.4138944221265243e-06, "loss": 0.7897, "step": 11904 }, { "epoch": 0.76, "grad_norm": 1.7188134521979563, "learning_rate": 1.413172179937521e-06, "loss": 0.7348, "step": 11905 }, { "epoch": 0.76, "grad_norm": 1.6593058745611946, "learning_rate": 1.41245009190284e-06, "loss": 0.7466, "step": 11906 }, { "epoch": 0.76, "grad_norm": 1.7967685768598116, "learning_rate": 1.4117281580535158e-06, "loss": 0.7122, "step": 11907 }, { "epoch": 0.76, "grad_norm": 1.656338814184243, "learning_rate": 1.4110063784205751e-06, "loss": 0.7735, "step": 11908 }, { "epoch": 0.76, "grad_norm": 1.704297369424568, "learning_rate": 1.410284753035039e-06, "loss": 0.6682, "step": 11909 }, { "epoch": 0.76, "grad_norm": 1.6918530191189929, "learning_rate": 1.409563281927921e-06, "loss": 0.8538, "step": 11910 }, { "epoch": 0.76, "grad_norm": 1.8045719009310692, "learning_rate": 1.4088419651302288e-06, "loss": 0.8386, "step": 11911 }, { "epoch": 0.76, "grad_norm": 1.6293319953724446, "learning_rate": 1.4081208026729615e-06, "loss": 0.8441, "step": 11912 }, { "epoch": 0.76, "grad_norm": 1.771991306032095, "learning_rate": 1.407399794587117e-06, "loss": 0.599, "step": 11913 }, { "epoch": 0.76, "grad_norm": 1.6579342206143968, "learning_rate": 1.406678940903678e-06, "loss": 0.5337, "step": 11914 }, { "epoch": 0.76, "grad_norm": 4.766302084797625, "learning_rate": 1.4059582416536282e-06, "loss": 0.6281, "step": 11915 }, { "epoch": 0.76, "grad_norm": 1.7913701350681113, "learning_rate": 1.4052376968679416e-06, "loss": 0.7247, "step": 11916 }, { "epoch": 0.76, "grad_norm": 1.5606324212623204, "learning_rate": 1.4045173065775852e-06, "loss": 0.6756, "step": 11917 }, { "epoch": 0.76, "grad_norm": 2.5853229385958336, "learning_rate": 1.4037970708135196e-06, "loss": 0.6944, "step": 11918 }, { "epoch": 0.76, "grad_norm": 1.786166854272827, "learning_rate": 1.4030769896066975e-06, "loss": 0.6476, "step": 11919 }, { "epoch": 0.76, "grad_norm": 1.9743534329373909, "learning_rate": 1.4023570629880718e-06, "loss": 0.6342, "step": 11920 }, { "epoch": 0.76, "grad_norm": 1.9929510446386485, "learning_rate": 1.4016372909885762e-06, "loss": 0.7173, "step": 11921 }, { "epoch": 0.76, "grad_norm": 1.5561883414915452, "learning_rate": 1.4009176736391523e-06, "loss": 0.7141, "step": 11922 }, { "epoch": 0.76, "grad_norm": 1.987022012861303, "learning_rate": 1.4001982109707201e-06, "loss": 0.715, "step": 11923 }, { "epoch": 0.76, "grad_norm": 1.591822821812802, "learning_rate": 1.399478903014207e-06, "loss": 0.7547, "step": 11924 }, { "epoch": 0.76, "grad_norm": 1.5569475777653135, "learning_rate": 1.3987597498005245e-06, "loss": 0.6159, "step": 11925 }, { "epoch": 0.76, "grad_norm": 1.7038154009545787, "learning_rate": 1.3980407513605793e-06, "loss": 0.9996, "step": 11926 }, { "epoch": 0.76, "grad_norm": 2.0546873024293233, "learning_rate": 1.397321907725277e-06, "loss": 0.7562, "step": 11927 }, { "epoch": 0.76, "grad_norm": 1.5378905094613526, "learning_rate": 1.3966032189255058e-06, "loss": 0.659, "step": 11928 }, { "epoch": 0.76, "grad_norm": 1.028687776464169, "learning_rate": 1.3958846849921593e-06, "loss": 0.5804, "step": 11929 }, { "epoch": 0.76, "grad_norm": 1.6704121495529163, "learning_rate": 1.3951663059561126e-06, "loss": 0.7006, "step": 11930 }, { "epoch": 0.76, "grad_norm": 1.5537694245357616, "learning_rate": 1.3944480818482448e-06, "loss": 0.6253, "step": 11931 }, { "epoch": 0.76, "grad_norm": 1.0380333463649454, "learning_rate": 1.3937300126994223e-06, "loss": 0.727, "step": 11932 }, { "epoch": 0.76, "grad_norm": 1.576087001127779, "learning_rate": 1.393012098540505e-06, "loss": 0.6857, "step": 11933 }, { "epoch": 0.76, "grad_norm": 1.8065294282397708, "learning_rate": 1.392294339402351e-06, "loss": 0.628, "step": 11934 }, { "epoch": 0.76, "grad_norm": 1.7564475916345927, "learning_rate": 1.3915767353158022e-06, "loss": 0.6987, "step": 11935 }, { "epoch": 0.76, "grad_norm": 1.6225159056549066, "learning_rate": 1.390859286311706e-06, "loss": 0.7146, "step": 11936 }, { "epoch": 0.76, "grad_norm": 1.550666025949907, "learning_rate": 1.3901419924208908e-06, "loss": 0.6614, "step": 11937 }, { "epoch": 0.76, "grad_norm": 1.790884901379387, "learning_rate": 1.389424853674189e-06, "loss": 0.6845, "step": 11938 }, { "epoch": 0.76, "grad_norm": 1.5574525339996166, "learning_rate": 1.3887078701024204e-06, "loss": 0.7183, "step": 11939 }, { "epoch": 0.76, "grad_norm": 1.047007671700491, "learning_rate": 1.387991041736399e-06, "loss": 0.6099, "step": 11940 }, { "epoch": 0.76, "grad_norm": 1.887940963962728, "learning_rate": 1.3872743686069328e-06, "loss": 0.7687, "step": 11941 }, { "epoch": 0.76, "grad_norm": 1.8812754317607527, "learning_rate": 1.3865578507448212e-06, "loss": 0.8012, "step": 11942 }, { "epoch": 0.76, "grad_norm": 1.7941776121587099, "learning_rate": 1.3858414881808634e-06, "loss": 0.7311, "step": 11943 }, { "epoch": 0.76, "grad_norm": 1.706988756379681, "learning_rate": 1.385125280945841e-06, "loss": 0.6481, "step": 11944 }, { "epoch": 0.76, "grad_norm": 1.8634874438523827, "learning_rate": 1.3844092290705396e-06, "loss": 0.6994, "step": 11945 }, { "epoch": 0.76, "grad_norm": 1.664753909722142, "learning_rate": 1.3836933325857321e-06, "loss": 0.7158, "step": 11946 }, { "epoch": 0.76, "grad_norm": 1.6993376922100554, "learning_rate": 1.382977591522186e-06, "loss": 0.7784, "step": 11947 }, { "epoch": 0.76, "grad_norm": 1.205910878279165, "learning_rate": 1.3822620059106633e-06, "loss": 0.6097, "step": 11948 }, { "epoch": 0.76, "grad_norm": 1.7343865981021938, "learning_rate": 1.3815465757819174e-06, "loss": 0.687, "step": 11949 }, { "epoch": 0.76, "grad_norm": 1.765943024666153, "learning_rate": 1.3808313011666958e-06, "loss": 0.6735, "step": 11950 }, { "epoch": 0.76, "grad_norm": 1.8746604028417158, "learning_rate": 1.3801161820957386e-06, "loss": 0.6679, "step": 11951 }, { "epoch": 0.77, "grad_norm": 1.6564664610447328, "learning_rate": 1.3794012185997851e-06, "loss": 0.7664, "step": 11952 }, { "epoch": 0.77, "grad_norm": 1.0954905416677367, "learning_rate": 1.378686410709556e-06, "loss": 0.6595, "step": 11953 }, { "epoch": 0.77, "grad_norm": 1.8818905047315801, "learning_rate": 1.3779717584557766e-06, "loss": 0.7052, "step": 11954 }, { "epoch": 0.77, "grad_norm": 1.5605829929615465, "learning_rate": 1.3772572618691604e-06, "loss": 0.6573, "step": 11955 }, { "epoch": 0.77, "grad_norm": 1.467432100369738, "learning_rate": 1.376542920980415e-06, "loss": 0.5903, "step": 11956 }, { "epoch": 0.77, "grad_norm": 3.7646826874960286, "learning_rate": 1.3758287358202404e-06, "loss": 0.7992, "step": 11957 }, { "epoch": 0.77, "grad_norm": 1.7599600112677871, "learning_rate": 1.3751147064193315e-06, "loss": 0.811, "step": 11958 }, { "epoch": 0.77, "grad_norm": 1.0919002978397154, "learning_rate": 1.3744008328083758e-06, "loss": 0.6098, "step": 11959 }, { "epoch": 0.77, "grad_norm": 1.0523315862612066, "learning_rate": 1.373687115018052e-06, "loss": 0.641, "step": 11960 }, { "epoch": 0.77, "grad_norm": 1.4827705561615467, "learning_rate": 1.3729735530790378e-06, "loss": 0.69, "step": 11961 }, { "epoch": 0.77, "grad_norm": 1.656419133853017, "learning_rate": 1.3722601470219986e-06, "loss": 0.7988, "step": 11962 }, { "epoch": 0.77, "grad_norm": 1.0640757860135963, "learning_rate": 1.3715468968775952e-06, "loss": 0.6586, "step": 11963 }, { "epoch": 0.77, "grad_norm": 1.830878531362494, "learning_rate": 1.3708338026764823e-06, "loss": 0.7331, "step": 11964 }, { "epoch": 0.77, "grad_norm": 1.8591789238009109, "learning_rate": 1.3701208644493064e-06, "loss": 0.7094, "step": 11965 }, { "epoch": 0.77, "grad_norm": 2.168332188377729, "learning_rate": 1.369408082226708e-06, "loss": 0.7028, "step": 11966 }, { "epoch": 0.77, "grad_norm": 1.9396100544689463, "learning_rate": 1.3686954560393218e-06, "loss": 0.7746, "step": 11967 }, { "epoch": 0.77, "grad_norm": 1.0656257583025597, "learning_rate": 1.367982985917773e-06, "loss": 0.5928, "step": 11968 }, { "epoch": 0.77, "grad_norm": 1.78350010273122, "learning_rate": 1.3672706718926849e-06, "loss": 0.7573, "step": 11969 }, { "epoch": 0.77, "grad_norm": 2.063071955197312, "learning_rate": 1.3665585139946697e-06, "loss": 0.6961, "step": 11970 }, { "epoch": 0.77, "grad_norm": 0.9026058297763313, "learning_rate": 1.3658465122543346e-06, "loss": 0.5877, "step": 11971 }, { "epoch": 0.77, "grad_norm": 1.676535843435959, "learning_rate": 1.3651346667022801e-06, "loss": 0.7597, "step": 11972 }, { "epoch": 0.77, "grad_norm": 1.141277348680262, "learning_rate": 1.3644229773690997e-06, "loss": 0.6748, "step": 11973 }, { "epoch": 0.77, "grad_norm": 1.0560294742182017, "learning_rate": 1.363711444285381e-06, "loss": 0.6284, "step": 11974 }, { "epoch": 0.77, "grad_norm": 1.6525067935010416, "learning_rate": 1.3630000674817011e-06, "loss": 0.6935, "step": 11975 }, { "epoch": 0.77, "grad_norm": 1.7439033622853666, "learning_rate": 1.3622888469886391e-06, "loss": 0.7064, "step": 11976 }, { "epoch": 0.77, "grad_norm": 1.6459464584928136, "learning_rate": 1.361577782836756e-06, "loss": 0.7218, "step": 11977 }, { "epoch": 0.77, "grad_norm": 1.2269093609004011, "learning_rate": 1.3608668750566157e-06, "loss": 0.5346, "step": 11978 }, { "epoch": 0.77, "grad_norm": 1.7596667509528263, "learning_rate": 1.3601561236787702e-06, "loss": 0.7183, "step": 11979 }, { "epoch": 0.77, "grad_norm": 1.9407770155471389, "learning_rate": 1.359445528733766e-06, "loss": 0.6677, "step": 11980 }, { "epoch": 0.77, "grad_norm": 1.9149705424708796, "learning_rate": 1.3587350902521435e-06, "loss": 0.7291, "step": 11981 }, { "epoch": 0.77, "grad_norm": 1.863102384551227, "learning_rate": 1.3580248082644337e-06, "loss": 0.604, "step": 11982 }, { "epoch": 0.77, "grad_norm": 1.0763226817852685, "learning_rate": 1.357314682801168e-06, "loss": 0.6847, "step": 11983 }, { "epoch": 0.77, "grad_norm": 1.0706502892625098, "learning_rate": 1.3566047138928594e-06, "loss": 0.5416, "step": 11984 }, { "epoch": 0.77, "grad_norm": 1.713803384056627, "learning_rate": 1.3558949015700278e-06, "loss": 0.7166, "step": 11985 }, { "epoch": 0.77, "grad_norm": 1.7040349589590653, "learning_rate": 1.355185245863173e-06, "loss": 0.7225, "step": 11986 }, { "epoch": 0.77, "grad_norm": 1.746918790483905, "learning_rate": 1.3544757468027986e-06, "loss": 0.6532, "step": 11987 }, { "epoch": 0.77, "grad_norm": 1.925820296344966, "learning_rate": 1.3537664044193965e-06, "loss": 0.7706, "step": 11988 }, { "epoch": 0.77, "grad_norm": 1.86726976371654, "learning_rate": 1.3530572187434531e-06, "loss": 0.8588, "step": 11989 }, { "epoch": 0.77, "grad_norm": 1.9294193908098751, "learning_rate": 1.3523481898054463e-06, "loss": 0.8033, "step": 11990 }, { "epoch": 0.77, "grad_norm": 2.0750202822533126, "learning_rate": 1.351639317635849e-06, "loss": 0.6508, "step": 11991 }, { "epoch": 0.77, "grad_norm": 1.6315606447506246, "learning_rate": 1.3509306022651297e-06, "loss": 0.6898, "step": 11992 }, { "epoch": 0.77, "grad_norm": 1.8098866258881243, "learning_rate": 1.3502220437237429e-06, "loss": 0.8489, "step": 11993 }, { "epoch": 0.77, "grad_norm": 1.7662579496587665, "learning_rate": 1.3495136420421461e-06, "loss": 0.7443, "step": 11994 }, { "epoch": 0.77, "grad_norm": 1.2148429305519521, "learning_rate": 1.3488053972507792e-06, "loss": 0.6514, "step": 11995 }, { "epoch": 0.77, "grad_norm": 1.4682487036948686, "learning_rate": 1.3480973093800859e-06, "loss": 0.6191, "step": 11996 }, { "epoch": 0.77, "grad_norm": 1.7071977063382704, "learning_rate": 1.3473893784604963e-06, "loss": 0.6588, "step": 11997 }, { "epoch": 0.77, "grad_norm": 2.8785601276481034, "learning_rate": 1.3466816045224346e-06, "loss": 0.734, "step": 11998 }, { "epoch": 0.77, "grad_norm": 1.5580823144820308, "learning_rate": 1.345973987596324e-06, "loss": 0.7007, "step": 11999 }, { "epoch": 0.77, "grad_norm": 1.7158529439005625, "learning_rate": 1.3452665277125697e-06, "loss": 0.6417, "step": 12000 }, { "epoch": 0.77, "grad_norm": 1.830642789552292, "learning_rate": 1.3445592249015843e-06, "loss": 0.5964, "step": 12001 }, { "epoch": 0.77, "grad_norm": 1.6353579458118408, "learning_rate": 1.3438520791937587e-06, "loss": 0.6356, "step": 12002 }, { "epoch": 0.77, "grad_norm": 1.808758137871285, "learning_rate": 1.3431450906194892e-06, "loss": 0.673, "step": 12003 }, { "epoch": 0.77, "grad_norm": 1.1497953273802888, "learning_rate": 1.3424382592091606e-06, "loss": 0.7462, "step": 12004 }, { "epoch": 0.77, "grad_norm": 1.6066030820737678, "learning_rate": 1.3417315849931495e-06, "loss": 0.7515, "step": 12005 }, { "epoch": 0.77, "grad_norm": 1.90384880712869, "learning_rate": 1.3410250680018277e-06, "loss": 0.6263, "step": 12006 }, { "epoch": 0.77, "grad_norm": 1.7324532530679821, "learning_rate": 1.3403187082655584e-06, "loss": 0.7467, "step": 12007 }, { "epoch": 0.77, "grad_norm": 1.661400098773705, "learning_rate": 1.3396125058147042e-06, "loss": 0.6596, "step": 12008 }, { "epoch": 0.77, "grad_norm": 1.6853037891618323, "learning_rate": 1.3389064606796098e-06, "loss": 0.7756, "step": 12009 }, { "epoch": 0.77, "grad_norm": 1.7368288455810676, "learning_rate": 1.338200572890624e-06, "loss": 0.6557, "step": 12010 }, { "epoch": 0.77, "grad_norm": 1.5067169044728301, "learning_rate": 1.3374948424780836e-06, "loss": 0.5896, "step": 12011 }, { "epoch": 0.77, "grad_norm": 2.3801314375488225, "learning_rate": 1.3367892694723183e-06, "loss": 0.762, "step": 12012 }, { "epoch": 0.77, "grad_norm": 1.8308915949896125, "learning_rate": 1.336083853903653e-06, "loss": 0.7049, "step": 12013 }, { "epoch": 0.77, "grad_norm": 2.1256203536194502, "learning_rate": 1.3353785958024052e-06, "loss": 0.8359, "step": 12014 }, { "epoch": 0.77, "grad_norm": 1.7404185999762352, "learning_rate": 1.3346734951988844e-06, "loss": 0.6015, "step": 12015 }, { "epoch": 0.77, "grad_norm": 1.635611391120924, "learning_rate": 1.3339685521233931e-06, "loss": 0.8449, "step": 12016 }, { "epoch": 0.77, "grad_norm": 1.7058744878156131, "learning_rate": 1.333263766606232e-06, "loss": 0.6312, "step": 12017 }, { "epoch": 0.77, "grad_norm": 1.7223898525938124, "learning_rate": 1.3325591386776892e-06, "loss": 0.6997, "step": 12018 }, { "epoch": 0.77, "grad_norm": 0.9814647793543785, "learning_rate": 1.3318546683680483e-06, "loss": 0.5819, "step": 12019 }, { "epoch": 0.77, "grad_norm": 1.8777973305970814, "learning_rate": 1.3311503557075862e-06, "loss": 0.6955, "step": 12020 }, { "epoch": 0.77, "grad_norm": 1.759622749585587, "learning_rate": 1.3304462007265716e-06, "loss": 1.0813, "step": 12021 }, { "epoch": 0.77, "grad_norm": 41.48674895578774, "learning_rate": 1.3297422034552692e-06, "loss": 0.7874, "step": 12022 }, { "epoch": 0.77, "grad_norm": 1.1610809775837443, "learning_rate": 1.3290383639239347e-06, "loss": 0.5999, "step": 12023 }, { "epoch": 0.77, "grad_norm": 2.1450956717200693, "learning_rate": 1.3283346821628157e-06, "loss": 0.7131, "step": 12024 }, { "epoch": 0.77, "grad_norm": 1.376611934925221, "learning_rate": 1.3276311582021583e-06, "loss": 0.6373, "step": 12025 }, { "epoch": 0.77, "grad_norm": 1.7644284383493143, "learning_rate": 1.3269277920721975e-06, "loss": 0.8036, "step": 12026 }, { "epoch": 0.77, "grad_norm": 1.9851021242942168, "learning_rate": 1.3262245838031618e-06, "loss": 0.8315, "step": 12027 }, { "epoch": 0.77, "grad_norm": 1.6301981372710197, "learning_rate": 1.3255215334252736e-06, "loss": 0.694, "step": 12028 }, { "epoch": 0.77, "grad_norm": 1.9900534238244163, "learning_rate": 1.3248186409687491e-06, "loss": 0.8676, "step": 12029 }, { "epoch": 0.77, "grad_norm": 1.5238568689250933, "learning_rate": 1.3241159064637965e-06, "loss": 0.7509, "step": 12030 }, { "epoch": 0.77, "grad_norm": 1.7975533364999872, "learning_rate": 1.3234133299406183e-06, "loss": 0.69, "step": 12031 }, { "epoch": 0.77, "grad_norm": 1.6321466567105194, "learning_rate": 1.3227109114294096e-06, "loss": 0.6337, "step": 12032 }, { "epoch": 0.77, "grad_norm": 1.2113671397246055, "learning_rate": 1.3220086509603569e-06, "loss": 0.7154, "step": 12033 }, { "epoch": 0.77, "grad_norm": 1.5886813506902655, "learning_rate": 1.3213065485636462e-06, "loss": 0.7435, "step": 12034 }, { "epoch": 0.77, "grad_norm": 2.108951001300038, "learning_rate": 1.3206046042694493e-06, "loss": 0.6628, "step": 12035 }, { "epoch": 0.77, "grad_norm": 1.919255393746662, "learning_rate": 1.3199028181079354e-06, "loss": 0.7267, "step": 12036 }, { "epoch": 0.77, "grad_norm": 1.6518758022360625, "learning_rate": 1.3192011901092654e-06, "loss": 0.6896, "step": 12037 }, { "epoch": 0.77, "grad_norm": 1.6191109554594, "learning_rate": 1.3184997203035938e-06, "loss": 0.7604, "step": 12038 }, { "epoch": 0.77, "grad_norm": 1.816431726343738, "learning_rate": 1.3177984087210682e-06, "loss": 0.8397, "step": 12039 }, { "epoch": 0.77, "grad_norm": 1.63133599835897, "learning_rate": 1.3170972553918283e-06, "loss": 0.7398, "step": 12040 }, { "epoch": 0.77, "grad_norm": 1.714126111626433, "learning_rate": 1.3163962603460123e-06, "loss": 0.72, "step": 12041 }, { "epoch": 0.77, "grad_norm": 1.716341514715599, "learning_rate": 1.315695423613742e-06, "loss": 0.763, "step": 12042 }, { "epoch": 0.77, "grad_norm": 1.8442078696727735, "learning_rate": 1.3149947452251422e-06, "loss": 0.7013, "step": 12043 }, { "epoch": 0.77, "grad_norm": 1.6804380347898245, "learning_rate": 1.3142942252103246e-06, "loss": 0.58, "step": 12044 }, { "epoch": 0.77, "grad_norm": 1.8818395757337532, "learning_rate": 1.3135938635993966e-06, "loss": 0.8344, "step": 12045 }, { "epoch": 0.77, "grad_norm": 1.6384568361700727, "learning_rate": 1.312893660422458e-06, "loss": 0.6992, "step": 12046 }, { "epoch": 0.77, "grad_norm": 1.5283031852107316, "learning_rate": 1.312193615709601e-06, "loss": 0.6857, "step": 12047 }, { "epoch": 0.77, "grad_norm": 1.0559774989605153, "learning_rate": 1.3114937294909164e-06, "loss": 0.6659, "step": 12048 }, { "epoch": 0.77, "grad_norm": 1.0450270577120744, "learning_rate": 1.310794001796477e-06, "loss": 0.6019, "step": 12049 }, { "epoch": 0.77, "grad_norm": 1.7965582357228471, "learning_rate": 1.310094432656363e-06, "loss": 0.9486, "step": 12050 }, { "epoch": 0.77, "grad_norm": 2.1134788787317675, "learning_rate": 1.3093950221006329e-06, "loss": 0.8093, "step": 12051 }, { "epoch": 0.77, "grad_norm": 1.8575708752663735, "learning_rate": 1.3086957701593523e-06, "loss": 0.7386, "step": 12052 }, { "epoch": 0.77, "grad_norm": 1.6278191944527278, "learning_rate": 1.30799667686257e-06, "loss": 0.7245, "step": 12053 }, { "epoch": 0.77, "grad_norm": 1.3477460891906257, "learning_rate": 1.3072977422403317e-06, "loss": 0.5941, "step": 12054 }, { "epoch": 0.77, "grad_norm": 1.9476205048007786, "learning_rate": 1.3065989663226797e-06, "loss": 0.6807, "step": 12055 }, { "epoch": 0.77, "grad_norm": 1.6159589022060947, "learning_rate": 1.3059003491396405e-06, "loss": 0.7313, "step": 12056 }, { "epoch": 0.77, "grad_norm": 1.634433784194415, "learning_rate": 1.3052018907212448e-06, "loss": 0.713, "step": 12057 }, { "epoch": 0.77, "grad_norm": 1.759039185432469, "learning_rate": 1.3045035910975045e-06, "loss": 0.6805, "step": 12058 }, { "epoch": 0.77, "grad_norm": 1.5970816832045136, "learning_rate": 1.303805450298437e-06, "loss": 0.6228, "step": 12059 }, { "epoch": 0.77, "grad_norm": 1.7489620286533707, "learning_rate": 1.303107468354044e-06, "loss": 0.7636, "step": 12060 }, { "epoch": 0.77, "grad_norm": 1.9291419051985017, "learning_rate": 1.3024096452943236e-06, "loss": 0.6814, "step": 12061 }, { "epoch": 0.77, "grad_norm": 1.0341227933545365, "learning_rate": 1.3017119811492668e-06, "loss": 0.5924, "step": 12062 }, { "epoch": 0.77, "grad_norm": 1.4673021698927047, "learning_rate": 1.301014475948857e-06, "loss": 0.7552, "step": 12063 }, { "epoch": 0.77, "grad_norm": 1.7911681274215374, "learning_rate": 1.3003171297230748e-06, "loss": 0.659, "step": 12064 }, { "epoch": 0.77, "grad_norm": 1.5136454253646263, "learning_rate": 1.2996199425018858e-06, "loss": 0.6606, "step": 12065 }, { "epoch": 0.77, "grad_norm": 1.274552148597008, "learning_rate": 1.298922914315257e-06, "loss": 0.7373, "step": 12066 }, { "epoch": 0.77, "grad_norm": 1.6021192059358575, "learning_rate": 1.2982260451931445e-06, "loss": 0.7023, "step": 12067 }, { "epoch": 0.77, "grad_norm": 1.8635830937218925, "learning_rate": 1.2975293351654982e-06, "loss": 0.7673, "step": 12068 }, { "epoch": 0.77, "grad_norm": 1.9804885360541071, "learning_rate": 1.2968327842622612e-06, "loss": 0.726, "step": 12069 }, { "epoch": 0.77, "grad_norm": 1.5060142649982302, "learning_rate": 1.2961363925133696e-06, "loss": 0.5832, "step": 12070 }, { "epoch": 0.77, "grad_norm": 1.1699076754383826, "learning_rate": 1.2954401599487531e-06, "loss": 0.6024, "step": 12071 }, { "epoch": 0.77, "grad_norm": 1.7900367651971427, "learning_rate": 1.2947440865983323e-06, "loss": 0.7577, "step": 12072 }, { "epoch": 0.77, "grad_norm": 1.339901357684108, "learning_rate": 1.2940481724920284e-06, "loss": 0.714, "step": 12073 }, { "epoch": 0.77, "grad_norm": 1.5861003063933337, "learning_rate": 1.2933524176597434e-06, "loss": 0.767, "step": 12074 }, { "epoch": 0.77, "grad_norm": 1.959444187128993, "learning_rate": 1.292656822131384e-06, "loss": 0.7319, "step": 12075 }, { "epoch": 0.77, "grad_norm": 1.7332946325680392, "learning_rate": 1.2919613859368446e-06, "loss": 0.786, "step": 12076 }, { "epoch": 0.77, "grad_norm": 1.7987286553004371, "learning_rate": 1.291266109106013e-06, "loss": 0.6672, "step": 12077 }, { "epoch": 0.77, "grad_norm": 1.733630857842785, "learning_rate": 1.290570991668771e-06, "loss": 0.8793, "step": 12078 }, { "epoch": 0.77, "grad_norm": 1.6933723358792052, "learning_rate": 1.2898760336549931e-06, "loss": 0.7266, "step": 12079 }, { "epoch": 0.77, "grad_norm": 1.647062164537808, "learning_rate": 1.2891812350945476e-06, "loss": 0.6714, "step": 12080 }, { "epoch": 0.77, "grad_norm": 2.2115362736354784, "learning_rate": 1.2884865960172931e-06, "loss": 0.8027, "step": 12081 }, { "epoch": 0.77, "grad_norm": 0.9404430306363797, "learning_rate": 1.2877921164530872e-06, "loss": 0.5395, "step": 12082 }, { "epoch": 0.77, "grad_norm": 1.7320934459139967, "learning_rate": 1.287097796431776e-06, "loss": 0.7356, "step": 12083 }, { "epoch": 0.77, "grad_norm": 1.7871802096850775, "learning_rate": 1.2864036359831995e-06, "loss": 0.8409, "step": 12084 }, { "epoch": 0.77, "grad_norm": 2.0315214719809993, "learning_rate": 1.2857096351371917e-06, "loss": 0.7004, "step": 12085 }, { "epoch": 0.77, "grad_norm": 1.7458149244805037, "learning_rate": 1.2850157939235785e-06, "loss": 0.7262, "step": 12086 }, { "epoch": 0.77, "grad_norm": 1.6732378430163148, "learning_rate": 1.2843221123721804e-06, "loss": 0.8174, "step": 12087 }, { "epoch": 0.77, "grad_norm": 1.5049946146325437, "learning_rate": 1.2836285905128105e-06, "loss": 0.5997, "step": 12088 }, { "epoch": 0.77, "grad_norm": 1.7107615925493682, "learning_rate": 1.2829352283752728e-06, "loss": 0.6897, "step": 12089 }, { "epoch": 0.77, "grad_norm": 1.6356134183768343, "learning_rate": 1.2822420259893697e-06, "loss": 0.7401, "step": 12090 }, { "epoch": 0.77, "grad_norm": 2.556940924684289, "learning_rate": 1.2815489833848927e-06, "loss": 0.7253, "step": 12091 }, { "epoch": 0.77, "grad_norm": 1.6168154186850803, "learning_rate": 1.2808561005916266e-06, "loss": 0.7455, "step": 12092 }, { "epoch": 0.77, "grad_norm": 2.067452391167225, "learning_rate": 1.28016337763935e-06, "loss": 0.6504, "step": 12093 }, { "epoch": 0.77, "grad_norm": 1.952722418075804, "learning_rate": 1.2794708145578356e-06, "loss": 0.7249, "step": 12094 }, { "epoch": 0.77, "grad_norm": 2.518642749690385, "learning_rate": 1.278778411376847e-06, "loss": 0.7447, "step": 12095 }, { "epoch": 0.77, "grad_norm": 2.3688311560362663, "learning_rate": 1.278086168126142e-06, "loss": 0.7344, "step": 12096 }, { "epoch": 0.77, "grad_norm": 1.6041001666796544, "learning_rate": 1.2773940848354754e-06, "loss": 0.6741, "step": 12097 }, { "epoch": 0.77, "grad_norm": 1.7943011131429074, "learning_rate": 1.2767021615345859e-06, "loss": 0.7069, "step": 12098 }, { "epoch": 0.77, "grad_norm": 1.7787897717974115, "learning_rate": 1.2760103982532152e-06, "loss": 0.8544, "step": 12099 }, { "epoch": 0.77, "grad_norm": 1.633149075704624, "learning_rate": 1.2753187950210922e-06, "loss": 0.5925, "step": 12100 }, { "epoch": 0.77, "grad_norm": 1.1958505792356502, "learning_rate": 1.274627351867941e-06, "loss": 0.639, "step": 12101 }, { "epoch": 0.77, "grad_norm": 1.7443247307345482, "learning_rate": 1.2739360688234782e-06, "loss": 0.7999, "step": 12102 }, { "epoch": 0.77, "grad_norm": 2.625531287444673, "learning_rate": 1.2732449459174134e-06, "loss": 0.6485, "step": 12103 }, { "epoch": 0.77, "grad_norm": 1.527285952254474, "learning_rate": 1.2725539831794498e-06, "loss": 0.6532, "step": 12104 }, { "epoch": 0.77, "grad_norm": 1.597616441802847, "learning_rate": 1.2718631806392823e-06, "loss": 0.6771, "step": 12105 }, { "epoch": 0.77, "grad_norm": 1.6827737081681935, "learning_rate": 1.2711725383266044e-06, "loss": 0.7603, "step": 12106 }, { "epoch": 0.77, "grad_norm": 1.9425351519073704, "learning_rate": 1.2704820562710923e-06, "loss": 0.6552, "step": 12107 }, { "epoch": 0.77, "grad_norm": 1.9050687467317735, "learning_rate": 1.2697917345024258e-06, "loss": 0.8595, "step": 12108 }, { "epoch": 0.78, "grad_norm": 1.155217657238814, "learning_rate": 1.2691015730502732e-06, "loss": 0.6262, "step": 12109 }, { "epoch": 0.78, "grad_norm": 1.7174815012669429, "learning_rate": 1.2684115719442947e-06, "loss": 0.8162, "step": 12110 }, { "epoch": 0.78, "grad_norm": 1.6917778585051861, "learning_rate": 1.2677217312141455e-06, "loss": 0.697, "step": 12111 }, { "epoch": 0.78, "grad_norm": 1.3132322583263658, "learning_rate": 1.2670320508894724e-06, "loss": 0.6743, "step": 12112 }, { "epoch": 0.78, "grad_norm": 1.9415642619871127, "learning_rate": 1.2663425309999205e-06, "loss": 0.6976, "step": 12113 }, { "epoch": 0.78, "grad_norm": 1.5000035808808916, "learning_rate": 1.2656531715751185e-06, "loss": 0.669, "step": 12114 }, { "epoch": 0.78, "grad_norm": 1.8799060674523211, "learning_rate": 1.2649639726446994e-06, "loss": 0.6863, "step": 12115 }, { "epoch": 0.78, "grad_norm": 1.7490999544270276, "learning_rate": 1.2642749342382772e-06, "loss": 0.6128, "step": 12116 }, { "epoch": 0.78, "grad_norm": 1.8175746099595662, "learning_rate": 1.2635860563854695e-06, "loss": 0.778, "step": 12117 }, { "epoch": 0.78, "grad_norm": 1.6618596220042294, "learning_rate": 1.2628973391158821e-06, "loss": 0.6454, "step": 12118 }, { "epoch": 0.78, "grad_norm": 1.6871567436674835, "learning_rate": 1.2622087824591129e-06, "loss": 0.7002, "step": 12119 }, { "epoch": 0.78, "grad_norm": 1.6113905826070187, "learning_rate": 1.261520386444759e-06, "loss": 0.5918, "step": 12120 }, { "epoch": 0.78, "grad_norm": 1.7810993041382976, "learning_rate": 1.2608321511024007e-06, "loss": 0.8021, "step": 12121 }, { "epoch": 0.78, "grad_norm": 1.7325512195708745, "learning_rate": 1.2601440764616218e-06, "loss": 0.616, "step": 12122 }, { "epoch": 0.78, "grad_norm": 1.8814834429065856, "learning_rate": 1.2594561625519891e-06, "loss": 0.6346, "step": 12123 }, { "epoch": 0.78, "grad_norm": 1.589929854650464, "learning_rate": 1.2587684094030723e-06, "loss": 0.7345, "step": 12124 }, { "epoch": 0.78, "grad_norm": 1.7874691100228957, "learning_rate": 1.258080817044428e-06, "loss": 0.6735, "step": 12125 }, { "epoch": 0.78, "grad_norm": 2.129765353506525, "learning_rate": 1.2573933855056053e-06, "loss": 0.7103, "step": 12126 }, { "epoch": 0.78, "grad_norm": 1.8984440994534952, "learning_rate": 1.256706114816154e-06, "loss": 0.7257, "step": 12127 }, { "epoch": 0.78, "grad_norm": 1.319501280137539, "learning_rate": 1.2560190050056054e-06, "loss": 0.7365, "step": 12128 }, { "epoch": 0.78, "grad_norm": 1.9039742156422828, "learning_rate": 1.2553320561034955e-06, "loss": 0.7638, "step": 12129 }, { "epoch": 0.78, "grad_norm": 1.7196512663558912, "learning_rate": 1.2546452681393417e-06, "loss": 0.6859, "step": 12130 }, { "epoch": 0.78, "grad_norm": 1.8035574223166926, "learning_rate": 1.2539586411426664e-06, "loss": 0.7835, "step": 12131 }, { "epoch": 0.78, "grad_norm": 1.785735879750959, "learning_rate": 1.2532721751429765e-06, "loss": 0.6948, "step": 12132 }, { "epoch": 0.78, "grad_norm": 2.0054453187824293, "learning_rate": 1.2525858701697762e-06, "loss": 0.6737, "step": 12133 }, { "epoch": 0.78, "grad_norm": 1.7909749330997498, "learning_rate": 1.2518997262525605e-06, "loss": 0.6478, "step": 12134 }, { "epoch": 0.78, "grad_norm": 1.7137169644858479, "learning_rate": 1.2512137434208188e-06, "loss": 0.7347, "step": 12135 }, { "epoch": 0.78, "grad_norm": 1.6890363475817862, "learning_rate": 1.2505279217040327e-06, "loss": 0.6892, "step": 12136 }, { "epoch": 0.78, "grad_norm": 1.6832293831580725, "learning_rate": 1.2498422611316767e-06, "loss": 0.7466, "step": 12137 }, { "epoch": 0.78, "grad_norm": 1.6341095572371322, "learning_rate": 1.2491567617332218e-06, "loss": 0.7033, "step": 12138 }, { "epoch": 0.78, "grad_norm": 1.6529887903624996, "learning_rate": 1.2484714235381278e-06, "loss": 0.7424, "step": 12139 }, { "epoch": 0.78, "grad_norm": 1.6434267009338464, "learning_rate": 1.2477862465758484e-06, "loss": 0.6347, "step": 12140 }, { "epoch": 0.78, "grad_norm": 1.6872304096093984, "learning_rate": 1.2471012308758324e-06, "loss": 0.6482, "step": 12141 }, { "epoch": 0.78, "grad_norm": 1.4665668853113483, "learning_rate": 1.2464163764675185e-06, "loss": 0.6636, "step": 12142 }, { "epoch": 0.78, "grad_norm": 1.815797753994585, "learning_rate": 1.2457316833803424e-06, "loss": 0.7708, "step": 12143 }, { "epoch": 0.78, "grad_norm": 2.195523855696833, "learning_rate": 1.245047151643729e-06, "loss": 0.71, "step": 12144 }, { "epoch": 0.78, "grad_norm": 1.307712051071901, "learning_rate": 1.244362781287099e-06, "loss": 0.6625, "step": 12145 }, { "epoch": 0.78, "grad_norm": 1.7552301195527467, "learning_rate": 1.2436785723398637e-06, "loss": 0.7863, "step": 12146 }, { "epoch": 0.78, "grad_norm": 1.7465431366901891, "learning_rate": 1.2429945248314317e-06, "loss": 0.7623, "step": 12147 }, { "epoch": 0.78, "grad_norm": 1.6077335916292639, "learning_rate": 1.2423106387912003e-06, "loss": 0.7028, "step": 12148 }, { "epoch": 0.78, "grad_norm": 1.1460883145683065, "learning_rate": 1.2416269142485615e-06, "loss": 0.5698, "step": 12149 }, { "epoch": 0.78, "grad_norm": 1.7861473725200305, "learning_rate": 1.240943351232901e-06, "loss": 0.7691, "step": 12150 }, { "epoch": 0.78, "grad_norm": 1.6786700249732225, "learning_rate": 1.2402599497735961e-06, "loss": 0.7984, "step": 12151 }, { "epoch": 0.78, "grad_norm": 1.9835426814192771, "learning_rate": 1.2395767099000183e-06, "loss": 0.7761, "step": 12152 }, { "epoch": 0.78, "grad_norm": 1.7234877262932302, "learning_rate": 1.2388936316415317e-06, "loss": 0.7693, "step": 12153 }, { "epoch": 0.78, "grad_norm": 2.3381443012676386, "learning_rate": 1.238210715027492e-06, "loss": 0.7765, "step": 12154 }, { "epoch": 0.78, "grad_norm": 1.8668253295551553, "learning_rate": 1.237527960087253e-06, "loss": 0.7484, "step": 12155 }, { "epoch": 0.78, "grad_norm": 1.7972326610699858, "learning_rate": 1.2368453668501562e-06, "loss": 0.7171, "step": 12156 }, { "epoch": 0.78, "grad_norm": 1.1354638976150477, "learning_rate": 1.2361629353455378e-06, "loss": 0.6542, "step": 12157 }, { "epoch": 0.78, "grad_norm": 1.6349059355113198, "learning_rate": 1.2354806656027274e-06, "loss": 0.6735, "step": 12158 }, { "epoch": 0.78, "grad_norm": 1.8034549569702372, "learning_rate": 1.234798557651048e-06, "loss": 0.7775, "step": 12159 }, { "epoch": 0.78, "grad_norm": 2.2564583862317895, "learning_rate": 1.234116611519815e-06, "loss": 0.7347, "step": 12160 }, { "epoch": 0.78, "grad_norm": 1.145821361828579, "learning_rate": 1.233434827238335e-06, "loss": 0.6712, "step": 12161 }, { "epoch": 0.78, "grad_norm": 1.6806426208770715, "learning_rate": 1.232753204835914e-06, "loss": 0.7359, "step": 12162 }, { "epoch": 0.78, "grad_norm": 1.9598835710179061, "learning_rate": 1.2320717443418422e-06, "loss": 0.7367, "step": 12163 }, { "epoch": 0.78, "grad_norm": 1.8850789689975276, "learning_rate": 1.2313904457854104e-06, "loss": 0.7781, "step": 12164 }, { "epoch": 0.78, "grad_norm": 1.7615771237454585, "learning_rate": 1.2307093091958983e-06, "loss": 0.7457, "step": 12165 }, { "epoch": 0.78, "grad_norm": 1.6087713327505717, "learning_rate": 1.2300283346025794e-06, "loss": 0.7713, "step": 12166 }, { "epoch": 0.78, "grad_norm": 1.7723573234030114, "learning_rate": 1.2293475220347212e-06, "loss": 0.6124, "step": 12167 }, { "epoch": 0.78, "grad_norm": 2.2426930132259764, "learning_rate": 1.2286668715215817e-06, "loss": 0.9105, "step": 12168 }, { "epoch": 0.78, "grad_norm": 1.8840243687957636, "learning_rate": 1.2279863830924183e-06, "loss": 0.717, "step": 12169 }, { "epoch": 0.78, "grad_norm": 1.8719131145506633, "learning_rate": 1.2273060567764711e-06, "loss": 0.7673, "step": 12170 }, { "epoch": 0.78, "grad_norm": 3.644360716458226, "learning_rate": 1.2266258926029851e-06, "loss": 0.7388, "step": 12171 }, { "epoch": 0.78, "grad_norm": 2.101368279960549, "learning_rate": 1.225945890601186e-06, "loss": 0.7466, "step": 12172 }, { "epoch": 0.78, "grad_norm": 1.9076679109208226, "learning_rate": 1.2252660508003045e-06, "loss": 0.7079, "step": 12173 }, { "epoch": 0.78, "grad_norm": 2.1208561835990674, "learning_rate": 1.2245863732295554e-06, "loss": 0.6794, "step": 12174 }, { "epoch": 0.78, "grad_norm": 2.963863638786104, "learning_rate": 1.2239068579181497e-06, "loss": 0.5948, "step": 12175 }, { "epoch": 0.78, "grad_norm": 2.9967773756671203, "learning_rate": 1.2232275048952956e-06, "loss": 0.8212, "step": 12176 }, { "epoch": 0.78, "grad_norm": 1.200903993967293, "learning_rate": 1.222548314190184e-06, "loss": 0.6868, "step": 12177 }, { "epoch": 0.78, "grad_norm": 1.7955797915032006, "learning_rate": 1.2218692858320114e-06, "loss": 0.6986, "step": 12178 }, { "epoch": 0.78, "grad_norm": 1.6111666287607758, "learning_rate": 1.2211904198499551e-06, "loss": 0.6197, "step": 12179 }, { "epoch": 0.78, "grad_norm": 1.7377269437288398, "learning_rate": 1.2205117162731956e-06, "loss": 0.7449, "step": 12180 }, { "epoch": 0.78, "grad_norm": 1.9004495815591613, "learning_rate": 1.2198331751309006e-06, "loss": 0.7439, "step": 12181 }, { "epoch": 0.78, "grad_norm": 1.7231059704676532, "learning_rate": 1.2191547964522326e-06, "loss": 0.7493, "step": 12182 }, { "epoch": 0.78, "grad_norm": 1.9274530607254639, "learning_rate": 1.2184765802663468e-06, "loss": 0.7907, "step": 12183 }, { "epoch": 0.78, "grad_norm": 1.783155580792419, "learning_rate": 1.21779852660239e-06, "loss": 0.7266, "step": 12184 }, { "epoch": 0.78, "grad_norm": 1.801395227830999, "learning_rate": 1.2171206354895081e-06, "loss": 0.8219, "step": 12185 }, { "epoch": 0.78, "grad_norm": 1.753232735230041, "learning_rate": 1.2164429069568295e-06, "loss": 0.7252, "step": 12186 }, { "epoch": 0.78, "grad_norm": 2.0337770169495006, "learning_rate": 1.2157653410334875e-06, "loss": 0.6256, "step": 12187 }, { "epoch": 0.78, "grad_norm": 1.8173103542132074, "learning_rate": 1.2150879377485959e-06, "loss": 0.8328, "step": 12188 }, { "epoch": 0.78, "grad_norm": 1.5671579229811214, "learning_rate": 1.214410697131273e-06, "loss": 0.6995, "step": 12189 }, { "epoch": 0.78, "grad_norm": 1.112386776272287, "learning_rate": 1.213733619210624e-06, "loss": 0.5755, "step": 12190 }, { "epoch": 0.78, "grad_norm": 1.531969420034792, "learning_rate": 1.2130567040157465e-06, "loss": 0.6891, "step": 12191 }, { "epoch": 0.78, "grad_norm": 1.643079503595154, "learning_rate": 1.2123799515757372e-06, "loss": 0.75, "step": 12192 }, { "epoch": 0.78, "grad_norm": 1.945568558589018, "learning_rate": 1.2117033619196762e-06, "loss": 0.9037, "step": 12193 }, { "epoch": 0.78, "grad_norm": 1.1356103466212395, "learning_rate": 1.2110269350766469e-06, "loss": 0.7088, "step": 12194 }, { "epoch": 0.78, "grad_norm": 1.7920199344300973, "learning_rate": 1.2103506710757156e-06, "loss": 0.6822, "step": 12195 }, { "epoch": 0.78, "grad_norm": 1.50346153158305, "learning_rate": 1.2096745699459505e-06, "loss": 0.6544, "step": 12196 }, { "epoch": 0.78, "grad_norm": 1.7136434236432316, "learning_rate": 1.2089986317164075e-06, "loss": 0.753, "step": 12197 }, { "epoch": 0.78, "grad_norm": 1.7413335764853766, "learning_rate": 1.208322856416138e-06, "loss": 0.7288, "step": 12198 }, { "epoch": 0.78, "grad_norm": 1.9158907723441319, "learning_rate": 1.2076472440741844e-06, "loss": 0.8546, "step": 12199 }, { "epoch": 0.78, "grad_norm": 1.7240758833055267, "learning_rate": 1.206971794719582e-06, "loss": 0.7503, "step": 12200 }, { "epoch": 0.78, "grad_norm": 1.5525499323388638, "learning_rate": 1.2062965083813643e-06, "loss": 0.7235, "step": 12201 }, { "epoch": 0.78, "grad_norm": 1.9434222046783183, "learning_rate": 1.2056213850885485e-06, "loss": 0.7743, "step": 12202 }, { "epoch": 0.78, "grad_norm": 1.943211019363579, "learning_rate": 1.2049464248701537e-06, "loss": 0.8626, "step": 12203 }, { "epoch": 0.78, "grad_norm": 2.1466132555239263, "learning_rate": 1.204271627755187e-06, "loss": 0.7606, "step": 12204 }, { "epoch": 0.78, "grad_norm": 1.4428610046457746, "learning_rate": 1.20359699377265e-06, "loss": 0.6896, "step": 12205 }, { "epoch": 0.78, "grad_norm": 1.8944375417412596, "learning_rate": 1.2029225229515368e-06, "loss": 0.8605, "step": 12206 }, { "epoch": 0.78, "grad_norm": 1.6493404397673985, "learning_rate": 1.202248215320835e-06, "loss": 0.8166, "step": 12207 }, { "epoch": 0.78, "grad_norm": 2.131079424603202, "learning_rate": 1.201574070909524e-06, "loss": 0.7962, "step": 12208 }, { "epoch": 0.78, "grad_norm": 1.9424814787732925, "learning_rate": 1.2009000897465782e-06, "loss": 0.6923, "step": 12209 }, { "epoch": 0.78, "grad_norm": 1.7694333195036798, "learning_rate": 1.2002262718609625e-06, "loss": 0.8445, "step": 12210 }, { "epoch": 0.78, "grad_norm": 2.161516767919224, "learning_rate": 1.1995526172816385e-06, "loss": 0.6645, "step": 12211 }, { "epoch": 0.78, "grad_norm": 1.7651106903656204, "learning_rate": 1.198879126037557e-06, "loss": 0.7298, "step": 12212 }, { "epoch": 0.78, "grad_norm": 1.2984630844237646, "learning_rate": 1.1982057981576634e-06, "loss": 0.6625, "step": 12213 }, { "epoch": 0.78, "grad_norm": 1.9984462544895205, "learning_rate": 1.197532633670896e-06, "loss": 0.787, "step": 12214 }, { "epoch": 0.78, "grad_norm": 1.6737924564125168, "learning_rate": 1.196859632606186e-06, "loss": 0.7422, "step": 12215 }, { "epoch": 0.78, "grad_norm": 0.989675511581766, "learning_rate": 1.196186794992457e-06, "loss": 0.5971, "step": 12216 }, { "epoch": 0.78, "grad_norm": 1.6225870460155019, "learning_rate": 1.1955141208586257e-06, "loss": 0.6972, "step": 12217 }, { "epoch": 0.78, "grad_norm": 2.2153645164833558, "learning_rate": 1.1948416102336063e-06, "loss": 0.764, "step": 12218 }, { "epoch": 0.78, "grad_norm": 1.9337747479394385, "learning_rate": 1.1941692631462954e-06, "loss": 0.7692, "step": 12219 }, { "epoch": 0.78, "grad_norm": 1.2309934627650132, "learning_rate": 1.193497079625594e-06, "loss": 0.6257, "step": 12220 }, { "epoch": 0.78, "grad_norm": 2.2886059621384596, "learning_rate": 1.1928250597003893e-06, "loss": 0.7986, "step": 12221 }, { "epoch": 0.78, "grad_norm": 2.4403528232199467, "learning_rate": 1.1921532033995636e-06, "loss": 0.6363, "step": 12222 }, { "epoch": 0.78, "grad_norm": 1.6100500515137375, "learning_rate": 1.1914815107519922e-06, "loss": 0.7813, "step": 12223 }, { "epoch": 0.78, "grad_norm": 1.6353141127369495, "learning_rate": 1.1908099817865427e-06, "loss": 0.6977, "step": 12224 }, { "epoch": 0.78, "grad_norm": 3.5247156364781866, "learning_rate": 1.1901386165320755e-06, "loss": 0.6886, "step": 12225 }, { "epoch": 0.78, "grad_norm": 1.8817614971029533, "learning_rate": 1.189467415017444e-06, "loss": 0.6931, "step": 12226 }, { "epoch": 0.78, "grad_norm": 1.7108774041204995, "learning_rate": 1.1887963772714982e-06, "loss": 0.7059, "step": 12227 }, { "epoch": 0.78, "grad_norm": 1.8218643979048645, "learning_rate": 1.1881255033230732e-06, "loss": 0.6425, "step": 12228 }, { "epoch": 0.78, "grad_norm": 1.5568591190712573, "learning_rate": 1.1874547932010054e-06, "loss": 0.6802, "step": 12229 }, { "epoch": 0.78, "grad_norm": 1.507719755390021, "learning_rate": 1.18678424693412e-06, "loss": 0.7042, "step": 12230 }, { "epoch": 0.78, "grad_norm": 1.692458491150171, "learning_rate": 1.1861138645512343e-06, "loss": 0.8398, "step": 12231 }, { "epoch": 0.78, "grad_norm": 1.134885996965249, "learning_rate": 1.1854436460811608e-06, "loss": 0.7501, "step": 12232 }, { "epoch": 0.78, "grad_norm": 1.739172975470784, "learning_rate": 1.1847735915527026e-06, "loss": 0.6453, "step": 12233 }, { "epoch": 0.78, "grad_norm": 1.543718722370076, "learning_rate": 1.1841037009946616e-06, "loss": 0.6301, "step": 12234 }, { "epoch": 0.78, "grad_norm": 1.7841584630677845, "learning_rate": 1.183433974435822e-06, "loss": 0.7124, "step": 12235 }, { "epoch": 0.78, "grad_norm": 1.7580033114737228, "learning_rate": 1.1827644119049735e-06, "loss": 0.7885, "step": 12236 }, { "epoch": 0.78, "grad_norm": 1.7024918615106117, "learning_rate": 1.1820950134308862e-06, "loss": 0.7831, "step": 12237 }, { "epoch": 0.78, "grad_norm": 1.636609100216191, "learning_rate": 1.1814257790423345e-06, "loss": 0.6507, "step": 12238 }, { "epoch": 0.78, "grad_norm": 1.867924004507076, "learning_rate": 1.1807567087680787e-06, "loss": 0.7508, "step": 12239 }, { "epoch": 0.78, "grad_norm": 1.5286895363858466, "learning_rate": 1.1800878026368734e-06, "loss": 0.6745, "step": 12240 }, { "epoch": 0.78, "grad_norm": 1.6542099194257716, "learning_rate": 1.1794190606774696e-06, "loss": 0.6937, "step": 12241 }, { "epoch": 0.78, "grad_norm": 1.7910500058326715, "learning_rate": 1.1787504829186043e-06, "loss": 0.7387, "step": 12242 }, { "epoch": 0.78, "grad_norm": 1.2631863583586893, "learning_rate": 1.178082069389016e-06, "loss": 0.6127, "step": 12243 }, { "epoch": 0.78, "grad_norm": 2.009096193902988, "learning_rate": 1.1774138201174268e-06, "loss": 0.749, "step": 12244 }, { "epoch": 0.78, "grad_norm": 1.5896848630979137, "learning_rate": 1.1767457351325605e-06, "loss": 0.6777, "step": 12245 }, { "epoch": 0.78, "grad_norm": 1.8375962075012573, "learning_rate": 1.176077814463129e-06, "loss": 0.7962, "step": 12246 }, { "epoch": 0.78, "grad_norm": 1.616209847205421, "learning_rate": 1.1754100581378365e-06, "loss": 0.676, "step": 12247 }, { "epoch": 0.78, "grad_norm": 1.2985603449494736, "learning_rate": 1.1747424661853857e-06, "loss": 0.6834, "step": 12248 }, { "epoch": 0.78, "grad_norm": 1.8758471870204338, "learning_rate": 1.174075038634463e-06, "loss": 0.7717, "step": 12249 }, { "epoch": 0.78, "grad_norm": 1.973524175587282, "learning_rate": 1.1734077755137585e-06, "loss": 0.7944, "step": 12250 }, { "epoch": 0.78, "grad_norm": 1.8132813517602742, "learning_rate": 1.1727406768519444e-06, "loss": 0.7848, "step": 12251 }, { "epoch": 0.78, "grad_norm": 2.358593536106954, "learning_rate": 1.1720737426776952e-06, "loss": 0.8145, "step": 12252 }, { "epoch": 0.78, "grad_norm": 1.7712019253479643, "learning_rate": 1.171406973019673e-06, "loss": 0.7055, "step": 12253 }, { "epoch": 0.78, "grad_norm": 1.5666519960324852, "learning_rate": 1.1707403679065337e-06, "loss": 0.6028, "step": 12254 }, { "epoch": 0.78, "grad_norm": 1.5445047410007535, "learning_rate": 1.1700739273669277e-06, "loss": 0.7473, "step": 12255 }, { "epoch": 0.78, "grad_norm": 1.157148786898804, "learning_rate": 1.1694076514294945e-06, "loss": 0.6345, "step": 12256 }, { "epoch": 0.78, "grad_norm": 1.0834260749615137, "learning_rate": 1.168741540122874e-06, "loss": 0.5964, "step": 12257 }, { "epoch": 0.78, "grad_norm": 1.9806038903368313, "learning_rate": 1.1680755934756894e-06, "loss": 0.6873, "step": 12258 }, { "epoch": 0.78, "grad_norm": 1.7667685430642825, "learning_rate": 1.1674098115165645e-06, "loss": 0.8102, "step": 12259 }, { "epoch": 0.78, "grad_norm": 1.9116078687764457, "learning_rate": 1.1667441942741132e-06, "loss": 0.7575, "step": 12260 }, { "epoch": 0.78, "grad_norm": 1.9392988994360625, "learning_rate": 1.166078741776941e-06, "loss": 0.6754, "step": 12261 }, { "epoch": 0.78, "grad_norm": 1.7219095717356774, "learning_rate": 1.1654134540536487e-06, "loss": 0.6884, "step": 12262 }, { "epoch": 0.78, "grad_norm": 2.0979001818163225, "learning_rate": 1.1647483311328285e-06, "loss": 0.7722, "step": 12263 }, { "epoch": 0.78, "grad_norm": 2.167981358004614, "learning_rate": 1.1640833730430663e-06, "loss": 0.7521, "step": 12264 }, { "epoch": 0.79, "grad_norm": 1.9576153187000023, "learning_rate": 1.1634185798129383e-06, "loss": 0.7547, "step": 12265 }, { "epoch": 0.79, "grad_norm": 1.725022590101318, "learning_rate": 1.1627539514710213e-06, "loss": 0.721, "step": 12266 }, { "epoch": 0.79, "grad_norm": 1.5969394546290592, "learning_rate": 1.1620894880458732e-06, "loss": 0.7267, "step": 12267 }, { "epoch": 0.79, "grad_norm": 1.089877616393692, "learning_rate": 1.161425189566056e-06, "loss": 0.6532, "step": 12268 }, { "epoch": 0.79, "grad_norm": 1.5983422707512283, "learning_rate": 1.1607610560601179e-06, "loss": 0.6859, "step": 12269 }, { "epoch": 0.79, "grad_norm": 1.7106855264174736, "learning_rate": 1.1600970875566025e-06, "loss": 0.7522, "step": 12270 }, { "epoch": 0.79, "grad_norm": 1.8311109292889578, "learning_rate": 1.1594332840840455e-06, "loss": 0.8046, "step": 12271 }, { "epoch": 0.79, "grad_norm": 2.0777258045417035, "learning_rate": 1.1587696456709758e-06, "loss": 0.7716, "step": 12272 }, { "epoch": 0.79, "grad_norm": 1.7342323826892052, "learning_rate": 1.1581061723459153e-06, "loss": 0.6497, "step": 12273 }, { "epoch": 0.79, "grad_norm": 2.0698503616522825, "learning_rate": 1.1574428641373769e-06, "loss": 0.7132, "step": 12274 }, { "epoch": 0.79, "grad_norm": 1.4835486627009649, "learning_rate": 1.1567797210738713e-06, "loss": 0.661, "step": 12275 }, { "epoch": 0.79, "grad_norm": 1.7659705455095176, "learning_rate": 1.1561167431838977e-06, "loss": 0.7756, "step": 12276 }, { "epoch": 0.79, "grad_norm": 1.856934378104385, "learning_rate": 1.1554539304959494e-06, "loss": 0.7488, "step": 12277 }, { "epoch": 0.79, "grad_norm": 1.785040195372321, "learning_rate": 1.1547912830385127e-06, "loss": 0.6949, "step": 12278 }, { "epoch": 0.79, "grad_norm": 1.2773036180627264, "learning_rate": 1.1541288008400665e-06, "loss": 0.6641, "step": 12279 }, { "epoch": 0.79, "grad_norm": 1.9431019944647627, "learning_rate": 1.1534664839290832e-06, "loss": 0.7293, "step": 12280 }, { "epoch": 0.79, "grad_norm": 1.238087312342024, "learning_rate": 1.1528043323340281e-06, "loss": 0.6096, "step": 12281 }, { "epoch": 0.79, "grad_norm": 1.814272244929363, "learning_rate": 1.152142346083357e-06, "loss": 0.7791, "step": 12282 }, { "epoch": 0.79, "grad_norm": 1.1974206758123376, "learning_rate": 1.151480525205525e-06, "loss": 0.7514, "step": 12283 }, { "epoch": 0.79, "grad_norm": 1.622845592097443, "learning_rate": 1.1508188697289707e-06, "loss": 0.7602, "step": 12284 }, { "epoch": 0.79, "grad_norm": 0.9939227140390592, "learning_rate": 1.1501573796821348e-06, "loss": 0.5883, "step": 12285 }, { "epoch": 0.79, "grad_norm": 1.6711462284970886, "learning_rate": 1.1494960550934448e-06, "loss": 0.6159, "step": 12286 }, { "epoch": 0.79, "grad_norm": 1.5122994517119168, "learning_rate": 1.148834895991323e-06, "loss": 0.7942, "step": 12287 }, { "epoch": 0.79, "grad_norm": 1.6061058125899, "learning_rate": 1.1481739024041856e-06, "loss": 0.7122, "step": 12288 }, { "epoch": 0.79, "grad_norm": 1.6996822466385906, "learning_rate": 1.147513074360438e-06, "loss": 0.6936, "step": 12289 }, { "epoch": 0.79, "grad_norm": 1.1062350525013869, "learning_rate": 1.1468524118884866e-06, "loss": 0.6743, "step": 12290 }, { "epoch": 0.79, "grad_norm": 1.7113952432869088, "learning_rate": 1.1461919150167189e-06, "loss": 0.6818, "step": 12291 }, { "epoch": 0.79, "grad_norm": 2.0173493816038426, "learning_rate": 1.1455315837735276e-06, "loss": 0.729, "step": 12292 }, { "epoch": 0.79, "grad_norm": 1.814988449459017, "learning_rate": 1.1448714181872867e-06, "loss": 0.6788, "step": 12293 }, { "epoch": 0.79, "grad_norm": 1.9382442455197324, "learning_rate": 1.1442114182863728e-06, "loss": 0.6837, "step": 12294 }, { "epoch": 0.79, "grad_norm": 3.0375077803072363, "learning_rate": 1.1435515840991502e-06, "loss": 0.6199, "step": 12295 }, { "epoch": 0.79, "grad_norm": 1.6973615038769174, "learning_rate": 1.1428919156539769e-06, "loss": 0.7299, "step": 12296 }, { "epoch": 0.79, "grad_norm": 1.3631107319421516, "learning_rate": 1.1422324129792039e-06, "loss": 0.6936, "step": 12297 }, { "epoch": 0.79, "grad_norm": 1.5488386577855706, "learning_rate": 1.1415730761031745e-06, "loss": 0.6779, "step": 12298 }, { "epoch": 0.79, "grad_norm": 1.8708968920236337, "learning_rate": 1.1409139050542295e-06, "loss": 0.7829, "step": 12299 }, { "epoch": 0.79, "grad_norm": 1.863862364992071, "learning_rate": 1.1402548998606927e-06, "loss": 0.7367, "step": 12300 }, { "epoch": 0.79, "grad_norm": 1.6516143601415856, "learning_rate": 1.1395960605508916e-06, "loss": 0.6677, "step": 12301 }, { "epoch": 0.79, "grad_norm": 1.5851372793219316, "learning_rate": 1.1389373871531395e-06, "loss": 0.6455, "step": 12302 }, { "epoch": 0.79, "grad_norm": 1.354747461189677, "learning_rate": 1.1382788796957456e-06, "loss": 0.5966, "step": 12303 }, { "epoch": 0.79, "grad_norm": 2.2137757400463745, "learning_rate": 1.1376205382070105e-06, "loss": 0.7526, "step": 12304 }, { "epoch": 0.79, "grad_norm": 0.9734281042143104, "learning_rate": 1.1369623627152276e-06, "loss": 0.6147, "step": 12305 }, { "epoch": 0.79, "grad_norm": 1.0960183541195267, "learning_rate": 1.1363043532486879e-06, "loss": 0.6127, "step": 12306 }, { "epoch": 0.79, "grad_norm": 1.6151099285342518, "learning_rate": 1.1356465098356656e-06, "loss": 0.6456, "step": 12307 }, { "epoch": 0.79, "grad_norm": 2.037650215687274, "learning_rate": 1.1349888325044383e-06, "loss": 0.7256, "step": 12308 }, { "epoch": 0.79, "grad_norm": 1.0599383660119854, "learning_rate": 1.1343313212832674e-06, "loss": 0.6429, "step": 12309 }, { "epoch": 0.79, "grad_norm": 0.9237966303469619, "learning_rate": 1.1336739762004144e-06, "loss": 0.647, "step": 12310 }, { "epoch": 0.79, "grad_norm": 2.128383197321213, "learning_rate": 1.13301679728413e-06, "loss": 0.723, "step": 12311 }, { "epoch": 0.79, "grad_norm": 2.155216694420871, "learning_rate": 1.1323597845626566e-06, "loss": 0.7601, "step": 12312 }, { "epoch": 0.79, "grad_norm": 1.6369254753706477, "learning_rate": 1.1317029380642353e-06, "loss": 0.7592, "step": 12313 }, { "epoch": 0.79, "grad_norm": 1.8024565819011977, "learning_rate": 1.1310462578170906e-06, "loss": 0.6635, "step": 12314 }, { "epoch": 0.79, "grad_norm": 1.639026559363471, "learning_rate": 1.1303897438494503e-06, "loss": 0.7958, "step": 12315 }, { "epoch": 0.79, "grad_norm": 1.0842487166129755, "learning_rate": 1.129733396189525e-06, "loss": 0.7316, "step": 12316 }, { "epoch": 0.79, "grad_norm": 1.9661158627315793, "learning_rate": 1.1290772148655278e-06, "loss": 0.7052, "step": 12317 }, { "epoch": 0.79, "grad_norm": 1.7274130141864985, "learning_rate": 1.1284211999056572e-06, "loss": 0.8167, "step": 12318 }, { "epoch": 0.79, "grad_norm": 1.3885097483851574, "learning_rate": 1.1277653513381083e-06, "loss": 0.5762, "step": 12319 }, { "epoch": 0.79, "grad_norm": 1.8678817909828724, "learning_rate": 1.1271096691910682e-06, "loss": 0.7385, "step": 12320 }, { "epoch": 0.79, "grad_norm": 1.8165042327864982, "learning_rate": 1.1264541534927148e-06, "loss": 0.8697, "step": 12321 }, { "epoch": 0.79, "grad_norm": 2.4225291560808166, "learning_rate": 1.1257988042712254e-06, "loss": 0.6831, "step": 12322 }, { "epoch": 0.79, "grad_norm": 1.9220765063410796, "learning_rate": 1.1251436215547596e-06, "loss": 0.7473, "step": 12323 }, { "epoch": 0.79, "grad_norm": 1.666520784993274, "learning_rate": 1.1244886053714803e-06, "loss": 0.7399, "step": 12324 }, { "epoch": 0.79, "grad_norm": 1.5953685659740193, "learning_rate": 1.1238337557495372e-06, "loss": 0.7073, "step": 12325 }, { "epoch": 0.79, "grad_norm": 2.322245872999106, "learning_rate": 1.1231790727170744e-06, "loss": 0.779, "step": 12326 }, { "epoch": 0.79, "grad_norm": 1.9044327323740118, "learning_rate": 1.1225245563022285e-06, "loss": 0.7351, "step": 12327 }, { "epoch": 0.79, "grad_norm": 1.244680855800296, "learning_rate": 1.1218702065331295e-06, "loss": 0.7611, "step": 12328 }, { "epoch": 0.79, "grad_norm": 1.770683159598792, "learning_rate": 1.1212160234378999e-06, "loss": 0.6521, "step": 12329 }, { "epoch": 0.79, "grad_norm": 1.553728469345301, "learning_rate": 1.1205620070446537e-06, "loss": 0.694, "step": 12330 }, { "epoch": 0.79, "grad_norm": 1.1282621485439448, "learning_rate": 1.1199081573815023e-06, "loss": 0.6197, "step": 12331 }, { "epoch": 0.79, "grad_norm": 1.1655217715166855, "learning_rate": 1.1192544744765448e-06, "loss": 0.7201, "step": 12332 }, { "epoch": 0.79, "grad_norm": 1.0736971509627697, "learning_rate": 1.1186009583578761e-06, "loss": 0.6974, "step": 12333 }, { "epoch": 0.79, "grad_norm": 1.7756126401903072, "learning_rate": 1.1179476090535818e-06, "loss": 0.7169, "step": 12334 }, { "epoch": 0.79, "grad_norm": 1.647161662881163, "learning_rate": 1.1172944265917419e-06, "loss": 0.7127, "step": 12335 }, { "epoch": 0.79, "grad_norm": 1.5471924542431494, "learning_rate": 1.1166414110004286e-06, "loss": 0.6682, "step": 12336 }, { "epoch": 0.79, "grad_norm": 2.2366448191270822, "learning_rate": 1.1159885623077076e-06, "loss": 0.7301, "step": 12337 }, { "epoch": 0.79, "grad_norm": 1.6799444456006725, "learning_rate": 1.115335880541637e-06, "loss": 0.8636, "step": 12338 }, { "epoch": 0.79, "grad_norm": 1.7248714172253066, "learning_rate": 1.1146833657302659e-06, "loss": 0.9359, "step": 12339 }, { "epoch": 0.79, "grad_norm": 1.5380339427944532, "learning_rate": 1.1140310179016412e-06, "loss": 0.6612, "step": 12340 }, { "epoch": 0.79, "grad_norm": 1.439245555663837, "learning_rate": 1.1133788370837972e-06, "loss": 0.754, "step": 12341 }, { "epoch": 0.79, "grad_norm": 1.0542223682348575, "learning_rate": 1.1127268233047645e-06, "loss": 0.5982, "step": 12342 }, { "epoch": 0.79, "grad_norm": 1.6578882605133771, "learning_rate": 1.1120749765925643e-06, "loss": 0.6429, "step": 12343 }, { "epoch": 0.79, "grad_norm": 1.5991798170009404, "learning_rate": 1.111423296975212e-06, "loss": 0.7475, "step": 12344 }, { "epoch": 0.79, "grad_norm": 1.0828376534811506, "learning_rate": 1.1107717844807153e-06, "loss": 0.6991, "step": 12345 }, { "epoch": 0.79, "grad_norm": 1.864454026067198, "learning_rate": 1.110120439137075e-06, "loss": 0.6604, "step": 12346 }, { "epoch": 0.79, "grad_norm": 0.9947160308510009, "learning_rate": 1.1094692609722829e-06, "loss": 0.6494, "step": 12347 }, { "epoch": 0.79, "grad_norm": 1.2924631911227167, "learning_rate": 1.1088182500143286e-06, "loss": 0.5884, "step": 12348 }, { "epoch": 0.79, "grad_norm": 1.6454545683848432, "learning_rate": 1.108167406291189e-06, "loss": 0.7115, "step": 12349 }, { "epoch": 0.79, "grad_norm": 1.5735348705049634, "learning_rate": 1.1075167298308364e-06, "loss": 0.711, "step": 12350 }, { "epoch": 0.79, "grad_norm": 2.3320647286945686, "learning_rate": 1.1068662206612363e-06, "loss": 0.6525, "step": 12351 }, { "epoch": 0.79, "grad_norm": 1.8751228809733012, "learning_rate": 1.1062158788103444e-06, "loss": 0.9123, "step": 12352 }, { "epoch": 0.79, "grad_norm": 1.6929581778439435, "learning_rate": 1.1055657043061124e-06, "loss": 0.6554, "step": 12353 }, { "epoch": 0.79, "grad_norm": 1.7880426498547477, "learning_rate": 1.104915697176482e-06, "loss": 0.8394, "step": 12354 }, { "epoch": 0.79, "grad_norm": 1.846920387581844, "learning_rate": 1.104265857449393e-06, "loss": 0.709, "step": 12355 }, { "epoch": 0.79, "grad_norm": 1.803478143881312, "learning_rate": 1.1036161851527682e-06, "loss": 0.832, "step": 12356 }, { "epoch": 0.79, "grad_norm": 2.0326491865971077, "learning_rate": 1.1029666803145356e-06, "loss": 0.6875, "step": 12357 }, { "epoch": 0.79, "grad_norm": 1.7271024206406818, "learning_rate": 1.1023173429626032e-06, "loss": 0.7092, "step": 12358 }, { "epoch": 0.79, "grad_norm": 1.8249380122672514, "learning_rate": 1.101668173124883e-06, "loss": 0.7026, "step": 12359 }, { "epoch": 0.79, "grad_norm": 1.7091830980173055, "learning_rate": 1.1010191708292728e-06, "loss": 0.6542, "step": 12360 }, { "epoch": 0.79, "grad_norm": 1.570430320917965, "learning_rate": 1.1003703361036644e-06, "loss": 0.6919, "step": 12361 }, { "epoch": 0.79, "grad_norm": 1.6006726994084097, "learning_rate": 1.0997216689759472e-06, "loss": 0.6136, "step": 12362 }, { "epoch": 0.79, "grad_norm": 1.6004258142299073, "learning_rate": 1.0990731694739947e-06, "loss": 0.5929, "step": 12363 }, { "epoch": 0.79, "grad_norm": 1.4167039597344868, "learning_rate": 1.0984248376256835e-06, "loss": 0.6463, "step": 12364 }, { "epoch": 0.79, "grad_norm": 1.709496514069697, "learning_rate": 1.0977766734588707e-06, "loss": 0.6542, "step": 12365 }, { "epoch": 0.79, "grad_norm": 1.5568734887472393, "learning_rate": 1.097128677001419e-06, "loss": 0.6785, "step": 12366 }, { "epoch": 0.79, "grad_norm": 1.6898452335888534, "learning_rate": 1.0964808482811751e-06, "loss": 0.8123, "step": 12367 }, { "epoch": 0.79, "grad_norm": 1.0165629127126279, "learning_rate": 1.0958331873259808e-06, "loss": 0.6112, "step": 12368 }, { "epoch": 0.79, "grad_norm": 1.8629339299172065, "learning_rate": 1.0951856941636752e-06, "loss": 0.7055, "step": 12369 }, { "epoch": 0.79, "grad_norm": 1.7367271787141052, "learning_rate": 1.0945383688220807e-06, "loss": 0.7582, "step": 12370 }, { "epoch": 0.79, "grad_norm": 0.9683144196996573, "learning_rate": 1.093891211329023e-06, "loss": 0.5495, "step": 12371 }, { "epoch": 0.79, "grad_norm": 2.0233414444129028, "learning_rate": 1.0932442217123106e-06, "loss": 0.722, "step": 12372 }, { "epoch": 0.79, "grad_norm": 1.9085977063903774, "learning_rate": 1.0925973999997535e-06, "loss": 0.7757, "step": 12373 }, { "epoch": 0.79, "grad_norm": 1.750563327736193, "learning_rate": 1.0919507462191498e-06, "loss": 0.7198, "step": 12374 }, { "epoch": 0.79, "grad_norm": 1.6675100282832798, "learning_rate": 1.091304260398291e-06, "loss": 0.7265, "step": 12375 }, { "epoch": 0.79, "grad_norm": 1.73849390395077, "learning_rate": 1.0906579425649622e-06, "loss": 0.6844, "step": 12376 }, { "epoch": 0.79, "grad_norm": 1.9178736560811485, "learning_rate": 1.0900117927469384e-06, "loss": 0.7736, "step": 12377 }, { "epoch": 0.79, "grad_norm": 1.8744681841164779, "learning_rate": 1.0893658109719946e-06, "loss": 0.8115, "step": 12378 }, { "epoch": 0.79, "grad_norm": 1.0781643062057835, "learning_rate": 1.088719997267888e-06, "loss": 0.5609, "step": 12379 }, { "epoch": 0.79, "grad_norm": 1.6732045544485752, "learning_rate": 1.0880743516623804e-06, "loss": 0.7232, "step": 12380 }, { "epoch": 0.79, "grad_norm": 1.7499782075621746, "learning_rate": 1.087428874183214e-06, "loss": 0.6878, "step": 12381 }, { "epoch": 0.79, "grad_norm": 1.8627811774148297, "learning_rate": 1.0867835648581344e-06, "loss": 0.6321, "step": 12382 }, { "epoch": 0.79, "grad_norm": 1.7178731822384883, "learning_rate": 1.0861384237148749e-06, "loss": 0.7017, "step": 12383 }, { "epoch": 0.79, "grad_norm": 1.5542793516491, "learning_rate": 1.0854934507811609e-06, "loss": 0.7261, "step": 12384 }, { "epoch": 0.79, "grad_norm": 1.8438361898873736, "learning_rate": 1.0848486460847135e-06, "loss": 0.7436, "step": 12385 }, { "epoch": 0.79, "grad_norm": 1.5719795458011019, "learning_rate": 1.084204009653243e-06, "loss": 0.7855, "step": 12386 }, { "epoch": 0.79, "grad_norm": 1.8913345204540153, "learning_rate": 1.083559541514459e-06, "loss": 0.6853, "step": 12387 }, { "epoch": 0.79, "grad_norm": 1.0945227081974642, "learning_rate": 1.082915241696053e-06, "loss": 0.6123, "step": 12388 }, { "epoch": 0.79, "grad_norm": 1.7250516711438506, "learning_rate": 1.0822711102257205e-06, "loss": 0.8453, "step": 12389 }, { "epoch": 0.79, "grad_norm": 1.318344517378772, "learning_rate": 1.0816271471311434e-06, "loss": 0.5716, "step": 12390 }, { "epoch": 0.79, "grad_norm": 1.7967367152520497, "learning_rate": 1.0809833524399981e-06, "loss": 0.6683, "step": 12391 }, { "epoch": 0.79, "grad_norm": 1.5236667944616051, "learning_rate": 1.0803397261799536e-06, "loss": 0.6167, "step": 12392 }, { "epoch": 0.79, "grad_norm": 1.6853631451612934, "learning_rate": 1.079696268378671e-06, "loss": 0.6734, "step": 12393 }, { "epoch": 0.79, "grad_norm": 1.6959599186813827, "learning_rate": 1.0790529790638048e-06, "loss": 0.7587, "step": 12394 }, { "epoch": 0.79, "grad_norm": 2.0566974265576565, "learning_rate": 1.0784098582630015e-06, "loss": 0.6607, "step": 12395 }, { "epoch": 0.79, "grad_norm": 1.5624952314210232, "learning_rate": 1.077766906003903e-06, "loss": 0.6581, "step": 12396 }, { "epoch": 0.79, "grad_norm": 1.7498092780643284, "learning_rate": 1.0771241223141415e-06, "loss": 0.6776, "step": 12397 }, { "epoch": 0.79, "grad_norm": 1.6009454118267936, "learning_rate": 1.0764815072213425e-06, "loss": 0.7018, "step": 12398 }, { "epoch": 0.79, "grad_norm": 1.7092558629273011, "learning_rate": 1.0758390607531232e-06, "loss": 0.7315, "step": 12399 }, { "epoch": 0.79, "grad_norm": 2.0342538414981104, "learning_rate": 1.0751967829370957e-06, "loss": 0.643, "step": 12400 }, { "epoch": 0.79, "grad_norm": 1.929334047697817, "learning_rate": 1.0745546738008627e-06, "loss": 0.6738, "step": 12401 }, { "epoch": 0.79, "grad_norm": 1.095500897027072, "learning_rate": 1.0739127333720223e-06, "loss": 0.5526, "step": 12402 }, { "epoch": 0.79, "grad_norm": 1.764751215213385, "learning_rate": 1.0732709616781606e-06, "loss": 0.6872, "step": 12403 }, { "epoch": 0.79, "grad_norm": 1.8590020023136935, "learning_rate": 1.0726293587468634e-06, "loss": 0.6501, "step": 12404 }, { "epoch": 0.79, "grad_norm": 1.8857086828603846, "learning_rate": 1.071987924605704e-06, "loss": 0.7685, "step": 12405 }, { "epoch": 0.79, "grad_norm": 1.095989219275565, "learning_rate": 1.0713466592822498e-06, "loss": 0.6372, "step": 12406 }, { "epoch": 0.79, "grad_norm": 1.6850794865077818, "learning_rate": 1.0707055628040602e-06, "loss": 0.6504, "step": 12407 }, { "epoch": 0.79, "grad_norm": 1.8020186745288629, "learning_rate": 1.0700646351986899e-06, "loss": 0.692, "step": 12408 }, { "epoch": 0.79, "grad_norm": 1.7791787703248845, "learning_rate": 1.0694238764936827e-06, "loss": 0.7553, "step": 12409 }, { "epoch": 0.79, "grad_norm": 1.681067716638141, "learning_rate": 1.0687832867165771e-06, "loss": 0.8436, "step": 12410 }, { "epoch": 0.79, "grad_norm": 1.0822563537569574, "learning_rate": 1.0681428658949083e-06, "loss": 0.5988, "step": 12411 }, { "epoch": 0.79, "grad_norm": 1.6833597019337667, "learning_rate": 1.067502614056194e-06, "loss": 0.6889, "step": 12412 }, { "epoch": 0.79, "grad_norm": 1.6895207916829742, "learning_rate": 1.0668625312279552e-06, "loss": 0.6797, "step": 12413 }, { "epoch": 0.79, "grad_norm": 1.239041652617325, "learning_rate": 1.0662226174377005e-06, "loss": 0.6086, "step": 12414 }, { "epoch": 0.79, "grad_norm": 1.6566635117060537, "learning_rate": 1.0655828727129319e-06, "loss": 0.6783, "step": 12415 }, { "epoch": 0.79, "grad_norm": 1.1784043572564649, "learning_rate": 1.0649432970811434e-06, "loss": 0.6353, "step": 12416 }, { "epoch": 0.79, "grad_norm": 1.5935089085274723, "learning_rate": 1.0643038905698239e-06, "loss": 0.6999, "step": 12417 }, { "epoch": 0.79, "grad_norm": 1.660616694380354, "learning_rate": 1.0636646532064531e-06, "loss": 0.5994, "step": 12418 }, { "epoch": 0.79, "grad_norm": 2.243620604869737, "learning_rate": 1.0630255850185024e-06, "loss": 0.7411, "step": 12419 }, { "epoch": 0.79, "grad_norm": 1.95828920848536, "learning_rate": 1.0623866860334425e-06, "loss": 0.7317, "step": 12420 }, { "epoch": 0.8, "grad_norm": 1.5318877627393033, "learning_rate": 1.0617479562787258e-06, "loss": 0.6312, "step": 12421 }, { "epoch": 0.8, "grad_norm": 1.995419440369588, "learning_rate": 1.061109395781808e-06, "loss": 0.7672, "step": 12422 }, { "epoch": 0.8, "grad_norm": 1.8975583218432628, "learning_rate": 1.060471004570131e-06, "loss": 0.6927, "step": 12423 }, { "epoch": 0.8, "grad_norm": 1.8632611883381356, "learning_rate": 1.059832782671133e-06, "loss": 0.7953, "step": 12424 }, { "epoch": 0.8, "grad_norm": 1.618387885673153, "learning_rate": 1.0591947301122424e-06, "loss": 0.7344, "step": 12425 }, { "epoch": 0.8, "grad_norm": 1.0358043503228878, "learning_rate": 1.0585568469208795e-06, "loss": 0.584, "step": 12426 }, { "epoch": 0.8, "grad_norm": 1.766243457598551, "learning_rate": 1.0579191331244648e-06, "loss": 0.7291, "step": 12427 }, { "epoch": 0.8, "grad_norm": 1.5394563848573655, "learning_rate": 1.057281588750399e-06, "loss": 0.6377, "step": 12428 }, { "epoch": 0.8, "grad_norm": 2.537320728998877, "learning_rate": 1.0566442138260885e-06, "loss": 0.8859, "step": 12429 }, { "epoch": 0.8, "grad_norm": 1.7640453469460449, "learning_rate": 1.0560070083789214e-06, "loss": 0.7539, "step": 12430 }, { "epoch": 0.8, "grad_norm": 1.8049102100204362, "learning_rate": 1.0553699724362866e-06, "loss": 0.7705, "step": 12431 }, { "epoch": 0.8, "grad_norm": 1.7149913244797044, "learning_rate": 1.0547331060255612e-06, "loss": 0.8085, "step": 12432 }, { "epoch": 0.8, "grad_norm": 1.6636056470181615, "learning_rate": 1.0540964091741157e-06, "loss": 0.6457, "step": 12433 }, { "epoch": 0.8, "grad_norm": 2.2767499427766147, "learning_rate": 1.0534598819093178e-06, "loss": 0.7547, "step": 12434 }, { "epoch": 0.8, "grad_norm": 1.6623967702690159, "learning_rate": 1.0528235242585188e-06, "loss": 0.6513, "step": 12435 }, { "epoch": 0.8, "grad_norm": 1.1704089828547346, "learning_rate": 1.052187336249073e-06, "loss": 0.6008, "step": 12436 }, { "epoch": 0.8, "grad_norm": 1.386109705229132, "learning_rate": 1.0515513179083176e-06, "loss": 0.5545, "step": 12437 }, { "epoch": 0.8, "grad_norm": 1.507176051149259, "learning_rate": 1.0509154692635908e-06, "loss": 0.6386, "step": 12438 }, { "epoch": 0.8, "grad_norm": 1.4978155560183743, "learning_rate": 1.050279790342219e-06, "loss": 0.5413, "step": 12439 }, { "epoch": 0.8, "grad_norm": 1.7432516749326774, "learning_rate": 1.0496442811715224e-06, "loss": 0.8007, "step": 12440 }, { "epoch": 0.8, "grad_norm": 1.933264389732372, "learning_rate": 1.0490089417788135e-06, "loss": 0.7202, "step": 12441 }, { "epoch": 0.8, "grad_norm": 1.059849056987339, "learning_rate": 1.0483737721913968e-06, "loss": 0.6418, "step": 12442 }, { "epoch": 0.8, "grad_norm": 2.490250707070677, "learning_rate": 1.0477387724365752e-06, "loss": 0.7436, "step": 12443 }, { "epoch": 0.8, "grad_norm": 1.6184755318513453, "learning_rate": 1.0471039425416329e-06, "loss": 0.7221, "step": 12444 }, { "epoch": 0.8, "grad_norm": 1.560571536172683, "learning_rate": 1.0464692825338584e-06, "loss": 0.6439, "step": 12445 }, { "epoch": 0.8, "grad_norm": 1.639976220609785, "learning_rate": 1.0458347924405266e-06, "loss": 0.6852, "step": 12446 }, { "epoch": 0.8, "grad_norm": 1.5713838348316305, "learning_rate": 1.045200472288907e-06, "loss": 0.7216, "step": 12447 }, { "epoch": 0.8, "grad_norm": 2.1363602987445, "learning_rate": 1.0445663221062606e-06, "loss": 0.804, "step": 12448 }, { "epoch": 0.8, "grad_norm": 1.6821919683361364, "learning_rate": 1.0439323419198422e-06, "loss": 0.7079, "step": 12449 }, { "epoch": 0.8, "grad_norm": 1.6500214319943332, "learning_rate": 1.0432985317568988e-06, "loss": 0.7556, "step": 12450 }, { "epoch": 0.8, "grad_norm": 1.7122614936553568, "learning_rate": 1.0426648916446692e-06, "loss": 0.6165, "step": 12451 }, { "epoch": 0.8, "grad_norm": 1.7146541414686538, "learning_rate": 1.0420314216103883e-06, "loss": 0.6956, "step": 12452 }, { "epoch": 0.8, "grad_norm": 1.6698549844279775, "learning_rate": 1.0413981216812802e-06, "loss": 0.7098, "step": 12453 }, { "epoch": 0.8, "grad_norm": 1.6472241648706476, "learning_rate": 1.0407649918845624e-06, "loss": 0.5851, "step": 12454 }, { "epoch": 0.8, "grad_norm": 1.5914280282657136, "learning_rate": 1.0401320322474456e-06, "loss": 0.6955, "step": 12455 }, { "epoch": 0.8, "grad_norm": 1.5776542803459148, "learning_rate": 1.0394992427971334e-06, "loss": 0.6801, "step": 12456 }, { "epoch": 0.8, "grad_norm": 1.7140881664991998, "learning_rate": 1.0388666235608219e-06, "loss": 0.6839, "step": 12457 }, { "epoch": 0.8, "grad_norm": 1.6580810418695024, "learning_rate": 1.0382341745656994e-06, "loss": 0.6995, "step": 12458 }, { "epoch": 0.8, "grad_norm": 1.5855920869489246, "learning_rate": 1.0376018958389472e-06, "loss": 0.7392, "step": 12459 }, { "epoch": 0.8, "grad_norm": 1.2646743907592473, "learning_rate": 1.0369697874077377e-06, "loss": 0.7876, "step": 12460 }, { "epoch": 0.8, "grad_norm": 1.0082508561777606, "learning_rate": 1.036337849299241e-06, "loss": 0.6722, "step": 12461 }, { "epoch": 0.8, "grad_norm": 1.8571235699713566, "learning_rate": 1.0357060815406146e-06, "loss": 0.6579, "step": 12462 }, { "epoch": 0.8, "grad_norm": 1.5780793384625282, "learning_rate": 1.0350744841590106e-06, "loss": 0.6511, "step": 12463 }, { "epoch": 0.8, "grad_norm": 1.5793767090554238, "learning_rate": 1.0344430571815734e-06, "loss": 0.6962, "step": 12464 }, { "epoch": 0.8, "grad_norm": 1.8834695060208344, "learning_rate": 1.0338118006354413e-06, "loss": 0.7081, "step": 12465 }, { "epoch": 0.8, "grad_norm": 1.7260761749835214, "learning_rate": 1.033180714547744e-06, "loss": 0.6677, "step": 12466 }, { "epoch": 0.8, "grad_norm": 1.796168353258607, "learning_rate": 1.032549798945604e-06, "loss": 0.6718, "step": 12467 }, { "epoch": 0.8, "grad_norm": 1.8112221213241433, "learning_rate": 1.0319190538561358e-06, "loss": 0.7744, "step": 12468 }, { "epoch": 0.8, "grad_norm": 1.5757703579239415, "learning_rate": 1.0312884793064493e-06, "loss": 0.7349, "step": 12469 }, { "epoch": 0.8, "grad_norm": 1.960491498822461, "learning_rate": 1.030658075323645e-06, "loss": 0.6678, "step": 12470 }, { "epoch": 0.8, "grad_norm": 1.5406973842515088, "learning_rate": 1.0300278419348158e-06, "loss": 0.721, "step": 12471 }, { "epoch": 0.8, "grad_norm": 1.601290882778262, "learning_rate": 1.029397779167048e-06, "loss": 0.791, "step": 12472 }, { "epoch": 0.8, "grad_norm": 1.2873317539631703, "learning_rate": 1.02876788704742e-06, "loss": 0.5541, "step": 12473 }, { "epoch": 0.8, "grad_norm": 1.134169182364924, "learning_rate": 1.0281381656030038e-06, "loss": 0.6381, "step": 12474 }, { "epoch": 0.8, "grad_norm": 1.7342673234766621, "learning_rate": 1.0275086148608614e-06, "loss": 0.6532, "step": 12475 }, { "epoch": 0.8, "grad_norm": 1.7276251663397553, "learning_rate": 1.026879234848055e-06, "loss": 0.7926, "step": 12476 }, { "epoch": 0.8, "grad_norm": 1.703610089672913, "learning_rate": 1.026250025591627e-06, "loss": 0.7983, "step": 12477 }, { "epoch": 0.8, "grad_norm": 1.9279876459989116, "learning_rate": 1.025620987118624e-06, "loss": 0.7866, "step": 12478 }, { "epoch": 0.8, "grad_norm": 1.523072911441053, "learning_rate": 1.0249921194560803e-06, "loss": 0.6771, "step": 12479 }, { "epoch": 0.8, "grad_norm": 1.879369632011674, "learning_rate": 1.0243634226310224e-06, "loss": 0.7155, "step": 12480 }, { "epoch": 0.8, "grad_norm": 1.9884476057691007, "learning_rate": 1.0237348966704708e-06, "loss": 0.7498, "step": 12481 }, { "epoch": 0.8, "grad_norm": 1.8917024020323034, "learning_rate": 1.0231065416014363e-06, "loss": 0.7131, "step": 12482 }, { "epoch": 0.8, "grad_norm": 1.8285454936815104, "learning_rate": 1.022478357450929e-06, "loss": 0.7657, "step": 12483 }, { "epoch": 0.8, "grad_norm": 1.7611024147806664, "learning_rate": 1.021850344245941e-06, "loss": 0.7702, "step": 12484 }, { "epoch": 0.8, "grad_norm": 2.4320670112849405, "learning_rate": 1.0212225020134693e-06, "loss": 0.7539, "step": 12485 }, { "epoch": 0.8, "grad_norm": 1.38906139364481, "learning_rate": 1.0205948307804904e-06, "loss": 0.6739, "step": 12486 }, { "epoch": 0.8, "grad_norm": 1.7321254541327824, "learning_rate": 1.0199673305739854e-06, "loss": 0.718, "step": 12487 }, { "epoch": 0.8, "grad_norm": 1.6540691767530424, "learning_rate": 1.0193400014209215e-06, "loss": 0.7801, "step": 12488 }, { "epoch": 0.8, "grad_norm": 3.150544666757469, "learning_rate": 1.0187128433482601e-06, "loss": 0.5795, "step": 12489 }, { "epoch": 0.8, "grad_norm": 1.6150151909550956, "learning_rate": 1.0180858563829544e-06, "loss": 0.556, "step": 12490 }, { "epoch": 0.8, "grad_norm": 1.5771815047765907, "learning_rate": 1.0174590405519502e-06, "loss": 0.7846, "step": 12491 }, { "epoch": 0.8, "grad_norm": 1.860448722213468, "learning_rate": 1.0168323958821908e-06, "loss": 0.6678, "step": 12492 }, { "epoch": 0.8, "grad_norm": 1.6960905052795978, "learning_rate": 1.0162059224006027e-06, "loss": 0.6267, "step": 12493 }, { "epoch": 0.8, "grad_norm": 1.9452306032810176, "learning_rate": 1.015579620134114e-06, "loss": 0.5892, "step": 12494 }, { "epoch": 0.8, "grad_norm": 1.1925345127546192, "learning_rate": 1.0149534891096408e-06, "loss": 0.7064, "step": 12495 }, { "epoch": 0.8, "grad_norm": 1.6700112162197205, "learning_rate": 1.0143275293540928e-06, "loss": 0.7428, "step": 12496 }, { "epoch": 0.8, "grad_norm": 1.6890756325890335, "learning_rate": 1.0137017408943729e-06, "loss": 0.693, "step": 12497 }, { "epoch": 0.8, "grad_norm": 1.5260785091088531, "learning_rate": 1.0130761237573739e-06, "loss": 0.6983, "step": 12498 }, { "epoch": 0.8, "grad_norm": 1.8856603040124729, "learning_rate": 1.0124506779699882e-06, "loss": 0.7444, "step": 12499 }, { "epoch": 0.8, "grad_norm": 1.628812821798814, "learning_rate": 1.0118254035590912e-06, "loss": 0.6222, "step": 12500 }, { "epoch": 0.8, "grad_norm": 1.848024773127787, "learning_rate": 1.0112003005515603e-06, "loss": 0.7722, "step": 12501 }, { "epoch": 0.8, "grad_norm": 2.860784921578034, "learning_rate": 1.0105753689742564e-06, "loss": 0.7198, "step": 12502 }, { "epoch": 0.8, "grad_norm": 1.8190618517908204, "learning_rate": 1.0099506088540418e-06, "loss": 0.7065, "step": 12503 }, { "epoch": 0.8, "grad_norm": 3.8075590002012087, "learning_rate": 1.0093260202177651e-06, "loss": 0.694, "step": 12504 }, { "epoch": 0.8, "grad_norm": 1.5208776785875395, "learning_rate": 1.0087016030922709e-06, "loss": 0.6029, "step": 12505 }, { "epoch": 0.8, "grad_norm": 1.0994711019886547, "learning_rate": 1.008077357504395e-06, "loss": 0.5961, "step": 12506 }, { "epoch": 0.8, "grad_norm": 1.6756278214179257, "learning_rate": 1.0074532834809647e-06, "loss": 0.674, "step": 12507 }, { "epoch": 0.8, "grad_norm": 1.87662423181317, "learning_rate": 1.006829381048805e-06, "loss": 0.6536, "step": 12508 }, { "epoch": 0.8, "grad_norm": 1.3201586117057578, "learning_rate": 1.0062056502347257e-06, "loss": 0.6926, "step": 12509 }, { "epoch": 0.8, "grad_norm": 1.6594968144680544, "learning_rate": 1.005582091065536e-06, "loss": 0.6648, "step": 12510 }, { "epoch": 0.8, "grad_norm": 1.1967277354257466, "learning_rate": 1.0049587035680353e-06, "loss": 0.5782, "step": 12511 }, { "epoch": 0.8, "grad_norm": 1.6341918751365376, "learning_rate": 1.0043354877690148e-06, "loss": 0.7673, "step": 12512 }, { "epoch": 0.8, "grad_norm": 1.6826557076066841, "learning_rate": 1.003712443695259e-06, "loss": 0.8163, "step": 12513 }, { "epoch": 0.8, "grad_norm": 1.5998245253428667, "learning_rate": 1.0030895713735444e-06, "loss": 0.6728, "step": 12514 }, { "epoch": 0.8, "grad_norm": 1.6880076093565575, "learning_rate": 1.0024668708306418e-06, "loss": 0.7109, "step": 12515 }, { "epoch": 0.8, "grad_norm": 1.939388628134984, "learning_rate": 1.0018443420933117e-06, "loss": 0.8507, "step": 12516 }, { "epoch": 0.8, "grad_norm": 1.2530120534462308, "learning_rate": 1.001221985188312e-06, "loss": 0.6957, "step": 12517 }, { "epoch": 0.8, "grad_norm": 2.0050965490619346, "learning_rate": 1.0005998001423883e-06, "loss": 0.6672, "step": 12518 }, { "epoch": 0.8, "grad_norm": 1.7063565866520887, "learning_rate": 9.99977786982282e-07, "loss": 0.6648, "step": 12519 }, { "epoch": 0.8, "grad_norm": 1.6607245204192966, "learning_rate": 9.993559457347245e-07, "loss": 0.6797, "step": 12520 }, { "epoch": 0.8, "grad_norm": 1.7693495561835946, "learning_rate": 9.987342764264424e-07, "loss": 0.6814, "step": 12521 }, { "epoch": 0.8, "grad_norm": 2.1710329353798135, "learning_rate": 9.981127790841526e-07, "loss": 0.5851, "step": 12522 }, { "epoch": 0.8, "grad_norm": 1.572540447161639, "learning_rate": 9.974914537345675e-07, "loss": 0.7036, "step": 12523 }, { "epoch": 0.8, "grad_norm": 1.525953795546218, "learning_rate": 9.968703004043873e-07, "loss": 0.6125, "step": 12524 }, { "epoch": 0.8, "grad_norm": 1.2744425292963755, "learning_rate": 9.96249319120311e-07, "loss": 0.6264, "step": 12525 }, { "epoch": 0.8, "grad_norm": 1.2012213622490016, "learning_rate": 9.956285099090262e-07, "loss": 0.6059, "step": 12526 }, { "epoch": 0.8, "grad_norm": 1.3154958898355928, "learning_rate": 9.95007872797214e-07, "loss": 0.6828, "step": 12527 }, { "epoch": 0.8, "grad_norm": 1.7590528334284938, "learning_rate": 9.943874078115473e-07, "loss": 0.8356, "step": 12528 }, { "epoch": 0.8, "grad_norm": 1.0661543046327966, "learning_rate": 9.937671149786933e-07, "loss": 0.6759, "step": 12529 }, { "epoch": 0.8, "grad_norm": 1.1234620868045193, "learning_rate": 9.931469943253103e-07, "loss": 0.6284, "step": 12530 }, { "epoch": 0.8, "grad_norm": 1.6455254440213987, "learning_rate": 9.925270458780496e-07, "loss": 0.9001, "step": 12531 }, { "epoch": 0.8, "grad_norm": 1.9532586268170682, "learning_rate": 9.919072696635563e-07, "loss": 0.8075, "step": 12532 }, { "epoch": 0.8, "grad_norm": 1.803925973415861, "learning_rate": 9.91287665708464e-07, "loss": 0.6561, "step": 12533 }, { "epoch": 0.8, "grad_norm": 1.5934899012656119, "learning_rate": 9.906682340394064e-07, "loss": 0.8358, "step": 12534 }, { "epoch": 0.8, "grad_norm": 1.397711974855258, "learning_rate": 9.900489746830034e-07, "loss": 0.6924, "step": 12535 }, { "epoch": 0.8, "grad_norm": 2.1192738071813424, "learning_rate": 9.894298876658692e-07, "loss": 0.6977, "step": 12536 }, { "epoch": 0.8, "grad_norm": 1.7703877062007345, "learning_rate": 9.888109730146112e-07, "loss": 0.8389, "step": 12537 }, { "epoch": 0.8, "grad_norm": 1.7546468433362075, "learning_rate": 9.88192230755829e-07, "loss": 0.7085, "step": 12538 }, { "epoch": 0.8, "grad_norm": 1.4981407415600134, "learning_rate": 9.87573660916114e-07, "loss": 0.726, "step": 12539 }, { "epoch": 0.8, "grad_norm": 1.1855830624656498, "learning_rate": 9.869552635220515e-07, "loss": 0.6209, "step": 12540 }, { "epoch": 0.8, "grad_norm": 2.4647085917639933, "learning_rate": 9.863370386002214e-07, "loss": 0.8925, "step": 12541 }, { "epoch": 0.8, "grad_norm": 1.531442720028783, "learning_rate": 9.857189861771887e-07, "loss": 0.7508, "step": 12542 }, { "epoch": 0.8, "grad_norm": 1.0066602497359345, "learning_rate": 9.851011062795201e-07, "loss": 0.5702, "step": 12543 }, { "epoch": 0.8, "grad_norm": 1.7664774945981958, "learning_rate": 9.8448339893377e-07, "loss": 0.8088, "step": 12544 }, { "epoch": 0.8, "grad_norm": 1.6192646779760258, "learning_rate": 9.83865864166485e-07, "loss": 0.6653, "step": 12545 }, { "epoch": 0.8, "grad_norm": 1.7981992377439906, "learning_rate": 9.832485020042065e-07, "loss": 0.7389, "step": 12546 }, { "epoch": 0.8, "grad_norm": 1.5697003282725446, "learning_rate": 9.826313124734654e-07, "loss": 0.8551, "step": 12547 }, { "epoch": 0.8, "grad_norm": 1.5445888204669491, "learning_rate": 9.820142956007917e-07, "loss": 0.58, "step": 12548 }, { "epoch": 0.8, "grad_norm": 1.6741641005626777, "learning_rate": 9.813974514126977e-07, "loss": 0.681, "step": 12549 }, { "epoch": 0.8, "grad_norm": 2.06538000558831, "learning_rate": 9.807807799357e-07, "loss": 0.7397, "step": 12550 }, { "epoch": 0.8, "grad_norm": 2.158580401921488, "learning_rate": 9.801642811962964e-07, "loss": 0.8107, "step": 12551 }, { "epoch": 0.8, "grad_norm": 1.789010263376866, "learning_rate": 9.795479552209857e-07, "loss": 0.774, "step": 12552 }, { "epoch": 0.8, "grad_norm": 1.8688058988220162, "learning_rate": 9.789318020362564e-07, "loss": 0.6694, "step": 12553 }, { "epoch": 0.8, "grad_norm": 1.868392738970459, "learning_rate": 9.783158216685874e-07, "loss": 0.7793, "step": 12554 }, { "epoch": 0.8, "grad_norm": 1.5424862570147242, "learning_rate": 9.777000141444564e-07, "loss": 0.673, "step": 12555 }, { "epoch": 0.8, "grad_norm": 1.7492761389401306, "learning_rate": 9.77084379490324e-07, "loss": 0.7241, "step": 12556 }, { "epoch": 0.8, "grad_norm": 2.2831004667465624, "learning_rate": 9.764689177326542e-07, "loss": 0.7369, "step": 12557 }, { "epoch": 0.8, "grad_norm": 1.7532067241673874, "learning_rate": 9.758536288978932e-07, "loss": 0.7223, "step": 12558 }, { "epoch": 0.8, "grad_norm": 1.5890633424625262, "learning_rate": 9.75238513012488e-07, "loss": 0.7615, "step": 12559 }, { "epoch": 0.8, "grad_norm": 1.6182709036197804, "learning_rate": 9.74623570102875e-07, "loss": 0.7869, "step": 12560 }, { "epoch": 0.8, "grad_norm": 2.0200194093591435, "learning_rate": 9.74008800195481e-07, "loss": 0.8136, "step": 12561 }, { "epoch": 0.8, "grad_norm": 1.8519614461499772, "learning_rate": 9.73394203316732e-07, "loss": 0.7321, "step": 12562 }, { "epoch": 0.8, "grad_norm": 1.6151752985349124, "learning_rate": 9.727797794930361e-07, "loss": 0.7441, "step": 12563 }, { "epoch": 0.8, "grad_norm": 1.2719227905529336, "learning_rate": 9.721655287508052e-07, "loss": 0.6193, "step": 12564 }, { "epoch": 0.8, "grad_norm": 1.9835959632537585, "learning_rate": 9.715514511164343e-07, "loss": 0.7075, "step": 12565 }, { "epoch": 0.8, "grad_norm": 1.7777351755090889, "learning_rate": 9.709375466163178e-07, "loss": 0.7196, "step": 12566 }, { "epoch": 0.8, "grad_norm": 1.367987964647289, "learning_rate": 9.703238152768402e-07, "loss": 0.5914, "step": 12567 }, { "epoch": 0.8, "grad_norm": 1.8909204014491652, "learning_rate": 9.697102571243767e-07, "loss": 0.6783, "step": 12568 }, { "epoch": 0.8, "grad_norm": 1.1179412222895584, "learning_rate": 9.690968721852978e-07, "loss": 0.6061, "step": 12569 }, { "epoch": 0.8, "grad_norm": 1.495416276809658, "learning_rate": 9.684836604859637e-07, "loss": 0.6691, "step": 12570 }, { "epoch": 0.8, "grad_norm": 1.8140139824414823, "learning_rate": 9.678706220527333e-07, "loss": 0.7957, "step": 12571 }, { "epoch": 0.8, "grad_norm": 1.5886454056315515, "learning_rate": 9.672577569119484e-07, "loss": 0.7486, "step": 12572 }, { "epoch": 0.8, "grad_norm": 1.1901333248339185, "learning_rate": 9.666450650899533e-07, "loss": 0.667, "step": 12573 }, { "epoch": 0.8, "grad_norm": 1.9624596840487323, "learning_rate": 9.66032546613076e-07, "loss": 0.7345, "step": 12574 }, { "epoch": 0.8, "grad_norm": 1.612041399866415, "learning_rate": 9.654202015076442e-07, "loss": 0.646, "step": 12575 }, { "epoch": 0.8, "grad_norm": 1.6994622422803822, "learning_rate": 9.648080297999746e-07, "loss": 0.679, "step": 12576 }, { "epoch": 0.81, "grad_norm": 1.6331655321098284, "learning_rate": 9.641960315163768e-07, "loss": 0.7968, "step": 12577 }, { "epoch": 0.81, "grad_norm": 1.499426893139726, "learning_rate": 9.63584206683153e-07, "loss": 0.6952, "step": 12578 }, { "epoch": 0.81, "grad_norm": 1.5732578705784641, "learning_rate": 9.629725553265983e-07, "loss": 0.7457, "step": 12579 }, { "epoch": 0.81, "grad_norm": 1.4518487009381997, "learning_rate": 9.623610774730002e-07, "loss": 0.6633, "step": 12580 }, { "epoch": 0.81, "grad_norm": 1.7989028983239639, "learning_rate": 9.617497731486374e-07, "loss": 0.7363, "step": 12581 }, { "epoch": 0.81, "grad_norm": 1.7005584728013887, "learning_rate": 9.611386423797852e-07, "loss": 0.782, "step": 12582 }, { "epoch": 0.81, "grad_norm": 1.0508799904100032, "learning_rate": 9.605276851927075e-07, "loss": 0.577, "step": 12583 }, { "epoch": 0.81, "grad_norm": 2.173179733588745, "learning_rate": 9.599169016136617e-07, "loss": 0.6831, "step": 12584 }, { "epoch": 0.81, "grad_norm": 1.980641784384448, "learning_rate": 9.593062916688982e-07, "loss": 0.9121, "step": 12585 }, { "epoch": 0.81, "grad_norm": 1.6210392964779794, "learning_rate": 9.586958553846592e-07, "loss": 0.7187, "step": 12586 }, { "epoch": 0.81, "grad_norm": 2.5072518328074787, "learning_rate": 9.580855927871808e-07, "loss": 0.7651, "step": 12587 }, { "epoch": 0.81, "grad_norm": 1.7357183493120962, "learning_rate": 9.574755039026901e-07, "loss": 0.8254, "step": 12588 }, { "epoch": 0.81, "grad_norm": 1.5733075109748882, "learning_rate": 9.56865588757407e-07, "loss": 0.8432, "step": 12589 }, { "epoch": 0.81, "grad_norm": 1.9387547462107768, "learning_rate": 9.562558473775458e-07, "loss": 0.7438, "step": 12590 }, { "epoch": 0.81, "grad_norm": 1.8871656550623819, "learning_rate": 9.556462797893113e-07, "loss": 0.8366, "step": 12591 }, { "epoch": 0.81, "grad_norm": 3.3675438597190306, "learning_rate": 9.550368860189013e-07, "loss": 0.6487, "step": 12592 }, { "epoch": 0.81, "grad_norm": 1.7080658049472586, "learning_rate": 9.544276660925067e-07, "loss": 0.7933, "step": 12593 }, { "epoch": 0.81, "grad_norm": 1.7500116600611995, "learning_rate": 9.538186200363098e-07, "loss": 0.7739, "step": 12594 }, { "epoch": 0.81, "grad_norm": 1.4787404193125628, "learning_rate": 9.532097478764862e-07, "loss": 0.6185, "step": 12595 }, { "epoch": 0.81, "grad_norm": 1.6808865686166088, "learning_rate": 9.526010496392029e-07, "loss": 0.6476, "step": 12596 }, { "epoch": 0.81, "grad_norm": 1.6415706988092658, "learning_rate": 9.519925253506246e-07, "loss": 0.7667, "step": 12597 }, { "epoch": 0.81, "grad_norm": 1.7685169685122322, "learning_rate": 9.513841750368991e-07, "loss": 0.697, "step": 12598 }, { "epoch": 0.81, "grad_norm": 2.18601912522299, "learning_rate": 9.507759987241755e-07, "loss": 0.6449, "step": 12599 }, { "epoch": 0.81, "grad_norm": 1.7863728022147638, "learning_rate": 9.501679964385907e-07, "loss": 0.6209, "step": 12600 }, { "epoch": 0.81, "grad_norm": 1.5361225281515145, "learning_rate": 9.495601682062755e-07, "loss": 0.8131, "step": 12601 }, { "epoch": 0.81, "grad_norm": 1.5778516597243346, "learning_rate": 9.489525140533534e-07, "loss": 0.7044, "step": 12602 }, { "epoch": 0.81, "grad_norm": 1.7375045447411426, "learning_rate": 9.483450340059386e-07, "loss": 0.7099, "step": 12603 }, { "epoch": 0.81, "grad_norm": 1.0675440795650857, "learning_rate": 9.477377280901428e-07, "loss": 0.6899, "step": 12604 }, { "epoch": 0.81, "grad_norm": 1.3375678567822635, "learning_rate": 9.47130596332062e-07, "loss": 0.6897, "step": 12605 }, { "epoch": 0.81, "grad_norm": 1.6720705749529108, "learning_rate": 9.465236387577947e-07, "loss": 0.6282, "step": 12606 }, { "epoch": 0.81, "grad_norm": 1.7514566669268774, "learning_rate": 9.459168553934211e-07, "loss": 0.7512, "step": 12607 }, { "epoch": 0.81, "grad_norm": 0.9329964839447559, "learning_rate": 9.453102462650232e-07, "loss": 0.5567, "step": 12608 }, { "epoch": 0.81, "grad_norm": 1.672029473374632, "learning_rate": 9.447038113986717e-07, "loss": 0.6514, "step": 12609 }, { "epoch": 0.81, "grad_norm": 1.4972385725060773, "learning_rate": 9.440975508204286e-07, "loss": 0.6948, "step": 12610 }, { "epoch": 0.81, "grad_norm": 1.1966246645479763, "learning_rate": 9.434914645563498e-07, "loss": 0.6956, "step": 12611 }, { "epoch": 0.81, "grad_norm": 1.7103074294805118, "learning_rate": 9.428855526324831e-07, "loss": 0.7484, "step": 12612 }, { "epoch": 0.81, "grad_norm": 1.6637522998365832, "learning_rate": 9.422798150748724e-07, "loss": 0.6041, "step": 12613 }, { "epoch": 0.81, "grad_norm": 1.8104646399445588, "learning_rate": 9.416742519095467e-07, "loss": 0.7567, "step": 12614 }, { "epoch": 0.81, "grad_norm": 1.5968516779107216, "learning_rate": 9.410688631625364e-07, "loss": 0.6745, "step": 12615 }, { "epoch": 0.81, "grad_norm": 1.7213448079540343, "learning_rate": 9.404636488598545e-07, "loss": 0.7104, "step": 12616 }, { "epoch": 0.81, "grad_norm": 1.5695992774237755, "learning_rate": 9.398586090275164e-07, "loss": 0.6993, "step": 12617 }, { "epoch": 0.81, "grad_norm": 1.5523185334322236, "learning_rate": 9.392537436915234e-07, "loss": 0.6207, "step": 12618 }, { "epoch": 0.81, "grad_norm": 1.807104386087071, "learning_rate": 9.386490528778702e-07, "loss": 0.7825, "step": 12619 }, { "epoch": 0.81, "grad_norm": 2.1184780576598605, "learning_rate": 9.380445366125496e-07, "loss": 0.7225, "step": 12620 }, { "epoch": 0.81, "grad_norm": 1.7162064226998222, "learning_rate": 9.374401949215367e-07, "loss": 0.656, "step": 12621 }, { "epoch": 0.81, "grad_norm": 1.78063896106702, "learning_rate": 9.368360278308103e-07, "loss": 0.6498, "step": 12622 }, { "epoch": 0.81, "grad_norm": 1.797047830768359, "learning_rate": 9.362320353663313e-07, "loss": 0.6904, "step": 12623 }, { "epoch": 0.81, "grad_norm": 1.5965071409963876, "learning_rate": 9.356282175540609e-07, "loss": 0.6761, "step": 12624 }, { "epoch": 0.81, "grad_norm": 1.7745628383447982, "learning_rate": 9.350245744199499e-07, "loss": 0.7231, "step": 12625 }, { "epoch": 0.81, "grad_norm": 1.6969955334786546, "learning_rate": 9.344211059899394e-07, "loss": 0.7542, "step": 12626 }, { "epoch": 0.81, "grad_norm": 1.6585790337572437, "learning_rate": 9.338178122899693e-07, "loss": 0.757, "step": 12627 }, { "epoch": 0.81, "grad_norm": 1.7484657149675593, "learning_rate": 9.332146933459629e-07, "loss": 0.7442, "step": 12628 }, { "epoch": 0.81, "grad_norm": 1.2532770206455792, "learning_rate": 9.32611749183846e-07, "loss": 0.7429, "step": 12629 }, { "epoch": 0.81, "grad_norm": 1.6707200049011206, "learning_rate": 9.320089798295268e-07, "loss": 0.6527, "step": 12630 }, { "epoch": 0.81, "grad_norm": 1.9477368035168523, "learning_rate": 9.31406385308915e-07, "loss": 0.8356, "step": 12631 }, { "epoch": 0.81, "grad_norm": 1.5412722926857116, "learning_rate": 9.308039656479073e-07, "loss": 0.8485, "step": 12632 }, { "epoch": 0.81, "grad_norm": 1.8432067730474107, "learning_rate": 9.302017208723951e-07, "loss": 0.7633, "step": 12633 }, { "epoch": 0.81, "grad_norm": 1.5319736042032264, "learning_rate": 9.295996510082605e-07, "loss": 0.7044, "step": 12634 }, { "epoch": 0.81, "grad_norm": 1.564239507804693, "learning_rate": 9.289977560813789e-07, "loss": 0.6537, "step": 12635 }, { "epoch": 0.81, "grad_norm": 1.8467286890886236, "learning_rate": 9.28396036117622e-07, "loss": 0.7529, "step": 12636 }, { "epoch": 0.81, "grad_norm": 1.7797163865586982, "learning_rate": 9.27794491142845e-07, "loss": 0.7747, "step": 12637 }, { "epoch": 0.81, "grad_norm": 1.6855829507366187, "learning_rate": 9.271931211829055e-07, "loss": 0.6772, "step": 12638 }, { "epoch": 0.81, "grad_norm": 1.5351229487360931, "learning_rate": 9.265919262636469e-07, "loss": 0.6214, "step": 12639 }, { "epoch": 0.81, "grad_norm": 1.7554469463878999, "learning_rate": 9.259909064109085e-07, "loss": 0.6672, "step": 12640 }, { "epoch": 0.81, "grad_norm": 1.7431372217822305, "learning_rate": 9.253900616505202e-07, "loss": 0.5906, "step": 12641 }, { "epoch": 0.81, "grad_norm": 1.8636764485389676, "learning_rate": 9.247893920083045e-07, "loss": 0.703, "step": 12642 }, { "epoch": 0.81, "grad_norm": 2.145966979415904, "learning_rate": 9.241888975100782e-07, "loss": 0.7004, "step": 12643 }, { "epoch": 0.81, "grad_norm": 1.8488724799129037, "learning_rate": 9.235885781816483e-07, "loss": 0.7004, "step": 12644 }, { "epoch": 0.81, "grad_norm": 1.6657762924767894, "learning_rate": 9.229884340488149e-07, "loss": 0.7606, "step": 12645 }, { "epoch": 0.81, "grad_norm": 1.624827502326925, "learning_rate": 9.223884651373722e-07, "loss": 0.617, "step": 12646 }, { "epoch": 0.81, "grad_norm": 1.6451563451990365, "learning_rate": 9.217886714731056e-07, "loss": 0.6892, "step": 12647 }, { "epoch": 0.81, "grad_norm": 1.9632294345166967, "learning_rate": 9.211890530817919e-07, "loss": 0.7685, "step": 12648 }, { "epoch": 0.81, "grad_norm": 1.5080729119296725, "learning_rate": 9.205896099892019e-07, "loss": 0.6851, "step": 12649 }, { "epoch": 0.81, "grad_norm": 1.8777334223609834, "learning_rate": 9.199903422210988e-07, "loss": 0.6657, "step": 12650 }, { "epoch": 0.81, "grad_norm": 1.966559635221957, "learning_rate": 9.193912498032376e-07, "loss": 0.8644, "step": 12651 }, { "epoch": 0.81, "grad_norm": 1.5440680683182724, "learning_rate": 9.187923327613651e-07, "loss": 0.7359, "step": 12652 }, { "epoch": 0.81, "grad_norm": 1.7621401020963703, "learning_rate": 9.181935911212231e-07, "loss": 0.7157, "step": 12653 }, { "epoch": 0.81, "grad_norm": 1.7465738127705905, "learning_rate": 9.175950249085424e-07, "loss": 0.7226, "step": 12654 }, { "epoch": 0.81, "grad_norm": 2.2003798606012026, "learning_rate": 9.169966341490499e-07, "loss": 0.6757, "step": 12655 }, { "epoch": 0.81, "grad_norm": 1.7458267138739487, "learning_rate": 9.163984188684627e-07, "loss": 0.6729, "step": 12656 }, { "epoch": 0.81, "grad_norm": 1.6842187480623656, "learning_rate": 9.158003790924908e-07, "loss": 0.8007, "step": 12657 }, { "epoch": 0.81, "grad_norm": 1.677059527870614, "learning_rate": 9.152025148468363e-07, "loss": 0.7755, "step": 12658 }, { "epoch": 0.81, "grad_norm": 1.3482103341475273, "learning_rate": 9.146048261571944e-07, "loss": 0.655, "step": 12659 }, { "epoch": 0.81, "grad_norm": 1.7393642064281665, "learning_rate": 9.140073130492528e-07, "loss": 0.748, "step": 12660 }, { "epoch": 0.81, "grad_norm": 1.1611984191638012, "learning_rate": 9.134099755486892e-07, "loss": 0.6464, "step": 12661 }, { "epoch": 0.81, "grad_norm": 1.9088980237746738, "learning_rate": 9.128128136811809e-07, "loss": 0.7969, "step": 12662 }, { "epoch": 0.81, "grad_norm": 1.7619523534541524, "learning_rate": 9.122158274723863e-07, "loss": 0.698, "step": 12663 }, { "epoch": 0.81, "grad_norm": 1.6597470488182426, "learning_rate": 9.116190169479678e-07, "loss": 0.7144, "step": 12664 }, { "epoch": 0.81, "grad_norm": 1.6934386517856475, "learning_rate": 9.110223821335723e-07, "loss": 0.7022, "step": 12665 }, { "epoch": 0.81, "grad_norm": 1.5476725639876334, "learning_rate": 9.104259230548435e-07, "loss": 0.7178, "step": 12666 }, { "epoch": 0.81, "grad_norm": 1.7308996871019806, "learning_rate": 9.098296397374146e-07, "loss": 0.641, "step": 12667 }, { "epoch": 0.81, "grad_norm": 1.7504142121489137, "learning_rate": 9.092335322069118e-07, "loss": 0.7197, "step": 12668 }, { "epoch": 0.81, "grad_norm": 2.172328835730562, "learning_rate": 9.086376004889591e-07, "loss": 0.7137, "step": 12669 }, { "epoch": 0.81, "grad_norm": 1.6485702787196814, "learning_rate": 9.080418446091622e-07, "loss": 0.6824, "step": 12670 }, { "epoch": 0.81, "grad_norm": 1.6879483882018835, "learning_rate": 9.074462645931309e-07, "loss": 0.8373, "step": 12671 }, { "epoch": 0.81, "grad_norm": 1.7601715213114357, "learning_rate": 9.068508604664572e-07, "loss": 0.8443, "step": 12672 }, { "epoch": 0.81, "grad_norm": 1.6905283334771506, "learning_rate": 9.062556322547333e-07, "loss": 0.7008, "step": 12673 }, { "epoch": 0.81, "grad_norm": 1.748191047706526, "learning_rate": 9.056605799835411e-07, "loss": 0.6817, "step": 12674 }, { "epoch": 0.81, "grad_norm": 1.966479999681733, "learning_rate": 9.050657036784516e-07, "loss": 0.7827, "step": 12675 }, { "epoch": 0.81, "grad_norm": 1.0188483497803174, "learning_rate": 9.044710033650367e-07, "loss": 0.555, "step": 12676 }, { "epoch": 0.81, "grad_norm": 2.2694822522155573, "learning_rate": 9.038764790688492e-07, "loss": 0.8383, "step": 12677 }, { "epoch": 0.81, "grad_norm": 1.6589859581274702, "learning_rate": 9.032821308154465e-07, "loss": 0.78, "step": 12678 }, { "epoch": 0.81, "grad_norm": 1.7754612236797707, "learning_rate": 9.026879586303666e-07, "loss": 0.774, "step": 12679 }, { "epoch": 0.81, "grad_norm": 1.8149463916737774, "learning_rate": 9.0209396253915e-07, "loss": 0.7044, "step": 12680 }, { "epoch": 0.81, "grad_norm": 1.9900702778581465, "learning_rate": 9.015001425673242e-07, "loss": 0.7319, "step": 12681 }, { "epoch": 0.81, "grad_norm": 1.151241608573109, "learning_rate": 9.009064987404098e-07, "loss": 0.6616, "step": 12682 }, { "epoch": 0.81, "grad_norm": 1.702438927281789, "learning_rate": 9.003130310839203e-07, "loss": 0.6895, "step": 12683 }, { "epoch": 0.81, "grad_norm": 1.5635132746436047, "learning_rate": 8.997197396233615e-07, "loss": 0.6841, "step": 12684 }, { "epoch": 0.81, "grad_norm": 1.062922871892595, "learning_rate": 8.99126624384235e-07, "loss": 0.6631, "step": 12685 }, { "epoch": 0.81, "grad_norm": 2.794941486228022, "learning_rate": 8.985336853920262e-07, "loss": 0.7401, "step": 12686 }, { "epoch": 0.81, "grad_norm": 1.9913356721804256, "learning_rate": 8.979409226722224e-07, "loss": 0.7142, "step": 12687 }, { "epoch": 0.81, "grad_norm": 1.4788572734892906, "learning_rate": 8.973483362502983e-07, "loss": 0.7232, "step": 12688 }, { "epoch": 0.81, "grad_norm": 1.8348915409537179, "learning_rate": 8.967559261517217e-07, "loss": 0.653, "step": 12689 }, { "epoch": 0.81, "grad_norm": 1.9151561023639923, "learning_rate": 8.961636924019534e-07, "loss": 0.7483, "step": 12690 }, { "epoch": 0.81, "grad_norm": 1.6133066233704014, "learning_rate": 8.955716350264454e-07, "loss": 0.6847, "step": 12691 }, { "epoch": 0.81, "grad_norm": 1.7084913155162562, "learning_rate": 8.94979754050646e-07, "loss": 0.8003, "step": 12692 }, { "epoch": 0.81, "grad_norm": 1.680105908426962, "learning_rate": 8.943880494999884e-07, "loss": 0.6432, "step": 12693 }, { "epoch": 0.81, "grad_norm": 1.4559692741463124, "learning_rate": 8.937965213999084e-07, "loss": 0.6548, "step": 12694 }, { "epoch": 0.81, "grad_norm": 1.0121472638618056, "learning_rate": 8.932051697758227e-07, "loss": 0.6247, "step": 12695 }, { "epoch": 0.81, "grad_norm": 1.5842711096930409, "learning_rate": 8.926139946531504e-07, "loss": 0.7017, "step": 12696 }, { "epoch": 0.81, "grad_norm": 1.6828279172624512, "learning_rate": 8.920229960572973e-07, "loss": 0.7081, "step": 12697 }, { "epoch": 0.81, "grad_norm": 1.8492592419430676, "learning_rate": 8.914321740136644e-07, "loss": 0.7399, "step": 12698 }, { "epoch": 0.81, "grad_norm": 1.5869969859376851, "learning_rate": 8.908415285476435e-07, "loss": 0.679, "step": 12699 }, { "epoch": 0.81, "grad_norm": 1.7573873503932538, "learning_rate": 8.902510596846176e-07, "loss": 0.696, "step": 12700 }, { "epoch": 0.81, "grad_norm": 1.6796446613109501, "learning_rate": 8.89660767449968e-07, "loss": 0.6988, "step": 12701 }, { "epoch": 0.81, "grad_norm": 1.4191115063274964, "learning_rate": 8.890706518690589e-07, "loss": 0.5614, "step": 12702 }, { "epoch": 0.81, "grad_norm": 2.0031816184802866, "learning_rate": 8.884807129672568e-07, "loss": 0.6593, "step": 12703 }, { "epoch": 0.81, "grad_norm": 2.041685567209842, "learning_rate": 8.878909507699135e-07, "loss": 0.8181, "step": 12704 }, { "epoch": 0.81, "grad_norm": 1.7288385939482287, "learning_rate": 8.873013653023765e-07, "loss": 0.6407, "step": 12705 }, { "epoch": 0.81, "grad_norm": 1.6481507233427477, "learning_rate": 8.867119565899851e-07, "loss": 0.594, "step": 12706 }, { "epoch": 0.81, "grad_norm": 2.411414458771708, "learning_rate": 8.861227246580706e-07, "loss": 0.6143, "step": 12707 }, { "epoch": 0.81, "grad_norm": 2.987821588789749, "learning_rate": 8.855336695319572e-07, "loss": 0.661, "step": 12708 }, { "epoch": 0.81, "grad_norm": 1.3945179159212227, "learning_rate": 8.849447912369591e-07, "loss": 0.5984, "step": 12709 }, { "epoch": 0.81, "grad_norm": 1.435878985243044, "learning_rate": 8.843560897983883e-07, "loss": 0.5841, "step": 12710 }, { "epoch": 0.81, "grad_norm": 1.5494775103828677, "learning_rate": 8.837675652415451e-07, "loss": 0.6749, "step": 12711 }, { "epoch": 0.81, "grad_norm": 1.9765141624945872, "learning_rate": 8.831792175917219e-07, "loss": 0.8284, "step": 12712 }, { "epoch": 0.81, "grad_norm": 1.263408487958535, "learning_rate": 8.82591046874206e-07, "loss": 0.6997, "step": 12713 }, { "epoch": 0.81, "grad_norm": 1.5648568325093608, "learning_rate": 8.820030531142748e-07, "loss": 0.7172, "step": 12714 }, { "epoch": 0.81, "grad_norm": 1.8334101691063058, "learning_rate": 8.814152363371992e-07, "loss": 0.7631, "step": 12715 }, { "epoch": 0.81, "grad_norm": 1.7712302212687479, "learning_rate": 8.808275965682423e-07, "loss": 0.8164, "step": 12716 }, { "epoch": 0.81, "grad_norm": 1.0241718600311152, "learning_rate": 8.802401338326582e-07, "loss": 0.7457, "step": 12717 }, { "epoch": 0.81, "grad_norm": 1.871973837789343, "learning_rate": 8.796528481556992e-07, "loss": 0.6089, "step": 12718 }, { "epoch": 0.81, "grad_norm": 1.196864197977791, "learning_rate": 8.790657395626001e-07, "loss": 0.601, "step": 12719 }, { "epoch": 0.81, "grad_norm": 1.6536919939789967, "learning_rate": 8.784788080785978e-07, "loss": 0.6731, "step": 12720 }, { "epoch": 0.81, "grad_norm": 1.1264148428593619, "learning_rate": 8.778920537289154e-07, "loss": 0.6001, "step": 12721 }, { "epoch": 0.81, "grad_norm": 1.6049837213053044, "learning_rate": 8.773054765387712e-07, "loss": 0.6538, "step": 12722 }, { "epoch": 0.81, "grad_norm": 1.7964803606522195, "learning_rate": 8.767190765333744e-07, "loss": 0.6342, "step": 12723 }, { "epoch": 0.81, "grad_norm": 1.8866520721338271, "learning_rate": 8.761328537379277e-07, "loss": 0.6302, "step": 12724 }, { "epoch": 0.81, "grad_norm": 1.0563236593784462, "learning_rate": 8.755468081776252e-07, "loss": 0.6541, "step": 12725 }, { "epoch": 0.81, "grad_norm": 1.6121371546689383, "learning_rate": 8.749609398776531e-07, "loss": 0.6542, "step": 12726 }, { "epoch": 0.81, "grad_norm": 1.853972347851395, "learning_rate": 8.743752488631946e-07, "loss": 0.715, "step": 12727 }, { "epoch": 0.81, "grad_norm": 0.9445766146181459, "learning_rate": 8.73789735159416e-07, "loss": 0.6321, "step": 12728 }, { "epoch": 0.81, "grad_norm": 1.0290679072505624, "learning_rate": 8.732043987914856e-07, "loss": 0.6648, "step": 12729 }, { "epoch": 0.81, "grad_norm": 1.19374921932603, "learning_rate": 8.726192397845585e-07, "loss": 0.6011, "step": 12730 }, { "epoch": 0.81, "grad_norm": 1.5289447104035072, "learning_rate": 8.720342581637836e-07, "loss": 0.7441, "step": 12731 }, { "epoch": 0.81, "grad_norm": 1.629755762120053, "learning_rate": 8.714494539543022e-07, "loss": 0.6718, "step": 12732 }, { "epoch": 0.81, "grad_norm": 2.2597748615470894, "learning_rate": 8.708648271812469e-07, "loss": 0.6939, "step": 12733 }, { "epoch": 0.82, "grad_norm": 1.2639636191844967, "learning_rate": 8.702803778697472e-07, "loss": 0.6278, "step": 12734 }, { "epoch": 0.82, "grad_norm": 2.044704073265826, "learning_rate": 8.696961060449166e-07, "loss": 0.813, "step": 12735 }, { "epoch": 0.82, "grad_norm": 2.254594300967826, "learning_rate": 8.691120117318708e-07, "loss": 0.7582, "step": 12736 }, { "epoch": 0.82, "grad_norm": 1.6526705456655855, "learning_rate": 8.685280949557084e-07, "loss": 0.6535, "step": 12737 }, { "epoch": 0.82, "grad_norm": 1.189137602779826, "learning_rate": 8.67944355741528e-07, "loss": 0.6581, "step": 12738 }, { "epoch": 0.82, "grad_norm": 1.6100692835412989, "learning_rate": 8.673607941144169e-07, "loss": 0.8147, "step": 12739 }, { "epoch": 0.82, "grad_norm": 1.785618731770551, "learning_rate": 8.667774100994536e-07, "loss": 0.8237, "step": 12740 }, { "epoch": 0.82, "grad_norm": 1.6043203313935763, "learning_rate": 8.661942037217141e-07, "loss": 0.782, "step": 12741 }, { "epoch": 0.82, "grad_norm": 1.930484487883073, "learning_rate": 8.656111750062596e-07, "loss": 0.7825, "step": 12742 }, { "epoch": 0.82, "grad_norm": 1.6462356403150908, "learning_rate": 8.650283239781515e-07, "loss": 0.7214, "step": 12743 }, { "epoch": 0.82, "grad_norm": 2.9526826164863125, "learning_rate": 8.644456506624343e-07, "loss": 0.6683, "step": 12744 }, { "epoch": 0.82, "grad_norm": 1.6099100168927805, "learning_rate": 8.638631550841553e-07, "loss": 0.7511, "step": 12745 }, { "epoch": 0.82, "grad_norm": 1.1249265392195127, "learning_rate": 8.63280837268346e-07, "loss": 0.5733, "step": 12746 }, { "epoch": 0.82, "grad_norm": 1.7195346798194706, "learning_rate": 8.626986972400326e-07, "loss": 0.6608, "step": 12747 }, { "epoch": 0.82, "grad_norm": 1.5529638663321381, "learning_rate": 8.621167350242382e-07, "loss": 0.682, "step": 12748 }, { "epoch": 0.82, "grad_norm": 1.7165551319113908, "learning_rate": 8.615349506459691e-07, "loss": 0.7271, "step": 12749 }, { "epoch": 0.82, "grad_norm": 1.0189502890007294, "learning_rate": 8.609533441302342e-07, "loss": 0.5783, "step": 12750 }, { "epoch": 0.82, "grad_norm": 1.9969663952995396, "learning_rate": 8.603719155020246e-07, "loss": 0.7727, "step": 12751 }, { "epoch": 0.82, "grad_norm": 2.225434669011978, "learning_rate": 8.59790664786333e-07, "loss": 0.6201, "step": 12752 }, { "epoch": 0.82, "grad_norm": 1.6408850852492565, "learning_rate": 8.592095920081383e-07, "loss": 0.757, "step": 12753 }, { "epoch": 0.82, "grad_norm": 1.0303848826498638, "learning_rate": 8.586286971924151e-07, "loss": 0.5907, "step": 12754 }, { "epoch": 0.82, "grad_norm": 1.104733150466534, "learning_rate": 8.580479803641279e-07, "loss": 0.6917, "step": 12755 }, { "epoch": 0.82, "grad_norm": 1.9372170152934771, "learning_rate": 8.574674415482337e-07, "loss": 0.8118, "step": 12756 }, { "epoch": 0.82, "grad_norm": 2.0848652234856013, "learning_rate": 8.568870807696872e-07, "loss": 0.6872, "step": 12757 }, { "epoch": 0.82, "grad_norm": 1.7180241341362186, "learning_rate": 8.56306898053425e-07, "loss": 0.7144, "step": 12758 }, { "epoch": 0.82, "grad_norm": 1.6930677618083796, "learning_rate": 8.557268934243868e-07, "loss": 0.7382, "step": 12759 }, { "epoch": 0.82, "grad_norm": 1.7366631176747631, "learning_rate": 8.551470669074985e-07, "loss": 0.8681, "step": 12760 }, { "epoch": 0.82, "grad_norm": 1.717503153637172, "learning_rate": 8.545674185276792e-07, "loss": 0.6112, "step": 12761 }, { "epoch": 0.82, "grad_norm": 1.2945565652573412, "learning_rate": 8.539879483098423e-07, "loss": 0.7067, "step": 12762 }, { "epoch": 0.82, "grad_norm": 2.1035102278468347, "learning_rate": 8.534086562788907e-07, "loss": 0.6314, "step": 12763 }, { "epoch": 0.82, "grad_norm": 2.0592811686911445, "learning_rate": 8.528295424597222e-07, "loss": 0.762, "step": 12764 }, { "epoch": 0.82, "grad_norm": 1.7573998934343245, "learning_rate": 8.52250606877224e-07, "loss": 0.7139, "step": 12765 }, { "epoch": 0.82, "grad_norm": 1.906743492638243, "learning_rate": 8.51671849556282e-07, "loss": 0.8864, "step": 12766 }, { "epoch": 0.82, "grad_norm": 1.8947108952429481, "learning_rate": 8.510932705217645e-07, "loss": 0.6576, "step": 12767 }, { "epoch": 0.82, "grad_norm": 2.223916971149485, "learning_rate": 8.50514869798541e-07, "loss": 0.7995, "step": 12768 }, { "epoch": 0.82, "grad_norm": 1.7642664198416813, "learning_rate": 8.499366474114695e-07, "loss": 0.7389, "step": 12769 }, { "epoch": 0.82, "grad_norm": 1.8618017804452887, "learning_rate": 8.493586033854007e-07, "loss": 0.719, "step": 12770 }, { "epoch": 0.82, "grad_norm": 1.799699784436487, "learning_rate": 8.487807377451767e-07, "loss": 0.6699, "step": 12771 }, { "epoch": 0.82, "grad_norm": 1.623849203580193, "learning_rate": 8.482030505156341e-07, "loss": 0.7689, "step": 12772 }, { "epoch": 0.82, "grad_norm": 1.97192429248777, "learning_rate": 8.476255417216007e-07, "loss": 0.7323, "step": 12773 }, { "epoch": 0.82, "grad_norm": 1.4668532048249672, "learning_rate": 8.470482113878942e-07, "loss": 0.6284, "step": 12774 }, { "epoch": 0.82, "grad_norm": 2.352860579009526, "learning_rate": 8.464710595393306e-07, "loss": 0.9158, "step": 12775 }, { "epoch": 0.82, "grad_norm": 1.6469790778254818, "learning_rate": 8.458940862007131e-07, "loss": 0.735, "step": 12776 }, { "epoch": 0.82, "grad_norm": 1.7851441833533035, "learning_rate": 8.453172913968382e-07, "loss": 0.7777, "step": 12777 }, { "epoch": 0.82, "grad_norm": 1.677271768492904, "learning_rate": 8.447406751524967e-07, "loss": 0.6634, "step": 12778 }, { "epoch": 0.82, "grad_norm": 1.0591419752897437, "learning_rate": 8.441642374924692e-07, "loss": 0.5746, "step": 12779 }, { "epoch": 0.82, "grad_norm": 2.0421705588425807, "learning_rate": 8.435879784415302e-07, "loss": 0.7024, "step": 12780 }, { "epoch": 0.82, "grad_norm": 1.745131599071794, "learning_rate": 8.430118980244462e-07, "loss": 0.7481, "step": 12781 }, { "epoch": 0.82, "grad_norm": 1.6505324446563159, "learning_rate": 8.424359962659745e-07, "loss": 0.6585, "step": 12782 }, { "epoch": 0.82, "grad_norm": 1.9952542078569433, "learning_rate": 8.418602731908687e-07, "loss": 0.8496, "step": 12783 }, { "epoch": 0.82, "grad_norm": 1.6287808834634536, "learning_rate": 8.412847288238712e-07, "loss": 0.7226, "step": 12784 }, { "epoch": 0.82, "grad_norm": 3.6392340701454495, "learning_rate": 8.407093631897168e-07, "loss": 0.6526, "step": 12785 }, { "epoch": 0.82, "grad_norm": 1.7463089134832557, "learning_rate": 8.401341763131343e-07, "loss": 0.6891, "step": 12786 }, { "epoch": 0.82, "grad_norm": 2.023436965340694, "learning_rate": 8.395591682188442e-07, "loss": 0.6195, "step": 12787 }, { "epoch": 0.82, "grad_norm": 2.015111898589365, "learning_rate": 8.389843389315582e-07, "loss": 0.7992, "step": 12788 }, { "epoch": 0.82, "grad_norm": 1.9678102289052883, "learning_rate": 8.384096884759807e-07, "loss": 0.7799, "step": 12789 }, { "epoch": 0.82, "grad_norm": 1.1284638377848764, "learning_rate": 8.378352168768128e-07, "loss": 0.6348, "step": 12790 }, { "epoch": 0.82, "grad_norm": 1.4006067204233879, "learning_rate": 8.372609241587387e-07, "loss": 0.7026, "step": 12791 }, { "epoch": 0.82, "grad_norm": 1.7763267777615954, "learning_rate": 8.366868103464453e-07, "loss": 0.8199, "step": 12792 }, { "epoch": 0.82, "grad_norm": 1.9457239265165531, "learning_rate": 8.361128754646025e-07, "loss": 0.7581, "step": 12793 }, { "epoch": 0.82, "grad_norm": 1.2578690688291072, "learning_rate": 8.355391195378798e-07, "loss": 0.6758, "step": 12794 }, { "epoch": 0.82, "grad_norm": 2.1967346781502766, "learning_rate": 8.349655425909348e-07, "loss": 0.9478, "step": 12795 }, { "epoch": 0.82, "grad_norm": 1.6010824077119699, "learning_rate": 8.343921446484177e-07, "loss": 0.6848, "step": 12796 }, { "epoch": 0.82, "grad_norm": 1.7375573801875823, "learning_rate": 8.338189257349755e-07, "loss": 0.7027, "step": 12797 }, { "epoch": 0.82, "grad_norm": 1.085608497856327, "learning_rate": 8.332458858752391e-07, "loss": 0.6591, "step": 12798 }, { "epoch": 0.82, "grad_norm": 1.7609708575798257, "learning_rate": 8.326730250938414e-07, "loss": 0.6787, "step": 12799 }, { "epoch": 0.82, "grad_norm": 1.8222495949124846, "learning_rate": 8.321003434153979e-07, "loss": 0.715, "step": 12800 }, { "epoch": 0.82, "grad_norm": 2.485684510112681, "learning_rate": 8.31527840864525e-07, "loss": 0.6817, "step": 12801 }, { "epoch": 0.82, "grad_norm": 1.3567563491380155, "learning_rate": 8.309555174658263e-07, "loss": 0.6662, "step": 12802 }, { "epoch": 0.82, "grad_norm": 2.0369163750031207, "learning_rate": 8.303833732438988e-07, "loss": 0.7768, "step": 12803 }, { "epoch": 0.82, "grad_norm": 1.6275203415836665, "learning_rate": 8.298114082233327e-07, "loss": 0.7773, "step": 12804 }, { "epoch": 0.82, "grad_norm": 1.086001212499691, "learning_rate": 8.29239622428708e-07, "loss": 0.6213, "step": 12805 }, { "epoch": 0.82, "grad_norm": 2.2627031010447767, "learning_rate": 8.286680158846028e-07, "loss": 0.6065, "step": 12806 }, { "epoch": 0.82, "grad_norm": 2.6241230258122683, "learning_rate": 8.280965886155789e-07, "loss": 0.9077, "step": 12807 }, { "epoch": 0.82, "grad_norm": 1.7504789131644243, "learning_rate": 8.275253406461997e-07, "loss": 0.761, "step": 12808 }, { "epoch": 0.82, "grad_norm": 1.6651963941932217, "learning_rate": 8.26954272001011e-07, "loss": 0.7233, "step": 12809 }, { "epoch": 0.82, "grad_norm": 1.7747949542136814, "learning_rate": 8.263833827045603e-07, "loss": 0.766, "step": 12810 }, { "epoch": 0.82, "grad_norm": 1.8268216046897505, "learning_rate": 8.25812672781382e-07, "loss": 0.6183, "step": 12811 }, { "epoch": 0.82, "grad_norm": 1.5835598756829115, "learning_rate": 8.252421422560025e-07, "loss": 0.8726, "step": 12812 }, { "epoch": 0.82, "grad_norm": 2.1562818398819372, "learning_rate": 8.246717911529456e-07, "loss": 0.818, "step": 12813 }, { "epoch": 0.82, "grad_norm": 1.622854832526292, "learning_rate": 8.241016194967194e-07, "loss": 0.8267, "step": 12814 }, { "epoch": 0.82, "grad_norm": 1.71815930165414, "learning_rate": 8.235316273118333e-07, "loss": 0.7232, "step": 12815 }, { "epoch": 0.82, "grad_norm": 2.205269286105133, "learning_rate": 8.229618146227791e-07, "loss": 0.7672, "step": 12816 }, { "epoch": 0.82, "grad_norm": 1.8237138094366059, "learning_rate": 8.223921814540503e-07, "loss": 0.7501, "step": 12817 }, { "epoch": 0.82, "grad_norm": 2.3391121551415477, "learning_rate": 8.218227278301277e-07, "loss": 0.7735, "step": 12818 }, { "epoch": 0.82, "grad_norm": 1.564635575719233, "learning_rate": 8.212534537754841e-07, "loss": 0.7501, "step": 12819 }, { "epoch": 0.82, "grad_norm": 1.5423986758408, "learning_rate": 8.206843593145864e-07, "loss": 0.7985, "step": 12820 }, { "epoch": 0.82, "grad_norm": 1.841043221839492, "learning_rate": 8.201154444718917e-07, "loss": 0.6038, "step": 12821 }, { "epoch": 0.82, "grad_norm": 1.8315248345713329, "learning_rate": 8.195467092718546e-07, "loss": 0.753, "step": 12822 }, { "epoch": 0.82, "grad_norm": 1.4043001965427697, "learning_rate": 8.189781537389135e-07, "loss": 0.6073, "step": 12823 }, { "epoch": 0.82, "grad_norm": 1.9516974821656503, "learning_rate": 8.184097778975064e-07, "loss": 0.7593, "step": 12824 }, { "epoch": 0.82, "grad_norm": 3.0918136105352465, "learning_rate": 8.178415817720609e-07, "loss": 0.7082, "step": 12825 }, { "epoch": 0.82, "grad_norm": 1.642909837170977, "learning_rate": 8.172735653869956e-07, "loss": 0.7577, "step": 12826 }, { "epoch": 0.82, "grad_norm": 1.6624118760582358, "learning_rate": 8.16705728766724e-07, "loss": 0.7742, "step": 12827 }, { "epoch": 0.82, "grad_norm": 1.5798560866986746, "learning_rate": 8.161380719356493e-07, "loss": 0.6092, "step": 12828 }, { "epoch": 0.82, "grad_norm": 1.7868803226680043, "learning_rate": 8.155705949181691e-07, "loss": 0.7132, "step": 12829 }, { "epoch": 0.82, "grad_norm": 1.7258161801987193, "learning_rate": 8.150032977386707e-07, "loss": 0.7014, "step": 12830 }, { "epoch": 0.82, "grad_norm": 1.8047394532658563, "learning_rate": 8.144361804215384e-07, "loss": 0.7669, "step": 12831 }, { "epoch": 0.82, "grad_norm": 1.6195755604049547, "learning_rate": 8.138692429911432e-07, "loss": 0.6112, "step": 12832 }, { "epoch": 0.82, "grad_norm": 1.7560020418786462, "learning_rate": 8.133024854718524e-07, "loss": 0.6694, "step": 12833 }, { "epoch": 0.82, "grad_norm": 1.5595998342739843, "learning_rate": 8.127359078880226e-07, "loss": 0.7789, "step": 12834 }, { "epoch": 0.82, "grad_norm": 1.8022817103819995, "learning_rate": 8.121695102640053e-07, "loss": 0.7308, "step": 12835 }, { "epoch": 0.82, "grad_norm": 1.7642863718270978, "learning_rate": 8.116032926241424e-07, "loss": 0.691, "step": 12836 }, { "epoch": 0.82, "grad_norm": 1.5422183697199674, "learning_rate": 8.110372549927692e-07, "loss": 0.7216, "step": 12837 }, { "epoch": 0.82, "grad_norm": 1.8115051482960034, "learning_rate": 8.104713973942107e-07, "loss": 0.8062, "step": 12838 }, { "epoch": 0.82, "grad_norm": 1.9279380997547089, "learning_rate": 8.099057198527899e-07, "loss": 0.7815, "step": 12839 }, { "epoch": 0.82, "grad_norm": 1.1818427520944095, "learning_rate": 8.093402223928165e-07, "loss": 0.7083, "step": 12840 }, { "epoch": 0.82, "grad_norm": 2.134749853980363, "learning_rate": 8.087749050385952e-07, "loss": 0.8036, "step": 12841 }, { "epoch": 0.82, "grad_norm": 1.7257755728192805, "learning_rate": 8.082097678144207e-07, "loss": 0.6824, "step": 12842 }, { "epoch": 0.82, "grad_norm": 1.5220275636895377, "learning_rate": 8.076448107445822e-07, "loss": 0.6989, "step": 12843 }, { "epoch": 0.82, "grad_norm": 1.7285409955671163, "learning_rate": 8.070800338533608e-07, "loss": 0.7313, "step": 12844 }, { "epoch": 0.82, "grad_norm": 1.263858394807396, "learning_rate": 8.065154371650286e-07, "loss": 0.635, "step": 12845 }, { "epoch": 0.82, "grad_norm": 1.6110040199381188, "learning_rate": 8.059510207038517e-07, "loss": 0.7254, "step": 12846 }, { "epoch": 0.82, "grad_norm": 1.8044753678409136, "learning_rate": 8.053867844940855e-07, "loss": 0.8407, "step": 12847 }, { "epoch": 0.82, "grad_norm": 1.6741844403989068, "learning_rate": 8.048227285599825e-07, "loss": 0.6449, "step": 12848 }, { "epoch": 0.82, "grad_norm": 2.057644971117411, "learning_rate": 8.042588529257828e-07, "loss": 0.6658, "step": 12849 }, { "epoch": 0.82, "grad_norm": 1.6260027778958968, "learning_rate": 8.03695157615722e-07, "loss": 0.7395, "step": 12850 }, { "epoch": 0.82, "grad_norm": 1.7712246657787896, "learning_rate": 8.031316426540254e-07, "loss": 0.7431, "step": 12851 }, { "epoch": 0.82, "grad_norm": 1.918622564808862, "learning_rate": 8.025683080649116e-07, "loss": 0.8195, "step": 12852 }, { "epoch": 0.82, "grad_norm": 1.254360187310438, "learning_rate": 8.02005153872592e-07, "loss": 0.6416, "step": 12853 }, { "epoch": 0.82, "grad_norm": 1.6213901294476338, "learning_rate": 8.014421801012684e-07, "loss": 0.7557, "step": 12854 }, { "epoch": 0.82, "grad_norm": 1.4665662533172472, "learning_rate": 8.008793867751402e-07, "loss": 0.66, "step": 12855 }, { "epoch": 0.82, "grad_norm": 1.5104973326049587, "learning_rate": 8.003167739183903e-07, "loss": 0.6558, "step": 12856 }, { "epoch": 0.82, "grad_norm": 1.6303799238097934, "learning_rate": 7.997543415552011e-07, "loss": 0.7351, "step": 12857 }, { "epoch": 0.82, "grad_norm": 1.632984011513334, "learning_rate": 7.991920897097449e-07, "loss": 0.7654, "step": 12858 }, { "epoch": 0.82, "grad_norm": 1.29562531291266, "learning_rate": 7.986300184061857e-07, "loss": 0.6368, "step": 12859 }, { "epoch": 0.82, "grad_norm": 2.486810235296072, "learning_rate": 7.980681276686797e-07, "loss": 0.8135, "step": 12860 }, { "epoch": 0.82, "grad_norm": 1.8550599242189534, "learning_rate": 7.975064175213748e-07, "loss": 0.6895, "step": 12861 }, { "epoch": 0.82, "grad_norm": 1.7097804140897288, "learning_rate": 7.969448879884162e-07, "loss": 0.6984, "step": 12862 }, { "epoch": 0.82, "grad_norm": 1.629059767594791, "learning_rate": 7.963835390939317e-07, "loss": 0.7082, "step": 12863 }, { "epoch": 0.82, "grad_norm": 2.0167396334123047, "learning_rate": 7.958223708620521e-07, "loss": 0.6637, "step": 12864 }, { "epoch": 0.82, "grad_norm": 0.9267954369347265, "learning_rate": 7.952613833168909e-07, "loss": 0.5714, "step": 12865 }, { "epoch": 0.82, "grad_norm": 1.387549382541733, "learning_rate": 7.947005764825611e-07, "loss": 0.7113, "step": 12866 }, { "epoch": 0.82, "grad_norm": 1.3108949834677688, "learning_rate": 7.941399503831637e-07, "loss": 0.5741, "step": 12867 }, { "epoch": 0.82, "grad_norm": 2.118949362474348, "learning_rate": 7.935795050427924e-07, "loss": 0.7192, "step": 12868 }, { "epoch": 0.82, "grad_norm": 1.529636862675222, "learning_rate": 7.930192404855375e-07, "loss": 0.6689, "step": 12869 }, { "epoch": 0.82, "grad_norm": 1.6651063257925336, "learning_rate": 7.924591567354728e-07, "loss": 0.7044, "step": 12870 }, { "epoch": 0.82, "grad_norm": 1.7437103245506476, "learning_rate": 7.918992538166753e-07, "loss": 0.7242, "step": 12871 }, { "epoch": 0.82, "grad_norm": 1.7224798794946894, "learning_rate": 7.913395317532024e-07, "loss": 0.6072, "step": 12872 }, { "epoch": 0.82, "grad_norm": 1.6523695538093248, "learning_rate": 7.907799905691144e-07, "loss": 0.6756, "step": 12873 }, { "epoch": 0.82, "grad_norm": 1.8466534274098683, "learning_rate": 7.90220630288458e-07, "loss": 0.7176, "step": 12874 }, { "epoch": 0.82, "grad_norm": 1.6797387459271818, "learning_rate": 7.896614509352724e-07, "loss": 0.7567, "step": 12875 }, { "epoch": 0.82, "grad_norm": 1.6803388822171326, "learning_rate": 7.891024525335905e-07, "loss": 0.6219, "step": 12876 }, { "epoch": 0.82, "grad_norm": 1.7524613382253662, "learning_rate": 7.885436351074355e-07, "loss": 0.7288, "step": 12877 }, { "epoch": 0.82, "grad_norm": 1.8400142861189805, "learning_rate": 7.879849986808286e-07, "loss": 0.7442, "step": 12878 }, { "epoch": 0.82, "grad_norm": 1.0555078023541444, "learning_rate": 7.874265432777728e-07, "loss": 0.7153, "step": 12879 }, { "epoch": 0.82, "grad_norm": 1.708618881860903, "learning_rate": 7.86868268922274e-07, "loss": 0.802, "step": 12880 }, { "epoch": 0.82, "grad_norm": 1.680126858594841, "learning_rate": 7.863101756383235e-07, "loss": 0.8628, "step": 12881 }, { "epoch": 0.82, "grad_norm": 2.1025109199510497, "learning_rate": 7.857522634499082e-07, "loss": 0.6567, "step": 12882 }, { "epoch": 0.82, "grad_norm": 1.0646066957658085, "learning_rate": 7.851945323810045e-07, "loss": 0.8059, "step": 12883 }, { "epoch": 0.82, "grad_norm": 1.6600009142030179, "learning_rate": 7.846369824555838e-07, "loss": 0.6352, "step": 12884 }, { "epoch": 0.82, "grad_norm": 0.9980757033910578, "learning_rate": 7.840796136976075e-07, "loss": 0.5709, "step": 12885 }, { "epoch": 0.82, "grad_norm": 1.4671563524105053, "learning_rate": 7.835224261310293e-07, "loss": 0.6329, "step": 12886 }, { "epoch": 0.82, "grad_norm": 2.4221128776177574, "learning_rate": 7.829654197797998e-07, "loss": 0.7197, "step": 12887 }, { "epoch": 0.82, "grad_norm": 1.108836212963635, "learning_rate": 7.824085946678534e-07, "loss": 0.7071, "step": 12888 }, { "epoch": 0.82, "grad_norm": 1.6334783212084183, "learning_rate": 7.818519508191236e-07, "loss": 0.6148, "step": 12889 }, { "epoch": 0.83, "grad_norm": 1.6061640799511612, "learning_rate": 7.812954882575341e-07, "loss": 0.6081, "step": 12890 }, { "epoch": 0.83, "grad_norm": 2.1396682012555175, "learning_rate": 7.807392070069992e-07, "loss": 0.6724, "step": 12891 }, { "epoch": 0.83, "grad_norm": 1.8245029723353268, "learning_rate": 7.801831070914279e-07, "loss": 0.7902, "step": 12892 }, { "epoch": 0.83, "grad_norm": 1.8174406639417882, "learning_rate": 7.796271885347189e-07, "loss": 0.8361, "step": 12893 }, { "epoch": 0.83, "grad_norm": 2.22314777199453, "learning_rate": 7.790714513607656e-07, "loss": 0.9423, "step": 12894 }, { "epoch": 0.83, "grad_norm": 1.8467354991897702, "learning_rate": 7.785158955934508e-07, "loss": 0.7319, "step": 12895 }, { "epoch": 0.83, "grad_norm": 1.5884379906079507, "learning_rate": 7.779605212566533e-07, "loss": 0.6323, "step": 12896 }, { "epoch": 0.83, "grad_norm": 1.8591270952368426, "learning_rate": 7.774053283742406e-07, "loss": 0.76, "step": 12897 }, { "epoch": 0.83, "grad_norm": 1.9334211896042626, "learning_rate": 7.768503169700742e-07, "loss": 0.7839, "step": 12898 }, { "epoch": 0.83, "grad_norm": 1.5943490442359896, "learning_rate": 7.762954870680067e-07, "loss": 0.5374, "step": 12899 }, { "epoch": 0.83, "grad_norm": 1.7632011675295731, "learning_rate": 7.757408386918846e-07, "loss": 0.6466, "step": 12900 }, { "epoch": 0.83, "grad_norm": 1.1070451118401192, "learning_rate": 7.751863718655444e-07, "loss": 0.6652, "step": 12901 }, { "epoch": 0.83, "grad_norm": 1.7987372396634564, "learning_rate": 7.746320866128171e-07, "loss": 0.7257, "step": 12902 }, { "epoch": 0.83, "grad_norm": 1.6433051149098974, "learning_rate": 7.740779829575218e-07, "loss": 0.7393, "step": 12903 }, { "epoch": 0.83, "grad_norm": 1.9719826198392612, "learning_rate": 7.735240609234767e-07, "loss": 0.7319, "step": 12904 }, { "epoch": 0.83, "grad_norm": 1.6162849276792026, "learning_rate": 7.729703205344863e-07, "loss": 0.7647, "step": 12905 }, { "epoch": 0.83, "grad_norm": 1.652212902504768, "learning_rate": 7.724167618143497e-07, "loss": 0.6422, "step": 12906 }, { "epoch": 0.83, "grad_norm": 1.2053813593747424, "learning_rate": 7.718633847868568e-07, "loss": 0.6571, "step": 12907 }, { "epoch": 0.83, "grad_norm": 1.7862538878998377, "learning_rate": 7.713101894757913e-07, "loss": 0.6565, "step": 12908 }, { "epoch": 0.83, "grad_norm": 1.6773791874382733, "learning_rate": 7.707571759049281e-07, "loss": 0.7482, "step": 12909 }, { "epoch": 0.83, "grad_norm": 1.5638747338665102, "learning_rate": 7.702043440980333e-07, "loss": 0.6852, "step": 12910 }, { "epoch": 0.83, "grad_norm": 1.8206910335268869, "learning_rate": 7.696516940788701e-07, "loss": 0.8204, "step": 12911 }, { "epoch": 0.83, "grad_norm": 1.975517255977622, "learning_rate": 7.690992258711855e-07, "loss": 0.8184, "step": 12912 }, { "epoch": 0.83, "grad_norm": 2.058230522937814, "learning_rate": 7.685469394987271e-07, "loss": 0.6966, "step": 12913 }, { "epoch": 0.83, "grad_norm": 1.0460574127791067, "learning_rate": 7.679948349852301e-07, "loss": 0.5363, "step": 12914 }, { "epoch": 0.83, "grad_norm": 1.3843776662064455, "learning_rate": 7.67442912354422e-07, "loss": 0.6361, "step": 12915 }, { "epoch": 0.83, "grad_norm": 1.1650318409580072, "learning_rate": 7.668911716300237e-07, "loss": 0.6717, "step": 12916 }, { "epoch": 0.83, "grad_norm": 1.5740295786485614, "learning_rate": 7.663396128357481e-07, "loss": 0.7868, "step": 12917 }, { "epoch": 0.83, "grad_norm": 1.6270446812570332, "learning_rate": 7.657882359952995e-07, "loss": 0.6694, "step": 12918 }, { "epoch": 0.83, "grad_norm": 1.5266753752297748, "learning_rate": 7.652370411323745e-07, "loss": 0.6205, "step": 12919 }, { "epoch": 0.83, "grad_norm": 1.61929622690154, "learning_rate": 7.646860282706652e-07, "loss": 0.6347, "step": 12920 }, { "epoch": 0.83, "grad_norm": 1.7705589578892726, "learning_rate": 7.641351974338478e-07, "loss": 0.7157, "step": 12921 }, { "epoch": 0.83, "grad_norm": 1.5441672085951046, "learning_rate": 7.635845486456006e-07, "loss": 0.6148, "step": 12922 }, { "epoch": 0.83, "grad_norm": 1.5775296521484745, "learning_rate": 7.630340819295879e-07, "loss": 0.6973, "step": 12923 }, { "epoch": 0.83, "grad_norm": 1.7743667664298255, "learning_rate": 7.624837973094668e-07, "loss": 0.652, "step": 12924 }, { "epoch": 0.83, "grad_norm": 1.6985309848144223, "learning_rate": 7.619336948088879e-07, "loss": 0.8134, "step": 12925 }, { "epoch": 0.83, "grad_norm": 2.2923259142980528, "learning_rate": 7.613837744514918e-07, "loss": 0.7854, "step": 12926 }, { "epoch": 0.83, "grad_norm": 1.7241428208643446, "learning_rate": 7.608340362609174e-07, "loss": 0.6688, "step": 12927 }, { "epoch": 0.83, "grad_norm": 1.6341242870995925, "learning_rate": 7.602844802607862e-07, "loss": 0.6878, "step": 12928 }, { "epoch": 0.83, "grad_norm": 1.7216715165321121, "learning_rate": 7.597351064747211e-07, "loss": 0.7636, "step": 12929 }, { "epoch": 0.83, "grad_norm": 1.531998849579141, "learning_rate": 7.591859149263287e-07, "loss": 0.6963, "step": 12930 }, { "epoch": 0.83, "grad_norm": 1.0117662527663958, "learning_rate": 7.586369056392162e-07, "loss": 0.5983, "step": 12931 }, { "epoch": 0.83, "grad_norm": 1.702889041927247, "learning_rate": 7.580880786369766e-07, "loss": 0.6803, "step": 12932 }, { "epoch": 0.83, "grad_norm": 1.62511193384784, "learning_rate": 7.575394339431969e-07, "loss": 0.6465, "step": 12933 }, { "epoch": 0.83, "grad_norm": 1.630325689047417, "learning_rate": 7.569909715814605e-07, "loss": 0.6278, "step": 12934 }, { "epoch": 0.83, "grad_norm": 1.7977291043860573, "learning_rate": 7.564426915753331e-07, "loss": 0.662, "step": 12935 }, { "epoch": 0.83, "grad_norm": 1.5845637007053732, "learning_rate": 7.558945939483847e-07, "loss": 0.6938, "step": 12936 }, { "epoch": 0.83, "grad_norm": 1.3375606148055024, "learning_rate": 7.553466787241665e-07, "loss": 0.6353, "step": 12937 }, { "epoch": 0.83, "grad_norm": 1.5826028388263302, "learning_rate": 7.547989459262295e-07, "loss": 0.7601, "step": 12938 }, { "epoch": 0.83, "grad_norm": 1.6132019580022774, "learning_rate": 7.542513955781139e-07, "loss": 0.6498, "step": 12939 }, { "epoch": 0.83, "grad_norm": 1.785929746217787, "learning_rate": 7.537040277033514e-07, "loss": 0.7283, "step": 12940 }, { "epoch": 0.83, "grad_norm": 1.1129597980524384, "learning_rate": 7.53156842325467e-07, "loss": 0.6235, "step": 12941 }, { "epoch": 0.83, "grad_norm": 1.8525996222853534, "learning_rate": 7.52609839467977e-07, "loss": 0.7726, "step": 12942 }, { "epoch": 0.83, "grad_norm": 1.1807091004965329, "learning_rate": 7.520630191543932e-07, "loss": 0.5724, "step": 12943 }, { "epoch": 0.83, "grad_norm": 1.603656865623011, "learning_rate": 7.515163814082121e-07, "loss": 0.7311, "step": 12944 }, { "epoch": 0.83, "grad_norm": 1.6925119635365737, "learning_rate": 7.509699262529308e-07, "loss": 0.6875, "step": 12945 }, { "epoch": 0.83, "grad_norm": 1.507048763792006, "learning_rate": 7.504236537120341e-07, "loss": 0.6855, "step": 12946 }, { "epoch": 0.83, "grad_norm": 2.0498652557215893, "learning_rate": 7.498775638089989e-07, "loss": 0.7707, "step": 12947 }, { "epoch": 0.83, "grad_norm": 1.6242676388279693, "learning_rate": 7.493316565672948e-07, "loss": 0.6809, "step": 12948 }, { "epoch": 0.83, "grad_norm": 2.9216781052342653, "learning_rate": 7.487859320103847e-07, "loss": 0.7013, "step": 12949 }, { "epoch": 0.83, "grad_norm": 1.7108593258477802, "learning_rate": 7.482403901617225e-07, "loss": 0.676, "step": 12950 }, { "epoch": 0.83, "grad_norm": 1.6826289470672522, "learning_rate": 7.476950310447523e-07, "loss": 0.6329, "step": 12951 }, { "epoch": 0.83, "grad_norm": 1.1428601181603648, "learning_rate": 7.471498546829159e-07, "loss": 0.5648, "step": 12952 }, { "epoch": 0.83, "grad_norm": 1.5057235397124873, "learning_rate": 7.466048610996423e-07, "loss": 0.7586, "step": 12953 }, { "epoch": 0.83, "grad_norm": 1.1882181198855908, "learning_rate": 7.46060050318354e-07, "loss": 0.7577, "step": 12954 }, { "epoch": 0.83, "grad_norm": 1.5895686034364307, "learning_rate": 7.455154223624661e-07, "loss": 0.6726, "step": 12955 }, { "epoch": 0.83, "grad_norm": 1.688719241644976, "learning_rate": 7.449709772553853e-07, "loss": 0.7012, "step": 12956 }, { "epoch": 0.83, "grad_norm": 1.651910933549988, "learning_rate": 7.444267150205108e-07, "loss": 0.6691, "step": 12957 }, { "epoch": 0.83, "grad_norm": 1.9879001369761278, "learning_rate": 7.438826356812345e-07, "loss": 0.8136, "step": 12958 }, { "epoch": 0.83, "grad_norm": 1.6956220174359338, "learning_rate": 7.433387392609387e-07, "loss": 0.764, "step": 12959 }, { "epoch": 0.83, "grad_norm": 1.3543683809617542, "learning_rate": 7.42795025782998e-07, "loss": 0.6969, "step": 12960 }, { "epoch": 0.83, "grad_norm": 1.8556544392983818, "learning_rate": 7.422514952707832e-07, "loss": 0.7165, "step": 12961 }, { "epoch": 0.83, "grad_norm": 1.6378807621904792, "learning_rate": 7.417081477476523e-07, "loss": 0.6381, "step": 12962 }, { "epoch": 0.83, "grad_norm": 1.776998331783049, "learning_rate": 7.411649832369566e-07, "loss": 0.798, "step": 12963 }, { "epoch": 0.83, "grad_norm": 1.5445249567963557, "learning_rate": 7.406220017620414e-07, "loss": 0.6148, "step": 12964 }, { "epoch": 0.83, "grad_norm": 1.2723219169538142, "learning_rate": 7.400792033462428e-07, "loss": 0.7375, "step": 12965 }, { "epoch": 0.83, "grad_norm": 2.0899662484456933, "learning_rate": 7.39536588012888e-07, "loss": 0.7275, "step": 12966 }, { "epoch": 0.83, "grad_norm": 1.7254176307949571, "learning_rate": 7.389941557852987e-07, "loss": 0.7574, "step": 12967 }, { "epoch": 0.83, "grad_norm": 1.5752041903686322, "learning_rate": 7.384519066867851e-07, "loss": 0.6396, "step": 12968 }, { "epoch": 0.83, "grad_norm": 1.5836737288512346, "learning_rate": 7.379098407406554e-07, "loss": 0.7422, "step": 12969 }, { "epoch": 0.83, "grad_norm": 1.7809013343860107, "learning_rate": 7.373679579702053e-07, "loss": 0.7278, "step": 12970 }, { "epoch": 0.83, "grad_norm": 1.6846564307928618, "learning_rate": 7.368262583987229e-07, "loss": 0.6926, "step": 12971 }, { "epoch": 0.83, "grad_norm": 1.8107394186819348, "learning_rate": 7.362847420494896e-07, "loss": 0.6822, "step": 12972 }, { "epoch": 0.83, "grad_norm": 1.8135685577553722, "learning_rate": 7.357434089457788e-07, "loss": 0.7813, "step": 12973 }, { "epoch": 0.83, "grad_norm": 1.8494532339290244, "learning_rate": 7.35202259110856e-07, "loss": 0.685, "step": 12974 }, { "epoch": 0.83, "grad_norm": 1.971135866611462, "learning_rate": 7.346612925679774e-07, "loss": 0.726, "step": 12975 }, { "epoch": 0.83, "grad_norm": 1.2506164790131984, "learning_rate": 7.341205093403963e-07, "loss": 0.6529, "step": 12976 }, { "epoch": 0.83, "grad_norm": 1.849255461329394, "learning_rate": 7.33579909451349e-07, "loss": 0.686, "step": 12977 }, { "epoch": 0.83, "grad_norm": 1.8262824965763595, "learning_rate": 7.330394929240736e-07, "loss": 0.6441, "step": 12978 }, { "epoch": 0.83, "grad_norm": 1.8217155268196157, "learning_rate": 7.324992597817948e-07, "loss": 0.7505, "step": 12979 }, { "epoch": 0.83, "grad_norm": 1.4145178490533756, "learning_rate": 7.319592100477307e-07, "loss": 0.6019, "step": 12980 }, { "epoch": 0.83, "grad_norm": 2.0044782395049725, "learning_rate": 7.314193437450911e-07, "loss": 0.7018, "step": 12981 }, { "epoch": 0.83, "grad_norm": 1.0468527567235968, "learning_rate": 7.308796608970775e-07, "loss": 0.5778, "step": 12982 }, { "epoch": 0.83, "grad_norm": 1.4478446987697724, "learning_rate": 7.30340161526888e-07, "loss": 0.6753, "step": 12983 }, { "epoch": 0.83, "grad_norm": 1.6761446953003725, "learning_rate": 7.298008456577038e-07, "loss": 0.7841, "step": 12984 }, { "epoch": 0.83, "grad_norm": 1.8070669451345063, "learning_rate": 7.292617133127083e-07, "loss": 0.8211, "step": 12985 }, { "epoch": 0.83, "grad_norm": 1.726018272942059, "learning_rate": 7.287227645150686e-07, "loss": 0.6285, "step": 12986 }, { "epoch": 0.83, "grad_norm": 1.8552198901413248, "learning_rate": 7.281839992879503e-07, "loss": 0.6875, "step": 12987 }, { "epoch": 0.83, "grad_norm": 1.5237874235365025, "learning_rate": 7.276454176545078e-07, "loss": 0.7619, "step": 12988 }, { "epoch": 0.83, "grad_norm": 1.6422419820031835, "learning_rate": 7.271070196378859e-07, "loss": 0.6624, "step": 12989 }, { "epoch": 0.83, "grad_norm": 1.9059024751996547, "learning_rate": 7.265688052612285e-07, "loss": 0.7575, "step": 12990 }, { "epoch": 0.83, "grad_norm": 1.417711528565118, "learning_rate": 7.260307745476619e-07, "loss": 0.6068, "step": 12991 }, { "epoch": 0.83, "grad_norm": 1.6872290038815325, "learning_rate": 7.254929275203138e-07, "loss": 0.6662, "step": 12992 }, { "epoch": 0.83, "grad_norm": 1.9910272730921281, "learning_rate": 7.249552642022956e-07, "loss": 0.7846, "step": 12993 }, { "epoch": 0.83, "grad_norm": 1.7160003227565008, "learning_rate": 7.244177846167177e-07, "loss": 0.6569, "step": 12994 }, { "epoch": 0.83, "grad_norm": 1.7929838446347575, "learning_rate": 7.238804887866796e-07, "loss": 0.7794, "step": 12995 }, { "epoch": 0.83, "grad_norm": 1.6132600844626876, "learning_rate": 7.233433767352727e-07, "loss": 0.7397, "step": 12996 }, { "epoch": 0.83, "grad_norm": 1.7766315899814706, "learning_rate": 7.228064484855807e-07, "loss": 0.6423, "step": 12997 }, { "epoch": 0.83, "grad_norm": 2.188967010960732, "learning_rate": 7.222697040606791e-07, "loss": 0.6279, "step": 12998 }, { "epoch": 0.83, "grad_norm": 1.7731296754814068, "learning_rate": 7.217331434836395e-07, "loss": 0.7289, "step": 12999 }, { "epoch": 0.83, "grad_norm": 1.6990307675150018, "learning_rate": 7.211967667775166e-07, "loss": 0.7515, "step": 13000 }, { "epoch": 0.83, "grad_norm": 2.113372585029323, "learning_rate": 7.206605739653683e-07, "loss": 0.7514, "step": 13001 }, { "epoch": 0.83, "grad_norm": 1.8763650693547058, "learning_rate": 7.201245650702338e-07, "loss": 0.6898, "step": 13002 }, { "epoch": 0.83, "grad_norm": 1.7299117022781465, "learning_rate": 7.195887401151536e-07, "loss": 0.7101, "step": 13003 }, { "epoch": 0.83, "grad_norm": 1.1663240061500593, "learning_rate": 7.190530991231548e-07, "loss": 0.65, "step": 13004 }, { "epoch": 0.83, "grad_norm": 1.9867474974833657, "learning_rate": 7.185176421172573e-07, "loss": 0.6492, "step": 13005 }, { "epoch": 0.83, "grad_norm": 1.805708806073611, "learning_rate": 7.179823691204768e-07, "loss": 0.8612, "step": 13006 }, { "epoch": 0.83, "grad_norm": 1.0398344964287949, "learning_rate": 7.174472801558147e-07, "loss": 0.649, "step": 13007 }, { "epoch": 0.83, "grad_norm": 1.6505146111552869, "learning_rate": 7.169123752462714e-07, "loss": 0.6811, "step": 13008 }, { "epoch": 0.83, "grad_norm": 1.4506858877832802, "learning_rate": 7.163776544148321e-07, "loss": 0.6204, "step": 13009 }, { "epoch": 0.83, "grad_norm": 1.0360701990911225, "learning_rate": 7.158431176844815e-07, "loss": 0.5584, "step": 13010 }, { "epoch": 0.83, "grad_norm": 1.853178447651399, "learning_rate": 7.153087650781909e-07, "loss": 0.6579, "step": 13011 }, { "epoch": 0.83, "grad_norm": 2.0062165973341575, "learning_rate": 7.147745966189267e-07, "loss": 0.7075, "step": 13012 }, { "epoch": 0.83, "grad_norm": 1.8181214581352148, "learning_rate": 7.142406123296452e-07, "loss": 0.784, "step": 13013 }, { "epoch": 0.83, "grad_norm": 2.0791904732112965, "learning_rate": 7.137068122332974e-07, "loss": 0.9044, "step": 13014 }, { "epoch": 0.83, "grad_norm": 1.5014022965769476, "learning_rate": 7.131731963528232e-07, "loss": 0.7494, "step": 13015 }, { "epoch": 0.83, "grad_norm": 2.0490566905926104, "learning_rate": 7.126397647111566e-07, "loss": 0.7988, "step": 13016 }, { "epoch": 0.83, "grad_norm": 1.6471602281249589, "learning_rate": 7.121065173312253e-07, "loss": 0.7459, "step": 13017 }, { "epoch": 0.83, "grad_norm": 1.9792321430533832, "learning_rate": 7.115734542359454e-07, "loss": 0.6474, "step": 13018 }, { "epoch": 0.83, "grad_norm": 1.1127179697007834, "learning_rate": 7.110405754482269e-07, "loss": 0.7001, "step": 13019 }, { "epoch": 0.83, "grad_norm": 1.8134603056483416, "learning_rate": 7.105078809909727e-07, "loss": 0.6719, "step": 13020 }, { "epoch": 0.83, "grad_norm": 1.996243489128329, "learning_rate": 7.09975370887076e-07, "loss": 0.735, "step": 13021 }, { "epoch": 0.83, "grad_norm": 1.8152747860911818, "learning_rate": 7.09443045159423e-07, "loss": 0.7837, "step": 13022 }, { "epoch": 0.83, "grad_norm": 1.5657559159870047, "learning_rate": 7.089109038308928e-07, "loss": 0.5868, "step": 13023 }, { "epoch": 0.83, "grad_norm": 0.9764698996948141, "learning_rate": 7.083789469243535e-07, "loss": 0.6232, "step": 13024 }, { "epoch": 0.83, "grad_norm": 1.3527817803359643, "learning_rate": 7.078471744626708e-07, "loss": 0.7399, "step": 13025 }, { "epoch": 0.83, "grad_norm": 2.101228317078598, "learning_rate": 7.07315586468697e-07, "loss": 0.7562, "step": 13026 }, { "epoch": 0.83, "grad_norm": 1.5510694955020867, "learning_rate": 7.067841829652794e-07, "loss": 0.617, "step": 13027 }, { "epoch": 0.83, "grad_norm": 1.6013827722503085, "learning_rate": 7.062529639752558e-07, "loss": 0.6374, "step": 13028 }, { "epoch": 0.83, "grad_norm": 1.6511589600873124, "learning_rate": 7.057219295214579e-07, "loss": 0.5797, "step": 13029 }, { "epoch": 0.83, "grad_norm": 1.6884932910532833, "learning_rate": 7.051910796267081e-07, "loss": 0.8009, "step": 13030 }, { "epoch": 0.83, "grad_norm": 1.7579127713642915, "learning_rate": 7.046604143138198e-07, "loss": 0.7354, "step": 13031 }, { "epoch": 0.83, "grad_norm": 1.9553451084907894, "learning_rate": 7.041299336056028e-07, "loss": 0.732, "step": 13032 }, { "epoch": 0.83, "grad_norm": 1.6436469999930028, "learning_rate": 7.035996375248527e-07, "loss": 0.7159, "step": 13033 }, { "epoch": 0.83, "grad_norm": 2.04863518401941, "learning_rate": 7.030695260943637e-07, "loss": 0.8588, "step": 13034 }, { "epoch": 0.83, "grad_norm": 1.8907457831676238, "learning_rate": 7.025395993369166e-07, "loss": 0.77, "step": 13035 }, { "epoch": 0.83, "grad_norm": 1.640126845616039, "learning_rate": 7.020098572752876e-07, "loss": 0.7505, "step": 13036 }, { "epoch": 0.83, "grad_norm": 1.681225550701551, "learning_rate": 7.01480299932244e-07, "loss": 0.5996, "step": 13037 }, { "epoch": 0.83, "grad_norm": 1.4871041490045258, "learning_rate": 7.009509273305442e-07, "loss": 0.8152, "step": 13038 }, { "epoch": 0.83, "grad_norm": 1.881847488619969, "learning_rate": 7.004217394929402e-07, "loss": 0.6605, "step": 13039 }, { "epoch": 0.83, "grad_norm": 1.5757542032283072, "learning_rate": 6.998927364421737e-07, "loss": 0.6507, "step": 13040 }, { "epoch": 0.83, "grad_norm": 1.6132489879019958, "learning_rate": 6.993639182009843e-07, "loss": 0.6614, "step": 13041 }, { "epoch": 0.83, "grad_norm": 1.6491781055025274, "learning_rate": 6.988352847920943e-07, "loss": 0.7259, "step": 13042 }, { "epoch": 0.83, "grad_norm": 1.736613324112776, "learning_rate": 6.983068362382272e-07, "loss": 0.6926, "step": 13043 }, { "epoch": 0.83, "grad_norm": 1.6708970652372146, "learning_rate": 6.977785725620928e-07, "loss": 0.7517, "step": 13044 }, { "epoch": 0.83, "grad_norm": 1.847884729498419, "learning_rate": 6.972504937863955e-07, "loss": 0.759, "step": 13045 }, { "epoch": 0.84, "grad_norm": 1.5607030624992555, "learning_rate": 6.967225999338306e-07, "loss": 0.7257, "step": 13046 }, { "epoch": 0.84, "grad_norm": 1.7792343316368247, "learning_rate": 6.961948910270844e-07, "loss": 0.6543, "step": 13047 }, { "epoch": 0.84, "grad_norm": 5.30430556695865, "learning_rate": 6.956673670888409e-07, "loss": 0.6816, "step": 13048 }, { "epoch": 0.84, "grad_norm": 1.5904782753785234, "learning_rate": 6.951400281417669e-07, "loss": 0.7372, "step": 13049 }, { "epoch": 0.84, "grad_norm": 1.7704179434419944, "learning_rate": 6.946128742085311e-07, "loss": 0.7386, "step": 13050 }, { "epoch": 0.84, "grad_norm": 2.075807985821692, "learning_rate": 6.940859053117843e-07, "loss": 0.6964, "step": 13051 }, { "epoch": 0.84, "grad_norm": 1.6778926542749657, "learning_rate": 6.935591214741794e-07, "loss": 0.7312, "step": 13052 }, { "epoch": 0.84, "grad_norm": 1.256693921906438, "learning_rate": 6.930325227183537e-07, "loss": 0.7417, "step": 13053 }, { "epoch": 0.84, "grad_norm": 4.300584487497602, "learning_rate": 6.925061090669389e-07, "loss": 0.85, "step": 13054 }, { "epoch": 0.84, "grad_norm": 2.038667245497396, "learning_rate": 6.919798805425626e-07, "loss": 0.6231, "step": 13055 }, { "epoch": 0.84, "grad_norm": 1.8622803310100775, "learning_rate": 6.914538371678364e-07, "loss": 0.745, "step": 13056 }, { "epoch": 0.84, "grad_norm": 1.7114831801828534, "learning_rate": 6.909279789653734e-07, "loss": 0.5963, "step": 13057 }, { "epoch": 0.84, "grad_norm": 1.1737655993012743, "learning_rate": 6.904023059577686e-07, "loss": 0.5992, "step": 13058 }, { "epoch": 0.84, "grad_norm": 1.7075784275150587, "learning_rate": 6.89876818167618e-07, "loss": 0.7091, "step": 13059 }, { "epoch": 0.84, "grad_norm": 1.908310164782779, "learning_rate": 6.893515156175051e-07, "loss": 0.824, "step": 13060 }, { "epoch": 0.84, "grad_norm": 1.0514425255697264, "learning_rate": 6.888263983300048e-07, "loss": 0.6332, "step": 13061 }, { "epoch": 0.84, "grad_norm": 2.347404073398883, "learning_rate": 6.883014663276894e-07, "loss": 0.8292, "step": 13062 }, { "epoch": 0.84, "grad_norm": 1.73118457136933, "learning_rate": 6.877767196331147e-07, "loss": 0.806, "step": 13063 }, { "epoch": 0.84, "grad_norm": 3.8418082682174104, "learning_rate": 6.872521582688374e-07, "loss": 0.803, "step": 13064 }, { "epoch": 0.84, "grad_norm": 1.6647190232118545, "learning_rate": 6.867277822573975e-07, "loss": 0.6087, "step": 13065 }, { "epoch": 0.84, "grad_norm": 1.4233862201436067, "learning_rate": 6.862035916213361e-07, "loss": 0.6136, "step": 13066 }, { "epoch": 0.84, "grad_norm": 1.7380653984958325, "learning_rate": 6.856795863831789e-07, "loss": 0.6723, "step": 13067 }, { "epoch": 0.84, "grad_norm": 1.7239229333184536, "learning_rate": 6.851557665654479e-07, "loss": 0.7762, "step": 13068 }, { "epoch": 0.84, "grad_norm": 1.8432089417144304, "learning_rate": 6.846321321906551e-07, "loss": 0.6081, "step": 13069 }, { "epoch": 0.84, "grad_norm": 2.1198477101450988, "learning_rate": 6.841086832813043e-07, "loss": 0.9238, "step": 13070 }, { "epoch": 0.84, "grad_norm": 1.692760889120454, "learning_rate": 6.835854198598957e-07, "loss": 0.7371, "step": 13071 }, { "epoch": 0.84, "grad_norm": 1.5636330627625215, "learning_rate": 6.830623419489135e-07, "loss": 0.8448, "step": 13072 }, { "epoch": 0.84, "grad_norm": 2.5851071894343445, "learning_rate": 6.825394495708415e-07, "loss": 0.6584, "step": 13073 }, { "epoch": 0.84, "grad_norm": 1.7079785971652708, "learning_rate": 6.820167427481522e-07, "loss": 0.7479, "step": 13074 }, { "epoch": 0.84, "grad_norm": 1.5621792000605415, "learning_rate": 6.814942215033099e-07, "loss": 0.7697, "step": 13075 }, { "epoch": 0.84, "grad_norm": 1.896310234285319, "learning_rate": 6.80971885858771e-07, "loss": 0.7069, "step": 13076 }, { "epoch": 0.84, "grad_norm": 1.7331702530011333, "learning_rate": 6.804497358369855e-07, "loss": 0.667, "step": 13077 }, { "epoch": 0.84, "grad_norm": 1.8498124146730603, "learning_rate": 6.799277714603935e-07, "loss": 0.7058, "step": 13078 }, { "epoch": 0.84, "grad_norm": 1.596462568295146, "learning_rate": 6.794059927514268e-07, "loss": 0.8015, "step": 13079 }, { "epoch": 0.84, "grad_norm": 1.095248160366146, "learning_rate": 6.788843997325145e-07, "loss": 0.5694, "step": 13080 }, { "epoch": 0.84, "grad_norm": 1.6154936909051762, "learning_rate": 6.783629924260682e-07, "loss": 0.7697, "step": 13081 }, { "epoch": 0.84, "grad_norm": 2.0494101941314105, "learning_rate": 6.778417708545004e-07, "loss": 0.6901, "step": 13082 }, { "epoch": 0.84, "grad_norm": 1.5846811903662457, "learning_rate": 6.773207350402117e-07, "loss": 0.8032, "step": 13083 }, { "epoch": 0.84, "grad_norm": 1.7917745885481364, "learning_rate": 6.767998850055946e-07, "loss": 0.6735, "step": 13084 }, { "epoch": 0.84, "grad_norm": 1.6677670030928062, "learning_rate": 6.762792207730334e-07, "loss": 0.6519, "step": 13085 }, { "epoch": 0.84, "grad_norm": 1.8292093081944187, "learning_rate": 6.757587423649065e-07, "loss": 0.7684, "step": 13086 }, { "epoch": 0.84, "grad_norm": 1.5647970349145544, "learning_rate": 6.752384498035824e-07, "loss": 0.7454, "step": 13087 }, { "epoch": 0.84, "grad_norm": 1.8363693732995447, "learning_rate": 6.747183431114218e-07, "loss": 0.8512, "step": 13088 }, { "epoch": 0.84, "grad_norm": 1.8919509979481763, "learning_rate": 6.741984223107773e-07, "loss": 0.8352, "step": 13089 }, { "epoch": 0.84, "grad_norm": 1.7540335007457024, "learning_rate": 6.736786874239959e-07, "loss": 0.8235, "step": 13090 }, { "epoch": 0.84, "grad_norm": 1.8877611285538725, "learning_rate": 6.731591384734138e-07, "loss": 0.6779, "step": 13091 }, { "epoch": 0.84, "grad_norm": 1.446865105050902, "learning_rate": 6.726397754813596e-07, "loss": 0.6358, "step": 13092 }, { "epoch": 0.84, "grad_norm": 1.8486574137753211, "learning_rate": 6.721205984701551e-07, "loss": 0.7446, "step": 13093 }, { "epoch": 0.84, "grad_norm": 2.1075249967748317, "learning_rate": 6.716016074621135e-07, "loss": 0.7401, "step": 13094 }, { "epoch": 0.84, "grad_norm": 1.5674806284875482, "learning_rate": 6.71082802479539e-07, "loss": 0.6043, "step": 13095 }, { "epoch": 0.84, "grad_norm": 1.966419886350256, "learning_rate": 6.705641835447286e-07, "loss": 0.6943, "step": 13096 }, { "epoch": 0.84, "grad_norm": 1.8178241425242925, "learning_rate": 6.70045750679974e-07, "loss": 0.8219, "step": 13097 }, { "epoch": 0.84, "grad_norm": 1.8281871638183491, "learning_rate": 6.695275039075527e-07, "loss": 0.811, "step": 13098 }, { "epoch": 0.84, "grad_norm": 1.9967757161680273, "learning_rate": 6.690094432497407e-07, "loss": 0.6964, "step": 13099 }, { "epoch": 0.84, "grad_norm": 1.592699670391183, "learning_rate": 6.684915687288023e-07, "loss": 0.8158, "step": 13100 }, { "epoch": 0.84, "grad_norm": 1.6222231690565032, "learning_rate": 6.679738803669944e-07, "loss": 0.7233, "step": 13101 }, { "epoch": 0.84, "grad_norm": 1.6698646723816297, "learning_rate": 6.674563781865662e-07, "loss": 0.6268, "step": 13102 }, { "epoch": 0.84, "grad_norm": 1.7872375935719118, "learning_rate": 6.669390622097577e-07, "loss": 0.6359, "step": 13103 }, { "epoch": 0.84, "grad_norm": 1.6758839033167858, "learning_rate": 6.664219324588056e-07, "loss": 0.6307, "step": 13104 }, { "epoch": 0.84, "grad_norm": 1.791500922561175, "learning_rate": 6.65904988955931e-07, "loss": 0.7755, "step": 13105 }, { "epoch": 0.84, "grad_norm": 1.6061844398049248, "learning_rate": 6.653882317233546e-07, "loss": 0.7087, "step": 13106 }, { "epoch": 0.84, "grad_norm": 1.9318749078316693, "learning_rate": 6.648716607832811e-07, "loss": 0.7843, "step": 13107 }, { "epoch": 0.84, "grad_norm": 2.1759869465032864, "learning_rate": 6.643552761579159e-07, "loss": 0.7532, "step": 13108 }, { "epoch": 0.84, "grad_norm": 1.8173009778727134, "learning_rate": 6.638390778694504e-07, "loss": 0.6871, "step": 13109 }, { "epoch": 0.84, "grad_norm": 1.0605387942750764, "learning_rate": 6.633230659400697e-07, "loss": 0.675, "step": 13110 }, { "epoch": 0.84, "grad_norm": 1.268070760815401, "learning_rate": 6.628072403919511e-07, "loss": 0.7181, "step": 13111 }, { "epoch": 0.84, "grad_norm": 2.300462781566512, "learning_rate": 6.62291601247263e-07, "loss": 0.832, "step": 13112 }, { "epoch": 0.84, "grad_norm": 1.9101072801100063, "learning_rate": 6.617761485281687e-07, "loss": 0.6558, "step": 13113 }, { "epoch": 0.84, "grad_norm": 1.9982599409437314, "learning_rate": 6.612608822568173e-07, "loss": 0.8027, "step": 13114 }, { "epoch": 0.84, "grad_norm": 1.943318635011882, "learning_rate": 6.607458024553576e-07, "loss": 0.7262, "step": 13115 }, { "epoch": 0.84, "grad_norm": 1.6420561229727058, "learning_rate": 6.602309091459253e-07, "loss": 0.7075, "step": 13116 }, { "epoch": 0.84, "grad_norm": 1.5186403195644664, "learning_rate": 6.597162023506492e-07, "loss": 0.6645, "step": 13117 }, { "epoch": 0.84, "grad_norm": 2.2565914191969827, "learning_rate": 6.592016820916508e-07, "loss": 0.7973, "step": 13118 }, { "epoch": 0.84, "grad_norm": 4.024308918113753, "learning_rate": 6.586873483910416e-07, "loss": 0.6893, "step": 13119 }, { "epoch": 0.84, "grad_norm": 1.9563069401850757, "learning_rate": 6.581732012709303e-07, "loss": 0.6987, "step": 13120 }, { "epoch": 0.84, "grad_norm": 1.1224364023430076, "learning_rate": 6.576592407534088e-07, "loss": 0.6852, "step": 13121 }, { "epoch": 0.84, "grad_norm": 1.7545718833735524, "learning_rate": 6.571454668605715e-07, "loss": 0.8102, "step": 13122 }, { "epoch": 0.84, "grad_norm": 1.2071041679095804, "learning_rate": 6.566318796144933e-07, "loss": 0.6989, "step": 13123 }, { "epoch": 0.84, "grad_norm": 1.6045041790361747, "learning_rate": 6.561184790372522e-07, "loss": 0.685, "step": 13124 }, { "epoch": 0.84, "grad_norm": 1.8024971055270298, "learning_rate": 6.556052651509104e-07, "loss": 0.7604, "step": 13125 }, { "epoch": 0.84, "grad_norm": 1.407530719669686, "learning_rate": 6.550922379775248e-07, "loss": 0.5973, "step": 13126 }, { "epoch": 0.84, "grad_norm": 1.0112099625557769, "learning_rate": 6.545793975391468e-07, "loss": 0.6055, "step": 13127 }, { "epoch": 0.84, "grad_norm": 1.6752469009825905, "learning_rate": 6.54066743857813e-07, "loss": 0.6931, "step": 13128 }, { "epoch": 0.84, "grad_norm": 1.6502278289344527, "learning_rate": 6.535542769555609e-07, "loss": 0.73, "step": 13129 }, { "epoch": 0.84, "grad_norm": 2.107400252155004, "learning_rate": 6.53041996854411e-07, "loss": 0.7343, "step": 13130 }, { "epoch": 0.84, "grad_norm": 3.0085282800827153, "learning_rate": 6.525299035763827e-07, "loss": 0.6929, "step": 13131 }, { "epoch": 0.84, "grad_norm": 1.819855479526111, "learning_rate": 6.520179971434837e-07, "loss": 0.6946, "step": 13132 }, { "epoch": 0.84, "grad_norm": 2.04783209188753, "learning_rate": 6.515062775777148e-07, "loss": 0.7198, "step": 13133 }, { "epoch": 0.84, "grad_norm": 1.5312673248944344, "learning_rate": 6.50994744901069e-07, "loss": 0.5968, "step": 13134 }, { "epoch": 0.84, "grad_norm": 2.2080404124310458, "learning_rate": 6.504833991355292e-07, "loss": 0.6445, "step": 13135 }, { "epoch": 0.84, "grad_norm": 1.0604862472294114, "learning_rate": 6.499722403030751e-07, "loss": 0.6574, "step": 13136 }, { "epoch": 0.84, "grad_norm": 2.5979186528580733, "learning_rate": 6.494612684256718e-07, "loss": 0.7699, "step": 13137 }, { "epoch": 0.84, "grad_norm": 1.7563833926269623, "learning_rate": 6.489504835252824e-07, "loss": 0.6269, "step": 13138 }, { "epoch": 0.84, "grad_norm": 1.6372582867817311, "learning_rate": 6.484398856238582e-07, "loss": 0.6065, "step": 13139 }, { "epoch": 0.84, "grad_norm": 1.235929660427442, "learning_rate": 6.47929474743344e-07, "loss": 0.6678, "step": 13140 }, { "epoch": 0.84, "grad_norm": 1.6365339391875058, "learning_rate": 6.474192509056759e-07, "loss": 0.8298, "step": 13141 }, { "epoch": 0.84, "grad_norm": 1.8689296875533887, "learning_rate": 6.469092141327827e-07, "loss": 0.7729, "step": 13142 }, { "epoch": 0.84, "grad_norm": 1.9154227497599454, "learning_rate": 6.463993644465843e-07, "loss": 0.7142, "step": 13143 }, { "epoch": 0.84, "grad_norm": 1.6946085200958672, "learning_rate": 6.458897018689919e-07, "loss": 0.7732, "step": 13144 }, { "epoch": 0.84, "grad_norm": 1.8161489273202513, "learning_rate": 6.453802264219117e-07, "loss": 0.721, "step": 13145 }, { "epoch": 0.84, "grad_norm": 1.026503711093854, "learning_rate": 6.448709381272395e-07, "loss": 0.5847, "step": 13146 }, { "epoch": 0.84, "grad_norm": 1.675652068381861, "learning_rate": 6.443618370068622e-07, "loss": 0.7655, "step": 13147 }, { "epoch": 0.84, "grad_norm": 1.709074273237026, "learning_rate": 6.438529230826612e-07, "loss": 0.7445, "step": 13148 }, { "epoch": 0.84, "grad_norm": 1.9276206755372152, "learning_rate": 6.43344196376508e-07, "loss": 0.7297, "step": 13149 }, { "epoch": 0.84, "grad_norm": 1.7317941581392942, "learning_rate": 6.428356569102667e-07, "loss": 0.7142, "step": 13150 }, { "epoch": 0.84, "grad_norm": 1.6811882375577176, "learning_rate": 6.423273047057932e-07, "loss": 0.6717, "step": 13151 }, { "epoch": 0.84, "grad_norm": 1.097036456680812, "learning_rate": 6.418191397849355e-07, "loss": 0.6648, "step": 13152 }, { "epoch": 0.84, "grad_norm": 1.6898070178712297, "learning_rate": 6.413111621695322e-07, "loss": 0.8022, "step": 13153 }, { "epoch": 0.84, "grad_norm": 1.6254616834667468, "learning_rate": 6.408033718814172e-07, "loss": 0.6927, "step": 13154 }, { "epoch": 0.84, "grad_norm": 1.2037972190529718, "learning_rate": 6.402957689424139e-07, "loss": 0.6603, "step": 13155 }, { "epoch": 0.84, "grad_norm": 1.668918244062492, "learning_rate": 6.397883533743371e-07, "loss": 0.6883, "step": 13156 }, { "epoch": 0.84, "grad_norm": 1.5063318857274581, "learning_rate": 6.392811251989944e-07, "loss": 0.6052, "step": 13157 }, { "epoch": 0.84, "grad_norm": 1.728407201525468, "learning_rate": 6.387740844381863e-07, "loss": 0.7388, "step": 13158 }, { "epoch": 0.84, "grad_norm": 1.0984312932606786, "learning_rate": 6.382672311137039e-07, "loss": 0.6014, "step": 13159 }, { "epoch": 0.84, "grad_norm": 1.103510127179047, "learning_rate": 6.377605652473301e-07, "loss": 0.6466, "step": 13160 }, { "epoch": 0.84, "grad_norm": 1.194844580499282, "learning_rate": 6.372540868608401e-07, "loss": 0.5336, "step": 13161 }, { "epoch": 0.84, "grad_norm": 2.168414740354496, "learning_rate": 6.367477959760043e-07, "loss": 0.7659, "step": 13162 }, { "epoch": 0.84, "grad_norm": 1.7606562599171216, "learning_rate": 6.362416926145775e-07, "loss": 0.8336, "step": 13163 }, { "epoch": 0.84, "grad_norm": 2.0792876476273148, "learning_rate": 6.357357767983147e-07, "loss": 0.6591, "step": 13164 }, { "epoch": 0.84, "grad_norm": 1.4574727563454817, "learning_rate": 6.352300485489571e-07, "loss": 0.7703, "step": 13165 }, { "epoch": 0.84, "grad_norm": 1.8152412210150761, "learning_rate": 6.347245078882408e-07, "loss": 0.8088, "step": 13166 }, { "epoch": 0.84, "grad_norm": 3.7500582532146822, "learning_rate": 6.342191548378923e-07, "loss": 0.7826, "step": 13167 }, { "epoch": 0.84, "grad_norm": 2.5905368978946384, "learning_rate": 6.337139894196292e-07, "loss": 0.7031, "step": 13168 }, { "epoch": 0.84, "grad_norm": 2.466029981569338, "learning_rate": 6.33209011655167e-07, "loss": 0.6993, "step": 13169 }, { "epoch": 0.84, "grad_norm": 1.7825682201454989, "learning_rate": 6.327042215662027e-07, "loss": 0.7014, "step": 13170 }, { "epoch": 0.84, "grad_norm": 1.5641509787496235, "learning_rate": 6.321996191744368e-07, "loss": 0.7993, "step": 13171 }, { "epoch": 0.84, "grad_norm": 1.3591635539788645, "learning_rate": 6.316952045015506e-07, "loss": 0.7446, "step": 13172 }, { "epoch": 0.84, "grad_norm": 1.59867803609775, "learning_rate": 6.311909775692265e-07, "loss": 0.7248, "step": 13173 }, { "epoch": 0.84, "grad_norm": 2.803495326902433, "learning_rate": 6.306869383991343e-07, "loss": 0.636, "step": 13174 }, { "epoch": 0.84, "grad_norm": 1.0091336510525561, "learning_rate": 6.301830870129349e-07, "loss": 0.6595, "step": 13175 }, { "epoch": 0.84, "grad_norm": 1.715514336724783, "learning_rate": 6.296794234322867e-07, "loss": 0.6603, "step": 13176 }, { "epoch": 0.84, "grad_norm": 1.3949587366403289, "learning_rate": 6.291759476788312e-07, "loss": 0.6866, "step": 13177 }, { "epoch": 0.84, "grad_norm": 1.7191825880897025, "learning_rate": 6.286726597742116e-07, "loss": 0.5919, "step": 13178 }, { "epoch": 0.84, "grad_norm": 1.6720456926709772, "learning_rate": 6.281695597400533e-07, "loss": 0.6855, "step": 13179 }, { "epoch": 0.84, "grad_norm": 1.9265408181564483, "learning_rate": 6.276666475979815e-07, "loss": 0.7322, "step": 13180 }, { "epoch": 0.84, "grad_norm": 1.1466710688210822, "learning_rate": 6.271639233696103e-07, "loss": 0.6566, "step": 13181 }, { "epoch": 0.84, "grad_norm": 1.750740099987304, "learning_rate": 6.266613870765437e-07, "loss": 0.7492, "step": 13182 }, { "epoch": 0.84, "grad_norm": 1.5912725145581619, "learning_rate": 6.261590387403832e-07, "loss": 0.775, "step": 13183 }, { "epoch": 0.84, "grad_norm": 1.2067791970974637, "learning_rate": 6.256568783827144e-07, "loss": 0.6616, "step": 13184 }, { "epoch": 0.84, "grad_norm": 2.635400209931452, "learning_rate": 6.251549060251233e-07, "loss": 0.6732, "step": 13185 }, { "epoch": 0.84, "grad_norm": 1.6707178214885856, "learning_rate": 6.246531216891794e-07, "loss": 0.8173, "step": 13186 }, { "epoch": 0.84, "grad_norm": 1.7447732141079886, "learning_rate": 6.241515253964515e-07, "loss": 0.8767, "step": 13187 }, { "epoch": 0.84, "grad_norm": 1.6805183934926777, "learning_rate": 6.236501171684961e-07, "loss": 0.6247, "step": 13188 }, { "epoch": 0.84, "grad_norm": 2.92215981517403, "learning_rate": 6.231488970268628e-07, "loss": 0.6692, "step": 13189 }, { "epoch": 0.84, "grad_norm": 1.2172011047751732, "learning_rate": 6.226478649930928e-07, "loss": 0.6522, "step": 13190 }, { "epoch": 0.84, "grad_norm": 1.7840278423287343, "learning_rate": 6.221470210887182e-07, "loss": 0.8346, "step": 13191 }, { "epoch": 0.84, "grad_norm": 2.4313634239072623, "learning_rate": 6.216463653352678e-07, "loss": 0.6616, "step": 13192 }, { "epoch": 0.84, "grad_norm": 1.6569915700157354, "learning_rate": 6.211458977542545e-07, "loss": 0.7638, "step": 13193 }, { "epoch": 0.84, "grad_norm": 1.70310207601016, "learning_rate": 6.20645618367191e-07, "loss": 0.7356, "step": 13194 }, { "epoch": 0.84, "grad_norm": 1.024234777315732, "learning_rate": 6.201455271955747e-07, "loss": 0.5794, "step": 13195 }, { "epoch": 0.84, "grad_norm": 1.6564023424405763, "learning_rate": 6.196456242609012e-07, "loss": 0.7348, "step": 13196 }, { "epoch": 0.84, "grad_norm": 1.9467435914561906, "learning_rate": 6.191459095846547e-07, "loss": 0.6417, "step": 13197 }, { "epoch": 0.84, "grad_norm": 1.7837588550968098, "learning_rate": 6.186463831883111e-07, "loss": 0.631, "step": 13198 }, { "epoch": 0.84, "grad_norm": 2.4908100641630893, "learning_rate": 6.181470450933397e-07, "loss": 0.8827, "step": 13199 }, { "epoch": 0.84, "grad_norm": 1.8104814665165538, "learning_rate": 6.176478953212001e-07, "loss": 0.7065, "step": 13200 }, { "epoch": 0.84, "grad_norm": 2.178285850504243, "learning_rate": 6.171489338933467e-07, "loss": 0.7185, "step": 13201 }, { "epoch": 0.85, "grad_norm": 1.7613190387443227, "learning_rate": 6.166501608312209e-07, "loss": 0.7177, "step": 13202 }, { "epoch": 0.85, "grad_norm": 1.7730395917819548, "learning_rate": 6.161515761562614e-07, "loss": 0.7686, "step": 13203 }, { "epoch": 0.85, "grad_norm": 1.6555601398059172, "learning_rate": 6.156531798898951e-07, "loss": 0.7365, "step": 13204 }, { "epoch": 0.85, "grad_norm": 1.9036712618970943, "learning_rate": 6.151549720535433e-07, "loss": 0.7525, "step": 13205 }, { "epoch": 0.85, "grad_norm": 1.072021909249018, "learning_rate": 6.146569526686158e-07, "loss": 0.6685, "step": 13206 }, { "epoch": 0.85, "grad_norm": 1.7234036454749466, "learning_rate": 6.141591217565185e-07, "loss": 0.5676, "step": 13207 }, { "epoch": 0.85, "grad_norm": 1.8159311626708696, "learning_rate": 6.136614793386459e-07, "loss": 0.6354, "step": 13208 }, { "epoch": 0.85, "grad_norm": 1.9325489721734532, "learning_rate": 6.131640254363847e-07, "loss": 0.6462, "step": 13209 }, { "epoch": 0.85, "grad_norm": 1.8283487067772826, "learning_rate": 6.126667600711167e-07, "loss": 0.7718, "step": 13210 }, { "epoch": 0.85, "grad_norm": 1.6049961872240237, "learning_rate": 6.121696832642126e-07, "loss": 0.6095, "step": 13211 }, { "epoch": 0.85, "grad_norm": 2.0374187825002554, "learning_rate": 6.116727950370355e-07, "loss": 0.7835, "step": 13212 }, { "epoch": 0.85, "grad_norm": 2.442312658465878, "learning_rate": 6.111760954109402e-07, "loss": 0.8984, "step": 13213 }, { "epoch": 0.85, "grad_norm": 1.6693559871699906, "learning_rate": 6.106795844072744e-07, "loss": 0.6263, "step": 13214 }, { "epoch": 0.85, "grad_norm": 1.9237920877144257, "learning_rate": 6.101832620473763e-07, "loss": 0.7087, "step": 13215 }, { "epoch": 0.85, "grad_norm": 3.0889004610060584, "learning_rate": 6.09687128352578e-07, "loss": 0.5469, "step": 13216 }, { "epoch": 0.85, "grad_norm": 1.6707480659466114, "learning_rate": 6.091911833441999e-07, "loss": 0.7231, "step": 13217 }, { "epoch": 0.85, "grad_norm": 1.7067552715030616, "learning_rate": 6.086954270435602e-07, "loss": 0.6735, "step": 13218 }, { "epoch": 0.85, "grad_norm": 1.6277490942203257, "learning_rate": 6.081998594719629e-07, "loss": 0.6465, "step": 13219 }, { "epoch": 0.85, "grad_norm": 1.8665768899124306, "learning_rate": 6.077044806507076e-07, "loss": 0.8038, "step": 13220 }, { "epoch": 0.85, "grad_norm": 1.8216300173560318, "learning_rate": 6.07209290601084e-07, "loss": 0.7981, "step": 13221 }, { "epoch": 0.85, "grad_norm": 1.6014885096806883, "learning_rate": 6.06714289344375e-07, "loss": 0.7653, "step": 13222 }, { "epoch": 0.85, "grad_norm": 2.3244677594316117, "learning_rate": 6.062194769018542e-07, "loss": 0.7815, "step": 13223 }, { "epoch": 0.85, "grad_norm": 1.6139329022059752, "learning_rate": 6.057248532947862e-07, "loss": 0.6585, "step": 13224 }, { "epoch": 0.85, "grad_norm": 1.824915028537193, "learning_rate": 6.05230418544433e-07, "loss": 0.766, "step": 13225 }, { "epoch": 0.85, "grad_norm": 2.0203254012051213, "learning_rate": 6.04736172672039e-07, "loss": 0.6945, "step": 13226 }, { "epoch": 0.85, "grad_norm": 1.781532736831914, "learning_rate": 6.042421156988498e-07, "loss": 0.7246, "step": 13227 }, { "epoch": 0.85, "grad_norm": 1.4598921667288989, "learning_rate": 6.037482476460981e-07, "loss": 0.5663, "step": 13228 }, { "epoch": 0.85, "grad_norm": 1.6749808689603944, "learning_rate": 6.032545685350088e-07, "loss": 0.6688, "step": 13229 }, { "epoch": 0.85, "grad_norm": 1.5364245189978385, "learning_rate": 6.027610783867993e-07, "loss": 0.7271, "step": 13230 }, { "epoch": 0.85, "grad_norm": 1.189944408810203, "learning_rate": 6.022677772226781e-07, "loss": 0.6374, "step": 13231 }, { "epoch": 0.85, "grad_norm": 1.9544891540518512, "learning_rate": 6.017746650638479e-07, "loss": 0.822, "step": 13232 }, { "epoch": 0.85, "grad_norm": 2.725123549170307, "learning_rate": 6.012817419314992e-07, "loss": 0.5809, "step": 13233 }, { "epoch": 0.85, "grad_norm": 1.1947774085674885, "learning_rate": 6.007890078468204e-07, "loss": 0.6863, "step": 13234 }, { "epoch": 0.85, "grad_norm": 1.8766589306799857, "learning_rate": 6.002964628309838e-07, "loss": 0.8004, "step": 13235 }, { "epoch": 0.85, "grad_norm": 0.9587089668483402, "learning_rate": 5.998041069051624e-07, "loss": 0.6205, "step": 13236 }, { "epoch": 0.85, "grad_norm": 1.5139087944432645, "learning_rate": 5.993119400905123e-07, "loss": 0.6154, "step": 13237 }, { "epoch": 0.85, "grad_norm": 1.1040278904512482, "learning_rate": 5.988199624081887e-07, "loss": 0.5992, "step": 13238 }, { "epoch": 0.85, "grad_norm": 1.574408171388052, "learning_rate": 5.983281738793351e-07, "loss": 0.7616, "step": 13239 }, { "epoch": 0.85, "grad_norm": 1.6702887010782435, "learning_rate": 5.978365745250863e-07, "loss": 0.6075, "step": 13240 }, { "epoch": 0.85, "grad_norm": 1.6963760901936074, "learning_rate": 5.973451643665734e-07, "loss": 0.7156, "step": 13241 }, { "epoch": 0.85, "grad_norm": 1.4926257149620465, "learning_rate": 5.968539434249121e-07, "loss": 0.578, "step": 13242 }, { "epoch": 0.85, "grad_norm": 1.129728169479998, "learning_rate": 5.963629117212183e-07, "loss": 0.6481, "step": 13243 }, { "epoch": 0.85, "grad_norm": 1.6057084584910704, "learning_rate": 5.958720692765913e-07, "loss": 0.7174, "step": 13244 }, { "epoch": 0.85, "grad_norm": 1.1875770124491436, "learning_rate": 5.95381416112129e-07, "loss": 0.6672, "step": 13245 }, { "epoch": 0.85, "grad_norm": 1.721739855420551, "learning_rate": 5.948909522489182e-07, "loss": 0.7794, "step": 13246 }, { "epoch": 0.85, "grad_norm": 1.5267972517061656, "learning_rate": 5.944006777080363e-07, "loss": 0.6276, "step": 13247 }, { "epoch": 0.85, "grad_norm": 1.757771597061928, "learning_rate": 5.939105925105587e-07, "loss": 0.6861, "step": 13248 }, { "epoch": 0.85, "grad_norm": 1.038446489015822, "learning_rate": 5.934206966775429e-07, "loss": 0.6494, "step": 13249 }, { "epoch": 0.85, "grad_norm": 1.7232803236238259, "learning_rate": 5.929309902300484e-07, "loss": 0.6133, "step": 13250 }, { "epoch": 0.85, "grad_norm": 1.6392496346023437, "learning_rate": 5.924414731891171e-07, "loss": 0.7499, "step": 13251 }, { "epoch": 0.85, "grad_norm": 1.5167980021773597, "learning_rate": 5.919521455757909e-07, "loss": 0.7655, "step": 13252 }, { "epoch": 0.85, "grad_norm": 1.743548069187954, "learning_rate": 5.914630074110989e-07, "loss": 0.7209, "step": 13253 }, { "epoch": 0.85, "grad_norm": 1.607460344417074, "learning_rate": 5.909740587160629e-07, "loss": 0.641, "step": 13254 }, { "epoch": 0.85, "grad_norm": 1.6580365334165803, "learning_rate": 5.904852995116977e-07, "loss": 0.816, "step": 13255 }, { "epoch": 0.85, "grad_norm": 1.5543237471399398, "learning_rate": 5.899967298190073e-07, "loss": 1.0033, "step": 13256 }, { "epoch": 0.85, "grad_norm": 1.646689328686619, "learning_rate": 5.895083496589932e-07, "loss": 0.6565, "step": 13257 }, { "epoch": 0.85, "grad_norm": 1.6442245605446932, "learning_rate": 5.890201590526401e-07, "loss": 0.7551, "step": 13258 }, { "epoch": 0.85, "grad_norm": 1.9622959461404164, "learning_rate": 5.885321580209324e-07, "loss": 0.5691, "step": 13259 }, { "epoch": 0.85, "grad_norm": 2.0931428251671433, "learning_rate": 5.880443465848435e-07, "loss": 0.8379, "step": 13260 }, { "epoch": 0.85, "grad_norm": 1.6667092406677877, "learning_rate": 5.875567247653374e-07, "loss": 0.7596, "step": 13261 }, { "epoch": 0.85, "grad_norm": 3.54095312059235, "learning_rate": 5.870692925833721e-07, "loss": 0.6886, "step": 13262 }, { "epoch": 0.85, "grad_norm": 1.64152471786692, "learning_rate": 5.865820500598951e-07, "loss": 0.6515, "step": 13263 }, { "epoch": 0.85, "grad_norm": 1.5284614460577433, "learning_rate": 5.860949972158481e-07, "loss": 0.7807, "step": 13264 }, { "epoch": 0.85, "grad_norm": 1.5730103513275189, "learning_rate": 5.85608134072162e-07, "loss": 0.6653, "step": 13265 }, { "epoch": 0.85, "grad_norm": 1.7837682673342798, "learning_rate": 5.851214606497635e-07, "loss": 0.7515, "step": 13266 }, { "epoch": 0.85, "grad_norm": 1.8550135861275683, "learning_rate": 5.846349769695675e-07, "loss": 0.8505, "step": 13267 }, { "epoch": 0.85, "grad_norm": 1.7995525892450552, "learning_rate": 5.841486830524823e-07, "loss": 0.655, "step": 13268 }, { "epoch": 0.85, "grad_norm": 1.5715543250676904, "learning_rate": 5.836625789194078e-07, "loss": 0.7053, "step": 13269 }, { "epoch": 0.85, "grad_norm": 1.1917220901911254, "learning_rate": 5.831766645912357e-07, "loss": 0.6324, "step": 13270 }, { "epoch": 0.85, "grad_norm": 1.7598188479360486, "learning_rate": 5.826909400888492e-07, "loss": 0.6738, "step": 13271 }, { "epoch": 0.85, "grad_norm": 1.1453301831879459, "learning_rate": 5.822054054331244e-07, "loss": 0.6357, "step": 13272 }, { "epoch": 0.85, "grad_norm": 1.5710247356850533, "learning_rate": 5.81720060644928e-07, "loss": 0.728, "step": 13273 }, { "epoch": 0.85, "grad_norm": 1.7720196046524728, "learning_rate": 5.812349057451183e-07, "loss": 0.6212, "step": 13274 }, { "epoch": 0.85, "grad_norm": 1.667249819049496, "learning_rate": 5.807499407545486e-07, "loss": 0.7762, "step": 13275 }, { "epoch": 0.85, "grad_norm": 2.058964300902622, "learning_rate": 5.8026516569406e-07, "loss": 0.6665, "step": 13276 }, { "epoch": 0.85, "grad_norm": 1.6052451072521539, "learning_rate": 5.797805805844875e-07, "loss": 0.6625, "step": 13277 }, { "epoch": 0.85, "grad_norm": 1.6582770255492496, "learning_rate": 5.792961854466572e-07, "loss": 0.7271, "step": 13278 }, { "epoch": 0.85, "grad_norm": 1.6906805632410276, "learning_rate": 5.788119803013881e-07, "loss": 0.6997, "step": 13279 }, { "epoch": 0.85, "grad_norm": 1.9144648362054135, "learning_rate": 5.783279651694895e-07, "loss": 0.7739, "step": 13280 }, { "epoch": 0.85, "grad_norm": 1.8051994319493077, "learning_rate": 5.778441400717644e-07, "loss": 0.7733, "step": 13281 }, { "epoch": 0.85, "grad_norm": 1.7662263505808504, "learning_rate": 5.773605050290043e-07, "loss": 0.7317, "step": 13282 }, { "epoch": 0.85, "grad_norm": 1.672493971811773, "learning_rate": 5.768770600619978e-07, "loss": 0.7386, "step": 13283 }, { "epoch": 0.85, "grad_norm": 1.7111331146506754, "learning_rate": 5.763938051915208e-07, "loss": 0.6583, "step": 13284 }, { "epoch": 0.85, "grad_norm": 1.9500775771663226, "learning_rate": 5.759107404383429e-07, "loss": 0.613, "step": 13285 }, { "epoch": 0.85, "grad_norm": 1.3091932764700889, "learning_rate": 5.754278658232249e-07, "loss": 0.594, "step": 13286 }, { "epoch": 0.85, "grad_norm": 1.2653950883432372, "learning_rate": 5.749451813669205e-07, "loss": 0.7198, "step": 13287 }, { "epoch": 0.85, "grad_norm": 2.182550961587129, "learning_rate": 5.744626870901731e-07, "loss": 0.6887, "step": 13288 }, { "epoch": 0.85, "grad_norm": 1.0880746789360423, "learning_rate": 5.739803830137192e-07, "loss": 0.538, "step": 13289 }, { "epoch": 0.85, "grad_norm": 0.9963397277738927, "learning_rate": 5.734982691582907e-07, "loss": 0.7014, "step": 13290 }, { "epoch": 0.85, "grad_norm": 1.7450642602242799, "learning_rate": 5.730163455446025e-07, "loss": 0.7875, "step": 13291 }, { "epoch": 0.85, "grad_norm": 1.684415352172803, "learning_rate": 5.725346121933712e-07, "loss": 0.7502, "step": 13292 }, { "epoch": 0.85, "grad_norm": 1.4862620431609157, "learning_rate": 5.720530691252979e-07, "loss": 0.6373, "step": 13293 }, { "epoch": 0.85, "grad_norm": 1.9206711290259557, "learning_rate": 5.715717163610801e-07, "loss": 0.6803, "step": 13294 }, { "epoch": 0.85, "grad_norm": 1.6980828146280087, "learning_rate": 5.71090553921404e-07, "loss": 0.6004, "step": 13295 }, { "epoch": 0.85, "grad_norm": 1.8642408089469311, "learning_rate": 5.706095818269485e-07, "loss": 0.6624, "step": 13296 }, { "epoch": 0.85, "grad_norm": 1.2628746791460712, "learning_rate": 5.701288000983884e-07, "loss": 0.6822, "step": 13297 }, { "epoch": 0.85, "grad_norm": 1.8350354275577292, "learning_rate": 5.696482087563814e-07, "loss": 0.7335, "step": 13298 }, { "epoch": 0.85, "grad_norm": 1.0920774978956236, "learning_rate": 5.691678078215873e-07, "loss": 0.6415, "step": 13299 }, { "epoch": 0.85, "grad_norm": 1.5096409734964775, "learning_rate": 5.686875973146477e-07, "loss": 0.6954, "step": 13300 }, { "epoch": 0.85, "grad_norm": 1.831429642817657, "learning_rate": 5.682075772562051e-07, "loss": 0.64, "step": 13301 }, { "epoch": 0.85, "grad_norm": 0.9470319549401546, "learning_rate": 5.677277476668886e-07, "loss": 0.4846, "step": 13302 }, { "epoch": 0.85, "grad_norm": 1.7582373609456965, "learning_rate": 5.672481085673199e-07, "loss": 0.7463, "step": 13303 }, { "epoch": 0.85, "grad_norm": 1.7956225377919715, "learning_rate": 5.66768659978113e-07, "loss": 0.7542, "step": 13304 }, { "epoch": 0.85, "grad_norm": 1.7163951392061065, "learning_rate": 5.662894019198722e-07, "loss": 0.9161, "step": 13305 }, { "epoch": 0.85, "grad_norm": 2.448305182783145, "learning_rate": 5.658103344131988e-07, "loss": 0.8097, "step": 13306 }, { "epoch": 0.85, "grad_norm": 1.9063074063215406, "learning_rate": 5.653314574786778e-07, "loss": 0.6816, "step": 13307 }, { "epoch": 0.85, "grad_norm": 1.6465063819348045, "learning_rate": 5.648527711368939e-07, "loss": 0.6804, "step": 13308 }, { "epoch": 0.85, "grad_norm": 1.9800586750242526, "learning_rate": 5.643742754084164e-07, "loss": 0.6796, "step": 13309 }, { "epoch": 0.85, "grad_norm": 1.5939310367082877, "learning_rate": 5.63895970313813e-07, "loss": 0.7799, "step": 13310 }, { "epoch": 0.85, "grad_norm": 1.76371171738654, "learning_rate": 5.634178558736397e-07, "loss": 0.7579, "step": 13311 }, { "epoch": 0.85, "grad_norm": 1.8477296941069783, "learning_rate": 5.629399321084428e-07, "loss": 0.7714, "step": 13312 }, { "epoch": 0.85, "grad_norm": 1.8540071405074479, "learning_rate": 5.624621990387669e-07, "loss": 0.709, "step": 13313 }, { "epoch": 0.85, "grad_norm": 1.782640422607806, "learning_rate": 5.619846566851384e-07, "loss": 0.738, "step": 13314 }, { "epoch": 0.85, "grad_norm": 1.7241585729118827, "learning_rate": 5.615073050680859e-07, "loss": 0.7448, "step": 13315 }, { "epoch": 0.85, "grad_norm": 1.7821238738992078, "learning_rate": 5.610301442081212e-07, "loss": 0.6901, "step": 13316 }, { "epoch": 0.85, "grad_norm": 1.2545784476899904, "learning_rate": 5.605531741257536e-07, "loss": 0.6856, "step": 13317 }, { "epoch": 0.85, "grad_norm": 1.7966980237699544, "learning_rate": 5.600763948414828e-07, "loss": 0.8077, "step": 13318 }, { "epoch": 0.85, "grad_norm": 2.0193534983889534, "learning_rate": 5.595998063757985e-07, "loss": 0.6859, "step": 13319 }, { "epoch": 0.85, "grad_norm": 1.9741405166053616, "learning_rate": 5.591234087491842e-07, "loss": 0.8133, "step": 13320 }, { "epoch": 0.85, "grad_norm": 1.6713210018888618, "learning_rate": 5.586472019821132e-07, "loss": 0.7042, "step": 13321 }, { "epoch": 0.85, "grad_norm": 1.7857962962126683, "learning_rate": 5.581711860950551e-07, "loss": 0.6658, "step": 13322 }, { "epoch": 0.85, "grad_norm": 1.577708699747818, "learning_rate": 5.576953611084635e-07, "loss": 0.7059, "step": 13323 }, { "epoch": 0.85, "grad_norm": 1.7014897628995027, "learning_rate": 5.57219727042792e-07, "loss": 0.7468, "step": 13324 }, { "epoch": 0.85, "grad_norm": 1.8717613404229987, "learning_rate": 5.567442839184805e-07, "loss": 0.7699, "step": 13325 }, { "epoch": 0.85, "grad_norm": 1.5711793673496872, "learning_rate": 5.562690317559639e-07, "loss": 0.7154, "step": 13326 }, { "epoch": 0.85, "grad_norm": 1.9251554016764316, "learning_rate": 5.557939705756665e-07, "loss": 0.851, "step": 13327 }, { "epoch": 0.85, "grad_norm": 1.6692482883029571, "learning_rate": 5.553191003980052e-07, "loss": 0.7649, "step": 13328 }, { "epoch": 0.85, "grad_norm": 1.6192856081213363, "learning_rate": 5.548444212433901e-07, "loss": 0.8101, "step": 13329 }, { "epoch": 0.85, "grad_norm": 1.9209674923894753, "learning_rate": 5.543699331322194e-07, "loss": 0.5997, "step": 13330 }, { "epoch": 0.85, "grad_norm": 1.5363546160573636, "learning_rate": 5.538956360848891e-07, "loss": 0.6654, "step": 13331 }, { "epoch": 0.85, "grad_norm": 1.6105967503469445, "learning_rate": 5.534215301217816e-07, "loss": 0.749, "step": 13332 }, { "epoch": 0.85, "grad_norm": 1.1601221821854362, "learning_rate": 5.529476152632735e-07, "loss": 0.6608, "step": 13333 }, { "epoch": 0.85, "grad_norm": 1.8580927458880596, "learning_rate": 5.524738915297317e-07, "loss": 0.7169, "step": 13334 }, { "epoch": 0.85, "grad_norm": 1.8115733181992058, "learning_rate": 5.520003589415168e-07, "loss": 0.7631, "step": 13335 }, { "epoch": 0.85, "grad_norm": 1.61901660990808, "learning_rate": 5.515270175189802e-07, "loss": 0.7697, "step": 13336 }, { "epoch": 0.85, "grad_norm": 1.8281808525998071, "learning_rate": 5.510538672824645e-07, "loss": 0.7274, "step": 13337 }, { "epoch": 0.85, "grad_norm": 1.5274172829624526, "learning_rate": 5.50580908252304e-07, "loss": 0.632, "step": 13338 }, { "epoch": 0.85, "grad_norm": 1.8710999674396624, "learning_rate": 5.501081404488279e-07, "loss": 0.761, "step": 13339 }, { "epoch": 0.85, "grad_norm": 1.6764113843765562, "learning_rate": 5.496355638923534e-07, "loss": 0.8236, "step": 13340 }, { "epoch": 0.85, "grad_norm": 1.0967163128220423, "learning_rate": 5.491631786031904e-07, "loss": 0.6092, "step": 13341 }, { "epoch": 0.85, "grad_norm": 1.6901731238956739, "learning_rate": 5.486909846016419e-07, "loss": 0.8604, "step": 13342 }, { "epoch": 0.85, "grad_norm": 1.7548921203203558, "learning_rate": 5.482189819080014e-07, "loss": 0.6507, "step": 13343 }, { "epoch": 0.85, "grad_norm": 2.563737507082717, "learning_rate": 5.477471705425541e-07, "loss": 0.8354, "step": 13344 }, { "epoch": 0.85, "grad_norm": 2.0300399672460547, "learning_rate": 5.472755505255783e-07, "loss": 0.7943, "step": 13345 }, { "epoch": 0.85, "grad_norm": 1.837353516913563, "learning_rate": 5.468041218773429e-07, "loss": 0.7072, "step": 13346 }, { "epoch": 0.85, "grad_norm": 2.58914703815385, "learning_rate": 5.463328846181081e-07, "loss": 0.7787, "step": 13347 }, { "epoch": 0.85, "grad_norm": 1.80984472509604, "learning_rate": 5.45861838768128e-07, "loss": 0.6338, "step": 13348 }, { "epoch": 0.85, "grad_norm": 1.8543909555016302, "learning_rate": 5.453909843476468e-07, "loss": 0.7072, "step": 13349 }, { "epoch": 0.85, "grad_norm": 1.8521066895009721, "learning_rate": 5.449203213769011e-07, "loss": 0.6774, "step": 13350 }, { "epoch": 0.85, "grad_norm": 2.4901457537884135, "learning_rate": 5.444498498761181e-07, "loss": 0.7824, "step": 13351 }, { "epoch": 0.85, "grad_norm": 1.0790801886863708, "learning_rate": 5.439795698655182e-07, "loss": 0.5689, "step": 13352 }, { "epoch": 0.85, "grad_norm": 1.909287267875661, "learning_rate": 5.43509481365313e-07, "loss": 0.7765, "step": 13353 }, { "epoch": 0.85, "grad_norm": 1.7771962498057206, "learning_rate": 5.430395843957054e-07, "loss": 0.834, "step": 13354 }, { "epoch": 0.85, "grad_norm": 1.7143233610255308, "learning_rate": 5.425698789768924e-07, "loss": 0.7894, "step": 13355 }, { "epoch": 0.85, "grad_norm": 1.9631907752434943, "learning_rate": 5.42100365129058e-07, "loss": 0.6941, "step": 13356 }, { "epoch": 0.85, "grad_norm": 1.6129064033998175, "learning_rate": 5.416310428723837e-07, "loss": 0.8295, "step": 13357 }, { "epoch": 0.85, "grad_norm": 1.7512853685254817, "learning_rate": 5.411619122270384e-07, "loss": 0.632, "step": 13358 }, { "epoch": 0.86, "grad_norm": 1.6921485002229817, "learning_rate": 5.40692973213185e-07, "loss": 0.5749, "step": 13359 }, { "epoch": 0.86, "grad_norm": 1.689946525631189, "learning_rate": 5.402242258509777e-07, "loss": 0.6731, "step": 13360 }, { "epoch": 0.86, "grad_norm": 1.7657118842439719, "learning_rate": 5.397556701605605e-07, "loss": 0.6516, "step": 13361 }, { "epoch": 0.86, "grad_norm": 1.6135949504359481, "learning_rate": 5.39287306162074e-07, "loss": 0.6821, "step": 13362 }, { "epoch": 0.86, "grad_norm": 1.5957002985028395, "learning_rate": 5.388191338756443e-07, "loss": 0.6629, "step": 13363 }, { "epoch": 0.86, "grad_norm": 1.6996301959231104, "learning_rate": 5.383511533213959e-07, "loss": 0.7527, "step": 13364 }, { "epoch": 0.86, "grad_norm": 1.5845550351802848, "learning_rate": 5.378833645194375e-07, "loss": 0.582, "step": 13365 }, { "epoch": 0.86, "grad_norm": 1.6611067469494505, "learning_rate": 5.374157674898772e-07, "loss": 0.5438, "step": 13366 }, { "epoch": 0.86, "grad_norm": 1.8475553806725111, "learning_rate": 5.369483622528104e-07, "loss": 0.7804, "step": 13367 }, { "epoch": 0.86, "grad_norm": 1.6111199437700208, "learning_rate": 5.364811488283233e-07, "loss": 0.7615, "step": 13368 }, { "epoch": 0.86, "grad_norm": 1.8366836635821862, "learning_rate": 5.360141272364994e-07, "loss": 0.693, "step": 13369 }, { "epoch": 0.86, "grad_norm": 1.6843325228739474, "learning_rate": 5.355472974974057e-07, "loss": 0.6757, "step": 13370 }, { "epoch": 0.86, "grad_norm": 1.2227504415816197, "learning_rate": 5.350806596311109e-07, "loss": 0.565, "step": 13371 }, { "epoch": 0.86, "grad_norm": 1.7173541419819056, "learning_rate": 5.346142136576649e-07, "loss": 0.8195, "step": 13372 }, { "epoch": 0.86, "grad_norm": 1.2113910474257428, "learning_rate": 5.341479595971177e-07, "loss": 0.7096, "step": 13373 }, { "epoch": 0.86, "grad_norm": 1.8829317808637487, "learning_rate": 5.336818974695073e-07, "loss": 0.7078, "step": 13374 }, { "epoch": 0.86, "grad_norm": 1.8615142762186478, "learning_rate": 5.332160272948628e-07, "loss": 0.7473, "step": 13375 }, { "epoch": 0.86, "grad_norm": 1.591727274588126, "learning_rate": 5.327503490932095e-07, "loss": 0.6056, "step": 13376 }, { "epoch": 0.86, "grad_norm": 1.9107721995915858, "learning_rate": 5.322848628845578e-07, "loss": 0.7095, "step": 13377 }, { "epoch": 0.86, "grad_norm": 1.6707999213971627, "learning_rate": 5.31819568688916e-07, "loss": 0.833, "step": 13378 }, { "epoch": 0.86, "grad_norm": 1.7026192470287487, "learning_rate": 5.313544665262782e-07, "loss": 0.6622, "step": 13379 }, { "epoch": 0.86, "grad_norm": 1.9319554037914748, "learning_rate": 5.308895564166372e-07, "loss": 0.7362, "step": 13380 }, { "epoch": 0.86, "grad_norm": 1.773387878305894, "learning_rate": 5.304248383799715e-07, "loss": 0.7268, "step": 13381 }, { "epoch": 0.86, "grad_norm": 1.619473447169837, "learning_rate": 5.299603124362546e-07, "loss": 0.7129, "step": 13382 }, { "epoch": 0.86, "grad_norm": 1.6215145213362463, "learning_rate": 5.294959786054505e-07, "loss": 0.6487, "step": 13383 }, { "epoch": 0.86, "grad_norm": 1.2228919221663341, "learning_rate": 5.290318369075153e-07, "loss": 0.6335, "step": 13384 }, { "epoch": 0.86, "grad_norm": 2.1619728353237466, "learning_rate": 5.285678873623973e-07, "loss": 0.7384, "step": 13385 }, { "epoch": 0.86, "grad_norm": 1.9583675694382314, "learning_rate": 5.281041299900347e-07, "loss": 0.875, "step": 13386 }, { "epoch": 0.86, "grad_norm": 1.2337761696961858, "learning_rate": 5.276405648103616e-07, "loss": 0.7056, "step": 13387 }, { "epoch": 0.86, "grad_norm": 2.2652836053133805, "learning_rate": 5.271771918432977e-07, "loss": 0.718, "step": 13388 }, { "epoch": 0.86, "grad_norm": 1.6636844966751287, "learning_rate": 5.267140111087604e-07, "loss": 0.7601, "step": 13389 }, { "epoch": 0.86, "grad_norm": 1.6918590986798674, "learning_rate": 5.262510226266548e-07, "loss": 0.6502, "step": 13390 }, { "epoch": 0.86, "grad_norm": 1.788783628551296, "learning_rate": 5.257882264168795e-07, "loss": 0.683, "step": 13391 }, { "epoch": 0.86, "grad_norm": 2.3082138137388157, "learning_rate": 5.253256224993252e-07, "loss": 0.7728, "step": 13392 }, { "epoch": 0.86, "grad_norm": 1.1554539292683688, "learning_rate": 5.248632108938728e-07, "loss": 0.6479, "step": 13393 }, { "epoch": 0.86, "grad_norm": 1.7413258136488858, "learning_rate": 5.244009916203957e-07, "loss": 0.6904, "step": 13394 }, { "epoch": 0.86, "grad_norm": 1.6162744203787305, "learning_rate": 5.239389646987592e-07, "loss": 0.7398, "step": 13395 }, { "epoch": 0.86, "grad_norm": 1.9202525779421378, "learning_rate": 5.234771301488206e-07, "loss": 0.7618, "step": 13396 }, { "epoch": 0.86, "grad_norm": 1.529163209915134, "learning_rate": 5.230154879904292e-07, "loss": 0.5983, "step": 13397 }, { "epoch": 0.86, "grad_norm": 2.0458856035563278, "learning_rate": 5.22554038243424e-07, "loss": 0.7008, "step": 13398 }, { "epoch": 0.86, "grad_norm": 1.6917726165487665, "learning_rate": 5.220927809276383e-07, "loss": 0.7499, "step": 13399 }, { "epoch": 0.86, "grad_norm": 1.7541816885804755, "learning_rate": 5.216317160628959e-07, "loss": 0.741, "step": 13400 }, { "epoch": 0.86, "grad_norm": 4.136265593339138, "learning_rate": 5.211708436690111e-07, "loss": 0.6683, "step": 13401 }, { "epoch": 0.86, "grad_norm": 2.1265781528281877, "learning_rate": 5.207101637657929e-07, "loss": 0.7828, "step": 13402 }, { "epoch": 0.86, "grad_norm": 2.311579733235478, "learning_rate": 5.20249676373038e-07, "loss": 0.7562, "step": 13403 }, { "epoch": 0.86, "grad_norm": 2.3592758505597993, "learning_rate": 5.197893815105398e-07, "loss": 0.7194, "step": 13404 }, { "epoch": 0.86, "grad_norm": 1.9381491051334814, "learning_rate": 5.193292791980803e-07, "loss": 0.6797, "step": 13405 }, { "epoch": 0.86, "grad_norm": 1.1850599336268073, "learning_rate": 5.188693694554325e-07, "loss": 0.6084, "step": 13406 }, { "epoch": 0.86, "grad_norm": 1.5399289222275792, "learning_rate": 5.184096523023635e-07, "loss": 0.7116, "step": 13407 }, { "epoch": 0.86, "grad_norm": 1.4887758070978572, "learning_rate": 5.179501277586308e-07, "loss": 0.6172, "step": 13408 }, { "epoch": 0.86, "grad_norm": 1.782828541107458, "learning_rate": 5.174907958439829e-07, "loss": 0.6277, "step": 13409 }, { "epoch": 0.86, "grad_norm": 1.026895750517334, "learning_rate": 5.170316565781608e-07, "loss": 0.7015, "step": 13410 }, { "epoch": 0.86, "grad_norm": 1.7193737325445508, "learning_rate": 5.165727099808998e-07, "loss": 0.7345, "step": 13411 }, { "epoch": 0.86, "grad_norm": 1.7243119724514138, "learning_rate": 5.161139560719214e-07, "loss": 0.6473, "step": 13412 }, { "epoch": 0.86, "grad_norm": 1.1573947923653782, "learning_rate": 5.156553948709436e-07, "loss": 0.6219, "step": 13413 }, { "epoch": 0.86, "grad_norm": 1.5191683548712394, "learning_rate": 5.151970263976741e-07, "loss": 0.6186, "step": 13414 }, { "epoch": 0.86, "grad_norm": 1.232658701482092, "learning_rate": 5.147388506718127e-07, "loss": 0.5731, "step": 13415 }, { "epoch": 0.86, "grad_norm": 1.561379693097109, "learning_rate": 5.142808677130506e-07, "loss": 0.6004, "step": 13416 }, { "epoch": 0.86, "grad_norm": 1.8396670769290768, "learning_rate": 5.138230775410713e-07, "loss": 0.7009, "step": 13417 }, { "epoch": 0.86, "grad_norm": 1.8241939524219293, "learning_rate": 5.133654801755489e-07, "loss": 0.7115, "step": 13418 }, { "epoch": 0.86, "grad_norm": 1.7880860627293682, "learning_rate": 5.129080756361499e-07, "loss": 0.7778, "step": 13419 }, { "epoch": 0.86, "grad_norm": 1.7971980399774756, "learning_rate": 5.124508639425352e-07, "loss": 0.7432, "step": 13420 }, { "epoch": 0.86, "grad_norm": 1.6188686945164334, "learning_rate": 5.119938451143502e-07, "loss": 0.6533, "step": 13421 }, { "epoch": 0.86, "grad_norm": 1.8354725097001463, "learning_rate": 5.115370191712404e-07, "loss": 0.675, "step": 13422 }, { "epoch": 0.86, "grad_norm": 1.0890942403492394, "learning_rate": 5.110803861328378e-07, "loss": 0.5635, "step": 13423 }, { "epoch": 0.86, "grad_norm": 1.4945701795756654, "learning_rate": 5.106239460187679e-07, "loss": 0.6572, "step": 13424 }, { "epoch": 0.86, "grad_norm": 1.6663175667978845, "learning_rate": 5.10167698848647e-07, "loss": 0.7476, "step": 13425 }, { "epoch": 0.86, "grad_norm": 1.6854076408407943, "learning_rate": 5.097116446420835e-07, "loss": 0.6956, "step": 13426 }, { "epoch": 0.86, "grad_norm": 1.8329824691072703, "learning_rate": 5.092557834186795e-07, "loss": 0.796, "step": 13427 }, { "epoch": 0.86, "grad_norm": 1.142119300245456, "learning_rate": 5.088001151980243e-07, "loss": 0.6411, "step": 13428 }, { "epoch": 0.86, "grad_norm": 1.6188707242003726, "learning_rate": 5.083446399997044e-07, "loss": 0.8286, "step": 13429 }, { "epoch": 0.86, "grad_norm": 1.6419800229047556, "learning_rate": 5.078893578432914e-07, "loss": 0.6416, "step": 13430 }, { "epoch": 0.86, "grad_norm": 1.1759014868059463, "learning_rate": 5.074342687483557e-07, "loss": 0.6846, "step": 13431 }, { "epoch": 0.86, "grad_norm": 2.2916526481339723, "learning_rate": 5.069793727344552e-07, "loss": 0.6933, "step": 13432 }, { "epoch": 0.86, "grad_norm": 1.0922770451923778, "learning_rate": 5.06524669821139e-07, "loss": 0.6693, "step": 13433 }, { "epoch": 0.86, "grad_norm": 1.8026585680893759, "learning_rate": 5.060701600279527e-07, "loss": 0.637, "step": 13434 }, { "epoch": 0.86, "grad_norm": 1.8922554407544807, "learning_rate": 5.056158433744251e-07, "loss": 0.7497, "step": 13435 }, { "epoch": 0.86, "grad_norm": 1.7100294670319502, "learning_rate": 5.051617198800873e-07, "loss": 0.7689, "step": 13436 }, { "epoch": 0.86, "grad_norm": 1.968379815542316, "learning_rate": 5.047077895644514e-07, "loss": 0.6772, "step": 13437 }, { "epoch": 0.86, "grad_norm": 1.756292575404494, "learning_rate": 5.042540524470302e-07, "loss": 0.697, "step": 13438 }, { "epoch": 0.86, "grad_norm": 3.790847690993426, "learning_rate": 5.038005085473224e-07, "loss": 0.6731, "step": 13439 }, { "epoch": 0.86, "grad_norm": 1.8526995934965094, "learning_rate": 5.033471578848203e-07, "loss": 0.6997, "step": 13440 }, { "epoch": 0.86, "grad_norm": 1.9334355113221264, "learning_rate": 5.028940004790106e-07, "loss": 0.8293, "step": 13441 }, { "epoch": 0.86, "grad_norm": 1.817955477386206, "learning_rate": 5.024410363493648e-07, "loss": 0.8878, "step": 13442 }, { "epoch": 0.86, "grad_norm": 1.8381082449937347, "learning_rate": 5.019882655153547e-07, "loss": 0.6799, "step": 13443 }, { "epoch": 0.86, "grad_norm": 1.7400532544407423, "learning_rate": 5.015356879964356e-07, "loss": 0.7136, "step": 13444 }, { "epoch": 0.86, "grad_norm": 2.4362256065564694, "learning_rate": 5.010833038120605e-07, "loss": 0.6289, "step": 13445 }, { "epoch": 0.86, "grad_norm": 1.6383385469575118, "learning_rate": 5.006311129816721e-07, "loss": 0.7256, "step": 13446 }, { "epoch": 0.86, "grad_norm": 1.8597605879983543, "learning_rate": 5.001791155247032e-07, "loss": 0.7211, "step": 13447 }, { "epoch": 0.86, "grad_norm": 1.9540782715460607, "learning_rate": 4.997273114605811e-07, "loss": 0.7225, "step": 13448 }, { "epoch": 0.86, "grad_norm": 1.9047770115169727, "learning_rate": 4.992757008087218e-07, "loss": 0.7515, "step": 13449 }, { "epoch": 0.86, "grad_norm": 1.8441835145332794, "learning_rate": 4.988242835885371e-07, "loss": 0.6811, "step": 13450 }, { "epoch": 0.86, "grad_norm": 1.7816922178725432, "learning_rate": 4.983730598194247e-07, "loss": 0.7545, "step": 13451 }, { "epoch": 0.86, "grad_norm": 2.4907917318157224, "learning_rate": 4.979220295207804e-07, "loss": 0.674, "step": 13452 }, { "epoch": 0.86, "grad_norm": 1.1315949630829596, "learning_rate": 4.974711927119863e-07, "loss": 0.5203, "step": 13453 }, { "epoch": 0.86, "grad_norm": 1.8132847375835226, "learning_rate": 4.970205494124198e-07, "loss": 0.737, "step": 13454 }, { "epoch": 0.86, "grad_norm": 1.5663756742588428, "learning_rate": 4.96570099641448e-07, "loss": 0.6606, "step": 13455 }, { "epoch": 0.86, "grad_norm": 1.7173301284549896, "learning_rate": 4.9611984341843e-07, "loss": 0.8303, "step": 13456 }, { "epoch": 0.86, "grad_norm": 1.9627623124385345, "learning_rate": 4.95669780762717e-07, "loss": 0.7403, "step": 13457 }, { "epoch": 0.86, "grad_norm": 1.5932812916292056, "learning_rate": 4.952199116936524e-07, "loss": 0.6313, "step": 13458 }, { "epoch": 0.86, "grad_norm": 1.2481421582319376, "learning_rate": 4.9477023623057e-07, "loss": 0.5334, "step": 13459 }, { "epoch": 0.86, "grad_norm": 1.8796127774077842, "learning_rate": 4.943207543927947e-07, "loss": 0.7378, "step": 13460 }, { "epoch": 0.86, "grad_norm": 1.2036982777851182, "learning_rate": 4.938714661996469e-07, "loss": 0.5744, "step": 13461 }, { "epoch": 0.86, "grad_norm": 1.8173284190934553, "learning_rate": 4.93422371670435e-07, "loss": 0.7895, "step": 13462 }, { "epoch": 0.86, "grad_norm": 1.2102502123053276, "learning_rate": 4.929734708244599e-07, "loss": 0.6802, "step": 13463 }, { "epoch": 0.86, "grad_norm": 1.899062352863077, "learning_rate": 4.925247636810144e-07, "loss": 0.6452, "step": 13464 }, { "epoch": 0.86, "grad_norm": 1.6520731677213811, "learning_rate": 4.920762502593829e-07, "loss": 0.6262, "step": 13465 }, { "epoch": 0.86, "grad_norm": 1.6229029010480838, "learning_rate": 4.916279305788419e-07, "loss": 0.6845, "step": 13466 }, { "epoch": 0.86, "grad_norm": 1.7475105765742247, "learning_rate": 4.911798046586591e-07, "loss": 0.7266, "step": 13467 }, { "epoch": 0.86, "grad_norm": 1.9411901361995478, "learning_rate": 4.907318725180926e-07, "loss": 0.8089, "step": 13468 }, { "epoch": 0.86, "grad_norm": 1.7339090477134351, "learning_rate": 4.902841341763964e-07, "loss": 0.7116, "step": 13469 }, { "epoch": 0.86, "grad_norm": 1.2409492339701027, "learning_rate": 4.898365896528113e-07, "loss": 0.6259, "step": 13470 }, { "epoch": 0.86, "grad_norm": 1.0258086734840373, "learning_rate": 4.893892389665728e-07, "loss": 0.6885, "step": 13471 }, { "epoch": 0.86, "grad_norm": 1.6072531272003934, "learning_rate": 4.889420821369067e-07, "loss": 0.7411, "step": 13472 }, { "epoch": 0.86, "grad_norm": 1.8445725990530741, "learning_rate": 4.884951191830312e-07, "loss": 0.6654, "step": 13473 }, { "epoch": 0.86, "grad_norm": 1.7860198679829613, "learning_rate": 4.880483501241551e-07, "loss": 0.8713, "step": 13474 }, { "epoch": 0.86, "grad_norm": 1.7452080884082617, "learning_rate": 4.876017749794787e-07, "loss": 0.7979, "step": 13475 }, { "epoch": 0.86, "grad_norm": 1.2621158965755914, "learning_rate": 4.871553937681989e-07, "loss": 0.5003, "step": 13476 }, { "epoch": 0.86, "grad_norm": 1.2151601477982998, "learning_rate": 4.867092065094947e-07, "loss": 0.6557, "step": 13477 }, { "epoch": 0.86, "grad_norm": 1.6193430137938647, "learning_rate": 4.862632132225464e-07, "loss": 0.7552, "step": 13478 }, { "epoch": 0.86, "grad_norm": 1.6934071767510928, "learning_rate": 4.858174139265209e-07, "loss": 0.689, "step": 13479 }, { "epoch": 0.86, "grad_norm": 1.61711578269984, "learning_rate": 4.853718086405768e-07, "loss": 0.7268, "step": 13480 }, { "epoch": 0.86, "grad_norm": 1.9631604389312287, "learning_rate": 4.849263973838664e-07, "loss": 0.7751, "step": 13481 }, { "epoch": 0.86, "grad_norm": 1.7983673725052287, "learning_rate": 4.844811801755306e-07, "loss": 0.8121, "step": 13482 }, { "epoch": 0.86, "grad_norm": 1.5381541763104751, "learning_rate": 4.840361570347069e-07, "loss": 0.6432, "step": 13483 }, { "epoch": 0.86, "grad_norm": 1.7301396544246164, "learning_rate": 4.835913279805182e-07, "loss": 0.6896, "step": 13484 }, { "epoch": 0.86, "grad_norm": 1.8673599635898703, "learning_rate": 4.831466930320861e-07, "loss": 0.716, "step": 13485 }, { "epoch": 0.86, "grad_norm": 1.7334842372702468, "learning_rate": 4.827022522085157e-07, "loss": 0.6704, "step": 13486 }, { "epoch": 0.86, "grad_norm": 1.6289347858915382, "learning_rate": 4.822580055289117e-07, "loss": 0.7315, "step": 13487 }, { "epoch": 0.86, "grad_norm": 1.8074401977924284, "learning_rate": 4.81813953012365e-07, "loss": 0.6422, "step": 13488 }, { "epoch": 0.86, "grad_norm": 1.9694907283929008, "learning_rate": 4.813700946779598e-07, "loss": 0.9629, "step": 13489 }, { "epoch": 0.86, "grad_norm": 1.8828528112545155, "learning_rate": 4.809264305447752e-07, "loss": 0.7351, "step": 13490 }, { "epoch": 0.86, "grad_norm": 1.8078180257042462, "learning_rate": 4.804829606318745e-07, "loss": 0.7344, "step": 13491 }, { "epoch": 0.86, "grad_norm": 1.746647883299634, "learning_rate": 4.800396849583211e-07, "loss": 0.7493, "step": 13492 }, { "epoch": 0.86, "grad_norm": 1.7422821631887262, "learning_rate": 4.795966035431626e-07, "loss": 0.7643, "step": 13493 }, { "epoch": 0.86, "grad_norm": 3.1653721135587682, "learning_rate": 4.791537164054444e-07, "loss": 0.6483, "step": 13494 }, { "epoch": 0.86, "grad_norm": 2.173291481735077, "learning_rate": 4.787110235642001e-07, "loss": 0.7857, "step": 13495 }, { "epoch": 0.86, "grad_norm": 1.6557742559568873, "learning_rate": 4.78268525038455e-07, "loss": 0.7303, "step": 13496 }, { "epoch": 0.86, "grad_norm": 1.6404096130998949, "learning_rate": 4.778262208472273e-07, "loss": 0.8046, "step": 13497 }, { "epoch": 0.86, "grad_norm": 0.9833106717145093, "learning_rate": 4.773841110095257e-07, "loss": 0.5998, "step": 13498 }, { "epoch": 0.86, "grad_norm": 1.8823135149375576, "learning_rate": 4.769421955443531e-07, "loss": 0.9282, "step": 13499 }, { "epoch": 0.86, "grad_norm": 1.8871978370212477, "learning_rate": 4.7650047447069916e-07, "loss": 0.6201, "step": 13500 }, { "epoch": 0.86, "grad_norm": 1.6724422150779696, "learning_rate": 4.7605894780755113e-07, "loss": 0.7408, "step": 13501 }, { "epoch": 0.86, "grad_norm": 1.2475702940596332, "learning_rate": 4.7561761557388176e-07, "loss": 0.5634, "step": 13502 }, { "epoch": 0.86, "grad_norm": 1.5451378277683472, "learning_rate": 4.751764777886614e-07, "loss": 0.7379, "step": 13503 }, { "epoch": 0.86, "grad_norm": 1.564430940969267, "learning_rate": 4.7473553447084764e-07, "loss": 0.6675, "step": 13504 }, { "epoch": 0.86, "grad_norm": 1.7520834356645436, "learning_rate": 4.742947856393909e-07, "loss": 0.7018, "step": 13505 }, { "epoch": 0.86, "grad_norm": 1.886615262055189, "learning_rate": 4.738542313132366e-07, "loss": 0.8406, "step": 13506 }, { "epoch": 0.86, "grad_norm": 1.6377023621440046, "learning_rate": 4.734138715113146e-07, "loss": 0.7438, "step": 13507 }, { "epoch": 0.86, "grad_norm": 1.7872768039939488, "learning_rate": 4.7297370625255466e-07, "loss": 0.7148, "step": 13508 }, { "epoch": 0.86, "grad_norm": 1.4954373742669327, "learning_rate": 4.7253373555587014e-07, "loss": 0.6707, "step": 13509 }, { "epoch": 0.86, "grad_norm": 1.5884304497039519, "learning_rate": 4.7209395944017354e-07, "loss": 0.6298, "step": 13510 }, { "epoch": 0.86, "grad_norm": 1.6482603480819085, "learning_rate": 4.7165437792436373e-07, "loss": 0.6773, "step": 13511 }, { "epoch": 0.86, "grad_norm": 1.7530126101982555, "learning_rate": 4.712149910273334e-07, "loss": 0.8307, "step": 13512 }, { "epoch": 0.86, "grad_norm": 2.2225735862836524, "learning_rate": 4.7077579876796675e-07, "loss": 0.7442, "step": 13513 }, { "epoch": 0.86, "grad_norm": 1.5055821649051857, "learning_rate": 4.7033680116513824e-07, "loss": 0.646, "step": 13514 }, { "epoch": 0.87, "grad_norm": 1.4660393730168322, "learning_rate": 4.698979982377172e-07, "loss": 0.631, "step": 13515 }, { "epoch": 0.87, "grad_norm": 1.8211106456741586, "learning_rate": 4.6945939000455966e-07, "loss": 0.7322, "step": 13516 }, { "epoch": 0.87, "grad_norm": 1.8384321088368243, "learning_rate": 4.690209764845183e-07, "loss": 0.7082, "step": 13517 }, { "epoch": 0.87, "grad_norm": 2.3135620962723356, "learning_rate": 4.685827576964347e-07, "loss": 0.7987, "step": 13518 }, { "epoch": 0.87, "grad_norm": 1.0630138828400646, "learning_rate": 4.6814473365914217e-07, "loss": 0.6939, "step": 13519 }, { "epoch": 0.87, "grad_norm": 1.6907519019911357, "learning_rate": 4.6770690439146617e-07, "loss": 0.6855, "step": 13520 }, { "epoch": 0.87, "grad_norm": 1.8105402716683994, "learning_rate": 4.6726926991222386e-07, "loss": 0.6967, "step": 13521 }, { "epoch": 0.87, "grad_norm": 2.1155779276280438, "learning_rate": 4.668318302402236e-07, "loss": 0.5882, "step": 13522 }, { "epoch": 0.87, "grad_norm": 1.6041365072913252, "learning_rate": 4.663945853942653e-07, "loss": 0.6875, "step": 13523 }, { "epoch": 0.87, "grad_norm": 1.5961656063440688, "learning_rate": 4.659575353931417e-07, "loss": 0.6787, "step": 13524 }, { "epoch": 0.87, "grad_norm": 1.7204173946981194, "learning_rate": 4.655206802556361e-07, "loss": 0.6195, "step": 13525 }, { "epoch": 0.87, "grad_norm": 1.5971198076208903, "learning_rate": 4.650840200005236e-07, "loss": 0.6698, "step": 13526 }, { "epoch": 0.87, "grad_norm": 1.469620377969315, "learning_rate": 4.646475546465706e-07, "loss": 0.7499, "step": 13527 }, { "epoch": 0.87, "grad_norm": 2.5570876380189165, "learning_rate": 4.642112842125357e-07, "loss": 0.7836, "step": 13528 }, { "epoch": 0.87, "grad_norm": 1.8195211859604037, "learning_rate": 4.6377520871716874e-07, "loss": 0.6179, "step": 13529 }, { "epoch": 0.87, "grad_norm": 1.8056751077053126, "learning_rate": 4.633393281792109e-07, "loss": 0.7043, "step": 13530 }, { "epoch": 0.87, "grad_norm": 1.6911826676789792, "learning_rate": 4.629036426173955e-07, "loss": 0.678, "step": 13531 }, { "epoch": 0.87, "grad_norm": 1.5655235860421521, "learning_rate": 4.624681520504498e-07, "loss": 0.6684, "step": 13532 }, { "epoch": 0.87, "grad_norm": 1.9710200905626019, "learning_rate": 4.6203285649708605e-07, "loss": 0.7279, "step": 13533 }, { "epoch": 0.87, "grad_norm": 1.8490174910839297, "learning_rate": 4.6159775597601543e-07, "loss": 0.7553, "step": 13534 }, { "epoch": 0.87, "grad_norm": 1.125399609752951, "learning_rate": 4.6116285050593733e-07, "loss": 0.7008, "step": 13535 }, { "epoch": 0.87, "grad_norm": 1.7336649990359747, "learning_rate": 4.607281401055419e-07, "loss": 0.6407, "step": 13536 }, { "epoch": 0.87, "grad_norm": 1.909491340984431, "learning_rate": 4.6029362479351303e-07, "loss": 0.7853, "step": 13537 }, { "epoch": 0.87, "grad_norm": 1.938269184446357, "learning_rate": 4.598593045885247e-07, "loss": 0.7938, "step": 13538 }, { "epoch": 0.87, "grad_norm": 2.0513916523815587, "learning_rate": 4.5942517950924305e-07, "loss": 0.6125, "step": 13539 }, { "epoch": 0.87, "grad_norm": 1.6118913697563841, "learning_rate": 4.5899124957432597e-07, "loss": 0.6809, "step": 13540 }, { "epoch": 0.87, "grad_norm": 1.8609974228668158, "learning_rate": 4.5855751480242404e-07, "loss": 0.6517, "step": 13541 }, { "epoch": 0.87, "grad_norm": 1.1556970516530216, "learning_rate": 4.5812397521217577e-07, "loss": 0.6771, "step": 13542 }, { "epoch": 0.87, "grad_norm": 1.588711530134054, "learning_rate": 4.5769063082221623e-07, "loss": 0.6671, "step": 13543 }, { "epoch": 0.87, "grad_norm": 1.4728878417468871, "learning_rate": 4.572574816511688e-07, "loss": 0.6808, "step": 13544 }, { "epoch": 0.87, "grad_norm": 1.8653838399389353, "learning_rate": 4.568245277176486e-07, "loss": 0.8556, "step": 13545 }, { "epoch": 0.87, "grad_norm": 1.760055659055204, "learning_rate": 4.5639176904026417e-07, "loss": 0.723, "step": 13546 }, { "epoch": 0.87, "grad_norm": 1.760905078921242, "learning_rate": 4.559592056376133e-07, "loss": 0.7504, "step": 13547 }, { "epoch": 0.87, "grad_norm": 1.6353038530877175, "learning_rate": 4.5552683752828896e-07, "loss": 0.6987, "step": 13548 }, { "epoch": 0.87, "grad_norm": 1.639375784959853, "learning_rate": 4.5509466473087017e-07, "loss": 0.6848, "step": 13549 }, { "epoch": 0.87, "grad_norm": 1.6424764758995207, "learning_rate": 4.546626872639343e-07, "loss": 0.7432, "step": 13550 }, { "epoch": 0.87, "grad_norm": 1.8389934484454429, "learning_rate": 4.5423090514604305e-07, "loss": 0.8262, "step": 13551 }, { "epoch": 0.87, "grad_norm": 1.5981256538754143, "learning_rate": 4.5379931839575673e-07, "loss": 0.6303, "step": 13552 }, { "epoch": 0.87, "grad_norm": 1.6199757199181153, "learning_rate": 4.533679270316227e-07, "loss": 0.7636, "step": 13553 }, { "epoch": 0.87, "grad_norm": 1.9236326665555723, "learning_rate": 4.529367310721805e-07, "loss": 0.7051, "step": 13554 }, { "epoch": 0.87, "grad_norm": 1.0382971920727084, "learning_rate": 4.525057305359648e-07, "loss": 0.5581, "step": 13555 }, { "epoch": 0.87, "grad_norm": 1.148258867448388, "learning_rate": 4.5207492544149525e-07, "loss": 0.7204, "step": 13556 }, { "epoch": 0.87, "grad_norm": 2.5374556334277663, "learning_rate": 4.5164431580729087e-07, "loss": 0.8152, "step": 13557 }, { "epoch": 0.87, "grad_norm": 1.83957982286845, "learning_rate": 4.512139016518541e-07, "loss": 0.7763, "step": 13558 }, { "epoch": 0.87, "grad_norm": 2.2496802650728105, "learning_rate": 4.507836829936868e-07, "loss": 0.692, "step": 13559 }, { "epoch": 0.87, "grad_norm": 1.7420523259783278, "learning_rate": 4.503536598512775e-07, "loss": 0.7154, "step": 13560 }, { "epoch": 0.87, "grad_norm": 1.7543971676122108, "learning_rate": 4.499238322431071e-07, "loss": 0.8181, "step": 13561 }, { "epoch": 0.87, "grad_norm": 1.5349947582886583, "learning_rate": 4.4949420018765065e-07, "loss": 0.7746, "step": 13562 }, { "epoch": 0.87, "grad_norm": 1.7577547436099241, "learning_rate": 4.4906476370337017e-07, "loss": 0.7064, "step": 13563 }, { "epoch": 0.87, "grad_norm": 2.8773526842461647, "learning_rate": 4.4863552280872523e-07, "loss": 0.8549, "step": 13564 }, { "epoch": 0.87, "grad_norm": 2.3829566109469065, "learning_rate": 4.482064775221595e-07, "loss": 0.8419, "step": 13565 }, { "epoch": 0.87, "grad_norm": 1.8427139593373756, "learning_rate": 4.47777627862116e-07, "loss": 0.6581, "step": 13566 }, { "epoch": 0.87, "grad_norm": 1.1852939735510883, "learning_rate": 4.4734897384702434e-07, "loss": 0.6576, "step": 13567 }, { "epoch": 0.87, "grad_norm": 1.80890580820833, "learning_rate": 4.469205154953077e-07, "loss": 0.65, "step": 13568 }, { "epoch": 0.87, "grad_norm": 1.748307623431149, "learning_rate": 4.4649225282537954e-07, "loss": 0.7642, "step": 13569 }, { "epoch": 0.87, "grad_norm": 1.8113891949895535, "learning_rate": 4.4606418585564526e-07, "loss": 0.8159, "step": 13570 }, { "epoch": 0.87, "grad_norm": 1.7140188135451377, "learning_rate": 4.4563631460450506e-07, "loss": 0.6345, "step": 13571 }, { "epoch": 0.87, "grad_norm": 1.532908346053996, "learning_rate": 4.452086390903443e-07, "loss": 0.7036, "step": 13572 }, { "epoch": 0.87, "grad_norm": 1.823684477636561, "learning_rate": 4.4478115933154597e-07, "loss": 0.7295, "step": 13573 }, { "epoch": 0.87, "grad_norm": 1.7241504133100205, "learning_rate": 4.443538753464821e-07, "loss": 0.7883, "step": 13574 }, { "epoch": 0.87, "grad_norm": 1.5767032600364093, "learning_rate": 4.439267871535152e-07, "loss": 0.7624, "step": 13575 }, { "epoch": 0.87, "grad_norm": 1.7996289311556575, "learning_rate": 4.4349989477100174e-07, "loss": 0.6705, "step": 13576 }, { "epoch": 0.87, "grad_norm": 1.6589911556826629, "learning_rate": 4.430731982172887e-07, "loss": 0.7281, "step": 13577 }, { "epoch": 0.87, "grad_norm": 1.6492581621275986, "learning_rate": 4.4264669751071353e-07, "loss": 0.7926, "step": 13578 }, { "epoch": 0.87, "grad_norm": 1.2272100411740208, "learning_rate": 4.4222039266960616e-07, "loss": 0.6592, "step": 13579 }, { "epoch": 0.87, "grad_norm": 2.1394577723054504, "learning_rate": 4.4179428371229125e-07, "loss": 0.7269, "step": 13580 }, { "epoch": 0.87, "grad_norm": 1.6740236453144777, "learning_rate": 4.413683706570776e-07, "loss": 0.7525, "step": 13581 }, { "epoch": 0.87, "grad_norm": 1.0651150909151694, "learning_rate": 4.409426535222738e-07, "loss": 0.6143, "step": 13582 }, { "epoch": 0.87, "grad_norm": 1.555240002436929, "learning_rate": 4.4051713232617423e-07, "loss": 0.6685, "step": 13583 }, { "epoch": 0.87, "grad_norm": 1.5884805997500855, "learning_rate": 4.40091807087068e-07, "loss": 0.7221, "step": 13584 }, { "epoch": 0.87, "grad_norm": 1.2587346744263332, "learning_rate": 4.396666778232345e-07, "loss": 0.5497, "step": 13585 }, { "epoch": 0.87, "grad_norm": 2.086709584235772, "learning_rate": 4.39241744552944e-07, "loss": 0.8486, "step": 13586 }, { "epoch": 0.87, "grad_norm": 1.7253522955191176, "learning_rate": 4.388170072944603e-07, "loss": 0.6984, "step": 13587 }, { "epoch": 0.87, "grad_norm": 1.6860066060300962, "learning_rate": 4.38392466066036e-07, "loss": 0.792, "step": 13588 }, { "epoch": 0.87, "grad_norm": 1.5429888205200935, "learning_rate": 4.3796812088591934e-07, "loss": 0.7509, "step": 13589 }, { "epoch": 0.87, "grad_norm": 1.11921350940035, "learning_rate": 4.375439717723462e-07, "loss": 0.6098, "step": 13590 }, { "epoch": 0.87, "grad_norm": 1.6746433076379432, "learning_rate": 4.37120018743547e-07, "loss": 0.6555, "step": 13591 }, { "epoch": 0.87, "grad_norm": 1.3338162200204249, "learning_rate": 4.366962618177406e-07, "loss": 0.6491, "step": 13592 }, { "epoch": 0.87, "grad_norm": 1.7766817925249143, "learning_rate": 4.362727010131407e-07, "loss": 0.8011, "step": 13593 }, { "epoch": 0.87, "grad_norm": 1.68485485925249, "learning_rate": 4.358493363479499e-07, "loss": 0.901, "step": 13594 }, { "epoch": 0.87, "grad_norm": 1.076927464284373, "learning_rate": 4.354261678403643e-07, "loss": 0.7156, "step": 13595 }, { "epoch": 0.87, "grad_norm": 1.7736047413431597, "learning_rate": 4.350031955085698e-07, "loss": 0.5987, "step": 13596 }, { "epoch": 0.87, "grad_norm": 1.2918102324632375, "learning_rate": 4.345804193707459e-07, "loss": 0.5804, "step": 13597 }, { "epoch": 0.87, "grad_norm": 1.7664443299336239, "learning_rate": 4.3415783944506294e-07, "loss": 0.6472, "step": 13598 }, { "epoch": 0.87, "grad_norm": 1.1812872378112484, "learning_rate": 4.3373545574968143e-07, "loss": 0.6941, "step": 13599 }, { "epoch": 0.87, "grad_norm": 2.073908335471389, "learning_rate": 4.333132683027552e-07, "loss": 0.597, "step": 13600 }, { "epoch": 0.87, "grad_norm": 1.1196892814225943, "learning_rate": 4.3289127712242864e-07, "loss": 0.7051, "step": 13601 }, { "epoch": 0.87, "grad_norm": 1.5634212498126403, "learning_rate": 4.3246948222683826e-07, "loss": 0.7443, "step": 13602 }, { "epoch": 0.87, "grad_norm": 1.7525998006580141, "learning_rate": 4.320478836341113e-07, "loss": 0.7149, "step": 13603 }, { "epoch": 0.87, "grad_norm": 1.7304377826207207, "learning_rate": 4.3162648136236885e-07, "loss": 0.7125, "step": 13604 }, { "epoch": 0.87, "grad_norm": 1.8129150060837207, "learning_rate": 4.312052754297197e-07, "loss": 0.8489, "step": 13605 }, { "epoch": 0.87, "grad_norm": 1.1246824891654643, "learning_rate": 4.3078426585426826e-07, "loss": 0.6678, "step": 13606 }, { "epoch": 0.87, "grad_norm": 2.010767309764286, "learning_rate": 4.303634526541067e-07, "loss": 0.6935, "step": 13607 }, { "epoch": 0.87, "grad_norm": 2.153047510739881, "learning_rate": 4.2994283584732286e-07, "loss": 0.67, "step": 13608 }, { "epoch": 0.87, "grad_norm": 1.485042719729281, "learning_rate": 4.2952241545199226e-07, "loss": 0.6972, "step": 13609 }, { "epoch": 0.87, "grad_norm": 2.817166712192903, "learning_rate": 4.291021914861848e-07, "loss": 0.6172, "step": 13610 }, { "epoch": 0.87, "grad_norm": 2.057104728378375, "learning_rate": 4.2868216396796057e-07, "loss": 0.7167, "step": 13611 }, { "epoch": 0.87, "grad_norm": 1.1312439470385214, "learning_rate": 4.282623329153701e-07, "loss": 0.6855, "step": 13612 }, { "epoch": 0.87, "grad_norm": 1.8521982756091513, "learning_rate": 4.2784269834645955e-07, "loss": 0.7843, "step": 13613 }, { "epoch": 0.87, "grad_norm": 1.112552760804299, "learning_rate": 4.2742326027926106e-07, "loss": 0.7273, "step": 13614 }, { "epoch": 0.87, "grad_norm": 1.5668870792450338, "learning_rate": 4.270040187318031e-07, "loss": 0.6478, "step": 13615 }, { "epoch": 0.87, "grad_norm": 1.1721258805893056, "learning_rate": 4.265849737221034e-07, "loss": 0.6387, "step": 13616 }, { "epoch": 0.87, "grad_norm": 1.9855412017106768, "learning_rate": 4.2616612526817146e-07, "loss": 0.9128, "step": 13617 }, { "epoch": 0.87, "grad_norm": 1.6855247782643785, "learning_rate": 4.257474733880085e-07, "loss": 0.5663, "step": 13618 }, { "epoch": 0.87, "grad_norm": 1.5586658434926248, "learning_rate": 4.253290180996067e-07, "loss": 0.6298, "step": 13619 }, { "epoch": 0.87, "grad_norm": 1.8594948980267392, "learning_rate": 4.249107594209523e-07, "loss": 0.804, "step": 13620 }, { "epoch": 0.87, "grad_norm": 1.4725676448009135, "learning_rate": 4.244926973700181e-07, "loss": 0.6602, "step": 13621 }, { "epoch": 0.87, "grad_norm": 2.2039739457571073, "learning_rate": 4.240748319647753e-07, "loss": 0.6587, "step": 13622 }, { "epoch": 0.87, "grad_norm": 1.9208414453121465, "learning_rate": 4.2365716322317894e-07, "loss": 0.6441, "step": 13623 }, { "epoch": 0.87, "grad_norm": 1.7618254565727647, "learning_rate": 4.2323969116318255e-07, "loss": 0.8004, "step": 13624 }, { "epoch": 0.87, "grad_norm": 1.5754016831592252, "learning_rate": 4.228224158027272e-07, "loss": 0.7053, "step": 13625 }, { "epoch": 0.87, "grad_norm": 1.8303625991564965, "learning_rate": 4.224053371597453e-07, "loss": 0.7911, "step": 13626 }, { "epoch": 0.87, "grad_norm": 1.9295572018527232, "learning_rate": 4.2198845525216524e-07, "loss": 0.7195, "step": 13627 }, { "epoch": 0.87, "grad_norm": 1.7316797310855339, "learning_rate": 4.215717700978994e-07, "loss": 0.7791, "step": 13628 }, { "epoch": 0.87, "grad_norm": 2.0516516157569678, "learning_rate": 4.2115528171486063e-07, "loss": 0.7385, "step": 13629 }, { "epoch": 0.87, "grad_norm": 1.4772906346718802, "learning_rate": 4.207389901209441e-07, "loss": 0.6202, "step": 13630 }, { "epoch": 0.87, "grad_norm": 1.7676560565473898, "learning_rate": 4.2032289533404434e-07, "loss": 0.8926, "step": 13631 }, { "epoch": 0.87, "grad_norm": 1.696196602385412, "learning_rate": 4.199069973720438e-07, "loss": 0.6587, "step": 13632 }, { "epoch": 0.87, "grad_norm": 1.8030816838435735, "learning_rate": 4.194912962528158e-07, "loss": 0.6264, "step": 13633 }, { "epoch": 0.87, "grad_norm": 1.105640517325318, "learning_rate": 4.190757919942273e-07, "loss": 0.6036, "step": 13634 }, { "epoch": 0.87, "grad_norm": 5.502986519679809, "learning_rate": 4.1866048461413454e-07, "loss": 0.6266, "step": 13635 }, { "epoch": 0.87, "grad_norm": 1.4876053268010687, "learning_rate": 4.1824537413038937e-07, "loss": 0.6195, "step": 13636 }, { "epoch": 0.87, "grad_norm": 1.0347717540226125, "learning_rate": 4.17830460560828e-07, "loss": 0.6077, "step": 13637 }, { "epoch": 0.87, "grad_norm": 1.8621926585174267, "learning_rate": 4.174157439232862e-07, "loss": 0.7925, "step": 13638 }, { "epoch": 0.87, "grad_norm": 1.1653253428843762, "learning_rate": 4.170012242355864e-07, "loss": 0.6368, "step": 13639 }, { "epoch": 0.87, "grad_norm": 1.2226884118910286, "learning_rate": 4.1658690151554383e-07, "loss": 0.7479, "step": 13640 }, { "epoch": 0.87, "grad_norm": 1.3765619503533992, "learning_rate": 4.161727757809653e-07, "loss": 0.7366, "step": 13641 }, { "epoch": 0.87, "grad_norm": 1.2065241661707113, "learning_rate": 4.1575884704964874e-07, "loss": 0.5441, "step": 13642 }, { "epoch": 0.87, "grad_norm": 1.6600340608144375, "learning_rate": 4.153451153393839e-07, "loss": 0.8316, "step": 13643 }, { "epoch": 0.87, "grad_norm": 1.5612678929407313, "learning_rate": 4.149315806679516e-07, "loss": 0.6531, "step": 13644 }, { "epoch": 0.87, "grad_norm": 1.6672721225464808, "learning_rate": 4.145182430531264e-07, "loss": 0.6682, "step": 13645 }, { "epoch": 0.87, "grad_norm": 0.9706049119193914, "learning_rate": 4.141051025126719e-07, "loss": 0.6702, "step": 13646 }, { "epoch": 0.87, "grad_norm": 1.8951398199996947, "learning_rate": 4.1369215906434337e-07, "loss": 0.7927, "step": 13647 }, { "epoch": 0.87, "grad_norm": 1.6063967913281418, "learning_rate": 4.132794127258888e-07, "loss": 0.6558, "step": 13648 }, { "epoch": 0.87, "grad_norm": 1.5393702254009103, "learning_rate": 4.1286686351504735e-07, "loss": 0.6715, "step": 13649 }, { "epoch": 0.87, "grad_norm": 1.6081848366146094, "learning_rate": 4.1245451144954873e-07, "loss": 0.7658, "step": 13650 }, { "epoch": 0.87, "grad_norm": 1.6924932295890371, "learning_rate": 4.120423565471154e-07, "loss": 0.7563, "step": 13651 }, { "epoch": 0.87, "grad_norm": 2.076608181871696, "learning_rate": 4.116303988254616e-07, "loss": 0.6797, "step": 13652 }, { "epoch": 0.87, "grad_norm": 1.6289316470753172, "learning_rate": 4.1121863830229037e-07, "loss": 0.7783, "step": 13653 }, { "epoch": 0.87, "grad_norm": 1.6569661107246632, "learning_rate": 4.108070749953008e-07, "loss": 0.6873, "step": 13654 }, { "epoch": 0.87, "grad_norm": 1.5995814593671995, "learning_rate": 4.1039570892217993e-07, "loss": 0.7495, "step": 13655 }, { "epoch": 0.87, "grad_norm": 1.6347105907189157, "learning_rate": 4.0998454010060695e-07, "loss": 0.6755, "step": 13656 }, { "epoch": 0.87, "grad_norm": 1.6354406331312246, "learning_rate": 4.0957356854825436e-07, "loss": 0.6879, "step": 13657 }, { "epoch": 0.87, "grad_norm": 1.7811422705683086, "learning_rate": 4.09162794282783e-07, "loss": 0.8718, "step": 13658 }, { "epoch": 0.87, "grad_norm": 1.4673857017450709, "learning_rate": 4.087522173218489e-07, "loss": 0.6954, "step": 13659 }, { "epoch": 0.87, "grad_norm": 1.854662251166407, "learning_rate": 4.0834183768309666e-07, "loss": 0.641, "step": 13660 }, { "epoch": 0.87, "grad_norm": 1.2077601898744421, "learning_rate": 4.079316553841628e-07, "loss": 0.6202, "step": 13661 }, { "epoch": 0.87, "grad_norm": 1.654772077969519, "learning_rate": 4.075216704426782e-07, "loss": 0.7515, "step": 13662 }, { "epoch": 0.87, "grad_norm": 2.150406602468845, "learning_rate": 4.0711188287626156e-07, "loss": 0.8749, "step": 13663 }, { "epoch": 0.87, "grad_norm": 1.8487819457324612, "learning_rate": 4.06702292702526e-07, "loss": 0.8292, "step": 13664 }, { "epoch": 0.87, "grad_norm": 2.2574243461030115, "learning_rate": 4.062928999390736e-07, "loss": 0.7562, "step": 13665 }, { "epoch": 0.87, "grad_norm": 1.5953823981338904, "learning_rate": 4.0588370460349967e-07, "loss": 0.663, "step": 13666 }, { "epoch": 0.87, "grad_norm": 0.9749449420317036, "learning_rate": 4.0547470671339075e-07, "loss": 0.6821, "step": 13667 }, { "epoch": 0.87, "grad_norm": 1.9905296567563928, "learning_rate": 4.050659062863233e-07, "loss": 0.8502, "step": 13668 }, { "epoch": 0.87, "grad_norm": 1.7034701706837114, "learning_rate": 4.0465730333986983e-07, "loss": 0.6289, "step": 13669 }, { "epoch": 0.87, "grad_norm": 2.0222166254103646, "learning_rate": 4.0424889789158763e-07, "loss": 0.8773, "step": 13670 }, { "epoch": 0.88, "grad_norm": 1.8313683348714256, "learning_rate": 4.0384068995903136e-07, "loss": 0.8447, "step": 13671 }, { "epoch": 0.88, "grad_norm": 1.7969195210356619, "learning_rate": 4.0343267955974487e-07, "loss": 0.7232, "step": 13672 }, { "epoch": 0.88, "grad_norm": 1.7447902746580353, "learning_rate": 4.03024866711263e-07, "loss": 0.6768, "step": 13673 }, { "epoch": 0.88, "grad_norm": 1.8270305048111557, "learning_rate": 4.026172514311122e-07, "loss": 0.7474, "step": 13674 }, { "epoch": 0.88, "grad_norm": 1.0679391338074815, "learning_rate": 4.0220983373681133e-07, "loss": 0.6616, "step": 13675 }, { "epoch": 0.88, "grad_norm": 1.7557203402301305, "learning_rate": 4.018026136458719e-07, "loss": 0.7034, "step": 13676 }, { "epoch": 0.88, "grad_norm": 1.1016139843540838, "learning_rate": 4.013955911757922e-07, "loss": 0.6263, "step": 13677 }, { "epoch": 0.88, "grad_norm": 1.6827922830588748, "learning_rate": 4.0098876634406925e-07, "loss": 0.6967, "step": 13678 }, { "epoch": 0.88, "grad_norm": 2.3449408146360295, "learning_rate": 4.00582139168183e-07, "loss": 0.675, "step": 13679 }, { "epoch": 0.88, "grad_norm": 1.4362879765103627, "learning_rate": 4.001757096656128e-07, "loss": 0.7029, "step": 13680 }, { "epoch": 0.88, "grad_norm": 1.493788518122469, "learning_rate": 3.997694778538247e-07, "loss": 0.662, "step": 13681 }, { "epoch": 0.88, "grad_norm": 1.8230243845080063, "learning_rate": 3.993634437502775e-07, "loss": 0.719, "step": 13682 }, { "epoch": 0.88, "grad_norm": 1.7270159696784733, "learning_rate": 3.9895760737242384e-07, "loss": 0.7126, "step": 13683 }, { "epoch": 0.88, "grad_norm": 2.9327505189664502, "learning_rate": 3.985519687377026e-07, "loss": 0.7522, "step": 13684 }, { "epoch": 0.88, "grad_norm": 1.8185769927563225, "learning_rate": 3.9814652786355046e-07, "loss": 0.7515, "step": 13685 }, { "epoch": 0.88, "grad_norm": 1.540229501896279, "learning_rate": 3.9774128476738836e-07, "loss": 0.6496, "step": 13686 }, { "epoch": 0.88, "grad_norm": 1.7081056703184765, "learning_rate": 3.973362394666369e-07, "loss": 0.7533, "step": 13687 }, { "epoch": 0.88, "grad_norm": 2.960255247941249, "learning_rate": 3.969313919787016e-07, "loss": 0.7132, "step": 13688 }, { "epoch": 0.88, "grad_norm": 1.7472679996140554, "learning_rate": 3.965267423209834e-07, "loss": 0.8116, "step": 13689 }, { "epoch": 0.88, "grad_norm": 1.7842834474669391, "learning_rate": 3.961222905108719e-07, "loss": 0.7127, "step": 13690 }, { "epoch": 0.88, "grad_norm": 1.4598731024501816, "learning_rate": 3.957180365657498e-07, "loss": 0.6803, "step": 13691 }, { "epoch": 0.88, "grad_norm": 1.2680225516573294, "learning_rate": 3.953139805029932e-07, "loss": 0.6439, "step": 13692 }, { "epoch": 0.88, "grad_norm": 1.5688220773814106, "learning_rate": 3.949101223399643e-07, "loss": 0.8129, "step": 13693 }, { "epoch": 0.88, "grad_norm": 1.6288567787844348, "learning_rate": 3.945064620940231e-07, "loss": 0.6625, "step": 13694 }, { "epoch": 0.88, "grad_norm": 1.8782281671640666, "learning_rate": 3.9410299978251523e-07, "loss": 0.9482, "step": 13695 }, { "epoch": 0.88, "grad_norm": 1.95266272587614, "learning_rate": 3.936997354227834e-07, "loss": 0.6936, "step": 13696 }, { "epoch": 0.88, "grad_norm": 1.5109956873855863, "learning_rate": 3.932966690321571e-07, "loss": 0.8017, "step": 13697 }, { "epoch": 0.88, "grad_norm": 1.8630750880413927, "learning_rate": 3.9289380062796036e-07, "loss": 0.7465, "step": 13698 }, { "epoch": 0.88, "grad_norm": 1.6928296941524301, "learning_rate": 3.92491130227507e-07, "loss": 0.7019, "step": 13699 }, { "epoch": 0.88, "grad_norm": 1.7353367145117495, "learning_rate": 3.92088657848102e-07, "loss": 0.765, "step": 13700 }, { "epoch": 0.88, "grad_norm": 1.5632818026277133, "learning_rate": 3.9168638350704614e-07, "loss": 0.7152, "step": 13701 }, { "epoch": 0.88, "grad_norm": 1.5577273960704332, "learning_rate": 3.9128430722162437e-07, "loss": 0.7688, "step": 13702 }, { "epoch": 0.88, "grad_norm": 1.8280585072434123, "learning_rate": 3.9088242900911957e-07, "loss": 0.7406, "step": 13703 }, { "epoch": 0.88, "grad_norm": 1.8041631572416241, "learning_rate": 3.904807488868029e-07, "loss": 0.8401, "step": 13704 }, { "epoch": 0.88, "grad_norm": 1.6519571152451888, "learning_rate": 3.900792668719372e-07, "loss": 0.7714, "step": 13705 }, { "epoch": 0.88, "grad_norm": 1.6703343683780245, "learning_rate": 3.896779829817787e-07, "loss": 0.6554, "step": 13706 }, { "epoch": 0.88, "grad_norm": 2.2128728436181624, "learning_rate": 3.8927689723357253e-07, "loss": 0.7625, "step": 13707 }, { "epoch": 0.88, "grad_norm": 2.067322387126691, "learning_rate": 3.888760096445565e-07, "loss": 0.8, "step": 13708 }, { "epoch": 0.88, "grad_norm": 1.6027211822758338, "learning_rate": 3.8847532023195967e-07, "loss": 0.668, "step": 13709 }, { "epoch": 0.88, "grad_norm": 1.8185079953435173, "learning_rate": 3.880748290130043e-07, "loss": 0.9541, "step": 13710 }, { "epoch": 0.88, "grad_norm": 2.353719076131986, "learning_rate": 3.876745360049017e-07, "loss": 0.6752, "step": 13711 }, { "epoch": 0.88, "grad_norm": 2.0673723094273835, "learning_rate": 3.872744412248558e-07, "loss": 0.6866, "step": 13712 }, { "epoch": 0.88, "grad_norm": 1.963122224842255, "learning_rate": 3.868745446900618e-07, "loss": 0.7015, "step": 13713 }, { "epoch": 0.88, "grad_norm": 1.5793046814246985, "learning_rate": 3.864748464177065e-07, "loss": 0.6782, "step": 13714 }, { "epoch": 0.88, "grad_norm": 1.5637741113973727, "learning_rate": 3.860753464249678e-07, "loss": 0.6467, "step": 13715 }, { "epoch": 0.88, "grad_norm": 1.5547535362006029, "learning_rate": 3.856760447290153e-07, "loss": 0.704, "step": 13716 }, { "epoch": 0.88, "grad_norm": 1.0126633317867042, "learning_rate": 3.8527694134700975e-07, "loss": 0.5583, "step": 13717 }, { "epoch": 0.88, "grad_norm": 1.5549348328500365, "learning_rate": 3.848780362961052e-07, "loss": 0.6663, "step": 13718 }, { "epoch": 0.88, "grad_norm": 1.148420225291058, "learning_rate": 3.844793295934451e-07, "loss": 0.6114, "step": 13719 }, { "epoch": 0.88, "grad_norm": 1.560311336333597, "learning_rate": 3.8408082125616473e-07, "loss": 0.6168, "step": 13720 }, { "epoch": 0.88, "grad_norm": 2.0923587327514173, "learning_rate": 3.836825113013909e-07, "loss": 0.6892, "step": 13721 }, { "epoch": 0.88, "grad_norm": 1.9090104030066055, "learning_rate": 3.832843997462432e-07, "loss": 0.7777, "step": 13722 }, { "epoch": 0.88, "grad_norm": 1.5048618872943853, "learning_rate": 3.8288648660783023e-07, "loss": 0.666, "step": 13723 }, { "epoch": 0.88, "grad_norm": 1.5958056397759202, "learning_rate": 3.8248877190325383e-07, "loss": 0.7066, "step": 13724 }, { "epoch": 0.88, "grad_norm": 1.9785228851769963, "learning_rate": 3.820912556496087e-07, "loss": 0.7122, "step": 13725 }, { "epoch": 0.88, "grad_norm": 1.5785694772234353, "learning_rate": 3.816939378639767e-07, "loss": 0.7569, "step": 13726 }, { "epoch": 0.88, "grad_norm": 1.0572426682377487, "learning_rate": 3.8129681856343526e-07, "loss": 0.6818, "step": 13727 }, { "epoch": 0.88, "grad_norm": 1.8327934177098344, "learning_rate": 3.808998977650513e-07, "loss": 0.835, "step": 13728 }, { "epoch": 0.88, "grad_norm": 1.477987401807503, "learning_rate": 3.8050317548588343e-07, "loss": 0.6557, "step": 13729 }, { "epoch": 0.88, "grad_norm": 1.572382282179137, "learning_rate": 3.801066517429824e-07, "loss": 0.841, "step": 13730 }, { "epoch": 0.88, "grad_norm": 1.798156555387991, "learning_rate": 3.7971032655339026e-07, "loss": 0.594, "step": 13731 }, { "epoch": 0.88, "grad_norm": 3.0399034544819115, "learning_rate": 3.7931419993413934e-07, "loss": 0.6595, "step": 13732 }, { "epoch": 0.88, "grad_norm": 2.3177778036673677, "learning_rate": 3.789182719022538e-07, "loss": 0.7597, "step": 13733 }, { "epoch": 0.88, "grad_norm": 2.258006547973527, "learning_rate": 3.7852254247475237e-07, "loss": 0.7036, "step": 13734 }, { "epoch": 0.88, "grad_norm": 1.7102572573509065, "learning_rate": 3.781270116686392e-07, "loss": 0.6909, "step": 13735 }, { "epoch": 0.88, "grad_norm": 1.5935130071433015, "learning_rate": 3.7773167950091616e-07, "loss": 0.7061, "step": 13736 }, { "epoch": 0.88, "grad_norm": 2.055252716541732, "learning_rate": 3.7733654598857303e-07, "loss": 0.7479, "step": 13737 }, { "epoch": 0.88, "grad_norm": 1.5098143267273043, "learning_rate": 3.769416111485913e-07, "loss": 0.7984, "step": 13738 }, { "epoch": 0.88, "grad_norm": 2.234819759873049, "learning_rate": 3.7654687499794453e-07, "loss": 0.7695, "step": 13739 }, { "epoch": 0.88, "grad_norm": 1.7695691143101135, "learning_rate": 3.761523375535975e-07, "loss": 0.6412, "step": 13740 }, { "epoch": 0.88, "grad_norm": 1.5866346938157305, "learning_rate": 3.757579988325083e-07, "loss": 0.6388, "step": 13741 }, { "epoch": 0.88, "grad_norm": 1.5922515791966934, "learning_rate": 3.7536385885162176e-07, "loss": 0.7772, "step": 13742 }, { "epoch": 0.88, "grad_norm": 1.8076593144197477, "learning_rate": 3.7496991762788095e-07, "loss": 0.717, "step": 13743 }, { "epoch": 0.88, "grad_norm": 1.5810568199640915, "learning_rate": 3.745761751782123e-07, "loss": 0.737, "step": 13744 }, { "epoch": 0.88, "grad_norm": 2.262803476937543, "learning_rate": 3.7418263151954184e-07, "loss": 0.7567, "step": 13745 }, { "epoch": 0.88, "grad_norm": 1.8319041879746663, "learning_rate": 3.737892866687809e-07, "loss": 0.6438, "step": 13746 }, { "epoch": 0.88, "grad_norm": 1.9772641989485298, "learning_rate": 3.7339614064283545e-07, "loss": 0.6484, "step": 13747 }, { "epoch": 0.88, "grad_norm": 1.615042682837498, "learning_rate": 3.730031934586031e-07, "loss": 0.6807, "step": 13748 }, { "epoch": 0.88, "grad_norm": 1.9652357801031755, "learning_rate": 3.7261044513296927e-07, "loss": 0.6229, "step": 13749 }, { "epoch": 0.88, "grad_norm": 2.0895956891200234, "learning_rate": 3.722178956828165e-07, "loss": 0.8912, "step": 13750 }, { "epoch": 0.88, "grad_norm": 1.9821259683905383, "learning_rate": 3.71825545125013e-07, "loss": 0.7225, "step": 13751 }, { "epoch": 0.88, "grad_norm": 1.0911255493908303, "learning_rate": 3.7143339347642306e-07, "loss": 0.6227, "step": 13752 }, { "epoch": 0.88, "grad_norm": 1.8934745401812323, "learning_rate": 3.710414407538998e-07, "loss": 0.9465, "step": 13753 }, { "epoch": 0.88, "grad_norm": 1.574569523638406, "learning_rate": 3.706496869742887e-07, "loss": 0.6806, "step": 13754 }, { "epoch": 0.88, "grad_norm": 3.7053111206640628, "learning_rate": 3.7025813215442685e-07, "loss": 0.5802, "step": 13755 }, { "epoch": 0.88, "grad_norm": 1.7779338095751678, "learning_rate": 3.6986677631114074e-07, "loss": 0.8081, "step": 13756 }, { "epoch": 0.88, "grad_norm": 1.726274921278718, "learning_rate": 3.694756194612531e-07, "loss": 0.6541, "step": 13757 }, { "epoch": 0.88, "grad_norm": 1.7754580297294762, "learning_rate": 3.690846616215715e-07, "loss": 0.7392, "step": 13758 }, { "epoch": 0.88, "grad_norm": 1.469340199848128, "learning_rate": 3.686939028089015e-07, "loss": 0.6237, "step": 13759 }, { "epoch": 0.88, "grad_norm": 1.1648129510850747, "learning_rate": 3.683033430400357e-07, "loss": 0.6344, "step": 13760 }, { "epoch": 0.88, "grad_norm": 2.1008005701255104, "learning_rate": 3.6791298233175955e-07, "loss": 0.6024, "step": 13761 }, { "epoch": 0.88, "grad_norm": 1.766886854078398, "learning_rate": 3.675228207008497e-07, "loss": 0.8343, "step": 13762 }, { "epoch": 0.88, "grad_norm": 1.6780980115271087, "learning_rate": 3.671328581640754e-07, "loss": 0.7284, "step": 13763 }, { "epoch": 0.88, "grad_norm": 1.7636281302189394, "learning_rate": 3.667430947381956e-07, "loss": 0.7553, "step": 13764 }, { "epoch": 0.88, "grad_norm": 1.3130354246213642, "learning_rate": 3.663535304399607e-07, "loss": 0.6574, "step": 13765 }, { "epoch": 0.88, "grad_norm": 1.679840503917995, "learning_rate": 3.6596416528611566e-07, "loss": 0.7638, "step": 13766 }, { "epoch": 0.88, "grad_norm": 1.5679181810842404, "learning_rate": 3.6557499929339325e-07, "loss": 0.7056, "step": 13767 }, { "epoch": 0.88, "grad_norm": 1.6475252952273391, "learning_rate": 3.6518603247851947e-07, "loss": 0.7188, "step": 13768 }, { "epoch": 0.88, "grad_norm": 1.5789504037457758, "learning_rate": 3.647972648582104e-07, "loss": 0.721, "step": 13769 }, { "epoch": 0.88, "grad_norm": 0.9733260605933549, "learning_rate": 3.64408696449175e-07, "loss": 0.6874, "step": 13770 }, { "epoch": 0.88, "grad_norm": 1.5715959509308726, "learning_rate": 3.640203272681131e-07, "loss": 0.7188, "step": 13771 }, { "epoch": 0.88, "grad_norm": 1.9058910313157693, "learning_rate": 3.6363215733171644e-07, "loss": 0.777, "step": 13772 }, { "epoch": 0.88, "grad_norm": 1.8427578774829443, "learning_rate": 3.6324418665666717e-07, "loss": 0.8064, "step": 13773 }, { "epoch": 0.88, "grad_norm": 1.6139334330281951, "learning_rate": 3.628564152596381e-07, "loss": 0.7193, "step": 13774 }, { "epoch": 0.88, "grad_norm": 1.8182002569909197, "learning_rate": 3.624688431572981e-07, "loss": 0.8072, "step": 13775 }, { "epoch": 0.88, "grad_norm": 1.9287605396947918, "learning_rate": 3.620814703663017e-07, "loss": 0.6487, "step": 13776 }, { "epoch": 0.88, "grad_norm": 1.7227168470175345, "learning_rate": 3.6169429690329825e-07, "loss": 0.6981, "step": 13777 }, { "epoch": 0.88, "grad_norm": 1.700971348908247, "learning_rate": 3.613073227849279e-07, "loss": 0.6695, "step": 13778 }, { "epoch": 0.88, "grad_norm": 1.6522045498611035, "learning_rate": 3.6092054802782107e-07, "loss": 0.7422, "step": 13779 }, { "epoch": 0.88, "grad_norm": 1.889730819346919, "learning_rate": 3.6053397264860126e-07, "loss": 0.7159, "step": 13780 }, { "epoch": 0.88, "grad_norm": 2.02543617694855, "learning_rate": 3.601475966638829e-07, "loss": 0.7682, "step": 13781 }, { "epoch": 0.88, "grad_norm": 2.058506039688467, "learning_rate": 3.5976142009026936e-07, "loss": 0.8001, "step": 13782 }, { "epoch": 0.88, "grad_norm": 1.5382893103013071, "learning_rate": 3.5937544294436134e-07, "loss": 0.7467, "step": 13783 }, { "epoch": 0.88, "grad_norm": 1.7007792371447343, "learning_rate": 3.5898966524274494e-07, "loss": 0.7031, "step": 13784 }, { "epoch": 0.88, "grad_norm": 1.5068775507801258, "learning_rate": 3.586040870020008e-07, "loss": 0.688, "step": 13785 }, { "epoch": 0.88, "grad_norm": 2.0756170987282396, "learning_rate": 3.582187082386995e-07, "loss": 0.7285, "step": 13786 }, { "epoch": 0.88, "grad_norm": 2.190710457191838, "learning_rate": 3.5783352896940513e-07, "loss": 0.7446, "step": 13787 }, { "epoch": 0.88, "grad_norm": 1.5302865614467154, "learning_rate": 3.5744854921067053e-07, "loss": 0.7509, "step": 13788 }, { "epoch": 0.88, "grad_norm": 1.6293690939690835, "learning_rate": 3.570637689790418e-07, "loss": 0.7218, "step": 13789 }, { "epoch": 0.88, "grad_norm": 1.6494239043384336, "learning_rate": 3.566791882910575e-07, "loss": 0.6647, "step": 13790 }, { "epoch": 0.88, "grad_norm": 1.7048459954227904, "learning_rate": 3.562948071632427e-07, "loss": 0.7859, "step": 13791 }, { "epoch": 0.88, "grad_norm": 1.090835441040688, "learning_rate": 3.5591062561212086e-07, "loss": 0.5658, "step": 13792 }, { "epoch": 0.88, "grad_norm": 1.582950472874273, "learning_rate": 3.55526643654201e-07, "loss": 0.7987, "step": 13793 }, { "epoch": 0.88, "grad_norm": 1.206106475005256, "learning_rate": 3.551428613059871e-07, "loss": 0.6946, "step": 13794 }, { "epoch": 0.88, "grad_norm": 1.6862819707668983, "learning_rate": 3.5475927858397263e-07, "loss": 0.5862, "step": 13795 }, { "epoch": 0.88, "grad_norm": 1.673217837125256, "learning_rate": 3.543758955046428e-07, "loss": 0.6282, "step": 13796 }, { "epoch": 0.88, "grad_norm": 2.378064553061693, "learning_rate": 3.539927120844766e-07, "loss": 0.6936, "step": 13797 }, { "epoch": 0.88, "grad_norm": 1.1010080058374392, "learning_rate": 3.536097283399392e-07, "loss": 0.5942, "step": 13798 }, { "epoch": 0.88, "grad_norm": 1.2021337214134422, "learning_rate": 3.532269442874942e-07, "loss": 0.6359, "step": 13799 }, { "epoch": 0.88, "grad_norm": 1.5185364184667687, "learning_rate": 3.5284435994358937e-07, "loss": 0.7493, "step": 13800 }, { "epoch": 0.88, "grad_norm": 1.9150560881782996, "learning_rate": 3.524619753246694e-07, "loss": 0.6709, "step": 13801 }, { "epoch": 0.88, "grad_norm": 2.195752733364305, "learning_rate": 3.520797904471679e-07, "loss": 0.6923, "step": 13802 }, { "epoch": 0.88, "grad_norm": 2.022940657159668, "learning_rate": 3.516978053275111e-07, "loss": 0.6541, "step": 13803 }, { "epoch": 0.88, "grad_norm": 1.7575186261834168, "learning_rate": 3.5131601998211417e-07, "loss": 0.8006, "step": 13804 }, { "epoch": 0.88, "grad_norm": 1.201152975030067, "learning_rate": 3.5093443442738627e-07, "loss": 0.7422, "step": 13805 }, { "epoch": 0.88, "grad_norm": 1.3462772243749033, "learning_rate": 3.505530486797287e-07, "loss": 0.673, "step": 13806 }, { "epoch": 0.88, "grad_norm": 1.640000860790489, "learning_rate": 3.5017186275553004e-07, "loss": 0.7369, "step": 13807 }, { "epoch": 0.88, "grad_norm": 1.9925708546361192, "learning_rate": 3.4979087667117494e-07, "loss": 0.79, "step": 13808 }, { "epoch": 0.88, "grad_norm": 1.0943175130196203, "learning_rate": 3.494100904430359e-07, "loss": 0.6723, "step": 13809 }, { "epoch": 0.88, "grad_norm": 1.5306029947740183, "learning_rate": 3.490295040874792e-07, "loss": 0.6649, "step": 13810 }, { "epoch": 0.88, "grad_norm": 1.5152180350177444, "learning_rate": 3.486491176208617e-07, "loss": 0.6914, "step": 13811 }, { "epoch": 0.88, "grad_norm": 1.7227987298909344, "learning_rate": 3.4826893105952997e-07, "loss": 0.7232, "step": 13812 }, { "epoch": 0.88, "grad_norm": 1.2396554026451523, "learning_rate": 3.478889444198269e-07, "loss": 0.6826, "step": 13813 }, { "epoch": 0.88, "grad_norm": 1.7747838900203057, "learning_rate": 3.4750915771808005e-07, "loss": 0.7082, "step": 13814 }, { "epoch": 0.88, "grad_norm": 1.631402116144084, "learning_rate": 3.471295709706146e-07, "loss": 0.7413, "step": 13815 }, { "epoch": 0.88, "grad_norm": 2.5428943727779494, "learning_rate": 3.4675018419374204e-07, "loss": 0.6763, "step": 13816 }, { "epoch": 0.88, "grad_norm": 1.734607700532, "learning_rate": 3.4637099740376934e-07, "loss": 0.7119, "step": 13817 }, { "epoch": 0.88, "grad_norm": 1.8377440691810738, "learning_rate": 3.459920106169923e-07, "loss": 0.6589, "step": 13818 }, { "epoch": 0.88, "grad_norm": 1.5428317122838346, "learning_rate": 3.4561322384969843e-07, "loss": 0.681, "step": 13819 }, { "epoch": 0.88, "grad_norm": 1.7060112716672784, "learning_rate": 3.4523463711816974e-07, "loss": 0.7842, "step": 13820 }, { "epoch": 0.88, "grad_norm": 1.720818995825867, "learning_rate": 3.448562504386738e-07, "loss": 0.7993, "step": 13821 }, { "epoch": 0.88, "grad_norm": 1.5974333731035393, "learning_rate": 3.4447806382747584e-07, "loss": 0.6814, "step": 13822 }, { "epoch": 0.88, "grad_norm": 1.7524592368723264, "learning_rate": 3.4410007730082685e-07, "loss": 0.8577, "step": 13823 }, { "epoch": 0.88, "grad_norm": 1.4766430750914983, "learning_rate": 3.4372229087497376e-07, "loss": 0.7001, "step": 13824 }, { "epoch": 0.88, "grad_norm": 1.504610067963993, "learning_rate": 3.4334470456615255e-07, "loss": 0.6854, "step": 13825 }, { "epoch": 0.88, "grad_norm": 1.795637344002707, "learning_rate": 3.4296731839059073e-07, "loss": 0.7716, "step": 13826 }, { "epoch": 0.89, "grad_norm": 1.254049839803166, "learning_rate": 3.4259013236450755e-07, "loss": 0.6862, "step": 13827 }, { "epoch": 0.89, "grad_norm": 1.6290351652512196, "learning_rate": 3.4221314650411454e-07, "loss": 0.8172, "step": 13828 }, { "epoch": 0.89, "grad_norm": 1.551348785764512, "learning_rate": 3.4183636082561257e-07, "loss": 0.7686, "step": 13829 }, { "epoch": 0.89, "grad_norm": 1.6942021172293071, "learning_rate": 3.414597753451954e-07, "loss": 0.7245, "step": 13830 }, { "epoch": 0.89, "grad_norm": 1.8420292754580407, "learning_rate": 3.4108339007904834e-07, "loss": 0.7388, "step": 13831 }, { "epoch": 0.89, "grad_norm": 1.893749591371699, "learning_rate": 3.40707205043348e-07, "loss": 0.6768, "step": 13832 }, { "epoch": 0.89, "grad_norm": 1.0224173563912753, "learning_rate": 3.4033122025426077e-07, "loss": 0.618, "step": 13833 }, { "epoch": 0.89, "grad_norm": 1.572983138777518, "learning_rate": 3.399554357279472e-07, "loss": 0.6841, "step": 13834 }, { "epoch": 0.89, "grad_norm": 1.867201735124102, "learning_rate": 3.395798514805565e-07, "loss": 0.8107, "step": 13835 }, { "epoch": 0.89, "grad_norm": 2.331054422506513, "learning_rate": 3.392044675282308e-07, "loss": 0.9066, "step": 13836 }, { "epoch": 0.89, "grad_norm": 1.0558206058381623, "learning_rate": 3.3882928388710376e-07, "loss": 0.6227, "step": 13837 }, { "epoch": 0.89, "grad_norm": 1.0116677130763032, "learning_rate": 3.384543005732982e-07, "loss": 0.5796, "step": 13838 }, { "epoch": 0.89, "grad_norm": 1.6234378225634105, "learning_rate": 3.380795176029328e-07, "loss": 0.5998, "step": 13839 }, { "epoch": 0.89, "grad_norm": 1.5670599908752485, "learning_rate": 3.3770493499211356e-07, "loss": 0.8035, "step": 13840 }, { "epoch": 0.89, "grad_norm": 1.5236556132450103, "learning_rate": 3.3733055275693983e-07, "loss": 0.578, "step": 13841 }, { "epoch": 0.89, "grad_norm": 1.5421947167597638, "learning_rate": 3.369563709135004e-07, "loss": 0.7643, "step": 13842 }, { "epoch": 0.89, "grad_norm": 1.0730768353489224, "learning_rate": 3.3658238947787857e-07, "loss": 0.503, "step": 13843 }, { "epoch": 0.89, "grad_norm": 1.6230887530446492, "learning_rate": 3.362086084661459e-07, "loss": 0.7716, "step": 13844 }, { "epoch": 0.89, "grad_norm": 1.4119320305951364, "learning_rate": 3.3583502789436783e-07, "loss": 0.614, "step": 13845 }, { "epoch": 0.89, "grad_norm": 3.8391691332515214, "learning_rate": 3.3546164777859936e-07, "loss": 0.6806, "step": 13846 }, { "epoch": 0.89, "grad_norm": 1.5426504089712056, "learning_rate": 3.3508846813488647e-07, "loss": 0.6626, "step": 13847 }, { "epoch": 0.89, "grad_norm": 4.33269493904859, "learning_rate": 3.3471548897926973e-07, "loss": 0.6278, "step": 13848 }, { "epoch": 0.89, "grad_norm": 1.761424703821369, "learning_rate": 3.3434271032777856e-07, "loss": 0.7139, "step": 13849 }, { "epoch": 0.89, "grad_norm": 1.6577418453735207, "learning_rate": 3.3397013219643395e-07, "loss": 0.8766, "step": 13850 }, { "epoch": 0.89, "grad_norm": 1.7680764161514695, "learning_rate": 3.335977546012481e-07, "loss": 0.8117, "step": 13851 }, { "epoch": 0.89, "grad_norm": 1.5283424171534605, "learning_rate": 3.332255775582249e-07, "loss": 0.7288, "step": 13852 }, { "epoch": 0.89, "grad_norm": 1.1819721145069375, "learning_rate": 3.328536010833605e-07, "loss": 0.6312, "step": 13853 }, { "epoch": 0.89, "grad_norm": 1.6604278496009495, "learning_rate": 3.3248182519264036e-07, "loss": 0.6742, "step": 13854 }, { "epoch": 0.89, "grad_norm": 1.9354844290155055, "learning_rate": 3.3211024990204565e-07, "loss": 0.8469, "step": 13855 }, { "epoch": 0.89, "grad_norm": 2.4805911370626634, "learning_rate": 3.317388752275419e-07, "loss": 0.6899, "step": 13856 }, { "epoch": 0.89, "grad_norm": 1.7943719421341173, "learning_rate": 3.3136770118509243e-07, "loss": 0.6417, "step": 13857 }, { "epoch": 0.89, "grad_norm": 1.0800673493383366, "learning_rate": 3.309967277906495e-07, "loss": 0.6313, "step": 13858 }, { "epoch": 0.89, "grad_norm": 1.8014706603696966, "learning_rate": 3.3062595506015594e-07, "loss": 0.732, "step": 13859 }, { "epoch": 0.89, "grad_norm": 1.7289667101332395, "learning_rate": 3.302553830095473e-07, "loss": 0.6482, "step": 13860 }, { "epoch": 0.89, "grad_norm": 1.103654542446907, "learning_rate": 3.2988501165474864e-07, "loss": 0.5726, "step": 13861 }, { "epoch": 0.89, "grad_norm": 1.2984445152326634, "learning_rate": 3.295148410116811e-07, "loss": 0.6827, "step": 13862 }, { "epoch": 0.89, "grad_norm": 1.0819793261763062, "learning_rate": 3.291448710962497e-07, "loss": 0.6254, "step": 13863 }, { "epoch": 0.89, "grad_norm": 1.702821628070619, "learning_rate": 3.2877510192435845e-07, "loss": 0.7179, "step": 13864 }, { "epoch": 0.89, "grad_norm": 2.093448799151411, "learning_rate": 3.284055335118963e-07, "loss": 0.8131, "step": 13865 }, { "epoch": 0.89, "grad_norm": 1.7438189748264274, "learning_rate": 3.2803616587474885e-07, "loss": 0.7774, "step": 13866 }, { "epoch": 0.89, "grad_norm": 1.6280462645120553, "learning_rate": 3.276669990287895e-07, "loss": 0.6659, "step": 13867 }, { "epoch": 0.89, "grad_norm": 1.1595460689788089, "learning_rate": 3.272980329898845e-07, "loss": 0.6569, "step": 13868 }, { "epoch": 0.89, "grad_norm": 1.6600891394411585, "learning_rate": 3.269292677738922e-07, "loss": 0.6657, "step": 13869 }, { "epoch": 0.89, "grad_norm": 1.647065179963658, "learning_rate": 3.265607033966595e-07, "loss": 0.7222, "step": 13870 }, { "epoch": 0.89, "grad_norm": 1.7953179162866797, "learning_rate": 3.261923398740285e-07, "loss": 0.6666, "step": 13871 }, { "epoch": 0.89, "grad_norm": 1.6017033492417658, "learning_rate": 3.2582417722182845e-07, "loss": 0.7228, "step": 13872 }, { "epoch": 0.89, "grad_norm": 1.916597048838165, "learning_rate": 3.2545621545588434e-07, "loss": 0.6767, "step": 13873 }, { "epoch": 0.89, "grad_norm": 1.799746054941027, "learning_rate": 3.250884545920091e-07, "loss": 0.7244, "step": 13874 }, { "epoch": 0.89, "grad_norm": 1.8019061065437487, "learning_rate": 3.2472089464600844e-07, "loss": 0.6827, "step": 13875 }, { "epoch": 0.89, "grad_norm": 1.1857347641816471, "learning_rate": 3.243535356336808e-07, "loss": 0.7172, "step": 13876 }, { "epoch": 0.89, "grad_norm": 1.6307681651697876, "learning_rate": 3.2398637757081187e-07, "loss": 0.6886, "step": 13877 }, { "epoch": 0.89, "grad_norm": 1.5017919729121056, "learning_rate": 3.2361942047318463e-07, "loss": 0.8012, "step": 13878 }, { "epoch": 0.89, "grad_norm": 1.7116164769625515, "learning_rate": 3.232526643565664e-07, "loss": 0.6772, "step": 13879 }, { "epoch": 0.89, "grad_norm": 1.7175626676445095, "learning_rate": 3.228861092367225e-07, "loss": 0.6814, "step": 13880 }, { "epoch": 0.89, "grad_norm": 1.1492203934930312, "learning_rate": 3.2251975512940516e-07, "loss": 0.7425, "step": 13881 }, { "epoch": 0.89, "grad_norm": 1.763393061041172, "learning_rate": 3.221536020503602e-07, "loss": 0.7496, "step": 13882 }, { "epoch": 0.89, "grad_norm": 2.013480204866176, "learning_rate": 3.2178765001532386e-07, "loss": 0.7308, "step": 13883 }, { "epoch": 0.89, "grad_norm": 1.590118599199808, "learning_rate": 3.214218990400236e-07, "loss": 0.8687, "step": 13884 }, { "epoch": 0.89, "grad_norm": 1.623266725872914, "learning_rate": 3.210563491401808e-07, "loss": 0.7326, "step": 13885 }, { "epoch": 0.89, "grad_norm": 1.521513030030085, "learning_rate": 3.2069100033150225e-07, "loss": 0.6301, "step": 13886 }, { "epoch": 0.89, "grad_norm": 1.574512474015634, "learning_rate": 3.203258526296937e-07, "loss": 0.611, "step": 13887 }, { "epoch": 0.89, "grad_norm": 1.568836034805406, "learning_rate": 3.1996090605044496e-07, "loss": 0.7631, "step": 13888 }, { "epoch": 0.89, "grad_norm": 1.810410088614325, "learning_rate": 3.195961606094433e-07, "loss": 0.7372, "step": 13889 }, { "epoch": 0.89, "grad_norm": 1.1183406153842017, "learning_rate": 3.1923161632236355e-07, "loss": 0.6002, "step": 13890 }, { "epoch": 0.89, "grad_norm": 1.252850270084987, "learning_rate": 3.188672732048731e-07, "loss": 0.6813, "step": 13891 }, { "epoch": 0.89, "grad_norm": 1.4767854184510436, "learning_rate": 3.185031312726311e-07, "loss": 0.6678, "step": 13892 }, { "epoch": 0.89, "grad_norm": 1.6741292315218184, "learning_rate": 3.181391905412867e-07, "loss": 0.68, "step": 13893 }, { "epoch": 0.89, "grad_norm": 1.7586327534034656, "learning_rate": 3.1777545102648354e-07, "loss": 0.7104, "step": 13894 }, { "epoch": 0.89, "grad_norm": 1.6756709755189692, "learning_rate": 3.1741191274385076e-07, "loss": 0.7918, "step": 13895 }, { "epoch": 0.89, "grad_norm": 1.127511942757693, "learning_rate": 3.170485757090158e-07, "loss": 0.6778, "step": 13896 }, { "epoch": 0.89, "grad_norm": 1.033860964424901, "learning_rate": 3.1668543993759293e-07, "loss": 0.6678, "step": 13897 }, { "epoch": 0.89, "grad_norm": 1.5790698723005023, "learning_rate": 3.163225054451885e-07, "loss": 0.5971, "step": 13898 }, { "epoch": 0.89, "grad_norm": 1.8116301899917404, "learning_rate": 3.159597722474006e-07, "loss": 0.6979, "step": 13899 }, { "epoch": 0.89, "grad_norm": 1.622756431042148, "learning_rate": 3.155972403598201e-07, "loss": 0.7176, "step": 13900 }, { "epoch": 0.89, "grad_norm": 1.9168364534280566, "learning_rate": 3.152349097980262e-07, "loss": 0.7438, "step": 13901 }, { "epoch": 0.89, "grad_norm": 1.5536075068316593, "learning_rate": 3.1487278057759196e-07, "loss": 0.7202, "step": 13902 }, { "epoch": 0.89, "grad_norm": 1.631064826636568, "learning_rate": 3.1451085271408053e-07, "loss": 0.6652, "step": 13903 }, { "epoch": 0.89, "grad_norm": 1.1010326026051565, "learning_rate": 3.141491262230473e-07, "loss": 0.6451, "step": 13904 }, { "epoch": 0.89, "grad_norm": 1.2564831550916602, "learning_rate": 3.137876011200386e-07, "loss": 0.6352, "step": 13905 }, { "epoch": 0.89, "grad_norm": 1.7716264032613895, "learning_rate": 3.134262774205915e-07, "loss": 0.7681, "step": 13906 }, { "epoch": 0.89, "grad_norm": 1.1813557736666591, "learning_rate": 3.130651551402353e-07, "loss": 0.578, "step": 13907 }, { "epoch": 0.89, "grad_norm": 1.6989319522293036, "learning_rate": 3.127042342944897e-07, "loss": 0.6689, "step": 13908 }, { "epoch": 0.89, "grad_norm": 1.7027482428742264, "learning_rate": 3.123435148988674e-07, "loss": 0.5659, "step": 13909 }, { "epoch": 0.89, "grad_norm": 1.619353553847792, "learning_rate": 3.1198299696886925e-07, "loss": 0.7335, "step": 13910 }, { "epoch": 0.89, "grad_norm": 1.623882371906214, "learning_rate": 3.116226805199929e-07, "loss": 0.7042, "step": 13911 }, { "epoch": 0.89, "grad_norm": 1.726162865789492, "learning_rate": 3.1126256556772096e-07, "loss": 0.6267, "step": 13912 }, { "epoch": 0.89, "grad_norm": 2.064460981546274, "learning_rate": 3.1090265212753214e-07, "loss": 0.741, "step": 13913 }, { "epoch": 0.89, "grad_norm": 1.6326592121532428, "learning_rate": 3.1054294021489353e-07, "loss": 0.6868, "step": 13914 }, { "epoch": 0.89, "grad_norm": 1.8519875054042594, "learning_rate": 3.101834298452661e-07, "loss": 0.7421, "step": 13915 }, { "epoch": 0.89, "grad_norm": 1.8856509551537735, "learning_rate": 3.098241210341002e-07, "loss": 0.7756, "step": 13916 }, { "epoch": 0.89, "grad_norm": 1.6807676061345445, "learning_rate": 3.094650137968369e-07, "loss": 0.6726, "step": 13917 }, { "epoch": 0.89, "grad_norm": 1.857273138443481, "learning_rate": 3.0910610814891327e-07, "loss": 0.7894, "step": 13918 }, { "epoch": 0.89, "grad_norm": 1.6685720245611495, "learning_rate": 3.0874740410575033e-07, "loss": 0.7209, "step": 13919 }, { "epoch": 0.89, "grad_norm": 2.0871506859346276, "learning_rate": 3.083889016827679e-07, "loss": 0.7345, "step": 13920 }, { "epoch": 0.89, "grad_norm": 1.7290410774824247, "learning_rate": 3.080306008953704e-07, "loss": 0.7545, "step": 13921 }, { "epoch": 0.89, "grad_norm": 1.7715100929212926, "learning_rate": 3.0767250175895933e-07, "loss": 0.6922, "step": 13922 }, { "epoch": 0.89, "grad_norm": 1.6430842917901438, "learning_rate": 3.0731460428892414e-07, "loss": 0.7269, "step": 13923 }, { "epoch": 0.89, "grad_norm": 1.181526414971625, "learning_rate": 3.069569085006463e-07, "loss": 0.7106, "step": 13924 }, { "epoch": 0.89, "grad_norm": 1.9077034262260426, "learning_rate": 3.065994144094997e-07, "loss": 0.8444, "step": 13925 }, { "epoch": 0.89, "grad_norm": 2.1005359767993426, "learning_rate": 3.06242122030847e-07, "loss": 0.8427, "step": 13926 }, { "epoch": 0.89, "grad_norm": 1.9201456847560894, "learning_rate": 3.0588503138004597e-07, "loss": 0.6233, "step": 13927 }, { "epoch": 0.89, "grad_norm": 1.7656645952881704, "learning_rate": 3.055281424724421e-07, "loss": 0.7543, "step": 13928 }, { "epoch": 0.89, "grad_norm": 1.1457450849170794, "learning_rate": 3.051714553233748e-07, "loss": 0.6467, "step": 13929 }, { "epoch": 0.89, "grad_norm": 1.9283854069559658, "learning_rate": 3.048149699481723e-07, "loss": 0.7782, "step": 13930 }, { "epoch": 0.89, "grad_norm": 1.0975422000584418, "learning_rate": 3.044586863621568e-07, "loss": 0.6104, "step": 13931 }, { "epoch": 0.89, "grad_norm": 1.6602382975309278, "learning_rate": 3.0410260458064056e-07, "loss": 0.8384, "step": 13932 }, { "epoch": 0.89, "grad_norm": 1.9949316003300694, "learning_rate": 3.0374672461892574e-07, "loss": 0.6846, "step": 13933 }, { "epoch": 0.89, "grad_norm": 1.7084210915677287, "learning_rate": 3.0339104649231064e-07, "loss": 0.7227, "step": 13934 }, { "epoch": 0.89, "grad_norm": 1.6583376095422182, "learning_rate": 3.0303557021607754e-07, "loss": 0.6785, "step": 13935 }, { "epoch": 0.89, "grad_norm": 1.2559268779190877, "learning_rate": 3.026802958055075e-07, "loss": 0.6611, "step": 13936 }, { "epoch": 0.89, "grad_norm": 1.109796950961231, "learning_rate": 3.023252232758667e-07, "loss": 0.8039, "step": 13937 }, { "epoch": 0.89, "grad_norm": 1.6848909057164654, "learning_rate": 3.019703526424167e-07, "loss": 0.7474, "step": 13938 }, { "epoch": 0.89, "grad_norm": 1.1364665546266928, "learning_rate": 3.0161568392040986e-07, "loss": 0.7024, "step": 13939 }, { "epoch": 0.89, "grad_norm": 1.6185646478356097, "learning_rate": 3.012612171250867e-07, "loss": 0.6677, "step": 13940 }, { "epoch": 0.89, "grad_norm": 1.9147254816031698, "learning_rate": 3.009069522716851e-07, "loss": 0.7239, "step": 13941 }, { "epoch": 0.89, "grad_norm": 1.316766014237793, "learning_rate": 3.005528893754267e-07, "loss": 0.6953, "step": 13942 }, { "epoch": 0.89, "grad_norm": 1.9483252378289588, "learning_rate": 3.0019902845153216e-07, "loss": 0.656, "step": 13943 }, { "epoch": 0.89, "grad_norm": 1.913710478239342, "learning_rate": 2.9984536951520595e-07, "loss": 0.6722, "step": 13944 }, { "epoch": 0.89, "grad_norm": 1.8496795372618644, "learning_rate": 2.994919125816498e-07, "loss": 0.792, "step": 13945 }, { "epoch": 0.89, "grad_norm": 1.743647695283776, "learning_rate": 2.991386576660543e-07, "loss": 0.7462, "step": 13946 }, { "epoch": 0.89, "grad_norm": 1.8652569748197423, "learning_rate": 2.9878560478360186e-07, "loss": 0.6583, "step": 13947 }, { "epoch": 0.89, "grad_norm": 1.7072752641746516, "learning_rate": 2.9843275394946526e-07, "loss": 0.6687, "step": 13948 }, { "epoch": 0.89, "grad_norm": 1.1986534471057069, "learning_rate": 2.980801051788085e-07, "loss": 0.5761, "step": 13949 }, { "epoch": 0.89, "grad_norm": 1.5664400871857418, "learning_rate": 2.977276584867905e-07, "loss": 0.6489, "step": 13950 }, { "epoch": 0.89, "grad_norm": 1.610944676983281, "learning_rate": 2.973754138885554e-07, "loss": 0.6061, "step": 13951 }, { "epoch": 0.89, "grad_norm": 1.5905782882531814, "learning_rate": 2.970233713992443e-07, "loss": 0.6941, "step": 13952 }, { "epoch": 0.89, "grad_norm": 1.7917547782213845, "learning_rate": 2.9667153103398573e-07, "loss": 0.8809, "step": 13953 }, { "epoch": 0.89, "grad_norm": 1.6177179069319962, "learning_rate": 2.9631989280790254e-07, "loss": 0.6927, "step": 13954 }, { "epoch": 0.89, "grad_norm": 1.1850531266502882, "learning_rate": 2.9596845673610597e-07, "loss": 0.6562, "step": 13955 }, { "epoch": 0.89, "grad_norm": 1.5337672300248923, "learning_rate": 2.956172228337012e-07, "loss": 0.5691, "step": 13956 }, { "epoch": 0.89, "grad_norm": 1.8380295366491435, "learning_rate": 2.9526619111578223e-07, "loss": 0.6091, "step": 13957 }, { "epoch": 0.89, "grad_norm": 1.6973877154801003, "learning_rate": 2.9491536159743595e-07, "loss": 0.7859, "step": 13958 }, { "epoch": 0.89, "grad_norm": 1.5504198944084793, "learning_rate": 2.945647342937413e-07, "loss": 0.5965, "step": 13959 }, { "epoch": 0.89, "grad_norm": 1.565835689874354, "learning_rate": 2.9421430921976746e-07, "loss": 0.7474, "step": 13960 }, { "epoch": 0.89, "grad_norm": 1.5553582615803843, "learning_rate": 2.9386408639057394e-07, "loss": 0.7042, "step": 13961 }, { "epoch": 0.89, "grad_norm": 1.820782396913275, "learning_rate": 2.9351406582121264e-07, "loss": 0.7872, "step": 13962 }, { "epoch": 0.89, "grad_norm": 1.7135121061261978, "learning_rate": 2.9316424752672766e-07, "loss": 0.6644, "step": 13963 }, { "epoch": 0.89, "grad_norm": 1.7421878221634797, "learning_rate": 2.9281463152215304e-07, "loss": 0.6931, "step": 13964 }, { "epoch": 0.89, "grad_norm": 1.670202833386591, "learning_rate": 2.9246521782251403e-07, "loss": 0.7125, "step": 13965 }, { "epoch": 0.89, "grad_norm": 1.2580486216788862, "learning_rate": 2.92116006442828e-07, "loss": 0.6049, "step": 13966 }, { "epoch": 0.89, "grad_norm": 2.1557498830104107, "learning_rate": 2.91766997398103e-07, "loss": 0.8164, "step": 13967 }, { "epoch": 0.89, "grad_norm": 1.6786989628360254, "learning_rate": 2.914181907033392e-07, "loss": 0.8193, "step": 13968 }, { "epoch": 0.89, "grad_norm": 1.726813513536484, "learning_rate": 2.91069586373528e-07, "loss": 0.7468, "step": 13969 }, { "epoch": 0.89, "grad_norm": 1.6994570336282304, "learning_rate": 2.9072118442365126e-07, "loss": 0.8003, "step": 13970 }, { "epoch": 0.89, "grad_norm": 2.000710295902283, "learning_rate": 2.9037298486868205e-07, "loss": 0.6818, "step": 13971 }, { "epoch": 0.89, "grad_norm": 1.8267446803830014, "learning_rate": 2.9002498772358556e-07, "loss": 0.7213, "step": 13972 }, { "epoch": 0.89, "grad_norm": 2.0402166430559525, "learning_rate": 2.8967719300331875e-07, "loss": 0.7065, "step": 13973 }, { "epoch": 0.89, "grad_norm": 1.9612582357492996, "learning_rate": 2.89329600722828e-07, "loss": 0.6873, "step": 13974 }, { "epoch": 0.89, "grad_norm": 1.9865538856081166, "learning_rate": 2.8898221089705194e-07, "loss": 0.7075, "step": 13975 }, { "epoch": 0.89, "grad_norm": 1.9008229068682436, "learning_rate": 2.886350235409224e-07, "loss": 0.9151, "step": 13976 }, { "epoch": 0.89, "grad_norm": 1.940738739908087, "learning_rate": 2.882880386693582e-07, "loss": 0.7636, "step": 13977 }, { "epoch": 0.89, "grad_norm": 1.9489479561254255, "learning_rate": 2.8794125629727444e-07, "loss": 0.8884, "step": 13978 }, { "epoch": 0.89, "grad_norm": 1.8527624457682634, "learning_rate": 2.8759467643957375e-07, "loss": 0.6349, "step": 13979 }, { "epoch": 0.89, "grad_norm": 1.684590842907097, "learning_rate": 2.872482991111519e-07, "loss": 0.7829, "step": 13980 }, { "epoch": 0.89, "grad_norm": 1.8157134080271657, "learning_rate": 2.8690212432689546e-07, "loss": 0.6753, "step": 13981 }, { "epoch": 0.89, "grad_norm": 1.522649759650179, "learning_rate": 2.865561521016813e-07, "loss": 0.6936, "step": 13982 }, { "epoch": 0.89, "grad_norm": 3.1806153458047697, "learning_rate": 2.862103824503809e-07, "loss": 0.7142, "step": 13983 }, { "epoch": 0.9, "grad_norm": 0.9985916919531228, "learning_rate": 2.858648153878518e-07, "loss": 0.5843, "step": 13984 }, { "epoch": 0.9, "grad_norm": 1.8912220273111469, "learning_rate": 2.855194509289483e-07, "loss": 0.6166, "step": 13985 }, { "epoch": 0.9, "grad_norm": 1.8242595438471485, "learning_rate": 2.851742890885112e-07, "loss": 0.6852, "step": 13986 }, { "epoch": 0.9, "grad_norm": 1.7014238201601295, "learning_rate": 2.8482932988137647e-07, "loss": 0.7099, "step": 13987 }, { "epoch": 0.9, "grad_norm": 1.5352198424507402, "learning_rate": 2.8448457332236945e-07, "loss": 0.6364, "step": 13988 }, { "epoch": 0.9, "grad_norm": 1.6230105355202413, "learning_rate": 2.8414001942630556e-07, "loss": 0.778, "step": 13989 }, { "epoch": 0.9, "grad_norm": 1.675904312613883, "learning_rate": 2.837956682079962e-07, "loss": 0.6849, "step": 13990 }, { "epoch": 0.9, "grad_norm": 1.634521890380867, "learning_rate": 2.834515196822374e-07, "loss": 0.6647, "step": 13991 }, { "epoch": 0.9, "grad_norm": 2.0897654713052907, "learning_rate": 2.831075738638228e-07, "loss": 0.7514, "step": 13992 }, { "epoch": 0.9, "grad_norm": 1.803601168452557, "learning_rate": 2.8276383076753175e-07, "loss": 0.687, "step": 13993 }, { "epoch": 0.9, "grad_norm": 1.650970570880122, "learning_rate": 2.8242029040813965e-07, "loss": 0.7949, "step": 13994 }, { "epoch": 0.9, "grad_norm": 1.6683732238076867, "learning_rate": 2.8207695280041025e-07, "loss": 0.716, "step": 13995 }, { "epoch": 0.9, "grad_norm": 1.7684840403297746, "learning_rate": 2.8173381795910006e-07, "loss": 0.7112, "step": 13996 }, { "epoch": 0.9, "grad_norm": 1.4754634643760491, "learning_rate": 2.813908858989556e-07, "loss": 0.7231, "step": 13997 }, { "epoch": 0.9, "grad_norm": 1.9115119278838857, "learning_rate": 2.8104815663471506e-07, "loss": 0.6882, "step": 13998 }, { "epoch": 0.9, "grad_norm": 1.9712613493000897, "learning_rate": 2.8070563018111063e-07, "loss": 0.7727, "step": 13999 }, { "epoch": 0.9, "grad_norm": 1.7959620541413173, "learning_rate": 2.803633065528599e-07, "loss": 0.865, "step": 14000 }, { "epoch": 0.9, "grad_norm": 1.8649143297417008, "learning_rate": 2.8002118576467784e-07, "loss": 0.6668, "step": 14001 }, { "epoch": 0.9, "grad_norm": 1.6978411919479128, "learning_rate": 2.796792678312671e-07, "loss": 0.7746, "step": 14002 }, { "epoch": 0.9, "grad_norm": 3.7056736714195773, "learning_rate": 2.7933755276732257e-07, "loss": 0.7631, "step": 14003 }, { "epoch": 0.9, "grad_norm": 1.630754400356331, "learning_rate": 2.7899604058753025e-07, "loss": 0.7985, "step": 14004 }, { "epoch": 0.9, "grad_norm": 1.8028645471918325, "learning_rate": 2.7865473130656794e-07, "loss": 0.8657, "step": 14005 }, { "epoch": 0.9, "grad_norm": 1.7487095513266853, "learning_rate": 2.78313624939105e-07, "loss": 0.7369, "step": 14006 }, { "epoch": 0.9, "grad_norm": 2.0528123234138773, "learning_rate": 2.779727214997996e-07, "loss": 0.6738, "step": 14007 }, { "epoch": 0.9, "grad_norm": 1.61494203018925, "learning_rate": 2.7763202100330624e-07, "loss": 0.7127, "step": 14008 }, { "epoch": 0.9, "grad_norm": 1.0716431054280828, "learning_rate": 2.7729152346426366e-07, "loss": 0.6201, "step": 14009 }, { "epoch": 0.9, "grad_norm": 1.6440885299774957, "learning_rate": 2.7695122889730865e-07, "loss": 0.6366, "step": 14010 }, { "epoch": 0.9, "grad_norm": 1.049333325551237, "learning_rate": 2.766111373170649e-07, "loss": 0.6207, "step": 14011 }, { "epoch": 0.9, "grad_norm": 1.8171258658105367, "learning_rate": 2.762712487381497e-07, "loss": 0.7633, "step": 14012 }, { "epoch": 0.9, "grad_norm": 2.059592370104581, "learning_rate": 2.7593156317516966e-07, "loss": 0.7353, "step": 14013 }, { "epoch": 0.9, "grad_norm": 1.2531451517335048, "learning_rate": 2.7559208064272423e-07, "loss": 0.6295, "step": 14014 }, { "epoch": 0.9, "grad_norm": 1.9502920749514654, "learning_rate": 2.752528011554051e-07, "loss": 0.624, "step": 14015 }, { "epoch": 0.9, "grad_norm": 1.7605493803608987, "learning_rate": 2.749137247277911e-07, "loss": 0.7378, "step": 14016 }, { "epoch": 0.9, "grad_norm": 2.0858575280627796, "learning_rate": 2.7457485137445725e-07, "loss": 0.7527, "step": 14017 }, { "epoch": 0.9, "grad_norm": 2.0305199411546577, "learning_rate": 2.7423618110996697e-07, "loss": 0.9211, "step": 14018 }, { "epoch": 0.9, "grad_norm": 1.5479377536391314, "learning_rate": 2.738977139488752e-07, "loss": 0.7064, "step": 14019 }, { "epoch": 0.9, "grad_norm": 1.5868616443582353, "learning_rate": 2.735594499057287e-07, "loss": 0.7549, "step": 14020 }, { "epoch": 0.9, "grad_norm": 1.8578641702572187, "learning_rate": 2.732213889950652e-07, "loss": 0.7394, "step": 14021 }, { "epoch": 0.9, "grad_norm": 1.9446978233090975, "learning_rate": 2.728835312314143e-07, "loss": 0.6666, "step": 14022 }, { "epoch": 0.9, "grad_norm": 1.8486842629878593, "learning_rate": 2.725458766292954e-07, "loss": 0.8292, "step": 14023 }, { "epoch": 0.9, "grad_norm": 0.9333747957928333, "learning_rate": 2.72208425203222e-07, "loss": 0.5924, "step": 14024 }, { "epoch": 0.9, "grad_norm": 1.500987233824952, "learning_rate": 2.718711769676957e-07, "loss": 0.6925, "step": 14025 }, { "epoch": 0.9, "grad_norm": 1.6620992873995764, "learning_rate": 2.715341319372117e-07, "loss": 0.7516, "step": 14026 }, { "epoch": 0.9, "grad_norm": 1.5878635798269638, "learning_rate": 2.711972901262538e-07, "loss": 0.811, "step": 14027 }, { "epoch": 0.9, "grad_norm": 1.6796744747239356, "learning_rate": 2.708606515493006e-07, "loss": 0.8042, "step": 14028 }, { "epoch": 0.9, "grad_norm": 2.470961398122992, "learning_rate": 2.705242162208188e-07, "loss": 0.6064, "step": 14029 }, { "epoch": 0.9, "grad_norm": 1.947563551728397, "learning_rate": 2.701879841552685e-07, "loss": 0.7174, "step": 14030 }, { "epoch": 0.9, "grad_norm": 1.1190347701490624, "learning_rate": 2.698519553670992e-07, "loss": 0.5996, "step": 14031 }, { "epoch": 0.9, "grad_norm": 1.0702750090494124, "learning_rate": 2.695161298707538e-07, "loss": 0.677, "step": 14032 }, { "epoch": 0.9, "grad_norm": 1.699660439277586, "learning_rate": 2.6918050768066527e-07, "loss": 0.7078, "step": 14033 }, { "epoch": 0.9, "grad_norm": 1.0660777706802964, "learning_rate": 2.6884508881125814e-07, "loss": 0.6696, "step": 14034 }, { "epoch": 0.9, "grad_norm": 1.6729000940190948, "learning_rate": 2.68509873276947e-07, "loss": 0.7154, "step": 14035 }, { "epoch": 0.9, "grad_norm": 1.685138715447692, "learning_rate": 2.681748610921392e-07, "loss": 0.7687, "step": 14036 }, { "epoch": 0.9, "grad_norm": 2.8858242076637066, "learning_rate": 2.678400522712332e-07, "loss": 0.7862, "step": 14037 }, { "epoch": 0.9, "grad_norm": 1.4673103956487448, "learning_rate": 2.675054468286181e-07, "loss": 0.6372, "step": 14038 }, { "epoch": 0.9, "grad_norm": 1.985692703071633, "learning_rate": 2.6717104477867464e-07, "loss": 0.7136, "step": 14039 }, { "epoch": 0.9, "grad_norm": 1.3553603326574555, "learning_rate": 2.66836846135774e-07, "loss": 0.6545, "step": 14040 }, { "epoch": 0.9, "grad_norm": 1.9251862401199455, "learning_rate": 2.665028509142803e-07, "loss": 0.6811, "step": 14041 }, { "epoch": 0.9, "grad_norm": 1.875793670515512, "learning_rate": 2.661690591285482e-07, "loss": 0.7786, "step": 14042 }, { "epoch": 0.9, "grad_norm": 2.1496802416226273, "learning_rate": 2.6583547079292224e-07, "loss": 0.7353, "step": 14043 }, { "epoch": 0.9, "grad_norm": 2.035736178166111, "learning_rate": 2.6550208592173996e-07, "loss": 0.784, "step": 14044 }, { "epoch": 0.9, "grad_norm": 4.084209752796762, "learning_rate": 2.651689045293293e-07, "loss": 0.8397, "step": 14045 }, { "epoch": 0.9, "grad_norm": 1.9324502346600911, "learning_rate": 2.648359266300105e-07, "loss": 0.7965, "step": 14046 }, { "epoch": 0.9, "grad_norm": 1.943109011649701, "learning_rate": 2.645031522380925e-07, "loss": 0.8099, "step": 14047 }, { "epoch": 0.9, "grad_norm": 1.6399046172311307, "learning_rate": 2.6417058136787965e-07, "loss": 0.7761, "step": 14048 }, { "epoch": 0.9, "grad_norm": 1.7267732388748167, "learning_rate": 2.638382140336626e-07, "loss": 0.5578, "step": 14049 }, { "epoch": 0.9, "grad_norm": 1.6761845375381914, "learning_rate": 2.6350605024972884e-07, "loss": 0.7618, "step": 14050 }, { "epoch": 0.9, "grad_norm": 1.191718995473926, "learning_rate": 2.631740900303503e-07, "loss": 0.7805, "step": 14051 }, { "epoch": 0.9, "grad_norm": 1.2462233586456064, "learning_rate": 2.6284233338979724e-07, "loss": 0.7092, "step": 14052 }, { "epoch": 0.9, "grad_norm": 1.2742827987415792, "learning_rate": 2.6251078034232605e-07, "loss": 0.5475, "step": 14053 }, { "epoch": 0.9, "grad_norm": 1.1585666588670693, "learning_rate": 2.621794309021863e-07, "loss": 0.6158, "step": 14054 }, { "epoch": 0.9, "grad_norm": 2.31962109800193, "learning_rate": 2.6184828508362016e-07, "loss": 0.6402, "step": 14055 }, { "epoch": 0.9, "grad_norm": 1.1581656526247712, "learning_rate": 2.6151734290085715e-07, "loss": 0.7157, "step": 14056 }, { "epoch": 0.9, "grad_norm": 1.7845747838693136, "learning_rate": 2.6118660436812326e-07, "loss": 0.8084, "step": 14057 }, { "epoch": 0.9, "grad_norm": 1.5104319631109224, "learning_rate": 2.608560694996304e-07, "loss": 0.6253, "step": 14058 }, { "epoch": 0.9, "grad_norm": 1.8567761750233416, "learning_rate": 2.605257383095855e-07, "loss": 0.7199, "step": 14059 }, { "epoch": 0.9, "grad_norm": 1.4444085977264158, "learning_rate": 2.6019561081218614e-07, "loss": 0.6841, "step": 14060 }, { "epoch": 0.9, "grad_norm": 1.7427387234417122, "learning_rate": 2.5986568702161817e-07, "loss": 0.6975, "step": 14061 }, { "epoch": 0.9, "grad_norm": 1.9526272792209658, "learning_rate": 2.5953596695206475e-07, "loss": 0.7192, "step": 14062 }, { "epoch": 0.9, "grad_norm": 2.3303194742131725, "learning_rate": 2.5920645061769225e-07, "loss": 0.7312, "step": 14063 }, { "epoch": 0.9, "grad_norm": 1.6523494568761625, "learning_rate": 2.5887713803266656e-07, "loss": 0.787, "step": 14064 }, { "epoch": 0.9, "grad_norm": 1.836435712666897, "learning_rate": 2.585480292111375e-07, "loss": 0.6863, "step": 14065 }, { "epoch": 0.9, "grad_norm": 1.7001706439405333, "learning_rate": 2.5821912416725157e-07, "loss": 0.6814, "step": 14066 }, { "epoch": 0.9, "grad_norm": 1.5754871376642539, "learning_rate": 2.578904229151441e-07, "loss": 0.8007, "step": 14067 }, { "epoch": 0.9, "grad_norm": 1.2377964024626371, "learning_rate": 2.5756192546894156e-07, "loss": 0.609, "step": 14068 }, { "epoch": 0.9, "grad_norm": 1.6530154591892032, "learning_rate": 2.5723363184276207e-07, "loss": 0.6697, "step": 14069 }, { "epoch": 0.9, "grad_norm": 1.5790150044146094, "learning_rate": 2.5690554205071495e-07, "loss": 0.6374, "step": 14070 }, { "epoch": 0.9, "grad_norm": 2.1128438903666926, "learning_rate": 2.5657765610690223e-07, "loss": 0.7621, "step": 14071 }, { "epoch": 0.9, "grad_norm": 1.9943217653998169, "learning_rate": 2.562499740254126e-07, "loss": 0.7463, "step": 14072 }, { "epoch": 0.9, "grad_norm": 1.7055637939767454, "learning_rate": 2.559224958203321e-07, "loss": 0.74, "step": 14073 }, { "epoch": 0.9, "grad_norm": 1.7348600508628842, "learning_rate": 2.555952215057345e-07, "loss": 0.715, "step": 14074 }, { "epoch": 0.9, "grad_norm": 1.1553985769722268, "learning_rate": 2.552681510956845e-07, "loss": 0.5827, "step": 14075 }, { "epoch": 0.9, "grad_norm": 2.4447340577441654, "learning_rate": 2.549412846042393e-07, "loss": 0.7345, "step": 14076 }, { "epoch": 0.9, "grad_norm": 1.0603426844803254, "learning_rate": 2.546146220454471e-07, "loss": 0.6346, "step": 14077 }, { "epoch": 0.9, "grad_norm": 1.6645192815783523, "learning_rate": 2.542881634333472e-07, "loss": 0.7077, "step": 14078 }, { "epoch": 0.9, "grad_norm": 1.963772531608807, "learning_rate": 2.539619087819689e-07, "loss": 0.654, "step": 14079 }, { "epoch": 0.9, "grad_norm": 1.6487390802205322, "learning_rate": 2.5363585810533606e-07, "loss": 0.6743, "step": 14080 }, { "epoch": 0.9, "grad_norm": 2.527084593594654, "learning_rate": 2.533100114174597e-07, "loss": 0.7015, "step": 14081 }, { "epoch": 0.9, "grad_norm": 1.5549141855667956, "learning_rate": 2.529843687323452e-07, "loss": 0.7665, "step": 14082 }, { "epoch": 0.9, "grad_norm": 2.0696719400283095, "learning_rate": 2.526589300639881e-07, "loss": 0.6806, "step": 14083 }, { "epoch": 0.9, "grad_norm": 1.612348865478224, "learning_rate": 2.523336954263744e-07, "loss": 0.7761, "step": 14084 }, { "epoch": 0.9, "grad_norm": 1.7320245531642084, "learning_rate": 2.520086648334824e-07, "loss": 0.7228, "step": 14085 }, { "epoch": 0.9, "grad_norm": 1.0337471613163811, "learning_rate": 2.5168383829928087e-07, "loss": 0.5915, "step": 14086 }, { "epoch": 0.9, "grad_norm": 2.211655745683724, "learning_rate": 2.5135921583773036e-07, "loss": 0.7416, "step": 14087 }, { "epoch": 0.9, "grad_norm": 1.854666093445153, "learning_rate": 2.5103479746278193e-07, "loss": 0.6679, "step": 14088 }, { "epoch": 0.9, "grad_norm": 1.5432073043676686, "learning_rate": 2.507105831883794e-07, "loss": 0.7426, "step": 14089 }, { "epoch": 0.9, "grad_norm": 1.5837720237001, "learning_rate": 2.503865730284566e-07, "loss": 0.6092, "step": 14090 }, { "epoch": 0.9, "grad_norm": 1.563490939537356, "learning_rate": 2.5006276699693854e-07, "loss": 0.6808, "step": 14091 }, { "epoch": 0.9, "grad_norm": 1.0362995785133826, "learning_rate": 2.497391651077419e-07, "loss": 0.6039, "step": 14092 }, { "epoch": 0.9, "grad_norm": 1.7935668670393208, "learning_rate": 2.4941576737477435e-07, "loss": 0.7502, "step": 14093 }, { "epoch": 0.9, "grad_norm": 3.3889928803728586, "learning_rate": 2.490925738119343e-07, "loss": 0.7735, "step": 14094 }, { "epoch": 0.9, "grad_norm": 1.6190704066706973, "learning_rate": 2.487695844331128e-07, "loss": 0.6867, "step": 14095 }, { "epoch": 0.9, "grad_norm": 1.7475842006887548, "learning_rate": 2.4844679925218994e-07, "loss": 0.6467, "step": 14096 }, { "epoch": 0.9, "grad_norm": 1.8309782270027206, "learning_rate": 2.481242182830401e-07, "loss": 0.8053, "step": 14097 }, { "epoch": 0.9, "grad_norm": 1.7484301890665028, "learning_rate": 2.4780184153952615e-07, "loss": 0.6757, "step": 14098 }, { "epoch": 0.9, "grad_norm": 1.0236118596488266, "learning_rate": 2.4747966903550355e-07, "loss": 0.6734, "step": 14099 }, { "epoch": 0.9, "grad_norm": 2.1876789842700495, "learning_rate": 2.47157700784818e-07, "loss": 0.683, "step": 14100 }, { "epoch": 0.9, "grad_norm": 1.6224354210446932, "learning_rate": 2.4683593680130734e-07, "loss": 0.7007, "step": 14101 }, { "epoch": 0.9, "grad_norm": 2.4682964778230887, "learning_rate": 2.46514377098801e-07, "loss": 0.7737, "step": 14102 }, { "epoch": 0.9, "grad_norm": 1.024664448554201, "learning_rate": 2.461930216911168e-07, "loss": 0.5446, "step": 14103 }, { "epoch": 0.9, "grad_norm": 1.0266632770820303, "learning_rate": 2.458718705920693e-07, "loss": 0.6399, "step": 14104 }, { "epoch": 0.9, "grad_norm": 1.7294873009753942, "learning_rate": 2.455509238154574e-07, "loss": 0.7379, "step": 14105 }, { "epoch": 0.9, "grad_norm": 1.980237083933574, "learning_rate": 2.4523018137507736e-07, "loss": 0.7279, "step": 14106 }, { "epoch": 0.9, "grad_norm": 1.5443021973028186, "learning_rate": 2.4490964328471257e-07, "loss": 0.6995, "step": 14107 }, { "epoch": 0.9, "grad_norm": 1.6026176740263527, "learning_rate": 2.445893095581392e-07, "loss": 0.6872, "step": 14108 }, { "epoch": 0.9, "grad_norm": 1.6080732007398486, "learning_rate": 2.442691802091257e-07, "loss": 0.6793, "step": 14109 }, { "epoch": 0.9, "grad_norm": 1.582153908964028, "learning_rate": 2.4394925525142834e-07, "loss": 0.735, "step": 14110 }, { "epoch": 0.9, "grad_norm": 1.771362364217272, "learning_rate": 2.4362953469879934e-07, "loss": 0.7455, "step": 14111 }, { "epoch": 0.9, "grad_norm": 1.634119558767143, "learning_rate": 2.4331001856497784e-07, "loss": 0.5683, "step": 14112 }, { "epoch": 0.9, "grad_norm": 2.5566561420499387, "learning_rate": 2.429907068636972e-07, "loss": 0.6051, "step": 14113 }, { "epoch": 0.9, "grad_norm": 1.7616236432790515, "learning_rate": 2.4267159960867927e-07, "loss": 0.7971, "step": 14114 }, { "epoch": 0.9, "grad_norm": 2.3128022432818827, "learning_rate": 2.423526968136397e-07, "loss": 0.8756, "step": 14115 }, { "epoch": 0.9, "grad_norm": 1.7921751412415752, "learning_rate": 2.420339984922843e-07, "loss": 0.7233, "step": 14116 }, { "epoch": 0.9, "grad_norm": 1.84057478358065, "learning_rate": 2.4171550465830974e-07, "loss": 0.6481, "step": 14117 }, { "epoch": 0.9, "grad_norm": 1.9730398219076897, "learning_rate": 2.4139721532540405e-07, "loss": 0.715, "step": 14118 }, { "epoch": 0.9, "grad_norm": 1.078413114952818, "learning_rate": 2.4107913050724627e-07, "loss": 0.6282, "step": 14119 }, { "epoch": 0.9, "grad_norm": 0.9665517165511384, "learning_rate": 2.407612502175094e-07, "loss": 0.6914, "step": 14120 }, { "epoch": 0.9, "grad_norm": 6.036216128514503, "learning_rate": 2.4044357446985134e-07, "loss": 0.8251, "step": 14121 }, { "epoch": 0.9, "grad_norm": 1.0783100209488208, "learning_rate": 2.4012610327792895e-07, "loss": 0.7201, "step": 14122 }, { "epoch": 0.9, "grad_norm": 2.1248329297205912, "learning_rate": 2.398088366553836e-07, "loss": 0.7506, "step": 14123 }, { "epoch": 0.9, "grad_norm": 2.1702904578865785, "learning_rate": 2.3949177461585263e-07, "loss": 0.6903, "step": 14124 }, { "epoch": 0.9, "grad_norm": 1.616363025399802, "learning_rate": 2.3917491717296184e-07, "loss": 0.6446, "step": 14125 }, { "epoch": 0.9, "grad_norm": 1.8214182747921903, "learning_rate": 2.388582643403281e-07, "loss": 0.7789, "step": 14126 }, { "epoch": 0.9, "grad_norm": 1.1878914798245115, "learning_rate": 2.385418161315639e-07, "loss": 0.7179, "step": 14127 }, { "epoch": 0.9, "grad_norm": 1.610757841292635, "learning_rate": 2.38225572560265e-07, "loss": 0.6633, "step": 14128 }, { "epoch": 0.9, "grad_norm": 1.5047962296169133, "learning_rate": 2.3790953364002722e-07, "loss": 0.6416, "step": 14129 }, { "epoch": 0.9, "grad_norm": 1.7108033989738773, "learning_rate": 2.375936993844291e-07, "loss": 0.8119, "step": 14130 }, { "epoch": 0.9, "grad_norm": 1.782678939827931, "learning_rate": 2.372780698070476e-07, "loss": 0.748, "step": 14131 }, { "epoch": 0.9, "grad_norm": 1.9005674817318052, "learning_rate": 2.3696264492144684e-07, "loss": 0.7537, "step": 14132 }, { "epoch": 0.9, "grad_norm": 1.2496235396065762, "learning_rate": 2.3664742474118317e-07, "loss": 0.5911, "step": 14133 }, { "epoch": 0.9, "grad_norm": 1.6744943877129495, "learning_rate": 2.363324092798036e-07, "loss": 0.6483, "step": 14134 }, { "epoch": 0.9, "grad_norm": 1.0840558330643382, "learning_rate": 2.3601759855084672e-07, "loss": 0.6408, "step": 14135 }, { "epoch": 0.9, "grad_norm": 1.7228827022160609, "learning_rate": 2.3570299256784446e-07, "loss": 0.7869, "step": 14136 }, { "epoch": 0.9, "grad_norm": 2.0175886331224917, "learning_rate": 2.3538859134431547e-07, "loss": 0.7288, "step": 14137 }, { "epoch": 0.9, "grad_norm": 2.1213045466744953, "learning_rate": 2.350743948937728e-07, "loss": 0.7593, "step": 14138 }, { "epoch": 0.9, "grad_norm": 1.194445571848384, "learning_rate": 2.347604032297207e-07, "loss": 0.729, "step": 14139 }, { "epoch": 0.91, "grad_norm": 2.7615522559341623, "learning_rate": 2.3444661636565337e-07, "loss": 0.8176, "step": 14140 }, { "epoch": 0.91, "grad_norm": 1.5571285820881828, "learning_rate": 2.3413303431505606e-07, "loss": 0.6992, "step": 14141 }, { "epoch": 0.91, "grad_norm": 1.816774845585205, "learning_rate": 2.3381965709140696e-07, "loss": 0.764, "step": 14142 }, { "epoch": 0.91, "grad_norm": 1.0332640541672244, "learning_rate": 2.3350648470817416e-07, "loss": 0.6743, "step": 14143 }, { "epoch": 0.91, "grad_norm": 1.5280203027379473, "learning_rate": 2.331935171788158e-07, "loss": 0.6741, "step": 14144 }, { "epoch": 0.91, "grad_norm": 1.1027859039873587, "learning_rate": 2.3288075451678381e-07, "loss": 0.6374, "step": 14145 }, { "epoch": 0.91, "grad_norm": 1.1858433772630406, "learning_rate": 2.325681967355209e-07, "loss": 0.6428, "step": 14146 }, { "epoch": 0.91, "grad_norm": 1.7712106258182592, "learning_rate": 2.3225584384845845e-07, "loss": 0.7387, "step": 14147 }, { "epoch": 0.91, "grad_norm": 1.7617119225097029, "learning_rate": 2.3194369586902132e-07, "loss": 0.7741, "step": 14148 }, { "epoch": 0.91, "grad_norm": 1.6580136362254252, "learning_rate": 2.3163175281062545e-07, "loss": 0.6788, "step": 14149 }, { "epoch": 0.91, "grad_norm": 1.7883096063528994, "learning_rate": 2.313200146866773e-07, "loss": 0.6302, "step": 14150 }, { "epoch": 0.91, "grad_norm": 1.5121973533911126, "learning_rate": 2.31008481510574e-07, "loss": 0.6086, "step": 14151 }, { "epoch": 0.91, "grad_norm": 1.5497056387551893, "learning_rate": 2.3069715329570475e-07, "loss": 0.6639, "step": 14152 }, { "epoch": 0.91, "grad_norm": 1.0141912237029949, "learning_rate": 2.3038603005545113e-07, "loss": 0.6879, "step": 14153 }, { "epoch": 0.91, "grad_norm": 1.1851264110351307, "learning_rate": 2.3007511180318298e-07, "loss": 0.5889, "step": 14154 }, { "epoch": 0.91, "grad_norm": 1.5648147066342166, "learning_rate": 2.2976439855226406e-07, "loss": 0.757, "step": 14155 }, { "epoch": 0.91, "grad_norm": 1.6671730294266676, "learning_rate": 2.294538903160476e-07, "loss": 0.7753, "step": 14156 }, { "epoch": 0.91, "grad_norm": 2.7439211599267006, "learning_rate": 2.2914358710787842e-07, "loss": 0.6168, "step": 14157 }, { "epoch": 0.91, "grad_norm": 1.7256043407412778, "learning_rate": 2.2883348894109259e-07, "loss": 0.71, "step": 14158 }, { "epoch": 0.91, "grad_norm": 1.653945828905847, "learning_rate": 2.2852359582901828e-07, "loss": 0.7343, "step": 14159 }, { "epoch": 0.91, "grad_norm": 2.1220288552368736, "learning_rate": 2.2821390778497377e-07, "loss": 0.6736, "step": 14160 }, { "epoch": 0.91, "grad_norm": 1.6379326031845844, "learning_rate": 2.2790442482226727e-07, "loss": 0.751, "step": 14161 }, { "epoch": 0.91, "grad_norm": 1.5687723329173098, "learning_rate": 2.2759514695420204e-07, "loss": 0.739, "step": 14162 }, { "epoch": 0.91, "grad_norm": 1.7585471583577155, "learning_rate": 2.2728607419406967e-07, "loss": 0.7089, "step": 14163 }, { "epoch": 0.91, "grad_norm": 1.8477577724242407, "learning_rate": 2.2697720655515232e-07, "loss": 0.5899, "step": 14164 }, { "epoch": 0.91, "grad_norm": 1.0745776935061697, "learning_rate": 2.2666854405072546e-07, "loss": 0.6153, "step": 14165 }, { "epoch": 0.91, "grad_norm": 1.7962119745236185, "learning_rate": 2.2636008669405408e-07, "loss": 0.7537, "step": 14166 }, { "epoch": 0.91, "grad_norm": 1.769811323522843, "learning_rate": 2.2605183449839585e-07, "loss": 0.8086, "step": 14167 }, { "epoch": 0.91, "grad_norm": 2.503619306241382, "learning_rate": 2.2574378747699743e-07, "loss": 0.7376, "step": 14168 }, { "epoch": 0.91, "grad_norm": 2.4034510021617153, "learning_rate": 2.2543594564309989e-07, "loss": 0.6408, "step": 14169 }, { "epoch": 0.91, "grad_norm": 1.8093205745233991, "learning_rate": 2.2512830900993155e-07, "loss": 0.6956, "step": 14170 }, { "epoch": 0.91, "grad_norm": 1.9160845852779658, "learning_rate": 2.2482087759071625e-07, "loss": 0.7719, "step": 14171 }, { "epoch": 0.91, "grad_norm": 1.765851374624212, "learning_rate": 2.245136513986651e-07, "loss": 0.6401, "step": 14172 }, { "epoch": 0.91, "grad_norm": 1.565410512294702, "learning_rate": 2.2420663044698254e-07, "loss": 0.5738, "step": 14173 }, { "epoch": 0.91, "grad_norm": 1.2896279915446056, "learning_rate": 2.2389981474886413e-07, "loss": 0.72, "step": 14174 }, { "epoch": 0.91, "grad_norm": 1.5715766844540693, "learning_rate": 2.2359320431749432e-07, "loss": 0.7365, "step": 14175 }, { "epoch": 0.91, "grad_norm": 1.633150614191314, "learning_rate": 2.2328679916605368e-07, "loss": 0.681, "step": 14176 }, { "epoch": 0.91, "grad_norm": 2.7538125509100353, "learning_rate": 2.2298059930770833e-07, "loss": 0.6982, "step": 14177 }, { "epoch": 0.91, "grad_norm": 1.5514978443526448, "learning_rate": 2.2267460475561942e-07, "loss": 0.6174, "step": 14178 }, { "epoch": 0.91, "grad_norm": 1.1892901701277023, "learning_rate": 2.2236881552293642e-07, "loss": 0.686, "step": 14179 }, { "epoch": 0.91, "grad_norm": 1.6666968072948776, "learning_rate": 2.220632316228033e-07, "loss": 0.6002, "step": 14180 }, { "epoch": 0.91, "grad_norm": 1.891849696766758, "learning_rate": 2.2175785306835285e-07, "loss": 0.8096, "step": 14181 }, { "epoch": 0.91, "grad_norm": 1.7775150885711113, "learning_rate": 2.2145267987270847e-07, "loss": 0.7304, "step": 14182 }, { "epoch": 0.91, "grad_norm": 1.7019919450317602, "learning_rate": 2.21147712048988e-07, "loss": 0.7973, "step": 14183 }, { "epoch": 0.91, "grad_norm": 1.6450646762721086, "learning_rate": 2.2084294961029596e-07, "loss": 0.6748, "step": 14184 }, { "epoch": 0.91, "grad_norm": 1.5155807082205524, "learning_rate": 2.2053839256973297e-07, "loss": 0.6736, "step": 14185 }, { "epoch": 0.91, "grad_norm": 1.6624294457558801, "learning_rate": 2.2023404094038524e-07, "loss": 0.6946, "step": 14186 }, { "epoch": 0.91, "grad_norm": 1.9880418193824174, "learning_rate": 2.1992989473533566e-07, "loss": 0.7544, "step": 14187 }, { "epoch": 0.91, "grad_norm": 0.9940686071182969, "learning_rate": 2.1962595396765486e-07, "loss": 0.7267, "step": 14188 }, { "epoch": 0.91, "grad_norm": 1.738938588614021, "learning_rate": 2.1932221865040572e-07, "loss": 0.7229, "step": 14189 }, { "epoch": 0.91, "grad_norm": 1.4842896591885648, "learning_rate": 2.1901868879664168e-07, "loss": 0.6092, "step": 14190 }, { "epoch": 0.91, "grad_norm": 1.5757719921599478, "learning_rate": 2.187153644194079e-07, "loss": 0.7679, "step": 14191 }, { "epoch": 0.91, "grad_norm": 1.6922192358787458, "learning_rate": 2.1841224553174222e-07, "loss": 0.7613, "step": 14192 }, { "epoch": 0.91, "grad_norm": 1.8663632578457885, "learning_rate": 2.1810933214666928e-07, "loss": 0.6878, "step": 14193 }, { "epoch": 0.91, "grad_norm": 1.4732563922055617, "learning_rate": 2.178066242772092e-07, "loss": 0.6967, "step": 14194 }, { "epoch": 0.91, "grad_norm": 1.7079964207940652, "learning_rate": 2.1750412193637216e-07, "loss": 0.6826, "step": 14195 }, { "epoch": 0.91, "grad_norm": 1.963657140457414, "learning_rate": 2.1720182513715882e-07, "loss": 0.7731, "step": 14196 }, { "epoch": 0.91, "grad_norm": 2.5507201598324136, "learning_rate": 2.1689973389256047e-07, "loss": 0.6927, "step": 14197 }, { "epoch": 0.91, "grad_norm": 1.9845001362570263, "learning_rate": 2.1659784821556117e-07, "loss": 0.7771, "step": 14198 }, { "epoch": 0.91, "grad_norm": 1.659502162151792, "learning_rate": 2.1629616811913502e-07, "loss": 0.7235, "step": 14199 }, { "epoch": 0.91, "grad_norm": 1.5466287434712171, "learning_rate": 2.1599469361624714e-07, "loss": 0.7592, "step": 14200 }, { "epoch": 0.91, "grad_norm": 1.6919331187019302, "learning_rate": 2.1569342471985556e-07, "loss": 0.6885, "step": 14201 }, { "epoch": 0.91, "grad_norm": 1.751929520503751, "learning_rate": 2.1539236144290653e-07, "loss": 0.641, "step": 14202 }, { "epoch": 0.91, "grad_norm": 1.909190673125012, "learning_rate": 2.150915037983403e-07, "loss": 0.7065, "step": 14203 }, { "epoch": 0.91, "grad_norm": 1.4161273360907503, "learning_rate": 2.147908517990871e-07, "loss": 0.6877, "step": 14204 }, { "epoch": 0.91, "grad_norm": 1.5617261498476056, "learning_rate": 2.1449040545806766e-07, "loss": 0.6706, "step": 14205 }, { "epoch": 0.91, "grad_norm": 1.8090392297857938, "learning_rate": 2.14190164788195e-07, "loss": 0.7274, "step": 14206 }, { "epoch": 0.91, "grad_norm": 2.7901956174941476, "learning_rate": 2.1389012980237267e-07, "loss": 0.6876, "step": 14207 }, { "epoch": 0.91, "grad_norm": 1.8008528266036743, "learning_rate": 2.1359030051349538e-07, "loss": 0.8191, "step": 14208 }, { "epoch": 0.91, "grad_norm": 1.6887698465426424, "learning_rate": 2.1329067693444893e-07, "loss": 0.5244, "step": 14209 }, { "epoch": 0.91, "grad_norm": 1.491451467015401, "learning_rate": 2.1299125907811136e-07, "loss": 0.6649, "step": 14210 }, { "epoch": 0.91, "grad_norm": 1.7914349528422613, "learning_rate": 2.126920469573507e-07, "loss": 0.759, "step": 14211 }, { "epoch": 0.91, "grad_norm": 1.5153807086187554, "learning_rate": 2.1239304058502663e-07, "loss": 0.745, "step": 14212 }, { "epoch": 0.91, "grad_norm": 1.7021787207701933, "learning_rate": 2.1209423997398893e-07, "loss": 0.6668, "step": 14213 }, { "epoch": 0.91, "grad_norm": 1.6911403095411175, "learning_rate": 2.1179564513708062e-07, "loss": 0.754, "step": 14214 }, { "epoch": 0.91, "grad_norm": 1.750132217067929, "learning_rate": 2.1149725608713368e-07, "loss": 0.7106, "step": 14215 }, { "epoch": 0.91, "grad_norm": 2.018917366585863, "learning_rate": 2.1119907283697282e-07, "loss": 0.6886, "step": 14216 }, { "epoch": 0.91, "grad_norm": 1.7599246445800878, "learning_rate": 2.109010953994123e-07, "loss": 0.7338, "step": 14217 }, { "epoch": 0.91, "grad_norm": 1.7717149117512885, "learning_rate": 2.106033237872601e-07, "loss": 0.7137, "step": 14218 }, { "epoch": 0.91, "grad_norm": 1.2662226201260351, "learning_rate": 2.1030575801331332e-07, "loss": 0.5927, "step": 14219 }, { "epoch": 0.91, "grad_norm": 1.729056490922079, "learning_rate": 2.1000839809036055e-07, "loss": 0.8019, "step": 14220 }, { "epoch": 0.91, "grad_norm": 1.8051262712987424, "learning_rate": 2.097112440311816e-07, "loss": 0.7517, "step": 14221 }, { "epoch": 0.91, "grad_norm": 1.1713206819601962, "learning_rate": 2.0941429584854788e-07, "loss": 0.6115, "step": 14222 }, { "epoch": 0.91, "grad_norm": 1.5097974343761362, "learning_rate": 2.0911755355522089e-07, "loss": 0.7879, "step": 14223 }, { "epoch": 0.91, "grad_norm": 1.7555408558882224, "learning_rate": 2.0882101716395376e-07, "loss": 0.7856, "step": 14224 }, { "epoch": 0.91, "grad_norm": 1.740270836327751, "learning_rate": 2.0852468668749294e-07, "loss": 0.7904, "step": 14225 }, { "epoch": 0.91, "grad_norm": 1.0716899928029753, "learning_rate": 2.0822856213857158e-07, "loss": 0.6723, "step": 14226 }, { "epoch": 0.91, "grad_norm": 1.6783196550547916, "learning_rate": 2.0793264352991894e-07, "loss": 0.7151, "step": 14227 }, { "epoch": 0.91, "grad_norm": 1.1378036529863904, "learning_rate": 2.0763693087425095e-07, "loss": 0.6864, "step": 14228 }, { "epoch": 0.91, "grad_norm": 1.786302623227801, "learning_rate": 2.0734142418427806e-07, "loss": 0.6874, "step": 14229 }, { "epoch": 0.91, "grad_norm": 1.8107388018886255, "learning_rate": 2.0704612347269948e-07, "loss": 0.8044, "step": 14230 }, { "epoch": 0.91, "grad_norm": 1.8663070573635243, "learning_rate": 2.067510287522073e-07, "loss": 0.7875, "step": 14231 }, { "epoch": 0.91, "grad_norm": 1.7919403888397298, "learning_rate": 2.064561400354842e-07, "loss": 0.7489, "step": 14232 }, { "epoch": 0.91, "grad_norm": 2.0341978317566776, "learning_rate": 2.0616145733520276e-07, "loss": 0.7195, "step": 14233 }, { "epoch": 0.91, "grad_norm": 1.7795710400620093, "learning_rate": 2.058669806640301e-07, "loss": 0.7023, "step": 14234 }, { "epoch": 0.91, "grad_norm": 1.5210970037756573, "learning_rate": 2.0557271003461942e-07, "loss": 0.827, "step": 14235 }, { "epoch": 0.91, "grad_norm": 1.81818626902686, "learning_rate": 2.052786454596195e-07, "loss": 0.604, "step": 14236 }, { "epoch": 0.91, "grad_norm": 1.9935158494788099, "learning_rate": 2.0498478695166857e-07, "loss": 0.7567, "step": 14237 }, { "epoch": 0.91, "grad_norm": 1.7826307211518653, "learning_rate": 2.046911345233954e-07, "loss": 0.7221, "step": 14238 }, { "epoch": 0.91, "grad_norm": 2.1044417271020235, "learning_rate": 2.0439768818742156e-07, "loss": 0.6588, "step": 14239 }, { "epoch": 0.91, "grad_norm": 1.6233590662790411, "learning_rate": 2.0410444795635697e-07, "loss": 0.6701, "step": 14240 }, { "epoch": 0.91, "grad_norm": 1.6155518535380418, "learning_rate": 2.0381141384280711e-07, "loss": 0.6998, "step": 14241 }, { "epoch": 0.91, "grad_norm": 1.8813877137775443, "learning_rate": 2.0351858585936356e-07, "loss": 0.7005, "step": 14242 }, { "epoch": 0.91, "grad_norm": 1.7808742057807863, "learning_rate": 2.0322596401861294e-07, "loss": 0.7831, "step": 14243 }, { "epoch": 0.91, "grad_norm": 1.0728583644722192, "learning_rate": 2.0293354833313018e-07, "loss": 0.6807, "step": 14244 }, { "epoch": 0.91, "grad_norm": 1.805354746994695, "learning_rate": 2.026413388154841e-07, "loss": 0.7373, "step": 14245 }, { "epoch": 0.91, "grad_norm": 1.6252912574561311, "learning_rate": 2.0234933547823242e-07, "loss": 0.7489, "step": 14246 }, { "epoch": 0.91, "grad_norm": 1.794309862741875, "learning_rate": 2.020575383339246e-07, "loss": 0.8189, "step": 14247 }, { "epoch": 0.91, "grad_norm": 1.5061980938272304, "learning_rate": 2.0176594739510336e-07, "loss": 0.8489, "step": 14248 }, { "epoch": 0.91, "grad_norm": 1.9006319791247965, "learning_rate": 2.0147456267429754e-07, "loss": 0.695, "step": 14249 }, { "epoch": 0.91, "grad_norm": 1.7335555824164974, "learning_rate": 2.0118338418403382e-07, "loss": 0.7272, "step": 14250 }, { "epoch": 0.91, "grad_norm": 1.6359147338685758, "learning_rate": 2.0089241193682273e-07, "loss": 0.7405, "step": 14251 }, { "epoch": 0.91, "grad_norm": 1.6911809796153805, "learning_rate": 2.0060164594517206e-07, "loss": 0.7512, "step": 14252 }, { "epoch": 0.91, "grad_norm": 1.5751857040895654, "learning_rate": 2.003110862215779e-07, "loss": 0.638, "step": 14253 }, { "epoch": 0.91, "grad_norm": 2.361996525828096, "learning_rate": 2.000207327785275e-07, "loss": 0.7159, "step": 14254 }, { "epoch": 0.91, "grad_norm": 1.6132766782142585, "learning_rate": 1.9973058562850033e-07, "loss": 0.6612, "step": 14255 }, { "epoch": 0.91, "grad_norm": 1.9958484062932818, "learning_rate": 1.994406447839653e-07, "loss": 0.6319, "step": 14256 }, { "epoch": 0.91, "grad_norm": 2.0053282169709896, "learning_rate": 1.9915091025738464e-07, "loss": 0.8467, "step": 14257 }, { "epoch": 0.91, "grad_norm": 1.7201517271220312, "learning_rate": 1.9886138206120896e-07, "loss": 0.7749, "step": 14258 }, { "epoch": 0.91, "grad_norm": 1.3301378898384766, "learning_rate": 1.985720602078828e-07, "loss": 0.6577, "step": 14259 }, { "epoch": 0.91, "grad_norm": 1.7436731689729421, "learning_rate": 1.9828294470984054e-07, "loss": 0.7224, "step": 14260 }, { "epoch": 0.91, "grad_norm": 1.6438522610148778, "learning_rate": 1.9799403557950793e-07, "loss": 0.7655, "step": 14261 }, { "epoch": 0.91, "grad_norm": 1.5024539053926589, "learning_rate": 1.977053328293005e-07, "loss": 0.6727, "step": 14262 }, { "epoch": 0.91, "grad_norm": 2.0773696429480566, "learning_rate": 1.9741683647162724e-07, "loss": 0.6518, "step": 14263 }, { "epoch": 0.91, "grad_norm": 1.659959731562535, "learning_rate": 1.9712854651888713e-07, "loss": 0.7598, "step": 14264 }, { "epoch": 0.91, "grad_norm": 1.0269249292625218, "learning_rate": 1.9684046298346858e-07, "loss": 0.5588, "step": 14265 }, { "epoch": 0.91, "grad_norm": 1.630441557104251, "learning_rate": 1.9655258587775505e-07, "loss": 0.7471, "step": 14266 }, { "epoch": 0.91, "grad_norm": 1.7417176536758607, "learning_rate": 1.9626491521411773e-07, "loss": 0.713, "step": 14267 }, { "epoch": 0.91, "grad_norm": 3.1380132072024667, "learning_rate": 1.959774510049206e-07, "loss": 0.6583, "step": 14268 }, { "epoch": 0.91, "grad_norm": 1.3117833369673588, "learning_rate": 1.956901932625177e-07, "loss": 0.5984, "step": 14269 }, { "epoch": 0.91, "grad_norm": 1.7462170712048222, "learning_rate": 1.9540314199925525e-07, "loss": 0.5969, "step": 14270 }, { "epoch": 0.91, "grad_norm": 1.798615918242361, "learning_rate": 1.9511629722747004e-07, "loss": 0.76, "step": 14271 }, { "epoch": 0.91, "grad_norm": 1.775132931737496, "learning_rate": 1.9482965895948947e-07, "loss": 0.7221, "step": 14272 }, { "epoch": 0.91, "grad_norm": 1.5785957473055148, "learning_rate": 1.9454322720763364e-07, "loss": 0.7555, "step": 14273 }, { "epoch": 0.91, "grad_norm": 1.5997103569980087, "learning_rate": 1.9425700198421104e-07, "loss": 0.7087, "step": 14274 }, { "epoch": 0.91, "grad_norm": 2.0210615725431524, "learning_rate": 1.939709833015252e-07, "loss": 0.8217, "step": 14275 }, { "epoch": 0.91, "grad_norm": 1.9900164492809447, "learning_rate": 1.9368517117186737e-07, "loss": 0.8402, "step": 14276 }, { "epoch": 0.91, "grad_norm": 1.7020611575687052, "learning_rate": 1.9339956560752216e-07, "loss": 0.7708, "step": 14277 }, { "epoch": 0.91, "grad_norm": 1.8129022961391097, "learning_rate": 1.9311416662076253e-07, "loss": 0.7975, "step": 14278 }, { "epoch": 0.91, "grad_norm": 1.6242684868663217, "learning_rate": 1.9282897422385593e-07, "loss": 0.6801, "step": 14279 }, { "epoch": 0.91, "grad_norm": 1.8169898503667574, "learning_rate": 1.9254398842905918e-07, "loss": 0.7583, "step": 14280 }, { "epoch": 0.91, "grad_norm": 1.89166479493689, "learning_rate": 1.9225920924861917e-07, "loss": 0.6953, "step": 14281 }, { "epoch": 0.91, "grad_norm": 1.1762106808116473, "learning_rate": 1.9197463669477557e-07, "loss": 0.5639, "step": 14282 }, { "epoch": 0.91, "grad_norm": 2.0095255496264643, "learning_rate": 1.9169027077975965e-07, "loss": 0.7521, "step": 14283 }, { "epoch": 0.91, "grad_norm": 1.542717910920117, "learning_rate": 1.9140611151579224e-07, "loss": 0.7872, "step": 14284 }, { "epoch": 0.91, "grad_norm": 1.6469009837727326, "learning_rate": 1.9112215891508635e-07, "loss": 0.6677, "step": 14285 }, { "epoch": 0.91, "grad_norm": 1.8457549744225477, "learning_rate": 1.908384129898444e-07, "loss": 0.6994, "step": 14286 }, { "epoch": 0.91, "grad_norm": 2.0845849855914556, "learning_rate": 1.905548737522628e-07, "loss": 0.784, "step": 14287 }, { "epoch": 0.91, "grad_norm": 1.5404195653438426, "learning_rate": 1.9027154121452618e-07, "loss": 0.6457, "step": 14288 }, { "epoch": 0.91, "grad_norm": 1.672289897871524, "learning_rate": 1.899884153888115e-07, "loss": 0.8009, "step": 14289 }, { "epoch": 0.91, "grad_norm": 1.74240542931777, "learning_rate": 1.8970549628728908e-07, "loss": 0.6891, "step": 14290 }, { "epoch": 0.91, "grad_norm": 1.4254714698257704, "learning_rate": 1.8942278392211466e-07, "loss": 0.7049, "step": 14291 }, { "epoch": 0.91, "grad_norm": 1.6479658585352155, "learning_rate": 1.891402783054419e-07, "loss": 0.7731, "step": 14292 }, { "epoch": 0.91, "grad_norm": 1.7760767742462324, "learning_rate": 1.8885797944941052e-07, "loss": 0.8108, "step": 14293 }, { "epoch": 0.91, "grad_norm": 1.9752979407540519, "learning_rate": 1.8857588736615418e-07, "loss": 0.7416, "step": 14294 }, { "epoch": 0.91, "grad_norm": 1.574834112201395, "learning_rate": 1.8829400206779536e-07, "loss": 0.7881, "step": 14295 }, { "epoch": 0.92, "grad_norm": 1.7491175702462083, "learning_rate": 1.8801232356644938e-07, "loss": 0.7254, "step": 14296 }, { "epoch": 0.92, "grad_norm": 1.5802385018355578, "learning_rate": 1.8773085187422325e-07, "loss": 0.6888, "step": 14297 }, { "epoch": 0.92, "grad_norm": 2.1160639940905828, "learning_rate": 1.8744958700321225e-07, "loss": 0.7231, "step": 14298 }, { "epoch": 0.92, "grad_norm": 1.6810690139708162, "learning_rate": 1.8716852896550618e-07, "loss": 0.7575, "step": 14299 }, { "epoch": 0.92, "grad_norm": 1.4832611117328773, "learning_rate": 1.8688767777318262e-07, "loss": 0.6229, "step": 14300 }, { "epoch": 0.92, "grad_norm": 1.5507966308584247, "learning_rate": 1.8660703343831354e-07, "loss": 0.729, "step": 14301 }, { "epoch": 0.92, "grad_norm": 1.8893065452397726, "learning_rate": 1.863265959729599e-07, "loss": 0.6914, "step": 14302 }, { "epoch": 0.92, "grad_norm": 1.8476327200126097, "learning_rate": 1.8604636538917365e-07, "loss": 0.6954, "step": 14303 }, { "epoch": 0.92, "grad_norm": 2.2198544687529855, "learning_rate": 1.8576634169900022e-07, "loss": 0.6293, "step": 14304 }, { "epoch": 0.92, "grad_norm": 1.6958139006312376, "learning_rate": 1.8548652491447217e-07, "loss": 0.6898, "step": 14305 }, { "epoch": 0.92, "grad_norm": 1.812578921324164, "learning_rate": 1.8520691504761769e-07, "loss": 0.7783, "step": 14306 }, { "epoch": 0.92, "grad_norm": 1.8408695289620927, "learning_rate": 1.8492751211045156e-07, "loss": 0.7629, "step": 14307 }, { "epoch": 0.92, "grad_norm": 2.1127521582639175, "learning_rate": 1.8464831611498367e-07, "loss": 0.6592, "step": 14308 }, { "epoch": 0.92, "grad_norm": 1.6578872971264782, "learning_rate": 1.8436932707321276e-07, "loss": 0.683, "step": 14309 }, { "epoch": 0.92, "grad_norm": 1.7784917520180963, "learning_rate": 1.840905449971292e-07, "loss": 0.6966, "step": 14310 }, { "epoch": 0.92, "grad_norm": 1.0495174231700906, "learning_rate": 1.8381196989871453e-07, "loss": 0.7576, "step": 14311 }, { "epoch": 0.92, "grad_norm": 1.7692155582389497, "learning_rate": 1.835336017899403e-07, "loss": 0.74, "step": 14312 }, { "epoch": 0.92, "grad_norm": 3.503955623015971, "learning_rate": 1.8325544068277244e-07, "loss": 0.6342, "step": 14313 }, { "epoch": 0.92, "grad_norm": 1.8242601572721187, "learning_rate": 1.8297748658916314e-07, "loss": 0.7893, "step": 14314 }, { "epoch": 0.92, "grad_norm": 1.5865591697558414, "learning_rate": 1.8269973952106057e-07, "loss": 0.7131, "step": 14315 }, { "epoch": 0.92, "grad_norm": 1.4660710727584287, "learning_rate": 1.8242219949039962e-07, "loss": 0.8887, "step": 14316 }, { "epoch": 0.92, "grad_norm": 1.7208819311720513, "learning_rate": 1.8214486650911022e-07, "loss": 0.6983, "step": 14317 }, { "epoch": 0.92, "grad_norm": 1.5814194731209958, "learning_rate": 1.8186774058911005e-07, "loss": 0.7209, "step": 14318 }, { "epoch": 0.92, "grad_norm": 2.012029731197048, "learning_rate": 1.8159082174231012e-07, "loss": 0.7305, "step": 14319 }, { "epoch": 0.92, "grad_norm": 1.9836572702982134, "learning_rate": 1.8131410998061261e-07, "loss": 0.698, "step": 14320 }, { "epoch": 0.92, "grad_norm": 1.5073531139395435, "learning_rate": 1.8103760531590851e-07, "loss": 0.7194, "step": 14321 }, { "epoch": 0.92, "grad_norm": 1.738574437522957, "learning_rate": 1.8076130776008283e-07, "loss": 0.6544, "step": 14322 }, { "epoch": 0.92, "grad_norm": 0.9604521005649149, "learning_rate": 1.8048521732500878e-07, "loss": 0.6072, "step": 14323 }, { "epoch": 0.92, "grad_norm": 1.7467465188281692, "learning_rate": 1.8020933402255304e-07, "loss": 0.7704, "step": 14324 }, { "epoch": 0.92, "grad_norm": 1.4672010444700465, "learning_rate": 1.7993365786457217e-07, "loss": 0.7719, "step": 14325 }, { "epoch": 0.92, "grad_norm": 2.0174431590927475, "learning_rate": 1.7965818886291508e-07, "loss": 0.709, "step": 14326 }, { "epoch": 0.92, "grad_norm": 1.4475878066842114, "learning_rate": 1.793829270294195e-07, "loss": 0.7889, "step": 14327 }, { "epoch": 0.92, "grad_norm": 1.843808121074727, "learning_rate": 1.7910787237591598e-07, "loss": 0.7301, "step": 14328 }, { "epoch": 0.92, "grad_norm": 2.7742543747458557, "learning_rate": 1.7883302491422673e-07, "loss": 0.7692, "step": 14329 }, { "epoch": 0.92, "grad_norm": 1.6049405357160837, "learning_rate": 1.7855838465616283e-07, "loss": 0.6824, "step": 14330 }, { "epoch": 0.92, "grad_norm": 1.6200155938394605, "learning_rate": 1.782839516135282e-07, "loss": 0.5794, "step": 14331 }, { "epoch": 0.92, "grad_norm": 1.6238929730826102, "learning_rate": 1.7800972579811783e-07, "loss": 0.7151, "step": 14332 }, { "epoch": 0.92, "grad_norm": 1.7420096695682872, "learning_rate": 1.777357072217173e-07, "loss": 0.7244, "step": 14333 }, { "epoch": 0.92, "grad_norm": 1.6588314258013566, "learning_rate": 1.7746189589610275e-07, "loss": 0.7706, "step": 14334 }, { "epoch": 0.92, "grad_norm": 1.308282784620058, "learning_rate": 1.7718829183304254e-07, "loss": 0.6283, "step": 14335 }, { "epoch": 0.92, "grad_norm": 1.6728483310963402, "learning_rate": 1.769148950442956e-07, "loss": 0.8076, "step": 14336 }, { "epoch": 0.92, "grad_norm": 1.1360447210975744, "learning_rate": 1.766417055416114e-07, "loss": 0.63, "step": 14337 }, { "epoch": 0.92, "grad_norm": 1.6357380297396968, "learning_rate": 1.7636872333673116e-07, "loss": 0.7128, "step": 14338 }, { "epoch": 0.92, "grad_norm": 2.5125855860761876, "learning_rate": 1.7609594844138767e-07, "loss": 0.7123, "step": 14339 }, { "epoch": 0.92, "grad_norm": 1.8060171174904718, "learning_rate": 1.7582338086730377e-07, "loss": 0.7445, "step": 14340 }, { "epoch": 0.92, "grad_norm": 1.6613740554473695, "learning_rate": 1.7555102062619454e-07, "loss": 0.7344, "step": 14341 }, { "epoch": 0.92, "grad_norm": 1.566808327754577, "learning_rate": 1.7527886772976456e-07, "loss": 0.6842, "step": 14342 }, { "epoch": 0.92, "grad_norm": 1.6123498725494796, "learning_rate": 1.7500692218971048e-07, "loss": 0.5607, "step": 14343 }, { "epoch": 0.92, "grad_norm": 1.7645256939477303, "learning_rate": 1.7473518401772026e-07, "loss": 0.6729, "step": 14344 }, { "epoch": 0.92, "grad_norm": 1.6681938862340726, "learning_rate": 1.7446365322547231e-07, "loss": 0.7532, "step": 14345 }, { "epoch": 0.92, "grad_norm": 1.8049739404557, "learning_rate": 1.7419232982463785e-07, "loss": 0.7388, "step": 14346 }, { "epoch": 0.92, "grad_norm": 1.5296168630443756, "learning_rate": 1.7392121382687533e-07, "loss": 0.6477, "step": 14347 }, { "epoch": 0.92, "grad_norm": 6.888439844423762, "learning_rate": 1.736503052438382e-07, "loss": 0.6379, "step": 14348 }, { "epoch": 0.92, "grad_norm": 1.6310673826510043, "learning_rate": 1.733796040871699e-07, "loss": 0.5947, "step": 14349 }, { "epoch": 0.92, "grad_norm": 2.001341269258956, "learning_rate": 1.7310911036850398e-07, "loss": 0.8471, "step": 14350 }, { "epoch": 0.92, "grad_norm": 2.029252088778375, "learning_rate": 1.7283882409946552e-07, "loss": 0.6942, "step": 14351 }, { "epoch": 0.92, "grad_norm": 2.0881939170242863, "learning_rate": 1.7256874529167134e-07, "loss": 0.7611, "step": 14352 }, { "epoch": 0.92, "grad_norm": 1.560169919699116, "learning_rate": 1.7229887395672884e-07, "loss": 0.6599, "step": 14353 }, { "epoch": 0.92, "grad_norm": 1.4897943234781434, "learning_rate": 1.7202921010623596e-07, "loss": 0.574, "step": 14354 }, { "epoch": 0.92, "grad_norm": 1.2892926197589922, "learning_rate": 1.7175975375178343e-07, "loss": 0.7081, "step": 14355 }, { "epoch": 0.92, "grad_norm": 1.8466118912202765, "learning_rate": 1.714905049049498e-07, "loss": 0.956, "step": 14356 }, { "epoch": 0.92, "grad_norm": 1.7237618864607989, "learning_rate": 1.7122146357730908e-07, "loss": 0.6719, "step": 14357 }, { "epoch": 0.92, "grad_norm": 1.960805930407823, "learning_rate": 1.7095262978042316e-07, "loss": 0.6646, "step": 14358 }, { "epoch": 0.92, "grad_norm": 2.0896342047930836, "learning_rate": 1.7068400352584613e-07, "loss": 0.6909, "step": 14359 }, { "epoch": 0.92, "grad_norm": 1.7370802394606857, "learning_rate": 1.7041558482512265e-07, "loss": 0.7433, "step": 14360 }, { "epoch": 0.92, "grad_norm": 1.8207313474987308, "learning_rate": 1.7014737368978795e-07, "loss": 0.7784, "step": 14361 }, { "epoch": 0.92, "grad_norm": 1.3490093419665028, "learning_rate": 1.6987937013137224e-07, "loss": 0.6835, "step": 14362 }, { "epoch": 0.92, "grad_norm": 1.2262203765933066, "learning_rate": 1.6961157416139018e-07, "loss": 0.6652, "step": 14363 }, { "epoch": 0.92, "grad_norm": 1.6612604034868839, "learning_rate": 1.693439857913537e-07, "loss": 0.6849, "step": 14364 }, { "epoch": 0.92, "grad_norm": 1.1629036253409775, "learning_rate": 1.690766050327608e-07, "loss": 0.6406, "step": 14365 }, { "epoch": 0.92, "grad_norm": 2.0702169227780858, "learning_rate": 1.6880943189710508e-07, "loss": 0.7785, "step": 14366 }, { "epoch": 0.92, "grad_norm": 2.0666548285885695, "learning_rate": 1.685424663958679e-07, "loss": 0.6826, "step": 14367 }, { "epoch": 0.92, "grad_norm": 1.7513516788582364, "learning_rate": 1.6827570854052345e-07, "loss": 0.6202, "step": 14368 }, { "epoch": 0.92, "grad_norm": 1.2296712797964275, "learning_rate": 1.680091583425364e-07, "loss": 0.622, "step": 14369 }, { "epoch": 0.92, "grad_norm": 1.6826133286378375, "learning_rate": 1.677428158133615e-07, "loss": 0.7475, "step": 14370 }, { "epoch": 0.92, "grad_norm": 2.0053334111879026, "learning_rate": 1.674766809644479e-07, "loss": 0.75, "step": 14371 }, { "epoch": 0.92, "grad_norm": 1.6040288507844052, "learning_rate": 1.672107538072304e-07, "loss": 0.7196, "step": 14372 }, { "epoch": 0.92, "grad_norm": 1.9363078535907217, "learning_rate": 1.6694503435314035e-07, "loss": 0.8146, "step": 14373 }, { "epoch": 0.92, "grad_norm": 1.8439556151715617, "learning_rate": 1.6667952261359754e-07, "loss": 0.7908, "step": 14374 }, { "epoch": 0.92, "grad_norm": 1.7329997702346092, "learning_rate": 1.6641421860001172e-07, "loss": 0.7547, "step": 14375 }, { "epoch": 0.92, "grad_norm": 1.6189388384143077, "learning_rate": 1.6614912232378656e-07, "loss": 0.7383, "step": 14376 }, { "epoch": 0.92, "grad_norm": 1.6009879742961448, "learning_rate": 1.6588423379631458e-07, "loss": 0.6498, "step": 14377 }, { "epoch": 0.92, "grad_norm": 2.630748947448914, "learning_rate": 1.6561955302898114e-07, "loss": 0.6682, "step": 14378 }, { "epoch": 0.92, "grad_norm": 1.1668243170448178, "learning_rate": 1.6535508003315937e-07, "loss": 0.6695, "step": 14379 }, { "epoch": 0.92, "grad_norm": 1.0601444418688544, "learning_rate": 1.650908148202185e-07, "loss": 0.6819, "step": 14380 }, { "epoch": 0.92, "grad_norm": 1.804528027821485, "learning_rate": 1.6482675740151444e-07, "loss": 0.6196, "step": 14381 }, { "epoch": 0.92, "grad_norm": 1.6530647798713582, "learning_rate": 1.6456290778839645e-07, "loss": 0.7496, "step": 14382 }, { "epoch": 0.92, "grad_norm": 1.1349462805023696, "learning_rate": 1.642992659922038e-07, "loss": 0.6404, "step": 14383 }, { "epoch": 0.92, "grad_norm": 1.754396702463151, "learning_rate": 1.6403583202426689e-07, "loss": 0.8532, "step": 14384 }, { "epoch": 0.92, "grad_norm": 1.8792668853268526, "learning_rate": 1.6377260589590939e-07, "loss": 0.7699, "step": 14385 }, { "epoch": 0.92, "grad_norm": 2.390738515346633, "learning_rate": 1.6350958761844226e-07, "loss": 0.7689, "step": 14386 }, { "epoch": 0.92, "grad_norm": 1.958889993995743, "learning_rate": 1.632467772031704e-07, "loss": 0.6346, "step": 14387 }, { "epoch": 0.92, "grad_norm": 1.6321747556192294, "learning_rate": 1.629841746613886e-07, "loss": 0.7821, "step": 14388 }, { "epoch": 0.92, "grad_norm": 1.580673663851511, "learning_rate": 1.6272178000438288e-07, "loss": 0.6804, "step": 14389 }, { "epoch": 0.92, "grad_norm": 1.7115097166260662, "learning_rate": 1.6245959324343086e-07, "loss": 0.7645, "step": 14390 }, { "epoch": 0.92, "grad_norm": 1.0807012271983532, "learning_rate": 1.621976143898002e-07, "loss": 0.5678, "step": 14391 }, { "epoch": 0.92, "grad_norm": 1.677497673767448, "learning_rate": 1.6193584345475078e-07, "loss": 0.7663, "step": 14392 }, { "epoch": 0.92, "grad_norm": 1.5684773207722753, "learning_rate": 1.6167428044953138e-07, "loss": 0.8355, "step": 14393 }, { "epoch": 0.92, "grad_norm": 1.4749478386120543, "learning_rate": 1.6141292538538634e-07, "loss": 0.7783, "step": 14394 }, { "epoch": 0.92, "grad_norm": 1.576925701400309, "learning_rate": 1.6115177827354556e-07, "loss": 0.6038, "step": 14395 }, { "epoch": 0.92, "grad_norm": 4.432582339504007, "learning_rate": 1.6089083912523396e-07, "loss": 0.7653, "step": 14396 }, { "epoch": 0.92, "grad_norm": 1.5567551033943186, "learning_rate": 1.6063010795166533e-07, "loss": 0.6502, "step": 14397 }, { "epoch": 0.92, "grad_norm": 2.009660657492017, "learning_rate": 1.6036958476404574e-07, "loss": 0.6863, "step": 14398 }, { "epoch": 0.92, "grad_norm": 1.1382856438028197, "learning_rate": 1.6010926957357232e-07, "loss": 0.6819, "step": 14399 }, { "epoch": 0.92, "grad_norm": 1.662082814765922, "learning_rate": 1.598491623914322e-07, "loss": 0.6921, "step": 14400 }, { "epoch": 0.92, "grad_norm": 2.1406914581158687, "learning_rate": 1.5958926322880487e-07, "loss": 0.6428, "step": 14401 }, { "epoch": 0.92, "grad_norm": 1.1333796830406149, "learning_rate": 1.5932957209685906e-07, "loss": 0.5894, "step": 14402 }, { "epoch": 0.92, "grad_norm": 1.8785214414565856, "learning_rate": 1.590700890067576e-07, "loss": 0.7351, "step": 14403 }, { "epoch": 0.92, "grad_norm": 1.9755892995762236, "learning_rate": 1.5881081396965093e-07, "loss": 0.8523, "step": 14404 }, { "epoch": 0.92, "grad_norm": 1.4312527322519055, "learning_rate": 1.5855174699668298e-07, "loss": 0.662, "step": 14405 }, { "epoch": 0.92, "grad_norm": 1.6094832445006975, "learning_rate": 1.5829288809898757e-07, "loss": 0.6281, "step": 14406 }, { "epoch": 0.92, "grad_norm": 2.527474029422201, "learning_rate": 1.580342372876903e-07, "loss": 0.7768, "step": 14407 }, { "epoch": 0.92, "grad_norm": 1.5653691042467441, "learning_rate": 1.577757945739067e-07, "loss": 0.6495, "step": 14408 }, { "epoch": 0.92, "grad_norm": 1.8576954841952467, "learning_rate": 1.5751755996874452e-07, "loss": 0.7305, "step": 14409 }, { "epoch": 0.92, "grad_norm": 1.6585359675075082, "learning_rate": 1.5725953348330214e-07, "loss": 0.8181, "step": 14410 }, { "epoch": 0.92, "grad_norm": 1.818876788859819, "learning_rate": 1.5700171512866956e-07, "loss": 0.613, "step": 14411 }, { "epoch": 0.92, "grad_norm": 1.7290564756194182, "learning_rate": 1.5674410491592573e-07, "loss": 0.9293, "step": 14412 }, { "epoch": 0.92, "grad_norm": 1.6066356074986847, "learning_rate": 1.5648670285614397e-07, "loss": 0.723, "step": 14413 }, { "epoch": 0.92, "grad_norm": 1.6127686213084529, "learning_rate": 1.5622950896038603e-07, "loss": 0.6319, "step": 14414 }, { "epoch": 0.92, "grad_norm": 1.6803599044168909, "learning_rate": 1.559725232397058e-07, "loss": 0.7603, "step": 14415 }, { "epoch": 0.92, "grad_norm": 1.876639934424519, "learning_rate": 1.557157457051478e-07, "loss": 0.8205, "step": 14416 }, { "epoch": 0.92, "grad_norm": 1.508690751789262, "learning_rate": 1.5545917636774655e-07, "loss": 0.7068, "step": 14417 }, { "epoch": 0.92, "grad_norm": 1.7654748485923146, "learning_rate": 1.5520281523853208e-07, "loss": 0.6802, "step": 14418 }, { "epoch": 0.92, "grad_norm": 1.43744703453143, "learning_rate": 1.5494666232851896e-07, "loss": 0.5774, "step": 14419 }, { "epoch": 0.92, "grad_norm": 1.8279139069368704, "learning_rate": 1.5469071764871834e-07, "loss": 0.6341, "step": 14420 }, { "epoch": 0.92, "grad_norm": 1.59615271165471, "learning_rate": 1.5443498121012813e-07, "loss": 0.6509, "step": 14421 }, { "epoch": 0.92, "grad_norm": 1.8016230061291458, "learning_rate": 1.5417945302374116e-07, "loss": 0.6501, "step": 14422 }, { "epoch": 0.92, "grad_norm": 1.9841414772420294, "learning_rate": 1.5392413310053866e-07, "loss": 0.7613, "step": 14423 }, { "epoch": 0.92, "grad_norm": 1.130861222666504, "learning_rate": 1.536690214514941e-07, "loss": 0.7011, "step": 14424 }, { "epoch": 0.92, "grad_norm": 1.897769060513809, "learning_rate": 1.5341411808757146e-07, "loss": 0.7607, "step": 14425 }, { "epoch": 0.92, "grad_norm": 1.5727706510668078, "learning_rate": 1.531594230197253e-07, "loss": 0.7471, "step": 14426 }, { "epoch": 0.92, "grad_norm": 1.1457131021506874, "learning_rate": 1.5290493625890413e-07, "loss": 0.6747, "step": 14427 }, { "epoch": 0.92, "grad_norm": 1.8099008999549397, "learning_rate": 1.5265065781604193e-07, "loss": 0.8099, "step": 14428 }, { "epoch": 0.92, "grad_norm": 1.6903773967666316, "learning_rate": 1.5239658770206945e-07, "loss": 0.7003, "step": 14429 }, { "epoch": 0.92, "grad_norm": 1.9006627008061567, "learning_rate": 1.521427259279057e-07, "loss": 0.716, "step": 14430 }, { "epoch": 0.92, "grad_norm": 0.9530325327168978, "learning_rate": 1.5188907250446028e-07, "loss": 0.6472, "step": 14431 }, { "epoch": 0.92, "grad_norm": 1.9163196373308315, "learning_rate": 1.5163562744263561e-07, "loss": 0.6394, "step": 14432 }, { "epoch": 0.92, "grad_norm": 1.6374675642392522, "learning_rate": 1.513823907533235e-07, "loss": 0.7327, "step": 14433 }, { "epoch": 0.92, "grad_norm": 1.8142549201542169, "learning_rate": 1.5112936244740862e-07, "loss": 0.6302, "step": 14434 }, { "epoch": 0.92, "grad_norm": 1.9066467999723777, "learning_rate": 1.508765425357639e-07, "loss": 0.6664, "step": 14435 }, { "epoch": 0.92, "grad_norm": 1.594079045949626, "learning_rate": 1.5062393102925676e-07, "loss": 0.6233, "step": 14436 }, { "epoch": 0.92, "grad_norm": 1.5488723096097956, "learning_rate": 1.5037152793874244e-07, "loss": 0.6883, "step": 14437 }, { "epoch": 0.92, "grad_norm": 1.1570256352981847, "learning_rate": 1.5011933327507e-07, "loss": 0.6029, "step": 14438 }, { "epoch": 0.92, "grad_norm": 1.7405678283934647, "learning_rate": 1.4986734704907745e-07, "loss": 0.6385, "step": 14439 }, { "epoch": 0.92, "grad_norm": 1.4834372417022956, "learning_rate": 1.4961556927159392e-07, "loss": 0.7404, "step": 14440 }, { "epoch": 0.92, "grad_norm": 1.5248724261065836, "learning_rate": 1.4936399995344298e-07, "loss": 0.6582, "step": 14441 }, { "epoch": 0.92, "grad_norm": 1.7199575454973577, "learning_rate": 1.4911263910543316e-07, "loss": 0.7398, "step": 14442 }, { "epoch": 0.92, "grad_norm": 2.2248646901913225, "learning_rate": 1.4886148673836975e-07, "loss": 0.701, "step": 14443 }, { "epoch": 0.92, "grad_norm": 1.836516592254982, "learning_rate": 1.4861054286304522e-07, "loss": 0.7289, "step": 14444 }, { "epoch": 0.92, "grad_norm": 1.8022231031736824, "learning_rate": 1.4835980749024592e-07, "loss": 0.6687, "step": 14445 }, { "epoch": 0.92, "grad_norm": 1.8701041284978117, "learning_rate": 1.481092806307477e-07, "loss": 0.7337, "step": 14446 }, { "epoch": 0.92, "grad_norm": 1.7543735469048574, "learning_rate": 1.4785896229531692e-07, "loss": 0.8659, "step": 14447 }, { "epoch": 0.92, "grad_norm": 1.9070972357263687, "learning_rate": 1.476088524947128e-07, "loss": 0.7301, "step": 14448 }, { "epoch": 0.92, "grad_norm": 1.0709870679109703, "learning_rate": 1.473589512396828e-07, "loss": 0.5335, "step": 14449 }, { "epoch": 0.92, "grad_norm": 2.3642123595829103, "learning_rate": 1.4710925854096946e-07, "loss": 0.8704, "step": 14450 }, { "epoch": 0.92, "grad_norm": 1.9829834404843578, "learning_rate": 1.46859774409302e-07, "loss": 0.5619, "step": 14451 }, { "epoch": 0.93, "grad_norm": 1.8369314796555005, "learning_rate": 1.46610498855404e-07, "loss": 0.7134, "step": 14452 }, { "epoch": 0.93, "grad_norm": 1.5641088323204704, "learning_rate": 1.4636143188998808e-07, "loss": 0.6212, "step": 14453 }, { "epoch": 0.93, "grad_norm": 1.1333230541963135, "learning_rate": 1.461125735237595e-07, "loss": 0.7618, "step": 14454 }, { "epoch": 0.93, "grad_norm": 1.637884627276145, "learning_rate": 1.4586392376741254e-07, "loss": 0.7136, "step": 14455 }, { "epoch": 0.93, "grad_norm": 1.7235910936970245, "learning_rate": 1.4561548263163472e-07, "loss": 0.6611, "step": 14456 }, { "epoch": 0.93, "grad_norm": 1.8948703493918724, "learning_rate": 1.4536725012710252e-07, "loss": 0.7414, "step": 14457 }, { "epoch": 0.93, "grad_norm": 2.33507184888084, "learning_rate": 1.451192262644846e-07, "loss": 0.6627, "step": 14458 }, { "epoch": 0.93, "grad_norm": 2.058640991538411, "learning_rate": 1.4487141105444136e-07, "loss": 0.8818, "step": 14459 }, { "epoch": 0.93, "grad_norm": 1.6308405287262469, "learning_rate": 1.446238045076226e-07, "loss": 0.7375, "step": 14460 }, { "epoch": 0.93, "grad_norm": 4.181464596146808, "learning_rate": 1.4437640663467034e-07, "loss": 0.7654, "step": 14461 }, { "epoch": 0.93, "grad_norm": 1.7352172109341062, "learning_rate": 1.4412921744621722e-07, "loss": 0.8524, "step": 14462 }, { "epoch": 0.93, "grad_norm": 1.085436274904227, "learning_rate": 1.4388223695288695e-07, "loss": 0.5771, "step": 14463 }, { "epoch": 0.93, "grad_norm": 1.5475255462200403, "learning_rate": 1.4363546516529326e-07, "loss": 0.6796, "step": 14464 }, { "epoch": 0.93, "grad_norm": 1.0757453691860512, "learning_rate": 1.433889020940432e-07, "loss": 0.7096, "step": 14465 }, { "epoch": 0.93, "grad_norm": 1.7978366308925857, "learning_rate": 1.431425477497328e-07, "loss": 0.7892, "step": 14466 }, { "epoch": 0.93, "grad_norm": 1.6810830979854507, "learning_rate": 1.4289640214294963e-07, "loss": 0.7615, "step": 14467 }, { "epoch": 0.93, "grad_norm": 1.558853385086932, "learning_rate": 1.4265046528427362e-07, "loss": 0.8005, "step": 14468 }, { "epoch": 0.93, "grad_norm": 1.7425123629951396, "learning_rate": 1.424047371842735e-07, "loss": 0.7713, "step": 14469 }, { "epoch": 0.93, "grad_norm": 1.511997250326767, "learning_rate": 1.4215921785351083e-07, "loss": 0.6636, "step": 14470 }, { "epoch": 0.93, "grad_norm": 1.8000458722705035, "learning_rate": 1.4191390730253718e-07, "loss": 0.7124, "step": 14471 }, { "epoch": 0.93, "grad_norm": 1.8113080466031823, "learning_rate": 1.416688055418952e-07, "loss": 0.8272, "step": 14472 }, { "epoch": 0.93, "grad_norm": 1.0460632463435968, "learning_rate": 1.4142391258211985e-07, "loss": 0.5953, "step": 14473 }, { "epoch": 0.93, "grad_norm": 1.1510735897976896, "learning_rate": 1.4117922843373487e-07, "loss": 0.7138, "step": 14474 }, { "epoch": 0.93, "grad_norm": 1.8130242844565103, "learning_rate": 1.409347531072569e-07, "loss": 0.7468, "step": 14475 }, { "epoch": 0.93, "grad_norm": 1.696506175326586, "learning_rate": 1.4069048661319308e-07, "loss": 0.7428, "step": 14476 }, { "epoch": 0.93, "grad_norm": 1.7306709567658525, "learning_rate": 1.4044642896204107e-07, "loss": 0.831, "step": 14477 }, { "epoch": 0.93, "grad_norm": 3.297313622852656, "learning_rate": 1.402025801642909e-07, "loss": 0.6858, "step": 14478 }, { "epoch": 0.93, "grad_norm": 2.124842164932003, "learning_rate": 1.3995894023042135e-07, "loss": 0.8175, "step": 14479 }, { "epoch": 0.93, "grad_norm": 1.873247219151724, "learning_rate": 1.397155091709046e-07, "loss": 0.7737, "step": 14480 }, { "epoch": 0.93, "grad_norm": 1.2605129838316962, "learning_rate": 1.394722869962023e-07, "loss": 0.7183, "step": 14481 }, { "epoch": 0.93, "grad_norm": 0.9955109033230803, "learning_rate": 1.392292737167672e-07, "loss": 0.6083, "step": 14482 }, { "epoch": 0.93, "grad_norm": 1.7636335427688057, "learning_rate": 1.3898646934304538e-07, "loss": 0.6829, "step": 14483 }, { "epoch": 0.93, "grad_norm": 1.708553005275189, "learning_rate": 1.3874387388546906e-07, "loss": 0.734, "step": 14484 }, { "epoch": 0.93, "grad_norm": 1.6419515523187493, "learning_rate": 1.3850148735446767e-07, "loss": 0.7769, "step": 14485 }, { "epoch": 0.93, "grad_norm": 2.4862062497483572, "learning_rate": 1.3825930976045565e-07, "loss": 0.7483, "step": 14486 }, { "epoch": 0.93, "grad_norm": 1.722220386783902, "learning_rate": 1.38017341113843e-07, "loss": 0.6234, "step": 14487 }, { "epoch": 0.93, "grad_norm": 1.5939338772026443, "learning_rate": 1.3777558142502868e-07, "loss": 0.6873, "step": 14488 }, { "epoch": 0.93, "grad_norm": 1.7190200273680465, "learning_rate": 1.3753403070440263e-07, "loss": 0.6346, "step": 14489 }, { "epoch": 0.93, "grad_norm": 1.6576213957645465, "learning_rate": 1.3729268896234716e-07, "loss": 0.6559, "step": 14490 }, { "epoch": 0.93, "grad_norm": 1.6307254165464307, "learning_rate": 1.3705155620923337e-07, "loss": 0.6592, "step": 14491 }, { "epoch": 0.93, "grad_norm": 0.9827327346798813, "learning_rate": 1.368106324554258e-07, "loss": 0.6396, "step": 14492 }, { "epoch": 0.93, "grad_norm": 0.9834796631352807, "learning_rate": 1.3656991771127781e-07, "loss": 0.6069, "step": 14493 }, { "epoch": 0.93, "grad_norm": 1.7880872832915646, "learning_rate": 1.3632941198713557e-07, "loss": 0.6211, "step": 14494 }, { "epoch": 0.93, "grad_norm": 1.6676387622566569, "learning_rate": 1.3608911529333467e-07, "loss": 0.6924, "step": 14495 }, { "epoch": 0.93, "grad_norm": 2.0610296392891114, "learning_rate": 1.3584902764020302e-07, "loss": 0.9075, "step": 14496 }, { "epoch": 0.93, "grad_norm": 1.5799115772339076, "learning_rate": 1.3560914903806065e-07, "loss": 0.6947, "step": 14497 }, { "epoch": 0.93, "grad_norm": 2.072005678655714, "learning_rate": 1.353694794972138e-07, "loss": 0.7507, "step": 14498 }, { "epoch": 0.93, "grad_norm": 1.7433143206346566, "learning_rate": 1.3513001902796642e-07, "loss": 0.7275, "step": 14499 }, { "epoch": 0.93, "grad_norm": 2.0721486153750472, "learning_rate": 1.3489076764060693e-07, "loss": 0.7557, "step": 14500 }, { "epoch": 0.93, "grad_norm": 1.4585279963555244, "learning_rate": 1.3465172534541936e-07, "loss": 0.7437, "step": 14501 }, { "epoch": 0.93, "grad_norm": 1.7080012351837015, "learning_rate": 1.3441289215267772e-07, "loss": 0.6777, "step": 14502 }, { "epoch": 0.93, "grad_norm": 1.0518459986219324, "learning_rate": 1.34174268072646e-07, "loss": 0.6446, "step": 14503 }, { "epoch": 0.93, "grad_norm": 1.554192186078539, "learning_rate": 1.3393585311557933e-07, "loss": 0.6646, "step": 14504 }, { "epoch": 0.93, "grad_norm": 2.3966099821413778, "learning_rate": 1.3369764729172453e-07, "loss": 0.8905, "step": 14505 }, { "epoch": 0.93, "grad_norm": 2.1445215182404636, "learning_rate": 1.3345965061132004e-07, "loss": 0.6329, "step": 14506 }, { "epoch": 0.93, "grad_norm": 1.9929445785865911, "learning_rate": 1.3322186308459274e-07, "loss": 0.8266, "step": 14507 }, { "epoch": 0.93, "grad_norm": 1.8840306856773688, "learning_rate": 1.329842847217644e-07, "loss": 0.6216, "step": 14508 }, { "epoch": 0.93, "grad_norm": 1.6290062961512868, "learning_rate": 1.3274691553304352e-07, "loss": 0.62, "step": 14509 }, { "epoch": 0.93, "grad_norm": 1.7544118313893182, "learning_rate": 1.325097555286331e-07, "loss": 0.7166, "step": 14510 }, { "epoch": 0.93, "grad_norm": 1.1847562003528918, "learning_rate": 1.322728047187255e-07, "loss": 0.6976, "step": 14511 }, { "epoch": 0.93, "grad_norm": 1.8618818341239014, "learning_rate": 1.3203606311350426e-07, "loss": 0.8431, "step": 14512 }, { "epoch": 0.93, "grad_norm": 1.0342333553819816, "learning_rate": 1.31799530723144e-07, "loss": 0.621, "step": 14513 }, { "epoch": 0.93, "grad_norm": 1.8929386414630252, "learning_rate": 1.3156320755780993e-07, "loss": 0.7323, "step": 14514 }, { "epoch": 0.93, "grad_norm": 1.623436245209751, "learning_rate": 1.3132709362766006e-07, "loss": 0.7781, "step": 14515 }, { "epoch": 0.93, "grad_norm": 1.8651497538500024, "learning_rate": 1.3109118894284012e-07, "loss": 0.6364, "step": 14516 }, { "epoch": 0.93, "grad_norm": 1.8309496271413672, "learning_rate": 1.308554935134909e-07, "loss": 0.6576, "step": 14517 }, { "epoch": 0.93, "grad_norm": 1.552699287692185, "learning_rate": 1.3062000734974045e-07, "loss": 0.7189, "step": 14518 }, { "epoch": 0.93, "grad_norm": 1.8355654130330201, "learning_rate": 1.3038473046171063e-07, "loss": 0.6322, "step": 14519 }, { "epoch": 0.93, "grad_norm": 1.1231563282893962, "learning_rate": 1.3014966285951226e-07, "loss": 0.6514, "step": 14520 }, { "epoch": 0.93, "grad_norm": 1.642440645970708, "learning_rate": 1.299148045532489e-07, "loss": 0.6691, "step": 14521 }, { "epoch": 0.93, "grad_norm": 1.716099559368423, "learning_rate": 1.2968015555301305e-07, "loss": 0.619, "step": 14522 }, { "epoch": 0.93, "grad_norm": 1.5721220297644192, "learning_rate": 1.2944571586888998e-07, "loss": 0.6387, "step": 14523 }, { "epoch": 0.93, "grad_norm": 2.0108906187045186, "learning_rate": 1.2921148551095663e-07, "loss": 0.69, "step": 14524 }, { "epoch": 0.93, "grad_norm": 1.6057089837272356, "learning_rate": 1.2897746448927828e-07, "loss": 0.6706, "step": 14525 }, { "epoch": 0.93, "grad_norm": 1.0140559949626986, "learning_rate": 1.28743652813913e-07, "loss": 0.6367, "step": 14526 }, { "epoch": 0.93, "grad_norm": 1.1500717402670488, "learning_rate": 1.2851005049490939e-07, "loss": 0.6517, "step": 14527 }, { "epoch": 0.93, "grad_norm": 1.8588861691101333, "learning_rate": 1.282766575423078e-07, "loss": 0.7928, "step": 14528 }, { "epoch": 0.93, "grad_norm": 1.622561418082252, "learning_rate": 1.2804347396613848e-07, "loss": 0.6717, "step": 14529 }, { "epoch": 0.93, "grad_norm": 3.624047856064659, "learning_rate": 1.278104997764229e-07, "loss": 0.5535, "step": 14530 }, { "epoch": 0.93, "grad_norm": 1.6008279390077047, "learning_rate": 1.2757773498317416e-07, "loss": 0.6943, "step": 14531 }, { "epoch": 0.93, "grad_norm": 1.7308983148718515, "learning_rate": 1.2734517959639647e-07, "loss": 0.7551, "step": 14532 }, { "epoch": 0.93, "grad_norm": 1.7340937687035267, "learning_rate": 1.2711283362608351e-07, "loss": 0.6866, "step": 14533 }, { "epoch": 0.93, "grad_norm": 1.5423585881430641, "learning_rate": 1.2688069708222228e-07, "loss": 0.5625, "step": 14534 }, { "epoch": 0.93, "grad_norm": 3.7903598605239965, "learning_rate": 1.266487699747887e-07, "loss": 0.7242, "step": 14535 }, { "epoch": 0.93, "grad_norm": 1.1275560482760274, "learning_rate": 1.2641705231375034e-07, "loss": 0.6674, "step": 14536 }, { "epoch": 0.93, "grad_norm": 1.6643953823904198, "learning_rate": 1.2618554410906648e-07, "loss": 0.6341, "step": 14537 }, { "epoch": 0.93, "grad_norm": 1.8164463991420778, "learning_rate": 1.2595424537068635e-07, "loss": 0.727, "step": 14538 }, { "epoch": 0.93, "grad_norm": 1.6439732158041185, "learning_rate": 1.2572315610855201e-07, "loss": 0.7318, "step": 14539 }, { "epoch": 0.93, "grad_norm": 1.638216683098442, "learning_rate": 1.2549227633259275e-07, "loss": 0.7555, "step": 14540 }, { "epoch": 0.93, "grad_norm": 1.6912325120266696, "learning_rate": 1.252616060527334e-07, "loss": 0.7613, "step": 14541 }, { "epoch": 0.93, "grad_norm": 1.4268289222064277, "learning_rate": 1.250311452788866e-07, "loss": 0.7379, "step": 14542 }, { "epoch": 0.93, "grad_norm": 1.8305418652043493, "learning_rate": 1.248008940209583e-07, "loss": 0.6082, "step": 14543 }, { "epoch": 0.93, "grad_norm": 1.660440259143159, "learning_rate": 1.245708522888428e-07, "loss": 0.7167, "step": 14544 }, { "epoch": 0.93, "grad_norm": 1.6810177385150005, "learning_rate": 1.243410200924272e-07, "loss": 0.9174, "step": 14545 }, { "epoch": 0.93, "grad_norm": 1.6004448970220686, "learning_rate": 1.2411139744158972e-07, "loss": 0.6439, "step": 14546 }, { "epoch": 0.93, "grad_norm": 1.741916787372303, "learning_rate": 1.2388198434619803e-07, "loss": 0.8104, "step": 14547 }, { "epoch": 0.93, "grad_norm": 1.543666910802948, "learning_rate": 1.2365278081611365e-07, "loss": 0.6455, "step": 14548 }, { "epoch": 0.93, "grad_norm": 2.267312564572525, "learning_rate": 1.2342378686118538e-07, "loss": 0.6483, "step": 14549 }, { "epoch": 0.93, "grad_norm": 1.7283653788539899, "learning_rate": 1.2319500249125594e-07, "loss": 0.7104, "step": 14550 }, { "epoch": 0.93, "grad_norm": 1.5969234089424693, "learning_rate": 1.2296642771615741e-07, "loss": 0.6719, "step": 14551 }, { "epoch": 0.93, "grad_norm": 1.953910831513736, "learning_rate": 1.227380625457142e-07, "loss": 0.7366, "step": 14552 }, { "epoch": 0.93, "grad_norm": 1.6816594399351303, "learning_rate": 1.2250990698974009e-07, "loss": 0.666, "step": 14553 }, { "epoch": 0.93, "grad_norm": 1.7026658995969328, "learning_rate": 1.2228196105804113e-07, "loss": 0.7562, "step": 14554 }, { "epoch": 0.93, "grad_norm": 1.6334028740369793, "learning_rate": 1.2205422476041452e-07, "loss": 0.7324, "step": 14555 }, { "epoch": 0.93, "grad_norm": 1.5491262148138554, "learning_rate": 1.2182669810664683e-07, "loss": 0.6719, "step": 14556 }, { "epoch": 0.93, "grad_norm": 0.9489725044482415, "learning_rate": 1.2159938110651803e-07, "loss": 0.6243, "step": 14557 }, { "epoch": 0.93, "grad_norm": 1.8566077118730615, "learning_rate": 1.2137227376979587e-07, "loss": 0.7981, "step": 14558 }, { "epoch": 0.93, "grad_norm": 1.7557050534647585, "learning_rate": 1.2114537610624255e-07, "loss": 0.6974, "step": 14559 }, { "epoch": 0.93, "grad_norm": 1.4326222564963353, "learning_rate": 1.2091868812560859e-07, "loss": 0.7678, "step": 14560 }, { "epoch": 0.93, "grad_norm": 1.2812468744701182, "learning_rate": 1.206922098376373e-07, "loss": 0.6409, "step": 14561 }, { "epoch": 0.93, "grad_norm": 1.185741897503129, "learning_rate": 1.2046594125206257e-07, "loss": 0.6491, "step": 14562 }, { "epoch": 0.93, "grad_norm": 1.770793363706675, "learning_rate": 1.2023988237860718e-07, "loss": 0.6332, "step": 14563 }, { "epoch": 0.93, "grad_norm": 1.660547690822146, "learning_rate": 1.2001403322698947e-07, "loss": 0.9098, "step": 14564 }, { "epoch": 0.93, "grad_norm": 1.9559672195373268, "learning_rate": 1.1978839380691277e-07, "loss": 0.7066, "step": 14565 }, { "epoch": 0.93, "grad_norm": 1.9360517954867384, "learning_rate": 1.195629641280771e-07, "loss": 0.6196, "step": 14566 }, { "epoch": 0.93, "grad_norm": 1.498183602905588, "learning_rate": 1.1933774420016974e-07, "loss": 0.7457, "step": 14567 }, { "epoch": 0.93, "grad_norm": 1.6093740092015445, "learning_rate": 1.1911273403287016e-07, "loss": 0.716, "step": 14568 }, { "epoch": 0.93, "grad_norm": 1.6254756209908552, "learning_rate": 1.188879336358495e-07, "loss": 0.621, "step": 14569 }, { "epoch": 0.93, "grad_norm": 1.5559087508139937, "learning_rate": 1.1866334301876837e-07, "loss": 0.6819, "step": 14570 }, { "epoch": 0.93, "grad_norm": 1.9641679818020175, "learning_rate": 1.184389621912807e-07, "loss": 0.8184, "step": 14571 }, { "epoch": 0.93, "grad_norm": 1.5553587626178673, "learning_rate": 1.182147911630277e-07, "loss": 0.6017, "step": 14572 }, { "epoch": 0.93, "grad_norm": 1.6096309423524917, "learning_rate": 1.1799082994364553e-07, "loss": 0.7181, "step": 14573 }, { "epoch": 0.93, "grad_norm": 1.6087151566570113, "learning_rate": 1.177670785427587e-07, "loss": 0.6968, "step": 14574 }, { "epoch": 0.93, "grad_norm": 1.6384367237196475, "learning_rate": 1.17543536969984e-07, "loss": 0.7653, "step": 14575 }, { "epoch": 0.93, "grad_norm": 1.5665916893116003, "learning_rate": 1.1732020523492871e-07, "loss": 0.7324, "step": 14576 }, { "epoch": 0.93, "grad_norm": 1.5999986043842265, "learning_rate": 1.1709708334719128e-07, "loss": 0.6178, "step": 14577 }, { "epoch": 0.93, "grad_norm": 2.281625529051604, "learning_rate": 1.1687417131636014e-07, "loss": 0.8568, "step": 14578 }, { "epoch": 0.93, "grad_norm": 1.8359468323398669, "learning_rate": 1.1665146915201652e-07, "loss": 0.7383, "step": 14579 }, { "epoch": 0.93, "grad_norm": 2.047956793852425, "learning_rate": 1.1642897686373167e-07, "loss": 0.7119, "step": 14580 }, { "epoch": 0.93, "grad_norm": 1.6164992741066373, "learning_rate": 1.1620669446106735e-07, "loss": 0.6945, "step": 14581 }, { "epoch": 0.93, "grad_norm": 1.604562982695168, "learning_rate": 1.1598462195357707e-07, "loss": 0.6831, "step": 14582 }, { "epoch": 0.93, "grad_norm": 1.6611864388485216, "learning_rate": 1.157627593508054e-07, "loss": 0.7718, "step": 14583 }, { "epoch": 0.93, "grad_norm": 1.9104733052208451, "learning_rate": 1.1554110666228691e-07, "loss": 0.6704, "step": 14584 }, { "epoch": 0.93, "grad_norm": 0.9883285671664923, "learning_rate": 1.153196638975479e-07, "loss": 0.5658, "step": 14585 }, { "epoch": 0.93, "grad_norm": 1.6409367158267072, "learning_rate": 1.1509843106610574e-07, "loss": 0.7237, "step": 14586 }, { "epoch": 0.93, "grad_norm": 1.190375813321708, "learning_rate": 1.148774081774684e-07, "loss": 0.631, "step": 14587 }, { "epoch": 0.93, "grad_norm": 1.7884688529824384, "learning_rate": 1.1465659524113438e-07, "loss": 0.7562, "step": 14588 }, { "epoch": 0.93, "grad_norm": 1.6759644713026665, "learning_rate": 1.1443599226659497e-07, "loss": 0.7329, "step": 14589 }, { "epoch": 0.93, "grad_norm": 1.2578677095720694, "learning_rate": 1.1421559926333092e-07, "loss": 0.7834, "step": 14590 }, { "epoch": 0.93, "grad_norm": 1.5933948091798766, "learning_rate": 1.1399541624081357e-07, "loss": 0.7731, "step": 14591 }, { "epoch": 0.93, "grad_norm": 1.3784663995534676, "learning_rate": 1.1377544320850641e-07, "loss": 0.5114, "step": 14592 }, { "epoch": 0.93, "grad_norm": 1.8835084384272762, "learning_rate": 1.1355568017586305e-07, "loss": 0.7752, "step": 14593 }, { "epoch": 0.93, "grad_norm": 1.7817863525405822, "learning_rate": 1.1333612715232923e-07, "loss": 0.7462, "step": 14594 }, { "epoch": 0.93, "grad_norm": 1.712862823158133, "learning_rate": 1.1311678414734018e-07, "loss": 0.7766, "step": 14595 }, { "epoch": 0.93, "grad_norm": 1.9720269995954287, "learning_rate": 1.1289765117032226e-07, "loss": 0.7373, "step": 14596 }, { "epoch": 0.93, "grad_norm": 1.557650574442114, "learning_rate": 1.1267872823069459e-07, "loss": 0.6753, "step": 14597 }, { "epoch": 0.93, "grad_norm": 1.6210318708116085, "learning_rate": 1.1246001533786576e-07, "loss": 0.7127, "step": 14598 }, { "epoch": 0.93, "grad_norm": 4.093860315049281, "learning_rate": 1.1224151250123549e-07, "loss": 0.7653, "step": 14599 }, { "epoch": 0.93, "grad_norm": 1.5907993512121328, "learning_rate": 1.1202321973019403e-07, "loss": 0.7307, "step": 14600 }, { "epoch": 0.93, "grad_norm": 1.9775303163459235, "learning_rate": 1.1180513703412388e-07, "loss": 0.6645, "step": 14601 }, { "epoch": 0.93, "grad_norm": 2.160262179848374, "learning_rate": 1.1158726442239698e-07, "loss": 0.7683, "step": 14602 }, { "epoch": 0.93, "grad_norm": 1.4618428057971617, "learning_rate": 1.1136960190437751e-07, "loss": 0.5917, "step": 14603 }, { "epoch": 0.93, "grad_norm": 1.8319675295806226, "learning_rate": 1.1115214948942077e-07, "loss": 0.7191, "step": 14604 }, { "epoch": 0.93, "grad_norm": 1.9710931003940906, "learning_rate": 1.1093490718687094e-07, "loss": 0.8101, "step": 14605 }, { "epoch": 0.93, "grad_norm": 1.1579503438534708, "learning_rate": 1.1071787500606557e-07, "loss": 0.7021, "step": 14606 }, { "epoch": 0.93, "grad_norm": 1.7057427810093286, "learning_rate": 1.1050105295633274e-07, "loss": 0.7488, "step": 14607 }, { "epoch": 0.94, "grad_norm": 1.721383513102351, "learning_rate": 1.1028444104698998e-07, "loss": 0.7714, "step": 14608 }, { "epoch": 0.94, "grad_norm": 1.838562307694638, "learning_rate": 1.1006803928734711e-07, "loss": 0.6744, "step": 14609 }, { "epoch": 0.94, "grad_norm": 1.8813770585192762, "learning_rate": 1.0985184768670443e-07, "loss": 0.6935, "step": 14610 }, { "epoch": 0.94, "grad_norm": 1.4991335640849426, "learning_rate": 1.0963586625435507e-07, "loss": 0.6127, "step": 14611 }, { "epoch": 0.94, "grad_norm": 1.7344194665975072, "learning_rate": 1.0942009499957884e-07, "loss": 0.6088, "step": 14612 }, { "epoch": 0.94, "grad_norm": 1.9386584503006856, "learning_rate": 1.0920453393165109e-07, "loss": 0.7368, "step": 14613 }, { "epoch": 0.94, "grad_norm": 1.8863068812695294, "learning_rate": 1.0898918305983496e-07, "loss": 0.7887, "step": 14614 }, { "epoch": 0.94, "grad_norm": 1.2221046770301625, "learning_rate": 1.087740423933864e-07, "loss": 0.6951, "step": 14615 }, { "epoch": 0.94, "grad_norm": 1.7039571285899866, "learning_rate": 1.0855911194155189e-07, "loss": 0.6828, "step": 14616 }, { "epoch": 0.94, "grad_norm": 1.7821609865253893, "learning_rate": 1.0834439171356848e-07, "loss": 0.7271, "step": 14617 }, { "epoch": 0.94, "grad_norm": 3.5348899673570617, "learning_rate": 1.0812988171866434e-07, "loss": 0.6631, "step": 14618 }, { "epoch": 0.94, "grad_norm": 1.5197295037057859, "learning_rate": 1.0791558196605823e-07, "loss": 0.7024, "step": 14619 }, { "epoch": 0.94, "grad_norm": 1.6287913942211238, "learning_rate": 1.0770149246496109e-07, "loss": 0.7684, "step": 14620 }, { "epoch": 0.94, "grad_norm": 1.5341673486154728, "learning_rate": 1.0748761322457334e-07, "loss": 0.7603, "step": 14621 }, { "epoch": 0.94, "grad_norm": 1.732328133208338, "learning_rate": 1.0727394425408766e-07, "loss": 0.7623, "step": 14622 }, { "epoch": 0.94, "grad_norm": 1.7634944857810375, "learning_rate": 1.0706048556268667e-07, "loss": 0.7298, "step": 14623 }, { "epoch": 0.94, "grad_norm": 2.001723278795277, "learning_rate": 1.0684723715954471e-07, "loss": 0.7892, "step": 14624 }, { "epoch": 0.94, "grad_norm": 1.6304208969818583, "learning_rate": 1.0663419905382666e-07, "loss": 0.6325, "step": 14625 }, { "epoch": 0.94, "grad_norm": 1.6981925460421987, "learning_rate": 1.0642137125468743e-07, "loss": 0.6518, "step": 14626 }, { "epoch": 0.94, "grad_norm": 1.802609929092684, "learning_rate": 1.0620875377127637e-07, "loss": 0.6927, "step": 14627 }, { "epoch": 0.94, "grad_norm": 1.7152777642100037, "learning_rate": 1.0599634661272839e-07, "loss": 0.7086, "step": 14628 }, { "epoch": 0.94, "grad_norm": 1.7162125623949562, "learning_rate": 1.0578414978817508e-07, "loss": 0.6624, "step": 14629 }, { "epoch": 0.94, "grad_norm": 1.81119391258728, "learning_rate": 1.0557216330673359e-07, "loss": 0.841, "step": 14630 }, { "epoch": 0.94, "grad_norm": 1.82097908696099, "learning_rate": 1.0536038717751607e-07, "loss": 0.7206, "step": 14631 }, { "epoch": 0.94, "grad_norm": 1.642732125195272, "learning_rate": 1.0514882140962468e-07, "loss": 0.6219, "step": 14632 }, { "epoch": 0.94, "grad_norm": 2.3617484038938312, "learning_rate": 1.0493746601215105e-07, "loss": 0.7216, "step": 14633 }, { "epoch": 0.94, "grad_norm": 1.7032677852331637, "learning_rate": 1.0472632099417957e-07, "loss": 0.856, "step": 14634 }, { "epoch": 0.94, "grad_norm": 1.8446134842711668, "learning_rate": 1.0451538636478353e-07, "loss": 0.7107, "step": 14635 }, { "epoch": 0.94, "grad_norm": 1.698435546890438, "learning_rate": 1.043046621330307e-07, "loss": 0.6241, "step": 14636 }, { "epoch": 0.94, "grad_norm": 1.1400359541019573, "learning_rate": 1.0409414830797493e-07, "loss": 0.6244, "step": 14637 }, { "epoch": 0.94, "grad_norm": 2.008059765009846, "learning_rate": 1.0388384489866565e-07, "loss": 0.7206, "step": 14638 }, { "epoch": 0.94, "grad_norm": 1.7961543598877963, "learning_rate": 1.0367375191414064e-07, "loss": 0.7418, "step": 14639 }, { "epoch": 0.94, "grad_norm": 1.6473160339796078, "learning_rate": 1.0346386936342878e-07, "loss": 0.7049, "step": 14640 }, { "epoch": 0.94, "grad_norm": 1.1139770923390127, "learning_rate": 1.032541972555512e-07, "loss": 0.6511, "step": 14641 }, { "epoch": 0.94, "grad_norm": 1.5899685190237047, "learning_rate": 1.0304473559951844e-07, "loss": 0.6753, "step": 14642 }, { "epoch": 0.94, "grad_norm": 1.7287323849897611, "learning_rate": 1.0283548440433332e-07, "loss": 0.7292, "step": 14643 }, { "epoch": 0.94, "grad_norm": 2.0558742994124426, "learning_rate": 1.0262644367898811e-07, "loss": 0.8695, "step": 14644 }, { "epoch": 0.94, "grad_norm": 1.483214147034064, "learning_rate": 1.0241761343246781e-07, "loss": 0.538, "step": 14645 }, { "epoch": 0.94, "grad_norm": 1.8091925920226657, "learning_rate": 1.0220899367374748e-07, "loss": 0.7406, "step": 14646 }, { "epoch": 0.94, "grad_norm": 1.9012760962451898, "learning_rate": 1.0200058441179272e-07, "loss": 0.7563, "step": 14647 }, { "epoch": 0.94, "grad_norm": 1.5230290441542735, "learning_rate": 1.0179238565556081e-07, "loss": 0.7098, "step": 14648 }, { "epoch": 0.94, "grad_norm": 1.773725104599076, "learning_rate": 1.0158439741399961e-07, "loss": 0.7675, "step": 14649 }, { "epoch": 0.94, "grad_norm": 1.6410752426544066, "learning_rate": 1.0137661969604806e-07, "loss": 0.5719, "step": 14650 }, { "epoch": 0.94, "grad_norm": 1.582855168863724, "learning_rate": 1.0116905251063625e-07, "loss": 0.6494, "step": 14651 }, { "epoch": 0.94, "grad_norm": 1.377797921725547, "learning_rate": 1.009616958666837e-07, "loss": 0.6487, "step": 14652 }, { "epoch": 0.94, "grad_norm": 1.681270888156135, "learning_rate": 1.0075454977310384e-07, "loss": 0.7712, "step": 14653 }, { "epoch": 0.94, "grad_norm": 1.5225936648283918, "learning_rate": 1.00547614238799e-07, "loss": 0.4924, "step": 14654 }, { "epoch": 0.94, "grad_norm": 2.0037916386985666, "learning_rate": 1.0034088927266206e-07, "loss": 0.6683, "step": 14655 }, { "epoch": 0.94, "grad_norm": 1.8141679740403789, "learning_rate": 1.0013437488357814e-07, "loss": 0.7791, "step": 14656 }, { "epoch": 0.94, "grad_norm": 1.4256658048050712, "learning_rate": 9.99280710804229e-08, "loss": 0.6557, "step": 14657 }, { "epoch": 0.94, "grad_norm": 1.981867488903053, "learning_rate": 9.972197787206317e-08, "loss": 0.6407, "step": 14658 }, { "epoch": 0.94, "grad_norm": 1.524549583110174, "learning_rate": 9.951609526735517e-08, "loss": 0.5831, "step": 14659 }, { "epoch": 0.94, "grad_norm": 1.4792477366372414, "learning_rate": 9.931042327514851e-08, "loss": 0.6618, "step": 14660 }, { "epoch": 0.94, "grad_norm": 1.761033426787753, "learning_rate": 9.910496190428164e-08, "loss": 0.7081, "step": 14661 }, { "epoch": 0.94, "grad_norm": 2.9920052661529026, "learning_rate": 9.889971116358532e-08, "loss": 0.7051, "step": 14662 }, { "epoch": 0.94, "grad_norm": 1.0170331726053594, "learning_rate": 9.869467106188135e-08, "loss": 0.6324, "step": 14663 }, { "epoch": 0.94, "grad_norm": 1.804218331268831, "learning_rate": 9.848984160798103e-08, "loss": 0.7054, "step": 14664 }, { "epoch": 0.94, "grad_norm": 1.7543198528680064, "learning_rate": 9.828522281068787e-08, "loss": 0.7435, "step": 14665 }, { "epoch": 0.94, "grad_norm": 1.7844245076988896, "learning_rate": 9.808081467879593e-08, "loss": 0.7237, "step": 14666 }, { "epoch": 0.94, "grad_norm": 1.6382400863639373, "learning_rate": 9.787661722108988e-08, "loss": 0.6828, "step": 14667 }, { "epoch": 0.94, "grad_norm": 2.0529173650563695, "learning_rate": 9.767263044634601e-08, "loss": 0.8629, "step": 14668 }, { "epoch": 0.94, "grad_norm": 2.386547772889506, "learning_rate": 9.74688543633323e-08, "loss": 0.7812, "step": 14669 }, { "epoch": 0.94, "grad_norm": 1.567352051358928, "learning_rate": 9.726528898080456e-08, "loss": 0.6718, "step": 14670 }, { "epoch": 0.94, "grad_norm": 1.6205959324652346, "learning_rate": 9.706193430751298e-08, "loss": 0.73, "step": 14671 }, { "epoch": 0.94, "grad_norm": 1.718217702768279, "learning_rate": 9.68587903521967e-08, "loss": 0.7223, "step": 14672 }, { "epoch": 0.94, "grad_norm": 1.9379387513590118, "learning_rate": 9.665585712358704e-08, "loss": 0.8225, "step": 14673 }, { "epoch": 0.94, "grad_norm": 1.761062594952452, "learning_rate": 9.64531346304054e-08, "loss": 0.7214, "step": 14674 }, { "epoch": 0.94, "grad_norm": 1.8067925485725544, "learning_rate": 9.625062288136367e-08, "loss": 0.7467, "step": 14675 }, { "epoch": 0.94, "grad_norm": 1.289918420263585, "learning_rate": 9.604832188516711e-08, "loss": 0.7096, "step": 14676 }, { "epoch": 0.94, "grad_norm": 1.7605649396133831, "learning_rate": 9.584623165050766e-08, "loss": 0.5845, "step": 14677 }, { "epoch": 0.94, "grad_norm": 1.7527888014885107, "learning_rate": 9.564435218607338e-08, "loss": 0.7209, "step": 14678 }, { "epoch": 0.94, "grad_norm": 1.8417906662706396, "learning_rate": 9.544268350053843e-08, "loss": 0.7929, "step": 14679 }, { "epoch": 0.94, "grad_norm": 1.7661763382548126, "learning_rate": 9.524122560257142e-08, "loss": 0.7501, "step": 14680 }, { "epoch": 0.94, "grad_norm": 1.1088889531257577, "learning_rate": 9.50399785008299e-08, "loss": 0.644, "step": 14681 }, { "epoch": 0.94, "grad_norm": 1.8358283098427373, "learning_rate": 9.483894220396361e-08, "loss": 0.9139, "step": 14682 }, { "epoch": 0.94, "grad_norm": 1.1219157068468828, "learning_rate": 9.463811672061284e-08, "loss": 0.5906, "step": 14683 }, { "epoch": 0.94, "grad_norm": 1.7956460981669342, "learning_rate": 9.443750205940738e-08, "loss": 0.7402, "step": 14684 }, { "epoch": 0.94, "grad_norm": 1.5607109450589713, "learning_rate": 9.423709822897087e-08, "loss": 0.6609, "step": 14685 }, { "epoch": 0.94, "grad_norm": 1.982310145482047, "learning_rate": 9.403690523791476e-08, "loss": 0.8235, "step": 14686 }, { "epoch": 0.94, "grad_norm": 1.4945341918551183, "learning_rate": 9.383692309484382e-08, "loss": 0.7185, "step": 14687 }, { "epoch": 0.94, "grad_norm": 1.1499638025285281, "learning_rate": 9.363715180835287e-08, "loss": 0.6526, "step": 14688 }, { "epoch": 0.94, "grad_norm": 1.8394624445247612, "learning_rate": 9.343759138702724e-08, "loss": 0.7211, "step": 14689 }, { "epoch": 0.94, "grad_norm": 1.5258255837755799, "learning_rate": 9.323824183944452e-08, "loss": 0.7572, "step": 14690 }, { "epoch": 0.94, "grad_norm": 1.6170779776583235, "learning_rate": 9.303910317417064e-08, "loss": 0.7102, "step": 14691 }, { "epoch": 0.94, "grad_norm": 1.7350345920166255, "learning_rate": 9.284017539976598e-08, "loss": 0.778, "step": 14692 }, { "epoch": 0.94, "grad_norm": 1.7043064196779565, "learning_rate": 9.264145852477868e-08, "loss": 0.6668, "step": 14693 }, { "epoch": 0.94, "grad_norm": 1.0481208145955299, "learning_rate": 9.244295255774972e-08, "loss": 0.6178, "step": 14694 }, { "epoch": 0.94, "grad_norm": 2.1762247493686777, "learning_rate": 9.224465750721057e-08, "loss": 0.7134, "step": 14695 }, { "epoch": 0.94, "grad_norm": 1.6185314406994316, "learning_rate": 9.204657338168388e-08, "loss": 0.633, "step": 14696 }, { "epoch": 0.94, "grad_norm": 1.0611077405506364, "learning_rate": 9.184870018968173e-08, "loss": 0.6914, "step": 14697 }, { "epoch": 0.94, "grad_norm": 1.7181386202105255, "learning_rate": 9.165103793970897e-08, "loss": 0.6028, "step": 14698 }, { "epoch": 0.94, "grad_norm": 1.6958214898279322, "learning_rate": 9.14535866402616e-08, "loss": 0.7091, "step": 14699 }, { "epoch": 0.94, "grad_norm": 1.7376030208559468, "learning_rate": 9.125634629982394e-08, "loss": 0.7512, "step": 14700 }, { "epoch": 0.94, "grad_norm": 1.0456867875696765, "learning_rate": 9.10593169268742e-08, "loss": 0.6407, "step": 14701 }, { "epoch": 0.94, "grad_norm": 1.184971800425892, "learning_rate": 9.086249852987949e-08, "loss": 0.7022, "step": 14702 }, { "epoch": 0.94, "grad_norm": 2.2777552153407696, "learning_rate": 9.066589111729973e-08, "loss": 0.7896, "step": 14703 }, { "epoch": 0.94, "grad_norm": 2.249725578070259, "learning_rate": 9.04694946975837e-08, "loss": 0.7503, "step": 14704 }, { "epoch": 0.94, "grad_norm": 1.7441223297993134, "learning_rate": 9.027330927917244e-08, "loss": 0.7389, "step": 14705 }, { "epoch": 0.94, "grad_norm": 1.6788433818282666, "learning_rate": 9.007733487049808e-08, "loss": 0.6604, "step": 14706 }, { "epoch": 0.94, "grad_norm": 1.4426477361203909, "learning_rate": 8.988157147998222e-08, "loss": 0.6263, "step": 14707 }, { "epoch": 0.94, "grad_norm": 1.8553239230075471, "learning_rate": 8.968601911603869e-08, "loss": 0.7921, "step": 14708 }, { "epoch": 0.94, "grad_norm": 1.4143554358595605, "learning_rate": 8.949067778707188e-08, "loss": 0.6191, "step": 14709 }, { "epoch": 0.94, "grad_norm": 1.6500021294746121, "learning_rate": 8.929554750147784e-08, "loss": 0.7708, "step": 14710 }, { "epoch": 0.94, "grad_norm": 1.6987471425095255, "learning_rate": 8.91006282676421e-08, "loss": 0.8292, "step": 14711 }, { "epoch": 0.94, "grad_norm": 1.5931203693728035, "learning_rate": 8.890592009394239e-08, "loss": 0.7061, "step": 14712 }, { "epoch": 0.94, "grad_norm": 1.552587468445054, "learning_rate": 8.871142298874647e-08, "loss": 0.6565, "step": 14713 }, { "epoch": 0.94, "grad_norm": 1.7156316507960059, "learning_rate": 8.851713696041375e-08, "loss": 0.7813, "step": 14714 }, { "epoch": 0.94, "grad_norm": 1.6878085895066093, "learning_rate": 8.832306201729368e-08, "loss": 0.6053, "step": 14715 }, { "epoch": 0.94, "grad_norm": 1.6305775981981836, "learning_rate": 8.812919816772791e-08, "loss": 0.7762, "step": 14716 }, { "epoch": 0.94, "grad_norm": 2.5166093428334197, "learning_rate": 8.793554542004756e-08, "loss": 0.7316, "step": 14717 }, { "epoch": 0.94, "grad_norm": 2.1913750312808116, "learning_rate": 8.774210378257597e-08, "loss": 0.6672, "step": 14718 }, { "epoch": 0.94, "grad_norm": 2.0387699545211997, "learning_rate": 8.754887326362649e-08, "loss": 0.6902, "step": 14719 }, { "epoch": 0.94, "grad_norm": 1.7423363857174128, "learning_rate": 8.735585387150414e-08, "loss": 0.7024, "step": 14720 }, { "epoch": 0.94, "grad_norm": 1.059144777877147, "learning_rate": 8.71630456145045e-08, "loss": 0.6062, "step": 14721 }, { "epoch": 0.94, "grad_norm": 1.4533167055769276, "learning_rate": 8.697044850091374e-08, "loss": 0.5844, "step": 14722 }, { "epoch": 0.94, "grad_norm": 1.864760324293096, "learning_rate": 8.677806253900967e-08, "loss": 0.7095, "step": 14723 }, { "epoch": 0.94, "grad_norm": 1.1332044440841536, "learning_rate": 8.658588773705956e-08, "loss": 0.6156, "step": 14724 }, { "epoch": 0.94, "grad_norm": 1.047097098512851, "learning_rate": 8.639392410332403e-08, "loss": 0.5775, "step": 14725 }, { "epoch": 0.94, "grad_norm": 1.8811932344482123, "learning_rate": 8.62021716460526e-08, "loss": 0.7727, "step": 14726 }, { "epoch": 0.94, "grad_norm": 1.5429451383296446, "learning_rate": 8.601063037348644e-08, "loss": 0.6974, "step": 14727 }, { "epoch": 0.94, "grad_norm": 1.64271538188282, "learning_rate": 8.581930029385788e-08, "loss": 0.6907, "step": 14728 }, { "epoch": 0.94, "grad_norm": 1.0572945945332388, "learning_rate": 8.562818141538976e-08, "loss": 0.65, "step": 14729 }, { "epoch": 0.94, "grad_norm": 2.0936987882213525, "learning_rate": 8.543727374629607e-08, "loss": 0.7323, "step": 14730 }, { "epoch": 0.94, "grad_norm": 1.7472346237471041, "learning_rate": 8.52465772947808e-08, "loss": 0.7605, "step": 14731 }, { "epoch": 0.94, "grad_norm": 1.129378579628741, "learning_rate": 8.505609206904075e-08, "loss": 0.6794, "step": 14732 }, { "epoch": 0.94, "grad_norm": 1.1706586332066031, "learning_rate": 8.486581807726157e-08, "loss": 0.6838, "step": 14733 }, { "epoch": 0.94, "grad_norm": 1.8921854420164568, "learning_rate": 8.46757553276223e-08, "loss": 0.8708, "step": 14734 }, { "epoch": 0.94, "grad_norm": 1.7903009303028223, "learning_rate": 8.448590382829025e-08, "loss": 0.8168, "step": 14735 }, { "epoch": 0.94, "grad_norm": 1.5544654168618868, "learning_rate": 8.429626358742504e-08, "loss": 0.8505, "step": 14736 }, { "epoch": 0.94, "grad_norm": 1.5365889560684403, "learning_rate": 8.410683461317682e-08, "loss": 0.5715, "step": 14737 }, { "epoch": 0.94, "grad_norm": 2.0299154372620207, "learning_rate": 8.391761691368738e-08, "loss": 0.6753, "step": 14738 }, { "epoch": 0.94, "grad_norm": 1.826888633152753, "learning_rate": 8.372861049708859e-08, "loss": 0.7539, "step": 14739 }, { "epoch": 0.94, "grad_norm": 1.2084548031572548, "learning_rate": 8.353981537150335e-08, "loss": 0.6291, "step": 14740 }, { "epoch": 0.94, "grad_norm": 1.5886218734536595, "learning_rate": 8.335123154504688e-08, "loss": 0.871, "step": 14741 }, { "epoch": 0.94, "grad_norm": 1.2602649895258498, "learning_rate": 8.316285902582211e-08, "loss": 0.7509, "step": 14742 }, { "epoch": 0.94, "grad_norm": 2.5459567854851137, "learning_rate": 8.297469782192702e-08, "loss": 0.802, "step": 14743 }, { "epoch": 0.94, "grad_norm": 1.671995545724701, "learning_rate": 8.278674794144625e-08, "loss": 0.6759, "step": 14744 }, { "epoch": 0.94, "grad_norm": 1.6263131983691792, "learning_rate": 8.25990093924589e-08, "loss": 0.6668, "step": 14745 }, { "epoch": 0.94, "grad_norm": 1.8900128289574154, "learning_rate": 8.241148218303352e-08, "loss": 0.6709, "step": 14746 }, { "epoch": 0.94, "grad_norm": 1.5709412238346052, "learning_rate": 8.222416632122864e-08, "loss": 0.7027, "step": 14747 }, { "epoch": 0.94, "grad_norm": 2.1975536194801952, "learning_rate": 8.203706181509674e-08, "loss": 0.7655, "step": 14748 }, { "epoch": 0.94, "grad_norm": 1.3783559489598127, "learning_rate": 8.185016867267693e-08, "loss": 0.5885, "step": 14749 }, { "epoch": 0.94, "grad_norm": 2.617175855573377, "learning_rate": 8.166348690200276e-08, "loss": 0.8226, "step": 14750 }, { "epoch": 0.94, "grad_norm": 1.1642711685422633, "learning_rate": 8.147701651109674e-08, "loss": 0.665, "step": 14751 }, { "epoch": 0.94, "grad_norm": 1.0501789173354774, "learning_rate": 8.129075750797355e-08, "loss": 0.7008, "step": 14752 }, { "epoch": 0.94, "grad_norm": 1.6606655704830524, "learning_rate": 8.11047099006379e-08, "loss": 0.7888, "step": 14753 }, { "epoch": 0.94, "grad_norm": 1.0380429493177916, "learning_rate": 8.091887369708506e-08, "loss": 0.654, "step": 14754 }, { "epoch": 0.94, "grad_norm": 2.3131090265004026, "learning_rate": 8.073324890530421e-08, "loss": 0.6527, "step": 14755 }, { "epoch": 0.94, "grad_norm": 1.7586023377805338, "learning_rate": 8.054783553327006e-08, "loss": 0.7166, "step": 14756 }, { "epoch": 0.94, "grad_norm": 1.4637973775292645, "learning_rate": 8.036263358895402e-08, "loss": 0.6491, "step": 14757 }, { "epoch": 0.94, "grad_norm": 1.6482921225165534, "learning_rate": 8.017764308031306e-08, "loss": 0.6678, "step": 14758 }, { "epoch": 0.94, "grad_norm": 1.831821870847018, "learning_rate": 7.999286401529971e-08, "loss": 0.6071, "step": 14759 }, { "epoch": 0.94, "grad_norm": 1.7266706245866967, "learning_rate": 7.980829640185483e-08, "loss": 0.6496, "step": 14760 }, { "epoch": 0.94, "grad_norm": 1.8380439959596901, "learning_rate": 7.962394024791043e-08, "loss": 0.8432, "step": 14761 }, { "epoch": 0.94, "grad_norm": 1.2590891610058121, "learning_rate": 7.943979556139014e-08, "loss": 0.6363, "step": 14762 }, { "epoch": 0.94, "grad_norm": 1.8629281339789652, "learning_rate": 7.925586235020766e-08, "loss": 0.7084, "step": 14763 }, { "epoch": 0.94, "grad_norm": 1.779245589230767, "learning_rate": 7.907214062226886e-08, "loss": 0.7559, "step": 14764 }, { "epoch": 0.95, "grad_norm": 1.7565890197900877, "learning_rate": 7.888863038546801e-08, "loss": 0.6257, "step": 14765 }, { "epoch": 0.95, "grad_norm": 1.5923072082273033, "learning_rate": 7.870533164769379e-08, "loss": 0.7983, "step": 14766 }, { "epoch": 0.95, "grad_norm": 1.9936219465777834, "learning_rate": 7.852224441682377e-08, "loss": 0.724, "step": 14767 }, { "epoch": 0.95, "grad_norm": 2.0008375302065957, "learning_rate": 7.833936870072612e-08, "loss": 0.7872, "step": 14768 }, { "epoch": 0.95, "grad_norm": 2.386340900188581, "learning_rate": 7.81567045072601e-08, "loss": 0.7973, "step": 14769 }, { "epoch": 0.95, "grad_norm": 1.7045220361837423, "learning_rate": 7.797425184427721e-08, "loss": 0.6565, "step": 14770 }, { "epoch": 0.95, "grad_norm": 0.8989277279241957, "learning_rate": 7.779201071961784e-08, "loss": 0.5516, "step": 14771 }, { "epoch": 0.95, "grad_norm": 1.225099493648422, "learning_rate": 7.760998114111462e-08, "loss": 0.5963, "step": 14772 }, { "epoch": 0.95, "grad_norm": 1.9852792411453193, "learning_rate": 7.742816311659185e-08, "loss": 0.6366, "step": 14773 }, { "epoch": 0.95, "grad_norm": 1.675230378699755, "learning_rate": 7.724655665386271e-08, "loss": 0.7903, "step": 14774 }, { "epoch": 0.95, "grad_norm": 1.650106104436668, "learning_rate": 7.706516176073209e-08, "loss": 0.6816, "step": 14775 }, { "epoch": 0.95, "grad_norm": 1.6649244360153463, "learning_rate": 7.688397844499652e-08, "loss": 0.6162, "step": 14776 }, { "epoch": 0.95, "grad_norm": 1.189414057756815, "learning_rate": 7.670300671444309e-08, "loss": 0.7919, "step": 14777 }, { "epoch": 0.95, "grad_norm": 2.5841010807706475, "learning_rate": 7.652224657684837e-08, "loss": 0.8191, "step": 14778 }, { "epoch": 0.95, "grad_norm": 1.7246642835064956, "learning_rate": 7.634169803998226e-08, "loss": 0.7253, "step": 14779 }, { "epoch": 0.95, "grad_norm": 1.7252226749416575, "learning_rate": 7.616136111160411e-08, "loss": 0.7036, "step": 14780 }, { "epoch": 0.95, "grad_norm": 1.7000093153513596, "learning_rate": 7.598123579946382e-08, "loss": 0.7667, "step": 14781 }, { "epoch": 0.95, "grad_norm": 1.749799846374508, "learning_rate": 7.580132211130354e-08, "loss": 0.6083, "step": 14782 }, { "epoch": 0.95, "grad_norm": 1.6390951073213558, "learning_rate": 7.562162005485484e-08, "loss": 0.7984, "step": 14783 }, { "epoch": 0.95, "grad_norm": 1.5205527248371, "learning_rate": 7.544212963784159e-08, "loss": 0.6554, "step": 14784 }, { "epoch": 0.95, "grad_norm": 1.5489316123237382, "learning_rate": 7.526285086797813e-08, "loss": 0.7561, "step": 14785 }, { "epoch": 0.95, "grad_norm": 1.5613766838300278, "learning_rate": 7.508378375296887e-08, "loss": 0.6204, "step": 14786 }, { "epoch": 0.95, "grad_norm": 1.7007194180145937, "learning_rate": 7.490492830050933e-08, "loss": 0.6396, "step": 14787 }, { "epoch": 0.95, "grad_norm": 1.6322906739321001, "learning_rate": 7.472628451828779e-08, "loss": 0.6662, "step": 14788 }, { "epoch": 0.95, "grad_norm": 1.1328817693831594, "learning_rate": 7.454785241398033e-08, "loss": 0.7365, "step": 14789 }, { "epoch": 0.95, "grad_norm": 1.1913997682723887, "learning_rate": 7.436963199525693e-08, "loss": 0.5089, "step": 14790 }, { "epoch": 0.95, "grad_norm": 1.2613078684456827, "learning_rate": 7.419162326977592e-08, "loss": 0.6717, "step": 14791 }, { "epoch": 0.95, "grad_norm": 1.8686181761048857, "learning_rate": 7.401382624518894e-08, "loss": 0.7048, "step": 14792 }, { "epoch": 0.95, "grad_norm": 1.9951775004305308, "learning_rate": 7.383624092913655e-08, "loss": 0.6271, "step": 14793 }, { "epoch": 0.95, "grad_norm": 1.6253685138978353, "learning_rate": 7.365886732925153e-08, "loss": 0.7676, "step": 14794 }, { "epoch": 0.95, "grad_norm": 1.0918786434413312, "learning_rate": 7.348170545315614e-08, "loss": 0.6966, "step": 14795 }, { "epoch": 0.95, "grad_norm": 1.7277850306420168, "learning_rate": 7.330475530846537e-08, "loss": 0.6825, "step": 14796 }, { "epoch": 0.95, "grad_norm": 2.1402952529688815, "learning_rate": 7.312801690278426e-08, "loss": 0.854, "step": 14797 }, { "epoch": 0.95, "grad_norm": 1.201446524615825, "learning_rate": 7.29514902437073e-08, "loss": 0.7398, "step": 14798 }, { "epoch": 0.95, "grad_norm": 1.6536574380491287, "learning_rate": 7.277517533882283e-08, "loss": 0.7168, "step": 14799 }, { "epoch": 0.95, "grad_norm": 1.744588040910729, "learning_rate": 7.259907219570761e-08, "loss": 0.6954, "step": 14800 }, { "epoch": 0.95, "grad_norm": 1.9692338598184738, "learning_rate": 7.242318082193e-08, "loss": 0.6631, "step": 14801 }, { "epoch": 0.95, "grad_norm": 1.7023733181872376, "learning_rate": 7.224750122505009e-08, "loss": 0.7912, "step": 14802 }, { "epoch": 0.95, "grad_norm": 1.8745558192833782, "learning_rate": 7.207203341261792e-08, "loss": 0.6768, "step": 14803 }, { "epoch": 0.95, "grad_norm": 1.710489570782995, "learning_rate": 7.189677739217526e-08, "loss": 0.7279, "step": 14804 }, { "epoch": 0.95, "grad_norm": 3.81096357529737, "learning_rate": 7.172173317125275e-08, "loss": 0.6826, "step": 14805 }, { "epoch": 0.95, "grad_norm": 1.491250238085024, "learning_rate": 7.154690075737547e-08, "loss": 0.6187, "step": 14806 }, { "epoch": 0.95, "grad_norm": 2.0318393785553766, "learning_rate": 7.137228015805519e-08, "loss": 0.6636, "step": 14807 }, { "epoch": 0.95, "grad_norm": 12.514703120366256, "learning_rate": 7.11978713807987e-08, "loss": 0.7673, "step": 14808 }, { "epoch": 0.95, "grad_norm": 1.6328929648591903, "learning_rate": 7.102367443310054e-08, "loss": 0.6565, "step": 14809 }, { "epoch": 0.95, "grad_norm": 1.946002058150351, "learning_rate": 7.084968932244751e-08, "loss": 0.7647, "step": 14810 }, { "epoch": 0.95, "grad_norm": 2.1343223379259664, "learning_rate": 7.067591605631752e-08, "loss": 0.7864, "step": 14811 }, { "epoch": 0.95, "grad_norm": 1.154713452272097, "learning_rate": 7.050235464217847e-08, "loss": 0.633, "step": 14812 }, { "epoch": 0.95, "grad_norm": 1.715323293886416, "learning_rate": 7.032900508749052e-08, "loss": 0.8854, "step": 14813 }, { "epoch": 0.95, "grad_norm": 1.81967809587642, "learning_rate": 7.01558673997027e-08, "loss": 0.7867, "step": 14814 }, { "epoch": 0.95, "grad_norm": 1.8374163590788466, "learning_rate": 6.998294158625684e-08, "loss": 0.8047, "step": 14815 }, { "epoch": 0.95, "grad_norm": 1.8182078726229085, "learning_rate": 6.981022765458423e-08, "loss": 0.708, "step": 14816 }, { "epoch": 0.95, "grad_norm": 1.607955738663547, "learning_rate": 6.963772561210891e-08, "loss": 0.6881, "step": 14817 }, { "epoch": 0.95, "grad_norm": 1.543253366296089, "learning_rate": 6.946543546624384e-08, "loss": 0.593, "step": 14818 }, { "epoch": 0.95, "grad_norm": 1.6769276135763864, "learning_rate": 6.929335722439367e-08, "loss": 0.6393, "step": 14819 }, { "epoch": 0.95, "grad_norm": 1.705355964835826, "learning_rate": 6.91214908939547e-08, "loss": 0.6523, "step": 14820 }, { "epoch": 0.95, "grad_norm": 1.131056214385814, "learning_rate": 6.894983648231213e-08, "loss": 0.5324, "step": 14821 }, { "epoch": 0.95, "grad_norm": 1.23622427447983, "learning_rate": 6.877839399684505e-08, "loss": 0.6799, "step": 14822 }, { "epoch": 0.95, "grad_norm": 1.8553724640996467, "learning_rate": 6.86071634449198e-08, "loss": 0.8445, "step": 14823 }, { "epoch": 0.95, "grad_norm": 1.6479740176090554, "learning_rate": 6.84361448338966e-08, "loss": 0.7107, "step": 14824 }, { "epoch": 0.95, "grad_norm": 1.6174150609104225, "learning_rate": 6.826533817112513e-08, "loss": 0.6469, "step": 14825 }, { "epoch": 0.95, "grad_norm": 2.028792046318289, "learning_rate": 6.809474346394673e-08, "loss": 0.6651, "step": 14826 }, { "epoch": 0.95, "grad_norm": 1.7519582950580102, "learning_rate": 6.792436071969277e-08, "loss": 0.7333, "step": 14827 }, { "epoch": 0.95, "grad_norm": 1.8162319929754238, "learning_rate": 6.775418994568572e-08, "loss": 0.7626, "step": 14828 }, { "epoch": 0.95, "grad_norm": 2.4171348852748378, "learning_rate": 6.758423114924029e-08, "loss": 0.6394, "step": 14829 }, { "epoch": 0.95, "grad_norm": 1.7704783895036087, "learning_rate": 6.741448433765951e-08, "loss": 0.6514, "step": 14830 }, { "epoch": 0.95, "grad_norm": 1.5877086933988078, "learning_rate": 6.724494951823979e-08, "loss": 0.759, "step": 14831 }, { "epoch": 0.95, "grad_norm": 1.6394188355028316, "learning_rate": 6.707562669826695e-08, "loss": 0.7454, "step": 14832 }, { "epoch": 0.95, "grad_norm": 1.818231265170723, "learning_rate": 6.690651588501795e-08, "loss": 0.7651, "step": 14833 }, { "epoch": 0.95, "grad_norm": 1.1655332617198715, "learning_rate": 6.673761708576088e-08, "loss": 0.6901, "step": 14834 }, { "epoch": 0.95, "grad_norm": 1.79186747817034, "learning_rate": 6.656893030775546e-08, "loss": 0.8572, "step": 14835 }, { "epoch": 0.95, "grad_norm": 1.884981890324345, "learning_rate": 6.640045555825036e-08, "loss": 0.7715, "step": 14836 }, { "epoch": 0.95, "grad_norm": 2.0337066617488393, "learning_rate": 6.623219284448645e-08, "loss": 0.6364, "step": 14837 }, { "epoch": 0.95, "grad_norm": 1.616562412871421, "learning_rate": 6.606414217369628e-08, "loss": 0.7355, "step": 14838 }, { "epoch": 0.95, "grad_norm": 1.7480776496444894, "learning_rate": 6.589630355310128e-08, "loss": 0.8156, "step": 14839 }, { "epoch": 0.95, "grad_norm": 1.4508884094267556, "learning_rate": 6.572867698991515e-08, "loss": 0.657, "step": 14840 }, { "epoch": 0.95, "grad_norm": 1.5456408862906685, "learning_rate": 6.556126249134209e-08, "loss": 0.6664, "step": 14841 }, { "epoch": 0.95, "grad_norm": 1.6258486395293683, "learning_rate": 6.539406006457749e-08, "loss": 0.6816, "step": 14842 }, { "epoch": 0.95, "grad_norm": 1.7116002402546315, "learning_rate": 6.522706971680726e-08, "loss": 0.7645, "step": 14843 }, { "epoch": 0.95, "grad_norm": 1.4541807515086675, "learning_rate": 6.506029145520842e-08, "loss": 0.5892, "step": 14844 }, { "epoch": 0.95, "grad_norm": 1.7698138353206032, "learning_rate": 6.489372528694748e-08, "loss": 0.777, "step": 14845 }, { "epoch": 0.95, "grad_norm": 1.5583302316524899, "learning_rate": 6.472737121918483e-08, "loss": 0.6514, "step": 14846 }, { "epoch": 0.95, "grad_norm": 1.659949052634261, "learning_rate": 6.456122925906971e-08, "loss": 0.6547, "step": 14847 }, { "epoch": 0.95, "grad_norm": 1.0178310718467471, "learning_rate": 6.439529941374145e-08, "loss": 0.6959, "step": 14848 }, { "epoch": 0.95, "grad_norm": 1.1420794438311945, "learning_rate": 6.422958169033266e-08, "loss": 0.5895, "step": 14849 }, { "epoch": 0.95, "grad_norm": 2.9904520472002316, "learning_rate": 6.406407609596488e-08, "loss": 0.7922, "step": 14850 }, { "epoch": 0.95, "grad_norm": 1.1825473688943033, "learning_rate": 6.389878263775129e-08, "loss": 0.6301, "step": 14851 }, { "epoch": 0.95, "grad_norm": 1.7791680383216844, "learning_rate": 6.373370132279566e-08, "loss": 0.6937, "step": 14852 }, { "epoch": 0.95, "grad_norm": 1.9335043940324375, "learning_rate": 6.356883215819287e-08, "loss": 0.7856, "step": 14853 }, { "epoch": 0.95, "grad_norm": 2.290968426664965, "learning_rate": 6.340417515102893e-08, "loss": 0.6914, "step": 14854 }, { "epoch": 0.95, "grad_norm": 1.805138016068173, "learning_rate": 6.323973030838037e-08, "loss": 0.7255, "step": 14855 }, { "epoch": 0.95, "grad_norm": 1.5964415717625144, "learning_rate": 6.307549763731436e-08, "loss": 0.661, "step": 14856 }, { "epoch": 0.95, "grad_norm": 1.6320229598200178, "learning_rate": 6.291147714488965e-08, "loss": 0.6329, "step": 14857 }, { "epoch": 0.95, "grad_norm": 1.6059672270974588, "learning_rate": 6.274766883815565e-08, "loss": 0.6448, "step": 14858 }, { "epoch": 0.95, "grad_norm": 1.2359642740487782, "learning_rate": 6.258407272415223e-08, "loss": 0.5945, "step": 14859 }, { "epoch": 0.95, "grad_norm": 1.5971649298621335, "learning_rate": 6.242068880991048e-08, "loss": 0.5976, "step": 14860 }, { "epoch": 0.95, "grad_norm": 1.9431306772467467, "learning_rate": 6.225751710245198e-08, "loss": 0.8322, "step": 14861 }, { "epoch": 0.95, "grad_norm": 1.8394462083252703, "learning_rate": 6.209455760879002e-08, "loss": 0.8244, "step": 14862 }, { "epoch": 0.95, "grad_norm": 1.8322405072708947, "learning_rate": 6.193181033592788e-08, "loss": 0.6981, "step": 14863 }, { "epoch": 0.95, "grad_norm": 1.7533060048796423, "learning_rate": 6.176927529086052e-08, "loss": 0.7127, "step": 14864 }, { "epoch": 0.95, "grad_norm": 1.4759793651318247, "learning_rate": 6.160695248057236e-08, "loss": 0.6398, "step": 14865 }, { "epoch": 0.95, "grad_norm": 1.567825593202175, "learning_rate": 6.144484191204115e-08, "loss": 0.7669, "step": 14866 }, { "epoch": 0.95, "grad_norm": 1.5979636855208332, "learning_rate": 6.128294359223297e-08, "loss": 0.6383, "step": 14867 }, { "epoch": 0.95, "grad_norm": 1.6665906770165755, "learning_rate": 6.11212575281056e-08, "loss": 0.7341, "step": 14868 }, { "epoch": 0.95, "grad_norm": 1.0516507894615235, "learning_rate": 6.09597837266096e-08, "loss": 0.6479, "step": 14869 }, { "epoch": 0.95, "grad_norm": 4.043812510654524, "learning_rate": 6.07985221946833e-08, "loss": 0.7107, "step": 14870 }, { "epoch": 0.95, "grad_norm": 1.691566096096723, "learning_rate": 6.063747293925781e-08, "loss": 0.7465, "step": 14871 }, { "epoch": 0.95, "grad_norm": 1.641230846673012, "learning_rate": 6.047663596725428e-08, "loss": 0.6969, "step": 14872 }, { "epoch": 0.95, "grad_norm": 1.6220015348825958, "learning_rate": 6.031601128558606e-08, "loss": 0.7563, "step": 14873 }, { "epoch": 0.95, "grad_norm": 1.7626356704927675, "learning_rate": 6.015559890115597e-08, "loss": 0.6969, "step": 14874 }, { "epoch": 0.95, "grad_norm": 1.6966196056790561, "learning_rate": 5.999539882085793e-08, "loss": 0.6262, "step": 14875 }, { "epoch": 0.95, "grad_norm": 1.6590815503247205, "learning_rate": 5.983541105157809e-08, "loss": 0.6815, "step": 14876 }, { "epoch": 0.95, "grad_norm": 1.6207979743655727, "learning_rate": 5.96756356001904e-08, "loss": 0.6345, "step": 14877 }, { "epoch": 0.95, "grad_norm": 1.836362436856528, "learning_rate": 5.951607247356384e-08, "loss": 0.623, "step": 14878 }, { "epoch": 0.95, "grad_norm": 1.8639341251941943, "learning_rate": 5.9356721678554554e-08, "loss": 0.7016, "step": 14879 }, { "epoch": 0.95, "grad_norm": 1.5986850664566508, "learning_rate": 5.9197583222011525e-08, "loss": 0.7298, "step": 14880 }, { "epoch": 0.95, "grad_norm": 1.7105667038978163, "learning_rate": 5.903865711077483e-08, "loss": 0.6328, "step": 14881 }, { "epoch": 0.95, "grad_norm": 1.5909082410174384, "learning_rate": 5.887994335167346e-08, "loss": 0.6823, "step": 14882 }, { "epoch": 0.95, "grad_norm": 1.7220622961956666, "learning_rate": 5.872144195153029e-08, "loss": 0.7378, "step": 14883 }, { "epoch": 0.95, "grad_norm": 1.6602649923457913, "learning_rate": 5.8563152917155975e-08, "loss": 0.7839, "step": 14884 }, { "epoch": 0.95, "grad_norm": 2.032538048877564, "learning_rate": 5.840507625535397e-08, "loss": 0.8162, "step": 14885 }, { "epoch": 0.95, "grad_norm": 2.0203133038284293, "learning_rate": 5.824721197291827e-08, "loss": 0.6593, "step": 14886 }, { "epoch": 0.95, "grad_norm": 1.8154099801846397, "learning_rate": 5.808956007663291e-08, "loss": 0.6674, "step": 14887 }, { "epoch": 0.95, "grad_norm": 1.8691959451893176, "learning_rate": 5.793212057327469e-08, "loss": 0.7817, "step": 14888 }, { "epoch": 0.95, "grad_norm": 1.6009789102641365, "learning_rate": 5.777489346960874e-08, "loss": 0.6382, "step": 14889 }, { "epoch": 0.95, "grad_norm": 1.1203811958422054, "learning_rate": 5.7617878772392445e-08, "loss": 0.5173, "step": 14890 }, { "epoch": 0.95, "grad_norm": 2.2259704687937365, "learning_rate": 5.7461076488374844e-08, "loss": 0.6155, "step": 14891 }, { "epoch": 0.95, "grad_norm": 1.7428004703916695, "learning_rate": 5.730448662429444e-08, "loss": 0.8561, "step": 14892 }, { "epoch": 0.95, "grad_norm": 3.247836134380248, "learning_rate": 5.7148109186880854e-08, "loss": 0.5945, "step": 14893 }, { "epoch": 0.95, "grad_norm": 1.8668362005319472, "learning_rate": 5.699194418285592e-08, "loss": 0.7216, "step": 14894 }, { "epoch": 0.95, "grad_norm": 1.4091350660380675, "learning_rate": 5.683599161892928e-08, "loss": 0.5872, "step": 14895 }, { "epoch": 0.95, "grad_norm": 1.1408995501855428, "learning_rate": 5.6680251501805564e-08, "loss": 0.5959, "step": 14896 }, { "epoch": 0.95, "grad_norm": 1.492186697879198, "learning_rate": 5.65247238381772e-08, "loss": 0.7714, "step": 14897 }, { "epoch": 0.95, "grad_norm": 1.469335801984678, "learning_rate": 5.636940863472884e-08, "loss": 0.658, "step": 14898 }, { "epoch": 0.95, "grad_norm": 1.676793057570672, "learning_rate": 5.621430589813459e-08, "loss": 0.7431, "step": 14899 }, { "epoch": 0.95, "grad_norm": 1.6731249518774387, "learning_rate": 5.6059415635061896e-08, "loss": 0.787, "step": 14900 }, { "epoch": 0.95, "grad_norm": 1.654231006089373, "learning_rate": 5.5904737852166545e-08, "loss": 0.808, "step": 14901 }, { "epoch": 0.95, "grad_norm": 1.7334490222655252, "learning_rate": 5.5750272556095996e-08, "loss": 0.6503, "step": 14902 }, { "epoch": 0.95, "grad_norm": 1.645045806519633, "learning_rate": 5.559601975348994e-08, "loss": 0.6784, "step": 14903 }, { "epoch": 0.95, "grad_norm": 1.7380469888451422, "learning_rate": 5.544197945097751e-08, "loss": 0.7784, "step": 14904 }, { "epoch": 0.95, "grad_norm": 1.1054394994354169, "learning_rate": 5.5288151655178427e-08, "loss": 0.6336, "step": 14905 }, { "epoch": 0.95, "grad_norm": 1.8429004870963803, "learning_rate": 5.51345363727046e-08, "loss": 0.595, "step": 14906 }, { "epoch": 0.95, "grad_norm": 1.7034652037433033, "learning_rate": 5.4981133610158e-08, "loss": 0.6537, "step": 14907 }, { "epoch": 0.95, "grad_norm": 1.9294461249263388, "learning_rate": 5.482794337413111e-08, "loss": 0.6983, "step": 14908 }, { "epoch": 0.95, "grad_norm": 1.9568676682646726, "learning_rate": 5.4674965671208115e-08, "loss": 0.7013, "step": 14909 }, { "epoch": 0.95, "grad_norm": 1.5982280013410255, "learning_rate": 5.45222005079632e-08, "loss": 0.6526, "step": 14910 }, { "epoch": 0.95, "grad_norm": 1.9711879326759434, "learning_rate": 5.436964789096222e-08, "loss": 0.7834, "step": 14911 }, { "epoch": 0.95, "grad_norm": 1.5256757929452016, "learning_rate": 5.4217307826762155e-08, "loss": 0.6978, "step": 14912 }, { "epoch": 0.95, "grad_norm": 2.444509374202466, "learning_rate": 5.406518032190944e-08, "loss": 0.742, "step": 14913 }, { "epoch": 0.95, "grad_norm": 1.5859230632163788, "learning_rate": 5.391326538294217e-08, "loss": 0.6641, "step": 14914 }, { "epoch": 0.95, "grad_norm": 1.314261212414087, "learning_rate": 5.3761563016389576e-08, "loss": 0.6809, "step": 14915 }, { "epoch": 0.95, "grad_norm": 1.6696417240508319, "learning_rate": 5.361007322877199e-08, "loss": 0.7635, "step": 14916 }, { "epoch": 0.95, "grad_norm": 1.9790457825208925, "learning_rate": 5.34587960265992e-08, "loss": 0.6194, "step": 14917 }, { "epoch": 0.95, "grad_norm": 1.7113159646460647, "learning_rate": 5.330773141637324e-08, "loss": 0.6571, "step": 14918 }, { "epoch": 0.95, "grad_norm": 1.6731317480308836, "learning_rate": 5.315687940458669e-08, "loss": 0.7286, "step": 14919 }, { "epoch": 0.95, "grad_norm": 1.7518372343230946, "learning_rate": 5.3006239997722694e-08, "loss": 0.743, "step": 14920 }, { "epoch": 0.96, "grad_norm": 3.545613121078165, "learning_rate": 5.285581320225552e-08, "loss": 0.7033, "step": 14921 }, { "epoch": 0.96, "grad_norm": 1.711330977285329, "learning_rate": 5.270559902465e-08, "loss": 0.7749, "step": 14922 }, { "epoch": 0.96, "grad_norm": 1.6772351458616812, "learning_rate": 5.255559747136263e-08, "loss": 1.0136, "step": 14923 }, { "epoch": 0.96, "grad_norm": 1.8728576485999953, "learning_rate": 5.240580854883881e-08, "loss": 0.712, "step": 14924 }, { "epoch": 0.96, "grad_norm": 1.8459616343839365, "learning_rate": 5.2256232263517835e-08, "loss": 0.7962, "step": 14925 }, { "epoch": 0.96, "grad_norm": 1.7911448604776286, "learning_rate": 5.210686862182679e-08, "loss": 0.6967, "step": 14926 }, { "epoch": 0.96, "grad_norm": 2.527475084792193, "learning_rate": 5.195771763018609e-08, "loss": 0.7527, "step": 14927 }, { "epoch": 0.96, "grad_norm": 1.4591426418964881, "learning_rate": 5.18087792950045e-08, "loss": 0.6643, "step": 14928 }, { "epoch": 0.96, "grad_norm": 1.550537091220948, "learning_rate": 5.166005362268467e-08, "loss": 0.6692, "step": 14929 }, { "epoch": 0.96, "grad_norm": 1.2068125952631827, "learning_rate": 5.151154061961761e-08, "loss": 0.5722, "step": 14930 }, { "epoch": 0.96, "grad_norm": 0.9776509943798828, "learning_rate": 5.1363240292186535e-08, "loss": 0.4939, "step": 14931 }, { "epoch": 0.96, "grad_norm": 1.7206872461175735, "learning_rate": 5.121515264676524e-08, "loss": 0.7109, "step": 14932 }, { "epoch": 0.96, "grad_norm": 1.8911493296152504, "learning_rate": 5.1067277689716974e-08, "loss": 0.6816, "step": 14933 }, { "epoch": 0.96, "grad_norm": 1.2573457457319284, "learning_rate": 5.091961542739887e-08, "loss": 0.7704, "step": 14934 }, { "epoch": 0.96, "grad_norm": 2.0800762171804097, "learning_rate": 5.07721658661553e-08, "loss": 0.6473, "step": 14935 }, { "epoch": 0.96, "grad_norm": 1.8693950366337206, "learning_rate": 5.0624929012325076e-08, "loss": 0.6811, "step": 14936 }, { "epoch": 0.96, "grad_norm": 1.812363559472419, "learning_rate": 5.0477904872234804e-08, "loss": 0.7409, "step": 14937 }, { "epoch": 0.96, "grad_norm": 1.8298271505455725, "learning_rate": 5.033109345220388e-08, "loss": 0.6956, "step": 14938 }, { "epoch": 0.96, "grad_norm": 1.8439067562172964, "learning_rate": 5.018449475854226e-08, "loss": 0.7777, "step": 14939 }, { "epoch": 0.96, "grad_norm": 1.241480787702866, "learning_rate": 5.003810879754933e-08, "loss": 0.6954, "step": 14940 }, { "epoch": 0.96, "grad_norm": 1.1399161094438248, "learning_rate": 4.9891935575517856e-08, "loss": 0.603, "step": 14941 }, { "epoch": 0.96, "grad_norm": 1.778172172261747, "learning_rate": 4.974597509872892e-08, "loss": 0.7143, "step": 14942 }, { "epoch": 0.96, "grad_norm": 1.5734050037225862, "learning_rate": 4.9600227373456936e-08, "loss": 0.6877, "step": 14943 }, { "epoch": 0.96, "grad_norm": 1.8350101656379394, "learning_rate": 4.945469240596412e-08, "loss": 0.7357, "step": 14944 }, { "epoch": 0.96, "grad_norm": 1.5225921427877145, "learning_rate": 4.930937020250604e-08, "loss": 0.7878, "step": 14945 }, { "epoch": 0.96, "grad_norm": 2.09300608370164, "learning_rate": 4.9164260769328785e-08, "loss": 0.7264, "step": 14946 }, { "epoch": 0.96, "grad_norm": 1.4602049659597929, "learning_rate": 4.90193641126685e-08, "loss": 0.7396, "step": 14947 }, { "epoch": 0.96, "grad_norm": 1.838488440244863, "learning_rate": 4.887468023875241e-08, "loss": 0.9903, "step": 14948 }, { "epoch": 0.96, "grad_norm": 1.9341953650624184, "learning_rate": 4.873020915379834e-08, "loss": 0.7155, "step": 14949 }, { "epoch": 0.96, "grad_norm": 1.5649001107927294, "learning_rate": 4.858595086401685e-08, "loss": 0.7666, "step": 14950 }, { "epoch": 0.96, "grad_norm": 1.6246363977178107, "learning_rate": 4.844190537560578e-08, "loss": 0.7326, "step": 14951 }, { "epoch": 0.96, "grad_norm": 1.4876880646611976, "learning_rate": 4.829807269475739e-08, "loss": 0.631, "step": 14952 }, { "epoch": 0.96, "grad_norm": 1.181086246266224, "learning_rate": 4.8154452827652854e-08, "loss": 0.6618, "step": 14953 }, { "epoch": 0.96, "grad_norm": 1.8420715780143297, "learning_rate": 4.8011045780465e-08, "loss": 0.7568, "step": 14954 }, { "epoch": 0.96, "grad_norm": 1.6867761789274154, "learning_rate": 4.7867851559356694e-08, "loss": 0.7217, "step": 14955 }, { "epoch": 0.96, "grad_norm": 1.6330955635018483, "learning_rate": 4.772487017048189e-08, "loss": 0.7555, "step": 14956 }, { "epoch": 0.96, "grad_norm": 1.6998324055838305, "learning_rate": 4.758210161998622e-08, "loss": 0.6968, "step": 14957 }, { "epoch": 0.96, "grad_norm": 1.9928624136376836, "learning_rate": 4.7439545914005345e-08, "loss": 0.7671, "step": 14958 }, { "epoch": 0.96, "grad_norm": 1.6616248611873903, "learning_rate": 4.729720305866603e-08, "loss": 0.6238, "step": 14959 }, { "epoch": 0.96, "grad_norm": 2.326267986281959, "learning_rate": 4.7155073060086156e-08, "loss": 0.7385, "step": 14960 }, { "epoch": 0.96, "grad_norm": 1.60337153217048, "learning_rate": 4.701315592437361e-08, "loss": 0.9094, "step": 14961 }, { "epoch": 0.96, "grad_norm": 1.662568053947079, "learning_rate": 4.687145165762797e-08, "loss": 0.6749, "step": 14962 }, { "epoch": 0.96, "grad_norm": 1.5449715344352053, "learning_rate": 4.6729960265939344e-08, "loss": 0.7361, "step": 14963 }, { "epoch": 0.96, "grad_norm": 1.6617923902599758, "learning_rate": 4.6588681755388445e-08, "loss": 0.8098, "step": 14964 }, { "epoch": 0.96, "grad_norm": 1.7608621120537589, "learning_rate": 4.644761613204818e-08, "loss": 0.8445, "step": 14965 }, { "epoch": 0.96, "grad_norm": 1.8219530188693165, "learning_rate": 4.630676340198037e-08, "loss": 0.669, "step": 14966 }, { "epoch": 0.96, "grad_norm": 1.7523838075139528, "learning_rate": 4.6166123571237955e-08, "loss": 0.8462, "step": 14967 }, { "epoch": 0.96, "grad_norm": 1.7971280323919674, "learning_rate": 4.6025696645866646e-08, "loss": 0.6306, "step": 14968 }, { "epoch": 0.96, "grad_norm": 2.676489486504024, "learning_rate": 4.588548263190107e-08, "loss": 0.6236, "step": 14969 }, { "epoch": 0.96, "grad_norm": 1.2334232755412184, "learning_rate": 4.574548153536806e-08, "loss": 0.6146, "step": 14970 }, { "epoch": 0.96, "grad_norm": 1.8226125166511669, "learning_rate": 4.560569336228338e-08, "loss": 0.7601, "step": 14971 }, { "epoch": 0.96, "grad_norm": 1.7075410521951, "learning_rate": 4.546611811865498e-08, "loss": 0.687, "step": 14972 }, { "epoch": 0.96, "grad_norm": 1.567934779158677, "learning_rate": 4.5326755810482514e-08, "loss": 0.6167, "step": 14973 }, { "epoch": 0.96, "grad_norm": 2.3920700123655356, "learning_rate": 4.518760644375508e-08, "loss": 0.8928, "step": 14974 }, { "epoch": 0.96, "grad_norm": 1.5826964168298963, "learning_rate": 4.504867002445179e-08, "loss": 0.7068, "step": 14975 }, { "epoch": 0.96, "grad_norm": 1.5192795307157831, "learning_rate": 4.4909946558545634e-08, "loss": 0.7167, "step": 14976 }, { "epoch": 0.96, "grad_norm": 1.684664704277639, "learning_rate": 4.477143605199796e-08, "loss": 0.7464, "step": 14977 }, { "epoch": 0.96, "grad_norm": 1.8419089176641246, "learning_rate": 4.463313851076123e-08, "loss": 0.7397, "step": 14978 }, { "epoch": 0.96, "grad_norm": 2.4204864067352836, "learning_rate": 4.449505394078013e-08, "loss": 0.725, "step": 14979 }, { "epoch": 0.96, "grad_norm": 1.7984483645459246, "learning_rate": 4.435718234798825e-08, "loss": 0.7368, "step": 14980 }, { "epoch": 0.96, "grad_norm": 1.767042652610999, "learning_rate": 4.4219523738311396e-08, "loss": 0.6672, "step": 14981 }, { "epoch": 0.96, "grad_norm": 1.2690647645366335, "learning_rate": 4.408207811766596e-08, "loss": 0.5573, "step": 14982 }, { "epoch": 0.96, "grad_norm": 1.8061190467133617, "learning_rate": 4.3944845491958874e-08, "loss": 0.5863, "step": 14983 }, { "epoch": 0.96, "grad_norm": 1.7010120420888466, "learning_rate": 4.3807825867088204e-08, "loss": 0.7356, "step": 14984 }, { "epoch": 0.96, "grad_norm": 2.4936543209511717, "learning_rate": 4.3671019248943126e-08, "loss": 0.7226, "step": 14985 }, { "epoch": 0.96, "grad_norm": 2.6284102221030006, "learning_rate": 4.353442564340282e-08, "loss": 0.6981, "step": 14986 }, { "epoch": 0.96, "grad_norm": 1.7839337408973137, "learning_rate": 4.3398045056337604e-08, "loss": 0.6327, "step": 14987 }, { "epoch": 0.96, "grad_norm": 1.9245027460126778, "learning_rate": 4.326187749360944e-08, "loss": 0.9022, "step": 14988 }, { "epoch": 0.96, "grad_norm": 1.6371405117556155, "learning_rate": 4.312592296106977e-08, "loss": 0.8049, "step": 14989 }, { "epoch": 0.96, "grad_norm": 2.234189006816427, "learning_rate": 4.2990181464562795e-08, "loss": 0.7881, "step": 14990 }, { "epoch": 0.96, "grad_norm": 2.1857937246042316, "learning_rate": 4.285465300992164e-08, "loss": 0.7059, "step": 14991 }, { "epoch": 0.96, "grad_norm": 1.6512230620880632, "learning_rate": 4.271933760297109e-08, "loss": 0.6437, "step": 14992 }, { "epoch": 0.96, "grad_norm": 1.6820004894622442, "learning_rate": 4.258423524952648e-08, "loss": 0.7312, "step": 14993 }, { "epoch": 0.96, "grad_norm": 1.8741690959712234, "learning_rate": 4.2449345955394295e-08, "loss": 0.6767, "step": 14994 }, { "epoch": 0.96, "grad_norm": 1.7138150758794397, "learning_rate": 4.231466972637211e-08, "loss": 0.6677, "step": 14995 }, { "epoch": 0.96, "grad_norm": 1.6782936479370238, "learning_rate": 4.2180206568248064e-08, "loss": 0.7234, "step": 14996 }, { "epoch": 0.96, "grad_norm": 1.6771572389159526, "learning_rate": 4.2045956486800877e-08, "loss": 0.6859, "step": 14997 }, { "epoch": 0.96, "grad_norm": 1.6424779089327561, "learning_rate": 4.191191948780038e-08, "loss": 0.6807, "step": 14998 }, { "epoch": 0.96, "grad_norm": 2.260233526636148, "learning_rate": 4.177809557700752e-08, "loss": 0.8582, "step": 14999 }, { "epoch": 0.96, "grad_norm": 1.0051838502919759, "learning_rate": 4.164448476017269e-08, "loss": 0.6426, "step": 15000 }, { "epoch": 0.96, "grad_norm": 1.198898130306916, "learning_rate": 4.1511087043039635e-08, "loss": 0.6092, "step": 15001 }, { "epoch": 0.96, "grad_norm": 2.469628971080507, "learning_rate": 4.1377902431340434e-08, "loss": 0.8078, "step": 15002 }, { "epoch": 0.96, "grad_norm": 1.5329652042296136, "learning_rate": 4.12449309307994e-08, "loss": 0.6447, "step": 15003 }, { "epoch": 0.96, "grad_norm": 1.191533498239994, "learning_rate": 4.111217254713196e-08, "loss": 0.6288, "step": 15004 }, { "epoch": 0.96, "grad_norm": 1.8115363318965114, "learning_rate": 4.097962728604299e-08, "loss": 0.8072, "step": 15005 }, { "epoch": 0.96, "grad_norm": 1.2294267587152443, "learning_rate": 4.0847295153229603e-08, "loss": 0.6843, "step": 15006 }, { "epoch": 0.96, "grad_norm": 1.5204187186301166, "learning_rate": 4.0715176154378366e-08, "loss": 0.9718, "step": 15007 }, { "epoch": 0.96, "grad_norm": 2.0303474342580277, "learning_rate": 4.0583270295168066e-08, "loss": 0.8262, "step": 15008 }, { "epoch": 0.96, "grad_norm": 1.7264432108868575, "learning_rate": 4.04515775812675e-08, "loss": 0.707, "step": 15009 }, { "epoch": 0.96, "grad_norm": 1.9037306178108966, "learning_rate": 4.0320098018337136e-08, "loss": 0.7531, "step": 15010 }, { "epoch": 0.96, "grad_norm": 2.009631321244041, "learning_rate": 4.018883161202691e-08, "loss": 0.5565, "step": 15011 }, { "epoch": 0.96, "grad_norm": 1.70561252944978, "learning_rate": 4.0057778367978974e-08, "loss": 0.8053, "step": 15012 }, { "epoch": 0.96, "grad_norm": 1.6095590083868014, "learning_rate": 3.992693829182548e-08, "loss": 0.7059, "step": 15013 }, { "epoch": 0.96, "grad_norm": 1.8280838493479101, "learning_rate": 3.979631138918916e-08, "loss": 0.8115, "step": 15014 }, { "epoch": 0.96, "grad_norm": 2.506298486391351, "learning_rate": 3.966589766568496e-08, "loss": 0.6808, "step": 15015 }, { "epoch": 0.96, "grad_norm": 1.1387026038830097, "learning_rate": 3.9535697126917296e-08, "loss": 0.6747, "step": 15016 }, { "epoch": 0.96, "grad_norm": 1.8582531750638516, "learning_rate": 3.940570977848168e-08, "loss": 0.7111, "step": 15017 }, { "epoch": 0.96, "grad_norm": 1.6242966596455741, "learning_rate": 3.9275935625965325e-08, "loss": 0.6601, "step": 15018 }, { "epoch": 0.96, "grad_norm": 1.8490307913367299, "learning_rate": 3.914637467494542e-08, "loss": 0.6283, "step": 15019 }, { "epoch": 0.96, "grad_norm": 2.160811794489983, "learning_rate": 3.90170269309903e-08, "loss": 0.6405, "step": 15020 }, { "epoch": 0.96, "grad_norm": 1.6024775458901848, "learning_rate": 3.888789239965885e-08, "loss": 0.6817, "step": 15021 }, { "epoch": 0.96, "grad_norm": 1.6220725698540608, "learning_rate": 3.875897108650051e-08, "loss": 0.6768, "step": 15022 }, { "epoch": 0.96, "grad_norm": 1.5109403758937534, "learning_rate": 3.863026299705697e-08, "loss": 0.6062, "step": 15023 }, { "epoch": 0.96, "grad_norm": 1.1668977526180653, "learning_rate": 3.850176813685935e-08, "loss": 0.6726, "step": 15024 }, { "epoch": 0.96, "grad_norm": 3.041282748667722, "learning_rate": 3.8373486511429916e-08, "loss": 0.84, "step": 15025 }, { "epoch": 0.96, "grad_norm": 2.230178955628758, "learning_rate": 3.824541812628258e-08, "loss": 0.7089, "step": 15026 }, { "epoch": 0.96, "grad_norm": 1.7925660424404832, "learning_rate": 3.811756298692126e-08, "loss": 0.6987, "step": 15027 }, { "epoch": 0.96, "grad_norm": 1.6760427448633022, "learning_rate": 3.7989921098840476e-08, "loss": 0.6564, "step": 15028 }, { "epoch": 0.96, "grad_norm": 1.724909848323695, "learning_rate": 3.7862492467526376e-08, "loss": 0.734, "step": 15029 }, { "epoch": 0.96, "grad_norm": 1.5764236368782578, "learning_rate": 3.77352770984557e-08, "loss": 0.7797, "step": 15030 }, { "epoch": 0.96, "grad_norm": 1.7482114551833046, "learning_rate": 3.7608274997095187e-08, "loss": 0.7427, "step": 15031 }, { "epoch": 0.96, "grad_norm": 1.8718214428792508, "learning_rate": 3.748148616890379e-08, "loss": 0.7266, "step": 15032 }, { "epoch": 0.96, "grad_norm": 1.7448034080257275, "learning_rate": 3.735491061932994e-08, "loss": 0.8124, "step": 15033 }, { "epoch": 0.96, "grad_norm": 1.711054229677796, "learning_rate": 3.7228548353814844e-08, "loss": 0.6977, "step": 15034 }, { "epoch": 0.96, "grad_norm": 1.718131111879254, "learning_rate": 3.710239937778803e-08, "loss": 0.6396, "step": 15035 }, { "epoch": 0.96, "grad_norm": 1.9736385801825878, "learning_rate": 3.697646369667185e-08, "loss": 0.7549, "step": 15036 }, { "epoch": 0.96, "grad_norm": 1.8604019505026785, "learning_rate": 3.685074131587863e-08, "loss": 0.766, "step": 15037 }, { "epoch": 0.96, "grad_norm": 1.5955986546170533, "learning_rate": 3.672523224081126e-08, "loss": 0.62, "step": 15038 }, { "epoch": 0.96, "grad_norm": 1.8781338813224433, "learning_rate": 3.6599936476864325e-08, "loss": 0.681, "step": 15039 }, { "epoch": 0.96, "grad_norm": 1.7276471986383042, "learning_rate": 3.647485402942241e-08, "loss": 0.759, "step": 15040 }, { "epoch": 0.96, "grad_norm": 1.8892336853375913, "learning_rate": 3.6349984903861214e-08, "loss": 0.6936, "step": 15041 }, { "epoch": 0.96, "grad_norm": 1.4640548972463947, "learning_rate": 3.6225329105548105e-08, "loss": 0.7214, "step": 15042 }, { "epoch": 0.96, "grad_norm": 1.9535051105101033, "learning_rate": 3.6100886639839904e-08, "loss": 0.7665, "step": 15043 }, { "epoch": 0.96, "grad_norm": 1.7793309892916314, "learning_rate": 3.597665751208512e-08, "loss": 0.5944, "step": 15044 }, { "epoch": 0.96, "grad_norm": 1.672337646159415, "learning_rate": 3.5852641727622264e-08, "loss": 0.7461, "step": 15045 }, { "epoch": 0.96, "grad_norm": 1.696938909903683, "learning_rate": 3.5728839291782614e-08, "loss": 0.7216, "step": 15046 }, { "epoch": 0.96, "grad_norm": 1.7359035704097212, "learning_rate": 3.5605250209885256e-08, "loss": 0.6827, "step": 15047 }, { "epoch": 0.96, "grad_norm": 1.7381183500216868, "learning_rate": 3.548187448724316e-08, "loss": 0.7219, "step": 15048 }, { "epoch": 0.96, "grad_norm": 1.858514937473993, "learning_rate": 3.535871212915765e-08, "loss": 0.7298, "step": 15049 }, { "epoch": 0.96, "grad_norm": 1.8125407441468422, "learning_rate": 3.523576314092281e-08, "loss": 0.7454, "step": 15050 }, { "epoch": 0.96, "grad_norm": 1.184894710548509, "learning_rate": 3.51130275278222e-08, "loss": 0.6927, "step": 15051 }, { "epoch": 0.96, "grad_norm": 1.7369816961866196, "learning_rate": 3.4990505295131594e-08, "loss": 0.7522, "step": 15052 }, { "epoch": 0.96, "grad_norm": 1.0871821199299088, "learning_rate": 3.4868196448115675e-08, "loss": 0.6816, "step": 15053 }, { "epoch": 0.96, "grad_norm": 1.8368609745857853, "learning_rate": 3.4746100992031354e-08, "loss": 0.6297, "step": 15054 }, { "epoch": 0.96, "grad_norm": 1.6995198342388211, "learning_rate": 3.46242189321272e-08, "loss": 0.7727, "step": 15055 }, { "epoch": 0.96, "grad_norm": 1.7056720440725215, "learning_rate": 3.4502550273639026e-08, "loss": 0.6609, "step": 15056 }, { "epoch": 0.96, "grad_norm": 1.6212266463050244, "learning_rate": 3.4381095021798203e-08, "loss": 0.7787, "step": 15057 }, { "epoch": 0.96, "grad_norm": 1.6510654770852102, "learning_rate": 3.4259853181823345e-08, "loss": 0.7486, "step": 15058 }, { "epoch": 0.96, "grad_norm": 1.7806514589904376, "learning_rate": 3.4138824758925826e-08, "loss": 0.7142, "step": 15059 }, { "epoch": 0.96, "grad_norm": 1.8385226440452611, "learning_rate": 3.401800975830705e-08, "loss": 0.6898, "step": 15060 }, { "epoch": 0.96, "grad_norm": 1.705249756400537, "learning_rate": 3.389740818515841e-08, "loss": 0.7144, "step": 15061 }, { "epoch": 0.96, "grad_norm": 1.9560054644979656, "learning_rate": 3.3777020044664655e-08, "loss": 0.7141, "step": 15062 }, { "epoch": 0.96, "grad_norm": 1.5014904000439866, "learning_rate": 3.3656845341998865e-08, "loss": 0.5484, "step": 15063 }, { "epoch": 0.96, "grad_norm": 1.9341975695283458, "learning_rate": 3.353688408232636e-08, "loss": 0.7418, "step": 15064 }, { "epoch": 0.96, "grad_norm": 1.7990315777329993, "learning_rate": 3.341713627080245e-08, "loss": 0.689, "step": 15065 }, { "epoch": 0.96, "grad_norm": 1.5329704070391874, "learning_rate": 3.3297601912573584e-08, "loss": 0.6008, "step": 15066 }, { "epoch": 0.96, "grad_norm": 1.6499019074537986, "learning_rate": 3.317828101277787e-08, "loss": 0.6652, "step": 15067 }, { "epoch": 0.96, "grad_norm": 1.5810134761359262, "learning_rate": 3.305917357654232e-08, "loss": 0.7009, "step": 15068 }, { "epoch": 0.96, "grad_norm": 1.1982030394471135, "learning_rate": 3.2940279608986714e-08, "loss": 0.6876, "step": 15069 }, { "epoch": 0.96, "grad_norm": 1.818184807356726, "learning_rate": 3.282159911522087e-08, "loss": 0.7644, "step": 15070 }, { "epoch": 0.96, "grad_norm": 2.209178779337864, "learning_rate": 3.270313210034515e-08, "loss": 0.852, "step": 15071 }, { "epoch": 0.96, "grad_norm": 1.8842330486672136, "learning_rate": 3.2584878569450474e-08, "loss": 0.7879, "step": 15072 }, { "epoch": 0.96, "grad_norm": 1.8291176497392647, "learning_rate": 3.246683852762056e-08, "loss": 0.7817, "step": 15073 }, { "epoch": 0.96, "grad_norm": 1.51271369650722, "learning_rate": 3.234901197992746e-08, "loss": 0.7081, "step": 15074 }, { "epoch": 0.96, "grad_norm": 1.8945486084055148, "learning_rate": 3.22313989314349e-08, "loss": 0.7896, "step": 15075 }, { "epoch": 0.96, "grad_norm": 1.6787965850646578, "learning_rate": 3.211399938719883e-08, "loss": 0.747, "step": 15076 }, { "epoch": 0.97, "grad_norm": 1.7970382466967598, "learning_rate": 3.199681335226357e-08, "loss": 0.7202, "step": 15077 }, { "epoch": 0.97, "grad_norm": 1.8935930524681366, "learning_rate": 3.1879840831666175e-08, "loss": 0.6765, "step": 15078 }, { "epoch": 0.97, "grad_norm": 1.6757447920079065, "learning_rate": 3.176308183043375e-08, "loss": 0.7374, "step": 15079 }, { "epoch": 0.97, "grad_norm": 1.6211505053688653, "learning_rate": 3.1646536353584503e-08, "loss": 0.72, "step": 15080 }, { "epoch": 0.97, "grad_norm": 2.1458582535694943, "learning_rate": 3.1530204406127196e-08, "loss": 0.7484, "step": 15081 }, { "epoch": 0.97, "grad_norm": 1.0446426218817186, "learning_rate": 3.141408599306117e-08, "loss": 0.6237, "step": 15082 }, { "epoch": 0.97, "grad_norm": 1.764220502596686, "learning_rate": 3.129818111937744e-08, "loss": 0.7734, "step": 15083 }, { "epoch": 0.97, "grad_norm": 1.5129790980125144, "learning_rate": 3.1182489790057555e-08, "loss": 0.6196, "step": 15084 }, { "epoch": 0.97, "grad_norm": 1.8235332168387133, "learning_rate": 3.1067012010073114e-08, "loss": 0.6294, "step": 15085 }, { "epoch": 0.97, "grad_norm": 1.7170875801553462, "learning_rate": 3.0951747784387363e-08, "loss": 0.6412, "step": 15086 }, { "epoch": 0.97, "grad_norm": 1.6434362764585804, "learning_rate": 3.0836697117954115e-08, "loss": 0.6736, "step": 15087 }, { "epoch": 0.97, "grad_norm": 1.7059355151562765, "learning_rate": 3.072186001571775e-08, "loss": 0.7483, "step": 15088 }, { "epoch": 0.97, "grad_norm": 1.859771579908006, "learning_rate": 3.0607236482613764e-08, "loss": 0.7472, "step": 15089 }, { "epoch": 0.97, "grad_norm": 1.5096769966445978, "learning_rate": 3.049282652356878e-08, "loss": 0.7335, "step": 15090 }, { "epoch": 0.97, "grad_norm": 1.636788809068009, "learning_rate": 3.037863014349995e-08, "loss": 0.8669, "step": 15091 }, { "epoch": 0.97, "grad_norm": 1.5392019091732265, "learning_rate": 3.0264647347315044e-08, "loss": 0.727, "step": 15092 }, { "epoch": 0.97, "grad_norm": 1.6305489715254697, "learning_rate": 3.0150878139912906e-08, "loss": 0.7417, "step": 15093 }, { "epoch": 0.97, "grad_norm": 1.880102491382143, "learning_rate": 3.003732252618241e-08, "loss": 0.6232, "step": 15094 }, { "epoch": 0.97, "grad_norm": 1.8546590096038034, "learning_rate": 2.9923980511004645e-08, "loss": 0.6733, "step": 15095 }, { "epoch": 0.97, "grad_norm": 1.778391286866921, "learning_rate": 2.981085209925072e-08, "loss": 0.6628, "step": 15096 }, { "epoch": 0.97, "grad_norm": 1.7695507425715185, "learning_rate": 2.96979372957823e-08, "loss": 0.6696, "step": 15097 }, { "epoch": 0.97, "grad_norm": 1.9935608549804118, "learning_rate": 2.958523610545272e-08, "loss": 0.6044, "step": 15098 }, { "epoch": 0.97, "grad_norm": 1.8131177423433251, "learning_rate": 2.947274853310589e-08, "loss": 0.8967, "step": 15099 }, { "epoch": 0.97, "grad_norm": 1.8064270725401685, "learning_rate": 2.936047458357516e-08, "loss": 0.7005, "step": 15100 }, { "epoch": 0.97, "grad_norm": 1.6849236684629993, "learning_rate": 2.9248414261686674e-08, "loss": 0.7408, "step": 15101 }, { "epoch": 0.97, "grad_norm": 1.2268498001453325, "learning_rate": 2.9136567572256024e-08, "loss": 0.6142, "step": 15102 }, { "epoch": 0.97, "grad_norm": 1.067230528838717, "learning_rate": 2.902493452009103e-08, "loss": 0.6314, "step": 15103 }, { "epoch": 0.97, "grad_norm": 2.182876125383885, "learning_rate": 2.8913515109988966e-08, "loss": 0.775, "step": 15104 }, { "epoch": 0.97, "grad_norm": 1.5678782701105614, "learning_rate": 2.8802309346737666e-08, "loss": 0.7126, "step": 15105 }, { "epoch": 0.97, "grad_norm": 1.61321027232828, "learning_rate": 2.86913172351172e-08, "loss": 0.6415, "step": 15106 }, { "epoch": 0.97, "grad_norm": 1.6735571906820068, "learning_rate": 2.8580538779898192e-08, "loss": 0.6808, "step": 15107 }, { "epoch": 0.97, "grad_norm": 1.7802105433324966, "learning_rate": 2.8469973985841283e-08, "loss": 0.7644, "step": 15108 }, { "epoch": 0.97, "grad_norm": 1.6080326774705682, "learning_rate": 2.8359622857698223e-08, "loss": 0.7228, "step": 15109 }, { "epoch": 0.97, "grad_norm": 1.525682022522553, "learning_rate": 2.824948540021133e-08, "loss": 0.748, "step": 15110 }, { "epoch": 0.97, "grad_norm": 1.7998984602226613, "learning_rate": 2.813956161811515e-08, "loss": 0.7013, "step": 15111 }, { "epoch": 0.97, "grad_norm": 1.9567101895659855, "learning_rate": 2.8029851516132577e-08, "loss": 0.8007, "step": 15112 }, { "epoch": 0.97, "grad_norm": 1.5006377951471155, "learning_rate": 2.7920355098979835e-08, "loss": 0.6275, "step": 15113 }, { "epoch": 0.97, "grad_norm": 1.673482738803647, "learning_rate": 2.7811072371362048e-08, "loss": 0.6055, "step": 15114 }, { "epoch": 0.97, "grad_norm": 2.2002198254073155, "learning_rate": 2.7702003337977124e-08, "loss": 0.6492, "step": 15115 }, { "epoch": 0.97, "grad_norm": 1.483839913221215, "learning_rate": 2.7593148003511317e-08, "loss": 0.7564, "step": 15116 }, { "epoch": 0.97, "grad_norm": 1.6213273364884389, "learning_rate": 2.748450637264366e-08, "loss": 0.676, "step": 15117 }, { "epoch": 0.97, "grad_norm": 1.6518699320464874, "learning_rate": 2.7376078450043198e-08, "loss": 0.7168, "step": 15118 }, { "epoch": 0.97, "grad_norm": 1.7980814198393151, "learning_rate": 2.7267864240369533e-08, "loss": 0.6506, "step": 15119 }, { "epoch": 0.97, "grad_norm": 1.5936126421775034, "learning_rate": 2.7159863748274506e-08, "loss": 0.6663, "step": 15120 }, { "epoch": 0.97, "grad_norm": 1.5695026543721824, "learning_rate": 2.7052076978398844e-08, "loss": 0.641, "step": 15121 }, { "epoch": 0.97, "grad_norm": 1.338635470825811, "learning_rate": 2.694450393537551e-08, "loss": 0.605, "step": 15122 }, { "epoch": 0.97, "grad_norm": 1.702246158862914, "learning_rate": 2.683714462382747e-08, "loss": 0.6948, "step": 15123 }, { "epoch": 0.97, "grad_norm": 1.74608157928587, "learning_rate": 2.672999904836937e-08, "loss": 0.7098, "step": 15124 }, { "epoch": 0.97, "grad_norm": 1.7266608499854041, "learning_rate": 2.6623067213605302e-08, "loss": 0.6773, "step": 15125 }, { "epoch": 0.97, "grad_norm": 1.2309605586587768, "learning_rate": 2.6516349124131037e-08, "loss": 0.6339, "step": 15126 }, { "epoch": 0.97, "grad_norm": 1.7544069578294008, "learning_rate": 2.6409844784533456e-08, "loss": 0.7587, "step": 15127 }, { "epoch": 0.97, "grad_norm": 1.4690081069190768, "learning_rate": 2.6303554199390013e-08, "loss": 0.6698, "step": 15128 }, { "epoch": 0.97, "grad_norm": 1.8068374841568482, "learning_rate": 2.6197477373268722e-08, "loss": 0.7925, "step": 15129 }, { "epoch": 0.97, "grad_norm": 1.0121256144602881, "learning_rate": 2.6091614310727597e-08, "loss": 0.6109, "step": 15130 }, { "epoch": 0.97, "grad_norm": 1.4973403653270303, "learning_rate": 2.5985965016318004e-08, "loss": 0.6082, "step": 15131 }, { "epoch": 0.97, "grad_norm": 2.173597849707601, "learning_rate": 2.5880529494579643e-08, "loss": 0.7193, "step": 15132 }, { "epoch": 0.97, "grad_norm": 1.5377117301710288, "learning_rate": 2.577530775004389e-08, "loss": 0.6943, "step": 15133 }, { "epoch": 0.97, "grad_norm": 1.8266154309867129, "learning_rate": 2.5670299787233788e-08, "loss": 0.8004, "step": 15134 }, { "epoch": 0.97, "grad_norm": 1.7039679716125145, "learning_rate": 2.5565505610660734e-08, "loss": 0.7421, "step": 15135 }, { "epoch": 0.97, "grad_norm": 2.0444383677467197, "learning_rate": 2.5460925224830567e-08, "loss": 0.5714, "step": 15136 }, { "epoch": 0.97, "grad_norm": 1.7842600367605528, "learning_rate": 2.5356558634235806e-08, "loss": 0.7102, "step": 15137 }, { "epoch": 0.97, "grad_norm": 1.7893009747305058, "learning_rate": 2.525240584336397e-08, "loss": 0.7122, "step": 15138 }, { "epoch": 0.97, "grad_norm": 3.1753940538749967, "learning_rate": 2.5148466856689812e-08, "loss": 0.6293, "step": 15139 }, { "epoch": 0.97, "grad_norm": 1.7775519944101625, "learning_rate": 2.504474167868087e-08, "loss": 0.6788, "step": 15140 }, { "epoch": 0.97, "grad_norm": 1.8500514511613, "learning_rate": 2.4941230313795252e-08, "loss": 0.7749, "step": 15141 }, { "epoch": 0.97, "grad_norm": 1.5067089103460285, "learning_rate": 2.4837932766481608e-08, "loss": 0.8234, "step": 15142 }, { "epoch": 0.97, "grad_norm": 1.6213496297168088, "learning_rate": 2.4734849041179176e-08, "loss": 0.9611, "step": 15143 }, { "epoch": 0.97, "grad_norm": 1.6762582051356838, "learning_rate": 2.463197914231885e-08, "loss": 0.7483, "step": 15144 }, { "epoch": 0.97, "grad_norm": 1.0127224863912083, "learning_rate": 2.4529323074320988e-08, "loss": 0.6103, "step": 15145 }, { "epoch": 0.97, "grad_norm": 1.496539865420543, "learning_rate": 2.4426880841598165e-08, "loss": 0.6799, "step": 15146 }, { "epoch": 0.97, "grad_norm": 1.8959729588273952, "learning_rate": 2.432465244855242e-08, "loss": 0.7317, "step": 15147 }, { "epoch": 0.97, "grad_norm": 1.6584020500335317, "learning_rate": 2.4222637899578015e-08, "loss": 0.6961, "step": 15148 }, { "epoch": 0.97, "grad_norm": 1.2375715867943713, "learning_rate": 2.412083719905922e-08, "loss": 0.7014, "step": 15149 }, { "epoch": 0.97, "grad_norm": 1.7965024021901532, "learning_rate": 2.4019250351371427e-08, "loss": 0.8685, "step": 15150 }, { "epoch": 0.97, "grad_norm": 1.922263386146058, "learning_rate": 2.3917877360879472e-08, "loss": 0.7991, "step": 15151 }, { "epoch": 0.97, "grad_norm": 1.6605166252637729, "learning_rate": 2.3816718231941537e-08, "loss": 0.716, "step": 15152 }, { "epoch": 0.97, "grad_norm": 2.0688149166038206, "learning_rate": 2.371577296890415e-08, "loss": 1.0426, "step": 15153 }, { "epoch": 0.97, "grad_norm": 1.9345878473160152, "learning_rate": 2.3615041576106613e-08, "loss": 0.8024, "step": 15154 }, { "epoch": 0.97, "grad_norm": 1.8361995391164658, "learning_rate": 2.3514524057877685e-08, "loss": 0.671, "step": 15155 }, { "epoch": 0.97, "grad_norm": 1.6291665421603017, "learning_rate": 2.3414220418537804e-08, "loss": 0.6943, "step": 15156 }, { "epoch": 0.97, "grad_norm": 1.5971518593934162, "learning_rate": 2.331413066239685e-08, "loss": 0.7677, "step": 15157 }, { "epoch": 0.97, "grad_norm": 1.7082907440079416, "learning_rate": 2.3214254793757497e-08, "loss": 0.6899, "step": 15158 }, { "epoch": 0.97, "grad_norm": 1.1465949710118426, "learning_rate": 2.311459281691186e-08, "loss": 0.5644, "step": 15159 }, { "epoch": 0.97, "grad_norm": 1.444624431849572, "learning_rate": 2.301514473614319e-08, "loss": 0.6288, "step": 15160 }, { "epoch": 0.97, "grad_norm": 1.8767062226819027, "learning_rate": 2.2915910555725286e-08, "loss": 0.7307, "step": 15161 }, { "epoch": 0.97, "grad_norm": 1.1227211167618816, "learning_rate": 2.2816890279923064e-08, "loss": 0.6283, "step": 15162 }, { "epoch": 0.97, "grad_norm": 1.8290637924760502, "learning_rate": 2.2718083912992573e-08, "loss": 0.6729, "step": 15163 }, { "epoch": 0.97, "grad_norm": 1.8425910300259594, "learning_rate": 2.261949145918041e-08, "loss": 0.773, "step": 15164 }, { "epoch": 0.97, "grad_norm": 1.6594639026990363, "learning_rate": 2.2521112922723186e-08, "loss": 0.6918, "step": 15165 }, { "epoch": 0.97, "grad_norm": 2.0345298295871803, "learning_rate": 2.2422948307849746e-08, "loss": 0.7795, "step": 15166 }, { "epoch": 0.97, "grad_norm": 1.5438950720037787, "learning_rate": 2.2324997618778375e-08, "loss": 0.6081, "step": 15167 }, { "epoch": 0.97, "grad_norm": 1.5473664270178928, "learning_rate": 2.2227260859719047e-08, "loss": 0.7481, "step": 15168 }, { "epoch": 0.97, "grad_norm": 1.8841195369066777, "learning_rate": 2.212973803487284e-08, "loss": 0.7105, "step": 15169 }, { "epoch": 0.97, "grad_norm": 1.9294042841423669, "learning_rate": 2.2032429148429735e-08, "loss": 0.7494, "step": 15170 }, { "epoch": 0.97, "grad_norm": 1.7308300325317354, "learning_rate": 2.1935334204573056e-08, "loss": 0.5961, "step": 15171 }, { "epoch": 0.97, "grad_norm": 1.3365776969896406, "learning_rate": 2.1838453207475574e-08, "loss": 0.7256, "step": 15172 }, { "epoch": 0.97, "grad_norm": 2.8781568515122107, "learning_rate": 2.1741786161300628e-08, "loss": 0.7904, "step": 15173 }, { "epoch": 0.97, "grad_norm": 1.6800352006502703, "learning_rate": 2.1645333070203222e-08, "loss": 0.7177, "step": 15174 }, { "epoch": 0.97, "grad_norm": 1.8588555004136118, "learning_rate": 2.1549093938327826e-08, "loss": 0.7235, "step": 15175 }, { "epoch": 0.97, "grad_norm": 1.1714448327767135, "learning_rate": 2.145306876981168e-08, "loss": 0.6013, "step": 15176 }, { "epoch": 0.97, "grad_norm": 1.6553488794854154, "learning_rate": 2.135725756878093e-08, "loss": 0.6633, "step": 15177 }, { "epoch": 0.97, "grad_norm": 2.0636602266796547, "learning_rate": 2.1261660339354505e-08, "loss": 0.8013, "step": 15178 }, { "epoch": 0.97, "grad_norm": 1.5877240250192377, "learning_rate": 2.116627708563912e-08, "loss": 0.6429, "step": 15179 }, { "epoch": 0.97, "grad_norm": 1.740823501813251, "learning_rate": 2.1071107811735382e-08, "loss": 0.6497, "step": 15180 }, { "epoch": 0.97, "grad_norm": 1.6556268456833863, "learning_rate": 2.0976152521733905e-08, "loss": 0.6949, "step": 15181 }, { "epoch": 0.97, "grad_norm": 2.2329227433924097, "learning_rate": 2.088141121971421e-08, "loss": 0.6282, "step": 15182 }, { "epoch": 0.97, "grad_norm": 1.1805293403406467, "learning_rate": 2.07868839097497e-08, "loss": 0.6481, "step": 15183 }, { "epoch": 0.97, "grad_norm": 1.7623375801489192, "learning_rate": 2.0692570595901572e-08, "loss": 0.6789, "step": 15184 }, { "epoch": 0.97, "grad_norm": 1.713769526254557, "learning_rate": 2.059847128222381e-08, "loss": 0.8012, "step": 15185 }, { "epoch": 0.97, "grad_norm": 2.593914816770266, "learning_rate": 2.0504585972760394e-08, "loss": 0.7116, "step": 15186 }, { "epoch": 0.97, "grad_norm": 1.7953203072793587, "learning_rate": 2.041091467154699e-08, "loss": 0.6263, "step": 15187 }, { "epoch": 0.97, "grad_norm": 1.7784369970715233, "learning_rate": 2.0317457382608706e-08, "loss": 0.6859, "step": 15188 }, { "epoch": 0.97, "grad_norm": 1.6126203582559777, "learning_rate": 2.022421410996234e-08, "loss": 0.7506, "step": 15189 }, { "epoch": 0.97, "grad_norm": 1.5009535348351408, "learning_rate": 2.0131184857615783e-08, "loss": 0.7811, "step": 15190 }, { "epoch": 0.97, "grad_norm": 1.6453467044319476, "learning_rate": 2.0038369629565846e-08, "loss": 0.6475, "step": 15191 }, { "epoch": 0.97, "grad_norm": 1.749714211738048, "learning_rate": 1.9945768429803226e-08, "loss": 0.7309, "step": 15192 }, { "epoch": 0.97, "grad_norm": 1.5739127589347401, "learning_rate": 1.9853381262306405e-08, "loss": 0.741, "step": 15193 }, { "epoch": 0.97, "grad_norm": 1.5729089136506285, "learning_rate": 1.976120813104665e-08, "loss": 0.6932, "step": 15194 }, { "epoch": 0.97, "grad_norm": 1.3112917431451998, "learning_rate": 1.9669249039985794e-08, "loss": 0.676, "step": 15195 }, { "epoch": 0.97, "grad_norm": 1.6186273309496761, "learning_rate": 1.957750399307512e-08, "loss": 0.7468, "step": 15196 }, { "epoch": 0.97, "grad_norm": 1.1393987607067464, "learning_rate": 1.9485972994257584e-08, "loss": 0.7604, "step": 15197 }, { "epoch": 0.97, "grad_norm": 1.8319465134925383, "learning_rate": 1.9394656047467818e-08, "loss": 0.7065, "step": 15198 }, { "epoch": 0.97, "grad_norm": 1.8168738745940562, "learning_rate": 1.9303553156630462e-08, "loss": 0.8024, "step": 15199 }, { "epoch": 0.97, "grad_norm": 1.6218856858548092, "learning_rate": 1.92126643256596e-08, "loss": 0.6367, "step": 15200 }, { "epoch": 0.97, "grad_norm": 2.005986864858325, "learning_rate": 1.912198955846323e-08, "loss": 0.7888, "step": 15201 }, { "epoch": 0.97, "grad_norm": 0.9465605418513975, "learning_rate": 1.9031528858936556e-08, "loss": 0.6347, "step": 15202 }, { "epoch": 0.97, "grad_norm": 2.094066809060482, "learning_rate": 1.894128223096925e-08, "loss": 0.8426, "step": 15203 }, { "epoch": 0.97, "grad_norm": 1.6827188162764057, "learning_rate": 1.885124967843821e-08, "loss": 0.6766, "step": 15204 }, { "epoch": 0.97, "grad_norm": 0.9838929329481855, "learning_rate": 1.8761431205214232e-08, "loss": 0.6971, "step": 15205 }, { "epoch": 0.97, "grad_norm": 1.1877999873644998, "learning_rate": 1.8671826815156448e-08, "loss": 0.642, "step": 15206 }, { "epoch": 0.97, "grad_norm": 1.9149604072449737, "learning_rate": 1.8582436512116776e-08, "loss": 0.7661, "step": 15207 }, { "epoch": 0.97, "grad_norm": 1.2992752539941754, "learning_rate": 1.849326029993659e-08, "loss": 0.6194, "step": 15208 }, { "epoch": 0.97, "grad_norm": 1.2888448313436942, "learning_rate": 1.8404298182447823e-08, "loss": 0.6371, "step": 15209 }, { "epoch": 0.97, "grad_norm": 2.1732032034792588, "learning_rate": 1.8315550163475194e-08, "loss": 0.7343, "step": 15210 }, { "epoch": 0.97, "grad_norm": 0.9816528513885948, "learning_rate": 1.8227016246831764e-08, "loss": 0.6376, "step": 15211 }, { "epoch": 0.97, "grad_norm": 1.5976017002417833, "learning_rate": 1.8138696436323377e-08, "loss": 0.6844, "step": 15212 }, { "epoch": 0.97, "grad_norm": 1.4712318628578713, "learning_rate": 1.8050590735745334e-08, "loss": 0.6464, "step": 15213 }, { "epoch": 0.97, "grad_norm": 1.7089950352225247, "learning_rate": 1.79626991488846e-08, "loss": 0.9501, "step": 15214 }, { "epoch": 0.97, "grad_norm": 1.5628302288335256, "learning_rate": 1.7875021679518156e-08, "loss": 0.7289, "step": 15215 }, { "epoch": 0.97, "grad_norm": 1.4545677187974866, "learning_rate": 1.7787558331414655e-08, "loss": 0.7072, "step": 15216 }, { "epoch": 0.97, "grad_norm": 1.774858547724596, "learning_rate": 1.7700309108332204e-08, "loss": 0.7971, "step": 15217 }, { "epoch": 0.97, "grad_norm": 1.4356812143727524, "learning_rate": 1.7613274014021686e-08, "loss": 0.6479, "step": 15218 }, { "epoch": 0.97, "grad_norm": 2.2512401510464564, "learning_rate": 1.7526453052223446e-08, "loss": 0.7012, "step": 15219 }, { "epoch": 0.97, "grad_norm": 1.177831642482749, "learning_rate": 1.743984622666839e-08, "loss": 0.6569, "step": 15220 }, { "epoch": 0.97, "grad_norm": 1.2050033031335907, "learning_rate": 1.7353453541078534e-08, "loss": 0.6413, "step": 15221 }, { "epoch": 0.97, "grad_norm": 1.7349403462287192, "learning_rate": 1.7267274999168138e-08, "loss": 0.7476, "step": 15222 }, { "epoch": 0.97, "grad_norm": 1.3141822814601318, "learning_rate": 1.7181310604639236e-08, "loss": 0.6384, "step": 15223 }, { "epoch": 0.97, "grad_norm": 1.0693832155104814, "learning_rate": 1.7095560361187758e-08, "loss": 0.5918, "step": 15224 }, { "epoch": 0.97, "grad_norm": 1.823569307638152, "learning_rate": 1.701002427249854e-08, "loss": 0.7202, "step": 15225 }, { "epoch": 0.97, "grad_norm": 1.6601722927557534, "learning_rate": 1.6924702342247522e-08, "loss": 0.6877, "step": 15226 }, { "epoch": 0.97, "grad_norm": 0.9938100224580475, "learning_rate": 1.683959457410178e-08, "loss": 0.6014, "step": 15227 }, { "epoch": 0.97, "grad_norm": 1.7926508285227023, "learning_rate": 1.6754700971719496e-08, "loss": 0.7223, "step": 15228 }, { "epoch": 0.97, "grad_norm": 1.8886002476198498, "learning_rate": 1.667002153874886e-08, "loss": 0.7748, "step": 15229 }, { "epoch": 0.97, "grad_norm": 1.7044764820168716, "learning_rate": 1.658555627882974e-08, "loss": 0.6266, "step": 15230 }, { "epoch": 0.97, "grad_norm": 1.6426854739167283, "learning_rate": 1.65013051955909e-08, "loss": 0.6664, "step": 15231 }, { "epoch": 0.97, "grad_norm": 1.8499245763624874, "learning_rate": 1.6417268292655e-08, "loss": 0.7942, "step": 15232 }, { "epoch": 0.98, "grad_norm": 1.5902955865543322, "learning_rate": 1.6333445573632478e-08, "loss": 0.7503, "step": 15233 }, { "epoch": 0.98, "grad_norm": 1.549233372505196, "learning_rate": 1.624983704212657e-08, "loss": 0.6988, "step": 15234 }, { "epoch": 0.98, "grad_norm": 1.8062618654786533, "learning_rate": 1.6166442701730513e-08, "loss": 0.6624, "step": 15235 }, { "epoch": 0.98, "grad_norm": 1.7835209876135123, "learning_rate": 1.6083262556027545e-08, "loss": 0.7807, "step": 15236 }, { "epoch": 0.98, "grad_norm": 1.7188616355253983, "learning_rate": 1.6000296608594257e-08, "loss": 0.6432, "step": 15237 }, { "epoch": 0.98, "grad_norm": 1.7115389838394663, "learning_rate": 1.5917544862995016e-08, "loss": 0.8074, "step": 15238 }, { "epoch": 0.98, "grad_norm": 1.8170795563065862, "learning_rate": 1.5835007322786424e-08, "loss": 0.7377, "step": 15239 }, { "epoch": 0.98, "grad_norm": 1.1093104270770227, "learning_rate": 1.5752683991516195e-08, "loss": 0.6028, "step": 15240 }, { "epoch": 0.98, "grad_norm": 1.8179719413711932, "learning_rate": 1.567057487272261e-08, "loss": 0.7827, "step": 15241 }, { "epoch": 0.98, "grad_norm": 1.6421445093859097, "learning_rate": 1.5588679969933406e-08, "loss": 0.6858, "step": 15242 }, { "epoch": 0.98, "grad_norm": 1.771283724061197, "learning_rate": 1.5506999286669656e-08, "loss": 0.7005, "step": 15243 }, { "epoch": 0.98, "grad_norm": 2.074996948646249, "learning_rate": 1.5425532826441326e-08, "loss": 0.6981, "step": 15244 }, { "epoch": 0.98, "grad_norm": 1.7170956515587212, "learning_rate": 1.534428059274895e-08, "loss": 0.6375, "step": 15245 }, { "epoch": 0.98, "grad_norm": 1.1294241285507587, "learning_rate": 1.526324258908585e-08, "loss": 0.6637, "step": 15246 }, { "epoch": 0.98, "grad_norm": 1.6232920824858499, "learning_rate": 1.5182418818933676e-08, "loss": 0.5785, "step": 15247 }, { "epoch": 0.98, "grad_norm": 1.803646190627472, "learning_rate": 1.5101809285766877e-08, "loss": 0.7597, "step": 15248 }, { "epoch": 0.98, "grad_norm": 1.1508870554387856, "learning_rate": 1.502141399304935e-08, "loss": 0.6804, "step": 15249 }, { "epoch": 0.98, "grad_norm": 1.746815150900494, "learning_rate": 1.4941232944237217e-08, "loss": 0.7435, "step": 15250 }, { "epoch": 0.98, "grad_norm": 1.6992847160339053, "learning_rate": 1.4861266142775498e-08, "loss": 0.7339, "step": 15251 }, { "epoch": 0.98, "grad_norm": 1.8333081592087341, "learning_rate": 1.4781513592100893e-08, "loss": 0.7898, "step": 15252 }, { "epoch": 0.98, "grad_norm": 1.9116536033922873, "learning_rate": 1.4701975295641768e-08, "loss": 0.668, "step": 15253 }, { "epoch": 0.98, "grad_norm": 1.5541631845496786, "learning_rate": 1.46226512568165e-08, "loss": 0.7503, "step": 15254 }, { "epoch": 0.98, "grad_norm": 2.0761879864023314, "learning_rate": 1.4543541479033473e-08, "loss": 0.6782, "step": 15255 }, { "epoch": 0.98, "grad_norm": 1.7913852769283682, "learning_rate": 1.4464645965693303e-08, "loss": 0.7291, "step": 15256 }, { "epoch": 0.98, "grad_norm": 1.8883658482419874, "learning_rate": 1.4385964720187162e-08, "loss": 0.6557, "step": 15257 }, { "epoch": 0.98, "grad_norm": 2.395469043142189, "learning_rate": 1.4307497745895127e-08, "loss": 0.6295, "step": 15258 }, { "epoch": 0.98, "grad_norm": 1.7164398423455034, "learning_rate": 1.4229245046190609e-08, "loss": 0.6783, "step": 15259 }, { "epoch": 0.98, "grad_norm": 1.4551817802434581, "learning_rate": 1.415120662443703e-08, "loss": 0.6355, "step": 15260 }, { "epoch": 0.98, "grad_norm": 2.257272332749303, "learning_rate": 1.4073382483987819e-08, "loss": 0.8568, "step": 15261 }, { "epoch": 0.98, "grad_norm": 1.9485904091848154, "learning_rate": 1.399577262818752e-08, "loss": 0.7433, "step": 15262 }, { "epoch": 0.98, "grad_norm": 1.7737560974863837, "learning_rate": 1.3918377060371802e-08, "loss": 0.8242, "step": 15263 }, { "epoch": 0.98, "grad_norm": 1.8001007541164455, "learning_rate": 1.3841195783867444e-08, "loss": 0.6111, "step": 15264 }, { "epoch": 0.98, "grad_norm": 2.2426224958793286, "learning_rate": 1.3764228801990686e-08, "loss": 0.7113, "step": 15265 }, { "epoch": 0.98, "grad_norm": 1.7591342635177194, "learning_rate": 1.3687476118049991e-08, "loss": 0.6946, "step": 15266 }, { "epoch": 0.98, "grad_norm": 1.6886009217568805, "learning_rate": 1.3610937735344387e-08, "loss": 0.7371, "step": 15267 }, { "epoch": 0.98, "grad_norm": 1.5085037680937474, "learning_rate": 1.3534613657162354e-08, "loss": 0.6176, "step": 15268 }, { "epoch": 0.98, "grad_norm": 2.477027070790083, "learning_rate": 1.3458503886784603e-08, "loss": 0.7489, "step": 15269 }, { "epoch": 0.98, "grad_norm": 1.1365329524189134, "learning_rate": 1.3382608427482402e-08, "loss": 0.6638, "step": 15270 }, { "epoch": 0.98, "grad_norm": 1.571566027461759, "learning_rate": 1.3306927282517034e-08, "loss": 0.63, "step": 15271 }, { "epoch": 0.98, "grad_norm": 1.584042868511297, "learning_rate": 1.3231460455141453e-08, "loss": 0.8189, "step": 15272 }, { "epoch": 0.98, "grad_norm": 1.624413585595069, "learning_rate": 1.3156207948599176e-08, "loss": 0.6369, "step": 15273 }, { "epoch": 0.98, "grad_norm": 1.5122331004874683, "learning_rate": 1.308116976612428e-08, "loss": 0.6646, "step": 15274 }, { "epoch": 0.98, "grad_norm": 1.21048878634933, "learning_rate": 1.3006345910941964e-08, "loss": 0.6917, "step": 15275 }, { "epoch": 0.98, "grad_norm": 1.7665845502574122, "learning_rate": 1.2931736386267435e-08, "loss": 0.6901, "step": 15276 }, { "epoch": 0.98, "grad_norm": 1.6486996824685813, "learning_rate": 1.2857341195308126e-08, "loss": 0.7313, "step": 15277 }, { "epoch": 0.98, "grad_norm": 1.8412825329225317, "learning_rate": 1.278316034126037e-08, "loss": 0.6553, "step": 15278 }, { "epoch": 0.98, "grad_norm": 2.1823399828277883, "learning_rate": 1.2709193827312727e-08, "loss": 0.7425, "step": 15279 }, { "epoch": 0.98, "grad_norm": 1.2123806162783204, "learning_rate": 1.2635441656644876e-08, "loss": 0.6115, "step": 15280 }, { "epoch": 0.98, "grad_norm": 1.7783588876813077, "learning_rate": 1.2561903832424837e-08, "loss": 0.6612, "step": 15281 }, { "epoch": 0.98, "grad_norm": 2.8777159288747383, "learning_rate": 1.2488580357815083e-08, "loss": 0.6815, "step": 15282 }, { "epoch": 0.98, "grad_norm": 1.6395814827326574, "learning_rate": 1.2415471235965315e-08, "loss": 0.801, "step": 15283 }, { "epoch": 0.98, "grad_norm": 1.731782373107523, "learning_rate": 1.2342576470018575e-08, "loss": 0.6656, "step": 15284 }, { "epoch": 0.98, "grad_norm": 1.2629287436762866, "learning_rate": 1.2269896063107356e-08, "loss": 0.6258, "step": 15285 }, { "epoch": 0.98, "grad_norm": 1.5559958603249031, "learning_rate": 1.2197430018354717e-08, "loss": 0.6666, "step": 15286 }, { "epoch": 0.98, "grad_norm": 1.9128505776170246, "learning_rate": 1.2125178338876498e-08, "loss": 0.6513, "step": 15287 }, { "epoch": 0.98, "grad_norm": 1.9954518422664347, "learning_rate": 1.2053141027776883e-08, "loss": 0.7152, "step": 15288 }, { "epoch": 0.98, "grad_norm": 2.366663237342314, "learning_rate": 1.1981318088152283e-08, "loss": 0.8277, "step": 15289 }, { "epoch": 0.98, "grad_norm": 1.5009861661762196, "learning_rate": 1.190970952308912e-08, "loss": 0.7615, "step": 15290 }, { "epoch": 0.98, "grad_norm": 2.2591260137441167, "learning_rate": 1.1838315335664929e-08, "loss": 0.6995, "step": 15291 }, { "epoch": 0.98, "grad_norm": 1.8357379141346328, "learning_rate": 1.1767135528948925e-08, "loss": 0.716, "step": 15292 }, { "epoch": 0.98, "grad_norm": 1.8202502074758016, "learning_rate": 1.1696170105999772e-08, "loss": 0.7224, "step": 15293 }, { "epoch": 0.98, "grad_norm": 1.9424532452917058, "learning_rate": 1.1625419069867249e-08, "loss": 0.7528, "step": 15294 }, { "epoch": 0.98, "grad_norm": 1.2757929597537225, "learning_rate": 1.1554882423591706e-08, "loss": 0.8059, "step": 15295 }, { "epoch": 0.98, "grad_norm": 1.5648076798317636, "learning_rate": 1.1484560170205716e-08, "loss": 0.9138, "step": 15296 }, { "epoch": 0.98, "grad_norm": 1.6225838337995848, "learning_rate": 1.141445231273075e-08, "loss": 0.7509, "step": 15297 }, { "epoch": 0.98, "grad_norm": 1.724932037783085, "learning_rate": 1.1344558854179955e-08, "loss": 0.7748, "step": 15298 }, { "epoch": 0.98, "grad_norm": 1.7355367470569665, "learning_rate": 1.1274879797558148e-08, "loss": 0.6463, "step": 15299 }, { "epoch": 0.98, "grad_norm": 1.7356778754931217, "learning_rate": 1.120541514585849e-08, "loss": 0.6776, "step": 15300 }, { "epoch": 0.98, "grad_norm": 1.1578507082484832, "learning_rate": 1.1136164902067481e-08, "loss": 0.6027, "step": 15301 }, { "epoch": 0.98, "grad_norm": 1.6455382708472672, "learning_rate": 1.1067129069161076e-08, "loss": 0.7073, "step": 15302 }, { "epoch": 0.98, "grad_norm": 1.7302140297247053, "learning_rate": 1.0998307650106344e-08, "loss": 0.7595, "step": 15303 }, { "epoch": 0.98, "grad_norm": 1.8923144216027836, "learning_rate": 1.092970064786092e-08, "loss": 0.731, "step": 15304 }, { "epoch": 0.98, "grad_norm": 1.7780669190892289, "learning_rate": 1.0861308065373556e-08, "loss": 0.6313, "step": 15305 }, { "epoch": 0.98, "grad_norm": 1.71225460091858, "learning_rate": 1.0793129905583566e-08, "loss": 0.7554, "step": 15306 }, { "epoch": 0.98, "grad_norm": 1.7767159186879247, "learning_rate": 1.072516617142083e-08, "loss": 0.7028, "step": 15307 }, { "epoch": 0.98, "grad_norm": 2.02020560846693, "learning_rate": 1.0657416865806902e-08, "loss": 0.6829, "step": 15308 }, { "epoch": 0.98, "grad_norm": 1.8329664725373265, "learning_rate": 1.0589881991652784e-08, "loss": 0.7703, "step": 15309 }, { "epoch": 0.98, "grad_norm": 1.689694523754711, "learning_rate": 1.052256155186171e-08, "loss": 0.6916, "step": 15310 }, { "epoch": 0.98, "grad_norm": 1.5738711025746506, "learning_rate": 1.0455455549326366e-08, "loss": 0.6852, "step": 15311 }, { "epoch": 0.98, "grad_norm": 1.645750804030065, "learning_rate": 1.038856398693111e-08, "loss": 0.7775, "step": 15312 }, { "epoch": 0.98, "grad_norm": 1.6488268735777993, "learning_rate": 1.0321886867550868e-08, "loss": 0.79, "step": 15313 }, { "epoch": 0.98, "grad_norm": 1.7239558931098642, "learning_rate": 1.0255424194050567e-08, "loss": 0.7912, "step": 15314 }, { "epoch": 0.98, "grad_norm": 1.5167121070456042, "learning_rate": 1.0189175969287923e-08, "loss": 0.624, "step": 15315 }, { "epoch": 0.98, "grad_norm": 1.3324110607150288, "learning_rate": 1.0123142196108993e-08, "loss": 0.5947, "step": 15316 }, { "epoch": 0.98, "grad_norm": 1.8373001275471121, "learning_rate": 1.0057322877352616e-08, "loss": 0.7812, "step": 15317 }, { "epoch": 0.98, "grad_norm": 2.124133772976106, "learning_rate": 9.991718015847085e-09, "loss": 0.7486, "step": 15318 }, { "epoch": 0.98, "grad_norm": 1.7962304887542742, "learning_rate": 9.926327614411813e-09, "loss": 0.7832, "step": 15319 }, { "epoch": 0.98, "grad_norm": 2.1949392446288383, "learning_rate": 9.861151675857884e-09, "loss": 0.7576, "step": 15320 }, { "epoch": 0.98, "grad_norm": 2.1551173307520703, "learning_rate": 9.796190202985834e-09, "loss": 0.6196, "step": 15321 }, { "epoch": 0.98, "grad_norm": 1.8240709905838934, "learning_rate": 9.73144319858732e-09, "loss": 0.7326, "step": 15322 }, { "epoch": 0.98, "grad_norm": 1.4754873219310713, "learning_rate": 9.666910665445673e-09, "loss": 0.5996, "step": 15323 }, { "epoch": 0.98, "grad_norm": 1.2013778149495964, "learning_rate": 9.602592606333672e-09, "loss": 0.6646, "step": 15324 }, { "epoch": 0.98, "grad_norm": 1.950793677767734, "learning_rate": 9.538489024016328e-09, "loss": 0.711, "step": 15325 }, { "epoch": 0.98, "grad_norm": 1.6258148121195342, "learning_rate": 9.474599921248662e-09, "loss": 0.6332, "step": 15326 }, { "epoch": 0.98, "grad_norm": 1.0634788110425002, "learning_rate": 9.410925300775697e-09, "loss": 0.5996, "step": 15327 }, { "epoch": 0.98, "grad_norm": 1.6519568086465444, "learning_rate": 9.347465165334135e-09, "loss": 0.8076, "step": 15328 }, { "epoch": 0.98, "grad_norm": 1.5679489399295796, "learning_rate": 9.284219517652348e-09, "loss": 0.7126, "step": 15329 }, { "epoch": 0.98, "grad_norm": 1.7874081196443485, "learning_rate": 9.221188360447609e-09, "loss": 0.7848, "step": 15330 }, { "epoch": 0.98, "grad_norm": 2.364755973832276, "learning_rate": 9.158371696428859e-09, "loss": 0.6524, "step": 15331 }, { "epoch": 0.98, "grad_norm": 1.7543933899789437, "learning_rate": 9.095769528296716e-09, "loss": 0.7611, "step": 15332 }, { "epoch": 0.98, "grad_norm": 1.8499944171277685, "learning_rate": 9.033381858740697e-09, "loss": 0.6986, "step": 15333 }, { "epoch": 0.98, "grad_norm": 1.3235598152815329, "learning_rate": 8.971208690442545e-09, "loss": 0.773, "step": 15334 }, { "epoch": 0.98, "grad_norm": 1.5736596336654816, "learning_rate": 8.909250026074013e-09, "loss": 0.7555, "step": 15335 }, { "epoch": 0.98, "grad_norm": 2.1409251959198254, "learning_rate": 8.847505868298522e-09, "loss": 0.6233, "step": 15336 }, { "epoch": 0.98, "grad_norm": 1.2087207765525225, "learning_rate": 8.785976219768954e-09, "loss": 0.7264, "step": 15337 }, { "epoch": 0.98, "grad_norm": 1.8180549937741186, "learning_rate": 8.724661083130414e-09, "loss": 0.7463, "step": 15338 }, { "epoch": 0.98, "grad_norm": 1.5666418022382471, "learning_rate": 8.663560461018016e-09, "loss": 0.8112, "step": 15339 }, { "epoch": 0.98, "grad_norm": 1.8470078581438727, "learning_rate": 8.60267435605744e-09, "loss": 0.7296, "step": 15340 }, { "epoch": 0.98, "grad_norm": 1.2047035418559515, "learning_rate": 8.542002770865477e-09, "loss": 0.5927, "step": 15341 }, { "epoch": 0.98, "grad_norm": 2.075426560550734, "learning_rate": 8.481545708049488e-09, "loss": 0.722, "step": 15342 }, { "epoch": 0.98, "grad_norm": 1.580961595939401, "learning_rate": 8.421303170208505e-09, "loss": 0.7389, "step": 15343 }, { "epoch": 0.98, "grad_norm": 1.1553686587017846, "learning_rate": 8.36127515993046e-09, "loss": 0.6493, "step": 15344 }, { "epoch": 0.98, "grad_norm": 1.811053010191661, "learning_rate": 8.301461679796619e-09, "loss": 0.7309, "step": 15345 }, { "epoch": 0.98, "grad_norm": 1.196362695601662, "learning_rate": 8.241862732376593e-09, "loss": 0.6798, "step": 15346 }, { "epoch": 0.98, "grad_norm": 1.7027144258022848, "learning_rate": 8.182478320232223e-09, "loss": 0.6684, "step": 15347 }, { "epoch": 0.98, "grad_norm": 2.3627071122080676, "learning_rate": 8.123308445915912e-09, "loss": 0.749, "step": 15348 }, { "epoch": 0.98, "grad_norm": 2.365881749626145, "learning_rate": 8.06435311197007e-09, "loss": 0.8667, "step": 15349 }, { "epoch": 0.98, "grad_norm": 1.6298487719241799, "learning_rate": 8.005612320929335e-09, "loss": 0.7135, "step": 15350 }, { "epoch": 0.98, "grad_norm": 1.8016960119053227, "learning_rate": 7.947086075317246e-09, "loss": 0.7724, "step": 15351 }, { "epoch": 0.98, "grad_norm": 1.5939115822389085, "learning_rate": 7.888774377650122e-09, "loss": 0.7723, "step": 15352 }, { "epoch": 0.98, "grad_norm": 2.060064785776051, "learning_rate": 7.830677230433181e-09, "loss": 0.7415, "step": 15353 }, { "epoch": 0.98, "grad_norm": 1.807993784190373, "learning_rate": 7.772794636163872e-09, "loss": 0.704, "step": 15354 }, { "epoch": 0.98, "grad_norm": 1.186879305380237, "learning_rate": 7.715126597329648e-09, "loss": 0.7364, "step": 15355 }, { "epoch": 0.98, "grad_norm": 1.84683091880913, "learning_rate": 7.657673116409081e-09, "loss": 0.8073, "step": 15356 }, { "epoch": 0.98, "grad_norm": 1.59640132984708, "learning_rate": 7.600434195871864e-09, "loss": 0.6886, "step": 15357 }, { "epoch": 0.98, "grad_norm": 1.6034667651942094, "learning_rate": 7.54340983817714e-09, "loss": 0.6234, "step": 15358 }, { "epoch": 0.98, "grad_norm": 1.5496503714099479, "learning_rate": 7.486600045775728e-09, "loss": 0.6467, "step": 15359 }, { "epoch": 0.98, "grad_norm": 1.3527505571779055, "learning_rate": 7.430004821110115e-09, "loss": 0.692, "step": 15360 }, { "epoch": 0.98, "grad_norm": 1.775671099276383, "learning_rate": 7.373624166611137e-09, "loss": 0.7278, "step": 15361 }, { "epoch": 0.98, "grad_norm": 1.170710029097669, "learning_rate": 7.317458084704076e-09, "loss": 0.6659, "step": 15362 }, { "epoch": 0.98, "grad_norm": 1.9585532244685693, "learning_rate": 7.261506577800892e-09, "loss": 0.7852, "step": 15363 }, { "epoch": 0.98, "grad_norm": 2.007029299482747, "learning_rate": 7.2057696483068816e-09, "loss": 0.6182, "step": 15364 }, { "epoch": 0.98, "grad_norm": 2.1912932326272023, "learning_rate": 7.1502472986179075e-09, "loss": 0.7315, "step": 15365 }, { "epoch": 0.98, "grad_norm": 1.6006737014355525, "learning_rate": 7.094939531119838e-09, "loss": 0.807, "step": 15366 }, { "epoch": 0.98, "grad_norm": 1.5402548263343825, "learning_rate": 7.039846348189105e-09, "loss": 0.6283, "step": 15367 }, { "epoch": 0.98, "grad_norm": 1.7335183490810555, "learning_rate": 6.984967752194927e-09, "loss": 0.8297, "step": 15368 }, { "epoch": 0.98, "grad_norm": 1.3303728152953378, "learning_rate": 6.93030374549486e-09, "loss": 0.689, "step": 15369 }, { "epoch": 0.98, "grad_norm": 1.5479008368827942, "learning_rate": 6.875854330438136e-09, "loss": 0.7162, "step": 15370 }, { "epoch": 0.98, "grad_norm": 1.55935111651176, "learning_rate": 6.8216195093656624e-09, "loss": 0.7355, "step": 15371 }, { "epoch": 0.98, "grad_norm": 1.9480384633499217, "learning_rate": 6.76759928460724e-09, "loss": 0.7277, "step": 15372 }, { "epoch": 0.98, "grad_norm": 1.9128974043157836, "learning_rate": 6.713793658486012e-09, "loss": 0.7148, "step": 15373 }, { "epoch": 0.98, "grad_norm": 1.7039141943210192, "learning_rate": 6.6602026333129065e-09, "loss": 0.6703, "step": 15374 }, { "epoch": 0.98, "grad_norm": 1.887957109683674, "learning_rate": 6.606826211392192e-09, "loss": 0.7852, "step": 15375 }, { "epoch": 0.98, "grad_norm": 1.5817236232319674, "learning_rate": 6.553664395017589e-09, "loss": 0.8515, "step": 15376 }, { "epoch": 0.98, "grad_norm": 1.9987984214842047, "learning_rate": 6.5007171864733824e-09, "loss": 0.8712, "step": 15377 }, { "epoch": 0.98, "grad_norm": 1.1621745906254606, "learning_rate": 6.4479845880360824e-09, "loss": 0.6536, "step": 15378 }, { "epoch": 0.98, "grad_norm": 1.685752773158283, "learning_rate": 6.395466601971101e-09, "loss": 0.6171, "step": 15379 }, { "epoch": 0.98, "grad_norm": 2.1202488950441234, "learning_rate": 6.3431632305360754e-09, "loss": 0.8056, "step": 15380 }, { "epoch": 0.98, "grad_norm": 1.507723746563671, "learning_rate": 6.291074475978653e-09, "loss": 0.7099, "step": 15381 }, { "epoch": 0.98, "grad_norm": 1.6432491246876157, "learning_rate": 6.239200340537599e-09, "loss": 0.6796, "step": 15382 }, { "epoch": 0.98, "grad_norm": 1.4609997433311805, "learning_rate": 6.187540826442795e-09, "loss": 0.5388, "step": 15383 }, { "epoch": 0.98, "grad_norm": 1.853042177314243, "learning_rate": 6.136095935913578e-09, "loss": 0.8875, "step": 15384 }, { "epoch": 0.98, "grad_norm": 1.8860370177667138, "learning_rate": 6.084865671162066e-09, "loss": 0.7412, "step": 15385 }, { "epoch": 0.98, "grad_norm": 1.756635337603837, "learning_rate": 6.033850034388722e-09, "loss": 0.6394, "step": 15386 }, { "epoch": 0.98, "grad_norm": 1.5594333069784057, "learning_rate": 5.983049027786791e-09, "loss": 0.7333, "step": 15387 }, { "epoch": 0.98, "grad_norm": 1.436938736962764, "learning_rate": 5.932462653539528e-09, "loss": 0.604, "step": 15388 }, { "epoch": 0.98, "grad_norm": 1.6344771568600664, "learning_rate": 5.882090913821303e-09, "loss": 0.6885, "step": 15389 }, { "epoch": 0.99, "grad_norm": 1.0515852232688891, "learning_rate": 5.831933810796497e-09, "loss": 0.6626, "step": 15390 }, { "epoch": 0.99, "grad_norm": 1.793376905645657, "learning_rate": 5.781991346621163e-09, "loss": 0.6617, "step": 15391 }, { "epoch": 0.99, "grad_norm": 1.985211078155675, "learning_rate": 5.7322635234413615e-09, "loss": 0.734, "step": 15392 }, { "epoch": 0.99, "grad_norm": 1.77862231557189, "learning_rate": 5.682750343394827e-09, "loss": 0.6893, "step": 15393 }, { "epoch": 0.99, "grad_norm": 1.532773266376767, "learning_rate": 5.633451808608747e-09, "loss": 0.6874, "step": 15394 }, { "epoch": 0.99, "grad_norm": 1.6399621339449553, "learning_rate": 5.584367921202538e-09, "loss": 0.6387, "step": 15395 }, { "epoch": 0.99, "grad_norm": 1.919864467300447, "learning_rate": 5.535498683285623e-09, "loss": 0.6625, "step": 15396 }, { "epoch": 0.99, "grad_norm": 1.7644464156103032, "learning_rate": 5.486844096957988e-09, "loss": 0.7101, "step": 15397 }, { "epoch": 0.99, "grad_norm": 1.8687840973642174, "learning_rate": 5.43840416431074e-09, "loss": 0.7082, "step": 15398 }, { "epoch": 0.99, "grad_norm": 1.6938802894996803, "learning_rate": 5.390178887426101e-09, "loss": 0.759, "step": 15399 }, { "epoch": 0.99, "grad_norm": 1.8297277076638832, "learning_rate": 5.342168268376302e-09, "loss": 0.6803, "step": 15400 }, { "epoch": 0.99, "grad_norm": 1.0906704967657772, "learning_rate": 5.294372309224693e-09, "loss": 0.6081, "step": 15401 }, { "epoch": 0.99, "grad_norm": 1.6353790817328766, "learning_rate": 5.246791012025743e-09, "loss": 0.6134, "step": 15402 }, { "epoch": 0.99, "grad_norm": 1.640792023438102, "learning_rate": 5.19942437882448e-09, "loss": 0.7338, "step": 15403 }, { "epoch": 0.99, "grad_norm": 1.6357648450039848, "learning_rate": 5.1522724116565005e-09, "loss": 0.8295, "step": 15404 }, { "epoch": 0.99, "grad_norm": 1.6854727442880972, "learning_rate": 5.10533511254796e-09, "loss": 0.8632, "step": 15405 }, { "epoch": 0.99, "grad_norm": 1.7634798454769653, "learning_rate": 5.05861248351669e-09, "loss": 0.739, "step": 15406 }, { "epoch": 0.99, "grad_norm": 1.6736355061437598, "learning_rate": 5.012104526569972e-09, "loss": 0.6716, "step": 15407 }, { "epoch": 0.99, "grad_norm": 1.8657629550589951, "learning_rate": 4.965811243707319e-09, "loss": 0.7009, "step": 15408 }, { "epoch": 0.99, "grad_norm": 1.6886855928025417, "learning_rate": 4.919732636918251e-09, "loss": 0.6959, "step": 15409 }, { "epoch": 0.99, "grad_norm": 1.6677416976825754, "learning_rate": 4.873868708182849e-09, "loss": 0.6614, "step": 15410 }, { "epoch": 0.99, "grad_norm": 1.2129696712915625, "learning_rate": 4.8282194594723164e-09, "loss": 0.7185, "step": 15411 }, { "epoch": 0.99, "grad_norm": 1.8606266060681875, "learning_rate": 4.782784892748971e-09, "loss": 0.6813, "step": 15412 }, { "epoch": 0.99, "grad_norm": 1.6602231344818132, "learning_rate": 4.737565009964584e-09, "loss": 0.653, "step": 15413 }, { "epoch": 0.99, "grad_norm": 1.7115814427933302, "learning_rate": 4.692559813063713e-09, "loss": 0.6654, "step": 15414 }, { "epoch": 0.99, "grad_norm": 1.9858432952891878, "learning_rate": 4.647769303979255e-09, "loss": 0.6809, "step": 15415 }, { "epoch": 0.99, "grad_norm": 1.0817160495835163, "learning_rate": 4.6031934846380024e-09, "loss": 0.7017, "step": 15416 }, { "epoch": 0.99, "grad_norm": 1.708731250602223, "learning_rate": 4.558832356953979e-09, "loss": 0.6481, "step": 15417 }, { "epoch": 0.99, "grad_norm": 2.164916858181382, "learning_rate": 4.514685922834549e-09, "loss": 0.7206, "step": 15418 }, { "epoch": 0.99, "grad_norm": 1.0119975347357488, "learning_rate": 4.470754184177084e-09, "loss": 0.5406, "step": 15419 }, { "epoch": 0.99, "grad_norm": 1.5874892417252804, "learning_rate": 4.427037142869517e-09, "loss": 0.8254, "step": 15420 }, { "epoch": 0.99, "grad_norm": 1.661891535616906, "learning_rate": 4.383534800790901e-09, "loss": 0.701, "step": 15421 }, { "epoch": 0.99, "grad_norm": 1.1709810499884143, "learning_rate": 4.3402471598102956e-09, "loss": 0.7322, "step": 15422 }, { "epoch": 0.99, "grad_norm": 1.1247246794111896, "learning_rate": 4.297174221788991e-09, "loss": 0.5259, "step": 15423 }, { "epoch": 0.99, "grad_norm": 1.8712882468555274, "learning_rate": 4.254315988577729e-09, "loss": 0.7007, "step": 15424 }, { "epoch": 0.99, "grad_norm": 1.7732802294841972, "learning_rate": 4.211672462018368e-09, "loss": 0.7192, "step": 15425 }, { "epoch": 0.99, "grad_norm": 2.4231892167309046, "learning_rate": 4.169243643943333e-09, "loss": 0.6667, "step": 15426 }, { "epoch": 0.99, "grad_norm": 1.772891731689411, "learning_rate": 4.127029536177274e-09, "loss": 0.6911, "step": 15427 }, { "epoch": 0.99, "grad_norm": 1.7472392691998595, "learning_rate": 4.085030140533186e-09, "loss": 0.7424, "step": 15428 }, { "epoch": 0.99, "grad_norm": 2.538909048708131, "learning_rate": 4.043245458816847e-09, "loss": 0.695, "step": 15429 }, { "epoch": 0.99, "grad_norm": 1.53338718906666, "learning_rate": 4.001675492823487e-09, "loss": 0.7497, "step": 15430 }, { "epoch": 0.99, "grad_norm": 1.5777088456945505, "learning_rate": 3.960320244340565e-09, "loss": 0.7116, "step": 15431 }, { "epoch": 0.99, "grad_norm": 2.0963261837717186, "learning_rate": 3.919179715144439e-09, "loss": 0.7122, "step": 15432 }, { "epoch": 0.99, "grad_norm": 1.6991306969400852, "learning_rate": 3.878253907004248e-09, "loss": 0.733, "step": 15433 }, { "epoch": 0.99, "grad_norm": 1.9049400358199469, "learning_rate": 3.837542821678031e-09, "loss": 0.7569, "step": 15434 }, { "epoch": 0.99, "grad_norm": 2.022022449894478, "learning_rate": 3.797046460916054e-09, "loss": 0.7912, "step": 15435 }, { "epoch": 0.99, "grad_norm": 1.0416033965991558, "learning_rate": 3.7567648264585924e-09, "loss": 0.5002, "step": 15436 }, { "epoch": 0.99, "grad_norm": 1.6695468749856317, "learning_rate": 3.716697920036483e-09, "loss": 0.7376, "step": 15437 }, { "epoch": 0.99, "grad_norm": 1.8598111829463253, "learning_rate": 3.6768457433727924e-09, "loss": 0.7424, "step": 15438 }, { "epoch": 0.99, "grad_norm": 1.7100824625245554, "learning_rate": 3.637208298178929e-09, "loss": 0.9543, "step": 15439 }, { "epoch": 0.99, "grad_norm": 2.2085744373640757, "learning_rate": 3.597785586159086e-09, "loss": 0.6899, "step": 15440 }, { "epoch": 0.99, "grad_norm": 1.4165223052863587, "learning_rate": 3.558577609007463e-09, "loss": 0.6709, "step": 15441 }, { "epoch": 0.99, "grad_norm": 1.5745420087036135, "learning_rate": 3.519584368409379e-09, "loss": 0.7194, "step": 15442 }, { "epoch": 0.99, "grad_norm": 1.1442427439803715, "learning_rate": 3.480805866040715e-09, "loss": 0.7, "step": 15443 }, { "epoch": 0.99, "grad_norm": 1.8707181781110744, "learning_rate": 3.4422421035679167e-09, "loss": 0.7511, "step": 15444 }, { "epoch": 0.99, "grad_norm": 1.6801260932463034, "learning_rate": 3.403893082647991e-09, "loss": 0.7547, "step": 15445 }, { "epoch": 0.99, "grad_norm": 1.0238341199898175, "learning_rate": 3.36575880492962e-09, "loss": 0.5497, "step": 15446 }, { "epoch": 0.99, "grad_norm": 1.923467315946292, "learning_rate": 3.3278392720514917e-09, "loss": 0.8123, "step": 15447 }, { "epoch": 0.99, "grad_norm": 2.3471260447363305, "learning_rate": 3.2901344856434147e-09, "loss": 0.6695, "step": 15448 }, { "epoch": 0.99, "grad_norm": 1.7773625520578251, "learning_rate": 3.252644447325759e-09, "loss": 0.6201, "step": 15449 }, { "epoch": 0.99, "grad_norm": 1.5421253718140229, "learning_rate": 3.2153691587094583e-09, "loss": 0.6508, "step": 15450 }, { "epoch": 0.99, "grad_norm": 1.7031336418858332, "learning_rate": 3.1783086213976743e-09, "loss": 0.6252, "step": 15451 }, { "epoch": 0.99, "grad_norm": 1.6866851039561728, "learning_rate": 3.1414628369819123e-09, "loss": 0.676, "step": 15452 }, { "epoch": 0.99, "grad_norm": 3.554302955358292, "learning_rate": 3.10483180704646e-09, "loss": 0.8006, "step": 15453 }, { "epoch": 0.99, "grad_norm": 1.6793850279967504, "learning_rate": 3.0684155331650587e-09, "loss": 0.7032, "step": 15454 }, { "epoch": 0.99, "grad_norm": 1.9045310779256037, "learning_rate": 3.0322140169036784e-09, "loss": 0.8917, "step": 15455 }, { "epoch": 0.99, "grad_norm": 1.7528235885457621, "learning_rate": 2.996227259817186e-09, "loss": 0.7427, "step": 15456 }, { "epoch": 0.99, "grad_norm": 1.637435191263892, "learning_rate": 2.9604552634532325e-09, "loss": 0.7571, "step": 15457 }, { "epoch": 0.99, "grad_norm": 1.9321256953463506, "learning_rate": 2.9248980293483664e-09, "loss": 0.6533, "step": 15458 }, { "epoch": 0.99, "grad_norm": 1.8899838880683566, "learning_rate": 2.88955555903081e-09, "loss": 0.7089, "step": 15459 }, { "epoch": 0.99, "grad_norm": 1.6380723505906025, "learning_rate": 2.854427854019903e-09, "loss": 0.6971, "step": 15460 }, { "epoch": 0.99, "grad_norm": 1.516523377337587, "learning_rate": 2.81951491582555e-09, "loss": 0.6315, "step": 15461 }, { "epoch": 0.99, "grad_norm": 1.600451620174044, "learning_rate": 2.7848167459476605e-09, "loss": 0.7162, "step": 15462 }, { "epoch": 0.99, "grad_norm": 1.5878994482247533, "learning_rate": 2.7503333458778205e-09, "loss": 0.8054, "step": 15463 }, { "epoch": 0.99, "grad_norm": 1.9133332643104486, "learning_rate": 2.716064717098177e-09, "loss": 0.773, "step": 15464 }, { "epoch": 0.99, "grad_norm": 1.8527529938774963, "learning_rate": 2.682010861080886e-09, "loss": 0.7363, "step": 15465 }, { "epoch": 0.99, "grad_norm": 2.085228680327608, "learning_rate": 2.6481717792903316e-09, "loss": 0.6919, "step": 15466 }, { "epoch": 0.99, "grad_norm": 1.0254779917469685, "learning_rate": 2.614547473180351e-09, "loss": 0.6478, "step": 15467 }, { "epoch": 0.99, "grad_norm": 1.0630448421511998, "learning_rate": 2.5811379441964547e-09, "loss": 0.5998, "step": 15468 }, { "epoch": 0.99, "grad_norm": 2.6978763292761516, "learning_rate": 2.5479431937736055e-09, "loss": 0.5645, "step": 15469 }, { "epoch": 0.99, "grad_norm": 2.0078200436515625, "learning_rate": 2.5149632233395503e-09, "loss": 0.75, "step": 15470 }, { "epoch": 0.99, "grad_norm": 1.8596504120889485, "learning_rate": 2.482198034310934e-09, "loss": 0.7976, "step": 15471 }, { "epoch": 0.99, "grad_norm": 1.7734115915657684, "learning_rate": 2.44964762809663e-09, "loss": 0.7293, "step": 15472 }, { "epoch": 0.99, "grad_norm": 1.8109516107015822, "learning_rate": 2.417312006094963e-09, "loss": 0.84, "step": 15473 }, { "epoch": 0.99, "grad_norm": 1.7006981660939908, "learning_rate": 2.3851911696959327e-09, "loss": 0.7871, "step": 15474 }, { "epoch": 0.99, "grad_norm": 1.754183468390483, "learning_rate": 2.353285120279547e-09, "loss": 0.6571, "step": 15475 }, { "epoch": 0.99, "grad_norm": 2.302708623833881, "learning_rate": 2.321593859218041e-09, "loss": 0.8304, "step": 15476 }, { "epoch": 0.99, "grad_norm": 1.7968680667811219, "learning_rate": 2.290117387872548e-09, "loss": 0.6743, "step": 15477 }, { "epoch": 0.99, "grad_norm": 1.4590368283813118, "learning_rate": 2.258855707595875e-09, "loss": 0.8355, "step": 15478 }, { "epoch": 0.99, "grad_norm": 1.6584423162003699, "learning_rate": 2.227808819732502e-09, "loss": 0.6998, "step": 15479 }, { "epoch": 0.99, "grad_norm": 1.6813258934976485, "learning_rate": 2.196976725615807e-09, "loss": 0.7667, "step": 15480 }, { "epoch": 0.99, "grad_norm": 1.8108013279354382, "learning_rate": 2.166359426570841e-09, "loss": 0.7053, "step": 15481 }, { "epoch": 0.99, "grad_norm": 1.8903751883860107, "learning_rate": 2.1359569239143283e-09, "loss": 0.6579, "step": 15482 }, { "epoch": 0.99, "grad_norm": 1.7422332665754603, "learning_rate": 2.105769218952447e-09, "loss": 0.7752, "step": 15483 }, { "epoch": 0.99, "grad_norm": 1.6345502021989724, "learning_rate": 2.075796312982492e-09, "loss": 0.6867, "step": 15484 }, { "epoch": 0.99, "grad_norm": 1.7724751566788057, "learning_rate": 2.0460382072928775e-09, "loss": 0.6916, "step": 15485 }, { "epoch": 0.99, "grad_norm": 1.7326815384454608, "learning_rate": 2.016494903162025e-09, "loss": 0.8027, "step": 15486 }, { "epoch": 0.99, "grad_norm": 1.685573458821228, "learning_rate": 1.98716640186003e-09, "loss": 0.8038, "step": 15487 }, { "epoch": 0.99, "grad_norm": 1.9852340166934521, "learning_rate": 1.9580527046475506e-09, "loss": 0.7125, "step": 15488 }, { "epoch": 0.99, "grad_norm": 1.5944334232577664, "learning_rate": 1.929153812775808e-09, "loss": 0.821, "step": 15489 }, { "epoch": 0.99, "grad_norm": 1.961157539648635, "learning_rate": 1.900469727486587e-09, "loss": 0.6091, "step": 15490 }, { "epoch": 0.99, "grad_norm": 1.8229573652418456, "learning_rate": 1.8720004500122347e-09, "loss": 0.7164, "step": 15491 }, { "epoch": 0.99, "grad_norm": 1.5803364223705851, "learning_rate": 1.8437459815773274e-09, "loss": 0.7276, "step": 15492 }, { "epoch": 0.99, "grad_norm": 1.4843361318584398, "learning_rate": 1.8157063233953387e-09, "loss": 0.7085, "step": 15493 }, { "epoch": 0.99, "grad_norm": 2.4494549453838066, "learning_rate": 1.787881476671971e-09, "loss": 0.7203, "step": 15494 }, { "epoch": 0.99, "grad_norm": 1.1797140269562716, "learning_rate": 1.7602714426023792e-09, "loss": 0.6736, "step": 15495 }, { "epoch": 0.99, "grad_norm": 1.7535717030170361, "learning_rate": 1.732876222373947e-09, "loss": 0.7792, "step": 15496 }, { "epoch": 0.99, "grad_norm": 2.516250700853946, "learning_rate": 1.7056958171635108e-09, "loss": 0.7124, "step": 15497 }, { "epoch": 0.99, "grad_norm": 0.9540747956594029, "learning_rate": 1.67873022813958e-09, "loss": 0.587, "step": 15498 }, { "epoch": 0.99, "grad_norm": 1.2467244333935308, "learning_rate": 1.6519794564606728e-09, "loss": 0.5866, "step": 15499 }, { "epoch": 0.99, "grad_norm": 1.8055666083786497, "learning_rate": 1.6254435032764248e-09, "loss": 0.6663, "step": 15500 }, { "epoch": 0.99, "grad_norm": 1.633944125492383, "learning_rate": 1.5991223697281454e-09, "loss": 0.6703, "step": 15501 }, { "epoch": 0.99, "grad_norm": 1.2653174436305477, "learning_rate": 1.5730160569460418e-09, "loss": 0.6793, "step": 15502 }, { "epoch": 0.99, "grad_norm": 1.9058609934825341, "learning_rate": 1.5471245660531042e-09, "loss": 0.8581, "step": 15503 }, { "epoch": 0.99, "grad_norm": 2.4699994398205094, "learning_rate": 1.5214478981612212e-09, "loss": 0.7204, "step": 15504 }, { "epoch": 0.99, "grad_norm": 1.7072121275002998, "learning_rate": 1.4959860543739546e-09, "loss": 0.6864, "step": 15505 }, { "epoch": 0.99, "grad_norm": 1.9794224067163524, "learning_rate": 1.4707390357865391e-09, "loss": 0.7281, "step": 15506 }, { "epoch": 0.99, "grad_norm": 1.517655496038653, "learning_rate": 1.4457068434831078e-09, "loss": 0.6861, "step": 15507 }, { "epoch": 0.99, "grad_norm": 1.9225493058096614, "learning_rate": 1.4208894785394666e-09, "loss": 0.674, "step": 15508 }, { "epoch": 0.99, "grad_norm": 1.4883254529413246, "learning_rate": 1.3962869420230951e-09, "loss": 0.7173, "step": 15509 }, { "epoch": 0.99, "grad_norm": 1.7089001450377297, "learning_rate": 1.3718992349903704e-09, "loss": 0.7215, "step": 15510 }, { "epoch": 0.99, "grad_norm": 1.803363127427481, "learning_rate": 1.3477263584904533e-09, "loss": 0.7109, "step": 15511 }, { "epoch": 0.99, "grad_norm": 1.2070377099006993, "learning_rate": 1.323768313560847e-09, "loss": 0.5732, "step": 15512 }, { "epoch": 0.99, "grad_norm": 1.6244257028290998, "learning_rate": 1.3000251012323939e-09, "loss": 0.7641, "step": 15513 }, { "epoch": 0.99, "grad_norm": 1.6803267334342333, "learning_rate": 1.2764967225253888e-09, "loss": 0.7034, "step": 15514 }, { "epoch": 0.99, "grad_norm": 1.781354253076734, "learning_rate": 1.2531831784506898e-09, "loss": 0.6946, "step": 15515 }, { "epoch": 0.99, "grad_norm": 1.6300146211746243, "learning_rate": 1.2300844700097181e-09, "loss": 0.5906, "step": 15516 }, { "epoch": 0.99, "grad_norm": 1.0140799380398804, "learning_rate": 1.2072005981966783e-09, "loss": 0.5408, "step": 15517 }, { "epoch": 0.99, "grad_norm": 2.1313945745776577, "learning_rate": 1.1845315639935628e-09, "loss": 0.891, "step": 15518 }, { "epoch": 0.99, "grad_norm": 1.89601961952485, "learning_rate": 1.1620773683757025e-09, "loss": 0.8858, "step": 15519 }, { "epoch": 0.99, "grad_norm": 1.008708451650089, "learning_rate": 1.1398380123078812e-09, "loss": 0.6738, "step": 15520 }, { "epoch": 0.99, "grad_norm": 1.7301482355081272, "learning_rate": 1.1178134967454456e-09, "loss": 0.806, "step": 15521 }, { "epoch": 0.99, "grad_norm": 1.9020769254290315, "learning_rate": 1.0960038226354165e-09, "loss": 0.6129, "step": 15522 }, { "epoch": 0.99, "grad_norm": 1.6423594241672175, "learning_rate": 1.0744089909153765e-09, "loss": 0.7095, "step": 15523 }, { "epoch": 0.99, "grad_norm": 1.5017412209515795, "learning_rate": 1.0530290025123623e-09, "loss": 0.7204, "step": 15524 }, { "epoch": 0.99, "grad_norm": 1.4764026804179806, "learning_rate": 1.0318638583467489e-09, "loss": 0.5257, "step": 15525 }, { "epoch": 0.99, "grad_norm": 1.4777000237088247, "learning_rate": 1.0109135593266983e-09, "loss": 0.6331, "step": 15526 }, { "epoch": 0.99, "grad_norm": 1.750806129814846, "learning_rate": 9.901781063531568e-10, "loss": 0.7178, "step": 15527 }, { "epoch": 0.99, "grad_norm": 1.7228200547684758, "learning_rate": 9.696575003176334e-10, "loss": 0.6892, "step": 15528 }, { "epoch": 0.99, "grad_norm": 1.7120544153578998, "learning_rate": 9.493517421022003e-10, "loss": 0.7437, "step": 15529 }, { "epoch": 0.99, "grad_norm": 1.5902966387985595, "learning_rate": 9.292608325789376e-10, "loss": 0.7341, "step": 15530 }, { "epoch": 0.99, "grad_norm": 1.9138774727298449, "learning_rate": 9.093847726110439e-10, "loss": 0.7947, "step": 15531 }, { "epoch": 0.99, "grad_norm": 1.623363728061937, "learning_rate": 8.897235630539458e-10, "loss": 0.586, "step": 15532 }, { "epoch": 0.99, "grad_norm": 1.63428492559217, "learning_rate": 8.70277204751413e-10, "loss": 0.7427, "step": 15533 }, { "epoch": 0.99, "grad_norm": 1.6626509517191814, "learning_rate": 8.510456985399985e-10, "loss": 0.7203, "step": 15534 }, { "epoch": 0.99, "grad_norm": 2.7058754264843565, "learning_rate": 8.320290452462632e-10, "loss": 0.6642, "step": 15535 }, { "epoch": 0.99, "grad_norm": 1.840144107185414, "learning_rate": 8.132272456867762e-10, "loss": 0.751, "step": 15536 }, { "epoch": 0.99, "grad_norm": 1.9397182199445604, "learning_rate": 7.946403006703351e-10, "loss": 0.7254, "step": 15537 }, { "epoch": 0.99, "grad_norm": 1.8011989020182488, "learning_rate": 7.762682109951902e-10, "loss": 0.6974, "step": 15538 }, { "epoch": 0.99, "grad_norm": 1.5263172155147071, "learning_rate": 7.581109774512651e-10, "loss": 0.6952, "step": 15539 }, { "epoch": 0.99, "grad_norm": 1.0540729688451418, "learning_rate": 7.401686008190467e-10, "loss": 0.6751, "step": 15540 }, { "epoch": 0.99, "grad_norm": 1.9449209026366159, "learning_rate": 7.224410818695848e-10, "loss": 0.7018, "step": 15541 }, { "epoch": 0.99, "grad_norm": 1.9095567100265667, "learning_rate": 7.049284213644925e-10, "loss": 0.7001, "step": 15542 }, { "epoch": 0.99, "grad_norm": 1.4890727008769704, "learning_rate": 6.876306200565008e-10, "loss": 0.6747, "step": 15543 }, { "epoch": 0.99, "grad_norm": 1.6457467073061738, "learning_rate": 6.705476786894593e-10, "loss": 0.7085, "step": 15544 }, { "epoch": 0.99, "grad_norm": 1.7372245969435318, "learning_rate": 6.536795979966703e-10, "loss": 0.6544, "step": 15545 }, { "epoch": 1.0, "grad_norm": 1.2283534789353852, "learning_rate": 6.370263787042196e-10, "loss": 0.5731, "step": 15546 }, { "epoch": 1.0, "grad_norm": 1.8600820344783702, "learning_rate": 6.20588021527091e-10, "loss": 0.714, "step": 15547 }, { "epoch": 1.0, "grad_norm": 1.6618427798528403, "learning_rate": 6.043645271719411e-10, "loss": 0.6711, "step": 15548 }, { "epoch": 1.0, "grad_norm": 1.756986928622356, "learning_rate": 5.883558963359903e-10, "loss": 0.7261, "step": 15549 }, { "epoch": 1.0, "grad_norm": 1.7973363603361425, "learning_rate": 5.725621297075768e-10, "loss": 0.6523, "step": 15550 }, { "epoch": 1.0, "grad_norm": 1.1059064657392232, "learning_rate": 5.569832279644915e-10, "loss": 0.6698, "step": 15551 }, { "epoch": 1.0, "grad_norm": 1.6264270640759413, "learning_rate": 5.416191917778646e-10, "loss": 0.7958, "step": 15552 }, { "epoch": 1.0, "grad_norm": 1.830077875078633, "learning_rate": 5.264700218066133e-10, "loss": 0.7328, "step": 15553 }, { "epoch": 1.0, "grad_norm": 1.7601148365773802, "learning_rate": 5.115357187024383e-10, "loss": 0.8326, "step": 15554 }, { "epoch": 1.0, "grad_norm": 1.8699549593607707, "learning_rate": 4.968162831070489e-10, "loss": 0.7373, "step": 15555 }, { "epoch": 1.0, "grad_norm": 1.9307143794963761, "learning_rate": 4.823117156532719e-10, "loss": 0.6925, "step": 15556 }, { "epoch": 1.0, "grad_norm": 1.7398088016378237, "learning_rate": 4.680220169639427e-10, "loss": 0.7075, "step": 15557 }, { "epoch": 1.0, "grad_norm": 1.8535488224838959, "learning_rate": 4.539471876535695e-10, "loss": 0.8061, "step": 15558 }, { "epoch": 1.0, "grad_norm": 1.9036603704980284, "learning_rate": 4.4008722832722397e-10, "loss": 0.745, "step": 15559 }, { "epoch": 1.0, "grad_norm": 1.501941533491973, "learning_rate": 4.264421395805407e-10, "loss": 0.6001, "step": 15560 }, { "epoch": 1.0, "grad_norm": 1.751940757002258, "learning_rate": 4.1301192199971753e-10, "loss": 0.693, "step": 15561 }, { "epoch": 1.0, "grad_norm": 1.4490596687424417, "learning_rate": 3.9979657616207037e-10, "loss": 0.729, "step": 15562 }, { "epoch": 1.0, "grad_norm": 1.7600637232433867, "learning_rate": 3.867961026354783e-10, "loss": 0.721, "step": 15563 }, { "epoch": 1.0, "grad_norm": 1.986274137038226, "learning_rate": 3.740105019789386e-10, "loss": 0.8906, "step": 15564 }, { "epoch": 1.0, "grad_norm": 1.5924996677792753, "learning_rate": 3.6143977474201175e-10, "loss": 0.6767, "step": 15565 }, { "epoch": 1.0, "grad_norm": 1.7932139861483467, "learning_rate": 3.4908392146426606e-10, "loss": 0.7279, "step": 15566 }, { "epoch": 1.0, "grad_norm": 1.478886778470972, "learning_rate": 3.369429426769433e-10, "loss": 0.687, "step": 15567 }, { "epoch": 1.0, "grad_norm": 1.5014289604755167, "learning_rate": 3.250168389024033e-10, "loss": 0.6732, "step": 15568 }, { "epoch": 1.0, "grad_norm": 1.6708894085360728, "learning_rate": 3.1330561065301413e-10, "loss": 0.6813, "step": 15569 }, { "epoch": 1.0, "grad_norm": 2.0747173480017227, "learning_rate": 3.0180925843170674e-10, "loss": 0.7129, "step": 15570 }, { "epoch": 1.0, "grad_norm": 2.014159422851189, "learning_rate": 2.9052778273308546e-10, "loss": 0.7645, "step": 15571 }, { "epoch": 1.0, "grad_norm": 1.6488499426395649, "learning_rate": 2.794611840417627e-10, "loss": 0.6324, "step": 15572 }, { "epoch": 1.0, "grad_norm": 1.4931355981639793, "learning_rate": 2.686094628329139e-10, "loss": 0.7346, "step": 15573 }, { "epoch": 1.0, "grad_norm": 2.3892786321465977, "learning_rate": 2.579726195739429e-10, "loss": 0.7686, "step": 15574 }, { "epoch": 1.0, "grad_norm": 1.9508454386044858, "learning_rate": 2.475506547211515e-10, "loss": 0.8014, "step": 15575 }, { "epoch": 1.0, "grad_norm": 1.5293788336094372, "learning_rate": 2.373435687225145e-10, "loss": 0.6653, "step": 15576 }, { "epoch": 1.0, "grad_norm": 2.247893169003882, "learning_rate": 2.2735136201712527e-10, "loss": 0.653, "step": 15577 }, { "epoch": 1.0, "grad_norm": 1.9436538505678527, "learning_rate": 2.1757403503408492e-10, "loss": 0.7626, "step": 15578 }, { "epoch": 1.0, "grad_norm": 1.684306092443992, "learning_rate": 2.080115881936129e-10, "loss": 0.6346, "step": 15579 }, { "epoch": 1.0, "grad_norm": 1.8741547480162604, "learning_rate": 1.9866402190704682e-10, "loss": 0.7125, "step": 15580 }, { "epoch": 1.0, "grad_norm": 1.1750321263602195, "learning_rate": 1.895313365757323e-10, "loss": 0.6239, "step": 15581 }, { "epoch": 1.0, "grad_norm": 1.7577751567298667, "learning_rate": 1.8061353259213322e-10, "loss": 0.6291, "step": 15582 }, { "epoch": 1.0, "grad_norm": 1.8003399100897532, "learning_rate": 1.7191061033983157e-10, "loss": 0.7614, "step": 15583 }, { "epoch": 1.0, "grad_norm": 1.2632348969561191, "learning_rate": 1.634225701929726e-10, "loss": 0.6759, "step": 15584 }, { "epoch": 1.0, "grad_norm": 1.5419636992912296, "learning_rate": 1.5514941251570937e-10, "loss": 0.704, "step": 15585 }, { "epoch": 1.0, "grad_norm": 1.83927390532511, "learning_rate": 1.470911376644235e-10, "loss": 1.0154, "step": 15586 }, { "epoch": 1.0, "grad_norm": 1.928327636672388, "learning_rate": 1.392477459843944e-10, "loss": 0.6684, "step": 15587 }, { "epoch": 1.0, "grad_norm": 1.9261262713870717, "learning_rate": 1.3161923781424002e-10, "loss": 0.6857, "step": 15588 }, { "epoch": 1.0, "grad_norm": 1.1152602551888198, "learning_rate": 1.2420561348036598e-10, "loss": 0.6719, "step": 15589 }, { "epoch": 1.0, "grad_norm": 1.8232550749922205, "learning_rate": 1.1700687330196137e-10, "loss": 0.8126, "step": 15590 }, { "epoch": 1.0, "grad_norm": 1.6542636765288794, "learning_rate": 1.1002301758822331e-10, "loss": 0.6616, "step": 15591 }, { "epoch": 1.0, "grad_norm": 2.0255755371906674, "learning_rate": 1.0325404664002225e-10, "loss": 0.6904, "step": 15592 }, { "epoch": 1.0, "grad_norm": 1.7617281633416089, "learning_rate": 9.66999607471264e-11, "loss": 0.766, "step": 15593 }, { "epoch": 1.0, "grad_norm": 1.7396696939716325, "learning_rate": 9.03607601920875e-11, "loss": 0.7765, "step": 15594 }, { "epoch": 1.0, "grad_norm": 1.4576931557320565, "learning_rate": 8.423644524691021e-11, "loss": 0.6558, "step": 15595 }, { "epoch": 1.0, "grad_norm": 1.6911925500390572, "learning_rate": 7.83270161752725e-11, "loss": 0.746, "step": 15596 }, { "epoch": 1.0, "grad_norm": 1.7028821911462881, "learning_rate": 7.263247323086032e-11, "loss": 0.6723, "step": 15597 }, { "epoch": 1.0, "grad_norm": 1.8809734953573969, "learning_rate": 6.715281665847784e-11, "loss": 0.8459, "step": 15598 }, { "epoch": 1.0, "grad_norm": 1.5665552390122965, "learning_rate": 6.188804669349236e-11, "loss": 0.6841, "step": 15599 }, { "epoch": 1.0, "grad_norm": 1.751061891957668, "learning_rate": 5.683816356183425e-11, "loss": 0.7731, "step": 15600 }, { "epoch": 1.0, "grad_norm": 1.5301758705007986, "learning_rate": 5.200316748110723e-11, "loss": 0.7061, "step": 15601 }, { "epoch": 1.0, "grad_norm": 1.6241131750459379, "learning_rate": 4.738305865947812e-11, "loss": 0.7255, "step": 15602 }, { "epoch": 1.0, "grad_norm": 1.5348837706336014, "learning_rate": 4.297783729456662e-11, "loss": 0.6396, "step": 15603 }, { "epoch": 1.0, "grad_norm": 1.7152712446435796, "learning_rate": 3.8787503576220854e-11, "loss": 0.9416, "step": 15604 }, { "epoch": 1.0, "grad_norm": 1.9874715960323803, "learning_rate": 3.481205768429696e-11, "loss": 0.7388, "step": 15605 }, { "epoch": 1.0, "grad_norm": 1.1521744687165072, "learning_rate": 3.105149978976929e-11, "loss": 0.5865, "step": 15606 }, { "epoch": 1.0, "grad_norm": 1.0224958401349025, "learning_rate": 2.7505830054175287e-11, "loss": 0.7083, "step": 15607 }, { "epoch": 1.0, "grad_norm": 1.8838703422167835, "learning_rate": 2.4175048630170617e-11, "loss": 0.8035, "step": 15608 }, { "epoch": 1.0, "grad_norm": 1.824019826887828, "learning_rate": 2.105915566041894e-11, "loss": 0.7025, "step": 15609 }, { "epoch": 1.0, "grad_norm": 1.6472678360335173, "learning_rate": 1.8158151279812354e-11, "loss": 0.6909, "step": 15610 }, { "epoch": 1.0, "grad_norm": 1.1987076811312103, "learning_rate": 1.5472035611585613e-11, "loss": 0.6743, "step": 15611 }, { "epoch": 1.0, "grad_norm": 1.9926455420975224, "learning_rate": 1.3000808772312134e-11, "loss": 0.5917, "step": 15612 }, { "epoch": 1.0, "grad_norm": 1.2045767287364848, "learning_rate": 1.0744470868018219e-11, "loss": 0.6442, "step": 15613 }, { "epoch": 1.0, "grad_norm": 1.7214068661474287, "learning_rate": 8.70302199529327e-12, "loss": 0.6677, "step": 15614 }, { "epoch": 1.0, "grad_norm": 1.7144563444066225, "learning_rate": 6.876462241844906e-12, "loss": 0.8345, "step": 15615 }, { "epoch": 1.0, "grad_norm": 1.6548973586480604, "learning_rate": 5.264791687054072e-12, "loss": 0.6374, "step": 15616 }, { "epoch": 1.0, "grad_norm": 1.5246283188428853, "learning_rate": 3.868010399199484e-12, "loss": 0.6751, "step": 15617 }, { "epoch": 1.0, "grad_norm": 1.7904354329428962, "learning_rate": 2.686118438788299e-12, "loss": 0.6985, "step": 15618 }, { "epoch": 1.0, "grad_norm": 2.6087567521994166, "learning_rate": 1.7191158563356626e-12, "loss": 0.8651, "step": 15619 }, { "epoch": 1.0, "grad_norm": 1.9869609389733853, "learning_rate": 9.670026934749388e-13, "loss": 0.8151, "step": 15620 }, { "epoch": 1.0, "grad_norm": 1.8120772273997248, "learning_rate": 4.2977898240259554e-13, "loss": 0.6758, "step": 15621 }, { "epoch": 1.0, "grad_norm": 1.7519575208338434, "learning_rate": 1.0744474698842767e-13, "loss": 0.643, "step": 15622 }, { "epoch": 1.0, "grad_norm": 1.5008482865515222, "learning_rate": 0.0, "loss": 0.7215, "step": 15623 }, { "epoch": 1.0, "step": 15623, "total_flos": 3394122111524864.0, "train_loss": 0.7551554423673261, "train_runtime": 61333.7666, "train_samples_per_second": 16.303, "train_steps_per_second": 0.255 } ], "logging_steps": 1.0, "max_steps": 15623, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 3394122111524864.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }