|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.27237964506324147, |
|
"eval_steps": 348, |
|
"global_step": 1389, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00019609765663300324, |
|
"grad_norm": 20.33372688293457, |
|
"learning_rate": 2e-05, |
|
"loss": 3.0843, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019609765663300324, |
|
"eval_loss": 1.1017773151397705, |
|
"eval_runtime": 79.9135, |
|
"eval_samples_per_second": 26.879, |
|
"eval_steps_per_second": 13.44, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003921953132660065, |
|
"grad_norm": 19.32895278930664, |
|
"learning_rate": 4e-05, |
|
"loss": 3.2221, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005882929698990097, |
|
"grad_norm": 18.6882266998291, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8951, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000784390626532013, |
|
"grad_norm": 43.008060455322266, |
|
"learning_rate": 8e-05, |
|
"loss": 5.167, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009804882831650162, |
|
"grad_norm": 21.642993927001953, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1304, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0011765859397980193, |
|
"grad_norm": 29.79266929626465, |
|
"learning_rate": 0.00012, |
|
"loss": 4.5153, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013726835964310226, |
|
"grad_norm": 25.503681182861328, |
|
"learning_rate": 0.00014, |
|
"loss": 3.8083, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001568781253064026, |
|
"grad_norm": 32.35524368286133, |
|
"learning_rate": 0.00016, |
|
"loss": 4.253, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017648789096970292, |
|
"grad_norm": 21.053390502929688, |
|
"learning_rate": 0.00018, |
|
"loss": 3.3757, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0019609765663300325, |
|
"grad_norm": 25.7067928314209, |
|
"learning_rate": 0.0002, |
|
"loss": 3.2484, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0021570742229630358, |
|
"grad_norm": 22.57227897644043, |
|
"learning_rate": 0.00019999974049780868, |
|
"loss": 2.8378, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0023531718795960386, |
|
"grad_norm": 19.06597900390625, |
|
"learning_rate": 0.00019999896199258152, |
|
"loss": 3.231, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002549269536229042, |
|
"grad_norm": 17.590620040893555, |
|
"learning_rate": 0.000199997664488359, |
|
"loss": 2.2391, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002745367192862045, |
|
"grad_norm": 8.627043724060059, |
|
"learning_rate": 0.00019999584799187522, |
|
"loss": 1.7095, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0029414648494950485, |
|
"grad_norm": 21.60858917236328, |
|
"learning_rate": 0.0001999935125125579, |
|
"loss": 3.9299, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003137562506128052, |
|
"grad_norm": 8.075380325317383, |
|
"learning_rate": 0.00019999065806252829, |
|
"loss": 1.7939, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.003333660162761055, |
|
"grad_norm": 11.393594741821289, |
|
"learning_rate": 0.00019998728465660105, |
|
"loss": 1.601, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0035297578193940584, |
|
"grad_norm": 8.256339073181152, |
|
"learning_rate": 0.00019998339231228434, |
|
"loss": 3.1556, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0037258554760270617, |
|
"grad_norm": 20.03615951538086, |
|
"learning_rate": 0.0001999789810497796, |
|
"loss": 2.0883, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003921953132660065, |
|
"grad_norm": 10.166353225708008, |
|
"learning_rate": 0.0001999740508919815, |
|
"loss": 3.5616, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004118050789293068, |
|
"grad_norm": 15.80553913116455, |
|
"learning_rate": 0.0001999686018644777, |
|
"loss": 3.0344, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0043141484459260715, |
|
"grad_norm": 7.451974391937256, |
|
"learning_rate": 0.00019996263399554897, |
|
"loss": 2.1049, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004510246102559075, |
|
"grad_norm": 5.434274673461914, |
|
"learning_rate": 0.00019995614731616875, |
|
"loss": 2.3178, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004706343759192077, |
|
"grad_norm": 10.594315528869629, |
|
"learning_rate": 0.00019994914186000328, |
|
"loss": 1.7096, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0049024414158250805, |
|
"grad_norm": 5.348718166351318, |
|
"learning_rate": 0.0001999416176634111, |
|
"loss": 2.695, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005098539072458084, |
|
"grad_norm": 17.776073455810547, |
|
"learning_rate": 0.00019993357476544312, |
|
"loss": 1.7411, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005294636729091087, |
|
"grad_norm": 10.051606178283691, |
|
"learning_rate": 0.0001999250132078424, |
|
"loss": 2.6161, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00549073438572409, |
|
"grad_norm": 26.03020668029785, |
|
"learning_rate": 0.00019991593303504376, |
|
"loss": 3.3977, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005686832042357094, |
|
"grad_norm": 10.213540077209473, |
|
"learning_rate": 0.00019990633429417363, |
|
"loss": 1.2442, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005882929698990097, |
|
"grad_norm": 11.69288444519043, |
|
"learning_rate": 0.00019989621703505, |
|
"loss": 1.4702, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0060790273556231, |
|
"grad_norm": 4.343452453613281, |
|
"learning_rate": 0.00019988558131018186, |
|
"loss": 1.0779, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.006275125012256104, |
|
"grad_norm": 9.106976509094238, |
|
"learning_rate": 0.00019987442717476906, |
|
"loss": 2.5887, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.006471222668889107, |
|
"grad_norm": 17.658370971679688, |
|
"learning_rate": 0.00019986275468670205, |
|
"loss": 2.2258, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00666732032552211, |
|
"grad_norm": 6.7451090812683105, |
|
"learning_rate": 0.00019985056390656162, |
|
"loss": 1.7206, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0068634179821551134, |
|
"grad_norm": 28.07065200805664, |
|
"learning_rate": 0.00019983785489761837, |
|
"loss": 2.7356, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007059515638788117, |
|
"grad_norm": 11.387879371643066, |
|
"learning_rate": 0.00019982462772583266, |
|
"loss": 1.973, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00725561329542112, |
|
"grad_norm": 9.64372444152832, |
|
"learning_rate": 0.00019981088245985408, |
|
"loss": 2.7339, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.007451710952054123, |
|
"grad_norm": 9.302544593811035, |
|
"learning_rate": 0.00019979661917102115, |
|
"loss": 1.7498, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007647808608687127, |
|
"grad_norm": 15.064400672912598, |
|
"learning_rate": 0.000199781837933361, |
|
"loss": 3.0109, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.00784390626532013, |
|
"grad_norm": 7.281099319458008, |
|
"learning_rate": 0.00019976653882358884, |
|
"loss": 1.3118, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008040003921953132, |
|
"grad_norm": 6.4474873542785645, |
|
"learning_rate": 0.0001997507219211078, |
|
"loss": 1.408, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.008236101578586136, |
|
"grad_norm": 13.101079940795898, |
|
"learning_rate": 0.00019973438730800822, |
|
"loss": 2.3367, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008432199235219139, |
|
"grad_norm": 5.951049327850342, |
|
"learning_rate": 0.00019971753506906753, |
|
"loss": 0.9101, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.008628296891852143, |
|
"grad_norm": 11.212276458740234, |
|
"learning_rate": 0.00019970016529174947, |
|
"loss": 2.7058, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008824394548485145, |
|
"grad_norm": 8.68136978149414, |
|
"learning_rate": 0.0001996822780662041, |
|
"loss": 2.0276, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00902049220511815, |
|
"grad_norm": 17.70038414001465, |
|
"learning_rate": 0.00019966387348526683, |
|
"loss": 2.7989, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.009216589861751152, |
|
"grad_norm": 10.247598648071289, |
|
"learning_rate": 0.00019964495164445824, |
|
"loss": 1.9618, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009412687518384154, |
|
"grad_norm": 10.378255844116211, |
|
"learning_rate": 0.0001996255126419835, |
|
"loss": 1.8003, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.009608785175017159, |
|
"grad_norm": 31.620820999145508, |
|
"learning_rate": 0.0001996055565787319, |
|
"loss": 2.8785, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.009804882831650161, |
|
"grad_norm": 9.976147651672363, |
|
"learning_rate": 0.0001995850835582763, |
|
"loss": 2.5605, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.010000980488283165, |
|
"grad_norm": 11.751899719238281, |
|
"learning_rate": 0.00019956409368687258, |
|
"loss": 2.7556, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.010197078144916168, |
|
"grad_norm": 15.828932762145996, |
|
"learning_rate": 0.000199542587073459, |
|
"loss": 2.7773, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.010393175801549172, |
|
"grad_norm": 10.772979736328125, |
|
"learning_rate": 0.00019952056382965597, |
|
"loss": 1.9553, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.010589273458182174, |
|
"grad_norm": 10.821427345275879, |
|
"learning_rate": 0.00019949802406976495, |
|
"loss": 1.8528, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.010785371114815178, |
|
"grad_norm": 7.228662490844727, |
|
"learning_rate": 0.00019947496791076837, |
|
"loss": 1.1844, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01098146877144818, |
|
"grad_norm": 7.164773941040039, |
|
"learning_rate": 0.00019945139547232872, |
|
"loss": 1.0291, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.011177566428081185, |
|
"grad_norm": 13.927733421325684, |
|
"learning_rate": 0.0001994273068767879, |
|
"loss": 1.5417, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.011373664084714187, |
|
"grad_norm": 10.366493225097656, |
|
"learning_rate": 0.00019940270224916688, |
|
"loss": 1.5122, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.011569761741347192, |
|
"grad_norm": 11.2214994430542, |
|
"learning_rate": 0.00019937758171716468, |
|
"loss": 1.6003, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.011765859397980194, |
|
"grad_norm": 14.360090255737305, |
|
"learning_rate": 0.000199351945411158, |
|
"loss": 1.5651, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011961957054613198, |
|
"grad_norm": 17.97150993347168, |
|
"learning_rate": 0.00019932579346420038, |
|
"loss": 1.6064, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0121580547112462, |
|
"grad_norm": 10.190518379211426, |
|
"learning_rate": 0.00019929912601202151, |
|
"loss": 1.9151, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.012354152367879203, |
|
"grad_norm": 13.573248863220215, |
|
"learning_rate": 0.00019927194319302677, |
|
"loss": 4.0602, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.012550250024512207, |
|
"grad_norm": 16.919841766357422, |
|
"learning_rate": 0.00019924424514829606, |
|
"loss": 2.8292, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01274634768114521, |
|
"grad_norm": 58.470252990722656, |
|
"learning_rate": 0.00019921603202158354, |
|
"loss": 1.9637, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.012942445337778214, |
|
"grad_norm": 18.334800720214844, |
|
"learning_rate": 0.00019918730395931649, |
|
"loss": 2.5609, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.013138542994411216, |
|
"grad_norm": 12.280759811401367, |
|
"learning_rate": 0.00019915806111059486, |
|
"loss": 1.2495, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01333464065104422, |
|
"grad_norm": 8.015874862670898, |
|
"learning_rate": 0.0001991283036271903, |
|
"loss": 1.505, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.013530738307677223, |
|
"grad_norm": 7.713284969329834, |
|
"learning_rate": 0.0001990980316635455, |
|
"loss": 2.3898, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.013726835964310227, |
|
"grad_norm": 18.01800537109375, |
|
"learning_rate": 0.00019906724537677316, |
|
"loss": 3.0263, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01392293362094323, |
|
"grad_norm": 21.270421981811523, |
|
"learning_rate": 0.00019903594492665558, |
|
"loss": 3.2547, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.014119031277576233, |
|
"grad_norm": 21.60205841064453, |
|
"learning_rate": 0.0001990041304756434, |
|
"loss": 2.577, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.014315128934209236, |
|
"grad_norm": 10.01419734954834, |
|
"learning_rate": 0.00019897180218885507, |
|
"loss": 1.9092, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.01451122659084224, |
|
"grad_norm": 14.10943603515625, |
|
"learning_rate": 0.00019893896023407578, |
|
"loss": 2.2377, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.014707324247475242, |
|
"grad_norm": 11.310667037963867, |
|
"learning_rate": 0.0001989056047817567, |
|
"loss": 1.6645, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014903421904108247, |
|
"grad_norm": 6.586666107177734, |
|
"learning_rate": 0.0001988717360050141, |
|
"loss": 2.2651, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.015099519560741249, |
|
"grad_norm": 4.402716159820557, |
|
"learning_rate": 0.00019883735407962846, |
|
"loss": 1.3483, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.015295617217374253, |
|
"grad_norm": 9.384387016296387, |
|
"learning_rate": 0.00019880245918404342, |
|
"loss": 2.6391, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.015491714874007256, |
|
"grad_norm": 6.753894329071045, |
|
"learning_rate": 0.000198767051499365, |
|
"loss": 2.9391, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.01568781253064026, |
|
"grad_norm": 6.399787902832031, |
|
"learning_rate": 0.00019873113120936074, |
|
"loss": 3.7452, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01588391018727326, |
|
"grad_norm": 8.880107879638672, |
|
"learning_rate": 0.00019869469850045842, |
|
"loss": 1.2771, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.016080007843906265, |
|
"grad_norm": 12.630661964416504, |
|
"learning_rate": 0.00019865775356174545, |
|
"loss": 2.2072, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.01627610550053927, |
|
"grad_norm": 7.974503993988037, |
|
"learning_rate": 0.00019862029658496762, |
|
"loss": 1.9795, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.016472203157172273, |
|
"grad_norm": 50.43594741821289, |
|
"learning_rate": 0.00019858232776452837, |
|
"loss": 1.5331, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.016668300813805274, |
|
"grad_norm": 7.273484230041504, |
|
"learning_rate": 0.00019854384729748746, |
|
"loss": 2.4005, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.016864398470438278, |
|
"grad_norm": 5.826492786407471, |
|
"learning_rate": 0.00019850485538356027, |
|
"loss": 2.1915, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.017060496127071282, |
|
"grad_norm": 9.881019592285156, |
|
"learning_rate": 0.0001984653522251165, |
|
"loss": 2.3309, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.017256593783704286, |
|
"grad_norm": 9.147713661193848, |
|
"learning_rate": 0.00019842533802717923, |
|
"loss": 1.1404, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.017452691440337287, |
|
"grad_norm": 13.98263931274414, |
|
"learning_rate": 0.00019838481299742398, |
|
"loss": 1.2166, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01764878909697029, |
|
"grad_norm": 8.206791877746582, |
|
"learning_rate": 0.0001983437773461774, |
|
"loss": 2.6039, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.017844886753603295, |
|
"grad_norm": 10.445443153381348, |
|
"learning_rate": 0.00019830223128641637, |
|
"loss": 2.3554, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0180409844102363, |
|
"grad_norm": 11.756292343139648, |
|
"learning_rate": 0.00019826017503376666, |
|
"loss": 1.7371, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0182370820668693, |
|
"grad_norm": 7.509032249450684, |
|
"learning_rate": 0.00019821760880650214, |
|
"loss": 1.389, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.018433179723502304, |
|
"grad_norm": 8.619280815124512, |
|
"learning_rate": 0.00019817453282554333, |
|
"loss": 1.6818, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.01862927738013531, |
|
"grad_norm": 9.11640739440918, |
|
"learning_rate": 0.00019813094731445654, |
|
"loss": 1.631, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01882537503676831, |
|
"grad_norm": 14.109521865844727, |
|
"learning_rate": 0.00019808685249945245, |
|
"loss": 2.0497, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.019021472693401313, |
|
"grad_norm": 10.804281234741211, |
|
"learning_rate": 0.00019804224860938506, |
|
"loss": 2.2364, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.019217570350034317, |
|
"grad_norm": 7.363731384277344, |
|
"learning_rate": 0.0001979971358757505, |
|
"loss": 1.0967, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01941366800666732, |
|
"grad_norm": 15.269912719726562, |
|
"learning_rate": 0.0001979515145326859, |
|
"loss": 2.8752, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.019609765663300322, |
|
"grad_norm": 5.457535266876221, |
|
"learning_rate": 0.000197905384816968, |
|
"loss": 1.7098, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.019805863319933326, |
|
"grad_norm": 4.689967632293701, |
|
"learning_rate": 0.00019785874696801202, |
|
"loss": 2.2133, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.02000196097656633, |
|
"grad_norm": 10.993409156799316, |
|
"learning_rate": 0.00019781160122787046, |
|
"loss": 2.314, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.020198058633199335, |
|
"grad_norm": 8.199251174926758, |
|
"learning_rate": 0.00019776394784123177, |
|
"loss": 2.5164, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.020394156289832335, |
|
"grad_norm": 15.144885063171387, |
|
"learning_rate": 0.00019771578705541916, |
|
"loss": 2.0058, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.02059025394646534, |
|
"grad_norm": 5.252450466156006, |
|
"learning_rate": 0.00019766711912038915, |
|
"loss": 1.7012, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.020786351603098344, |
|
"grad_norm": 8.265049934387207, |
|
"learning_rate": 0.0001976179442887305, |
|
"loss": 1.8646, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.020982449259731348, |
|
"grad_norm": 8.365408897399902, |
|
"learning_rate": 0.00019756826281566272, |
|
"loss": 1.9615, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.02117854691636435, |
|
"grad_norm": 7.514213562011719, |
|
"learning_rate": 0.00019751807495903484, |
|
"loss": 1.4897, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.021374644572997353, |
|
"grad_norm": 15.234655380249023, |
|
"learning_rate": 0.00019746738097932407, |
|
"loss": 2.0467, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.021570742229630357, |
|
"grad_norm": 6.856448650360107, |
|
"learning_rate": 0.0001974161811396343, |
|
"loss": 1.4492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.021766839886263357, |
|
"grad_norm": 7.893224716186523, |
|
"learning_rate": 0.00019736447570569503, |
|
"loss": 1.919, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.02196293754289636, |
|
"grad_norm": 8.966511726379395, |
|
"learning_rate": 0.0001973122649458597, |
|
"loss": 2.4484, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.022159035199529366, |
|
"grad_norm": 7.631579875946045, |
|
"learning_rate": 0.00019725954913110442, |
|
"loss": 1.4992, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.02235513285616237, |
|
"grad_norm": 7.418518543243408, |
|
"learning_rate": 0.0001972063285350266, |
|
"loss": 0.8401, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02255123051279537, |
|
"grad_norm": 7.739930629730225, |
|
"learning_rate": 0.00019715260343384347, |
|
"loss": 2.0713, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.022747328169428375, |
|
"grad_norm": 6.441893100738525, |
|
"learning_rate": 0.00019709837410639063, |
|
"loss": 1.4438, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02294342582606138, |
|
"grad_norm": 6.008083820343018, |
|
"learning_rate": 0.0001970436408341207, |
|
"loss": 1.3503, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.023139523482694383, |
|
"grad_norm": 7.100820541381836, |
|
"learning_rate": 0.00019698840390110176, |
|
"loss": 1.4726, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.023335621139327384, |
|
"grad_norm": 10.213756561279297, |
|
"learning_rate": 0.0001969326635940159, |
|
"loss": 0.8107, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.023531718795960388, |
|
"grad_norm": 5.251387119293213, |
|
"learning_rate": 0.00019687642020215775, |
|
"loss": 1.5542, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.023727816452593392, |
|
"grad_norm": 6.100740432739258, |
|
"learning_rate": 0.00019681967401743297, |
|
"loss": 1.2512, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.023923914109226396, |
|
"grad_norm": 7.356696128845215, |
|
"learning_rate": 0.00019676242533435678, |
|
"loss": 2.4725, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.024120011765859397, |
|
"grad_norm": 11.542431831359863, |
|
"learning_rate": 0.00019670467445005233, |
|
"loss": 3.0307, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0243161094224924, |
|
"grad_norm": 12.166086196899414, |
|
"learning_rate": 0.00019664642166424928, |
|
"loss": 1.2784, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.024512207079125405, |
|
"grad_norm": 5.222433090209961, |
|
"learning_rate": 0.00019658766727928206, |
|
"loss": 1.1759, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.024708304735758406, |
|
"grad_norm": 4.77174711227417, |
|
"learning_rate": 0.00019652841160008858, |
|
"loss": 1.1041, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02490440239239141, |
|
"grad_norm": 4.879274368286133, |
|
"learning_rate": 0.0001964686549342084, |
|
"loss": 2.6326, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.025100500049024414, |
|
"grad_norm": 14.171689987182617, |
|
"learning_rate": 0.00019640839759178116, |
|
"loss": 3.4144, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02529659770565742, |
|
"grad_norm": 7.598373889923096, |
|
"learning_rate": 0.00019634763988554522, |
|
"loss": 2.0596, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02549269536229042, |
|
"grad_norm": 6.88770866394043, |
|
"learning_rate": 0.00019628638213083565, |
|
"loss": 1.4691, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.025688793018923423, |
|
"grad_norm": 7.128096580505371, |
|
"learning_rate": 0.00019622462464558295, |
|
"loss": 1.3307, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.025884890675556427, |
|
"grad_norm": 6.430881500244141, |
|
"learning_rate": 0.00019616236775031113, |
|
"loss": 0.9491, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02608098833218943, |
|
"grad_norm": 9.912070274353027, |
|
"learning_rate": 0.00019609961176813624, |
|
"loss": 2.5006, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.026277085988822432, |
|
"grad_norm": 8.550467491149902, |
|
"learning_rate": 0.0001960363570247645, |
|
"loss": 2.4952, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.026473183645455436, |
|
"grad_norm": 4.201476573944092, |
|
"learning_rate": 0.0001959726038484909, |
|
"loss": 0.9033, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02666928130208844, |
|
"grad_norm": 5.774847984313965, |
|
"learning_rate": 0.00019590835257019714, |
|
"loss": 2.1291, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.026865378958721445, |
|
"grad_norm": 8.179195404052734, |
|
"learning_rate": 0.00019584360352335023, |
|
"loss": 2.7527, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.027061476615354445, |
|
"grad_norm": 15.658841133117676, |
|
"learning_rate": 0.0001957783570440005, |
|
"loss": 1.8304, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.02725757427198745, |
|
"grad_norm": 5.7399163246154785, |
|
"learning_rate": 0.0001957126134707801, |
|
"loss": 1.7071, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.027453671928620454, |
|
"grad_norm": 5.0817389488220215, |
|
"learning_rate": 0.00019564637314490108, |
|
"loss": 1.8933, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.027649769585253458, |
|
"grad_norm": 5.634946346282959, |
|
"learning_rate": 0.0001955796364101535, |
|
"loss": 1.7343, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02784586724188646, |
|
"grad_norm": 6.406938552856445, |
|
"learning_rate": 0.00019551240361290407, |
|
"loss": 2.3013, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.028041964898519463, |
|
"grad_norm": 8.239458084106445, |
|
"learning_rate": 0.00019544467510209388, |
|
"loss": 1.2177, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.028238062555152467, |
|
"grad_norm": 11.887965202331543, |
|
"learning_rate": 0.0001953764512292369, |
|
"loss": 2.4312, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.028434160211785468, |
|
"grad_norm": 7.482359409332275, |
|
"learning_rate": 0.00019530773234841803, |
|
"loss": 1.1083, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.028630257868418472, |
|
"grad_norm": 8.86729621887207, |
|
"learning_rate": 0.00019523851881629126, |
|
"loss": 1.6451, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.028826355525051476, |
|
"grad_norm": 7.395509719848633, |
|
"learning_rate": 0.0001951688109920778, |
|
"loss": 1.31, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.02902245318168448, |
|
"grad_norm": 4.955163955688477, |
|
"learning_rate": 0.00019509860923756442, |
|
"loss": 2.5206, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02921855083831748, |
|
"grad_norm": 5.034746170043945, |
|
"learning_rate": 0.00019502791391710125, |
|
"loss": 0.9336, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.029414648494950485, |
|
"grad_norm": 12.375234603881836, |
|
"learning_rate": 0.00019495672539760007, |
|
"loss": 2.1276, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02961074615158349, |
|
"grad_norm": 5.832932949066162, |
|
"learning_rate": 0.00019488504404853248, |
|
"loss": 1.3252, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.029806843808216493, |
|
"grad_norm": 5.934417724609375, |
|
"learning_rate": 0.00019481287024192775, |
|
"loss": 1.5907, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.030002941464849494, |
|
"grad_norm": 9.238896369934082, |
|
"learning_rate": 0.00019474020435237117, |
|
"loss": 1.1184, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.030199039121482498, |
|
"grad_norm": 9.787931442260742, |
|
"learning_rate": 0.00019466704675700185, |
|
"loss": 1.4931, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.030395136778115502, |
|
"grad_norm": 7.260796070098877, |
|
"learning_rate": 0.00019459339783551094, |
|
"loss": 0.8924, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.030591234434748506, |
|
"grad_norm": 8.712836265563965, |
|
"learning_rate": 0.00019451925797013954, |
|
"loss": 1.586, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.030787332091381507, |
|
"grad_norm": 11.15104866027832, |
|
"learning_rate": 0.00019444462754567682, |
|
"loss": 1.5007, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.03098342974801451, |
|
"grad_norm": 7.158255100250244, |
|
"learning_rate": 0.00019436950694945798, |
|
"loss": 2.4118, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.031179527404647515, |
|
"grad_norm": 11.58385944366455, |
|
"learning_rate": 0.00019429389657136213, |
|
"loss": 2.1638, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.03137562506128052, |
|
"grad_norm": 7.469117641448975, |
|
"learning_rate": 0.00019421779680381054, |
|
"loss": 3.0682, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.031571722717913524, |
|
"grad_norm": 10.78966999053955, |
|
"learning_rate": 0.00019414120804176426, |
|
"loss": 1.1822, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.03176782037454652, |
|
"grad_norm": 9.68694019317627, |
|
"learning_rate": 0.00019406413068272238, |
|
"loss": 2.5351, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.031963918031179525, |
|
"grad_norm": 11.67428970336914, |
|
"learning_rate": 0.00019398656512671972, |
|
"loss": 1.9244, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.03216001568781253, |
|
"grad_norm": 12.72513198852539, |
|
"learning_rate": 0.00019390851177632497, |
|
"loss": 3.2138, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.03235611334444553, |
|
"grad_norm": 8.345921516418457, |
|
"learning_rate": 0.00019382997103663838, |
|
"loss": 2.6435, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03255221100107854, |
|
"grad_norm": 7.740304470062256, |
|
"learning_rate": 0.0001937509433152899, |
|
"loss": 0.8189, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.03274830865771154, |
|
"grad_norm": 9.329862594604492, |
|
"learning_rate": 0.0001936714290224368, |
|
"loss": 1.4106, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.032944406314344546, |
|
"grad_norm": 7.179844379425049, |
|
"learning_rate": 0.00019359142857076176, |
|
"loss": 1.8125, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03314050397097755, |
|
"grad_norm": 7.835447311401367, |
|
"learning_rate": 0.00019351094237547066, |
|
"loss": 1.6617, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.03333660162761055, |
|
"grad_norm": 6.018518924713135, |
|
"learning_rate": 0.0001934299708542904, |
|
"loss": 2.4333, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03353269928424355, |
|
"grad_norm": 8.176468849182129, |
|
"learning_rate": 0.00019334851442746664, |
|
"loss": 2.5915, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.033728796940876556, |
|
"grad_norm": 8.241739273071289, |
|
"learning_rate": 0.00019326657351776186, |
|
"loss": 1.666, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.03392489459750956, |
|
"grad_norm": 8.064835548400879, |
|
"learning_rate": 0.000193184148550453, |
|
"loss": 1.477, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.034120992254142564, |
|
"grad_norm": 5.790217399597168, |
|
"learning_rate": 0.00019310123995332917, |
|
"loss": 0.7703, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.03431708991077557, |
|
"grad_norm": 9.38430118560791, |
|
"learning_rate": 0.00019301784815668974, |
|
"loss": 1.5785, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03451318756740857, |
|
"grad_norm": 8.252826690673828, |
|
"learning_rate": 0.00019293397359334167, |
|
"loss": 2.1462, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03470928522404157, |
|
"grad_norm": 12.65652847290039, |
|
"learning_rate": 0.00019284961669859766, |
|
"loss": 1.3009, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.034905382880674574, |
|
"grad_norm": 6.8490753173828125, |
|
"learning_rate": 0.00019276477791027374, |
|
"loss": 2.4905, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03510148053730758, |
|
"grad_norm": 4.2581048011779785, |
|
"learning_rate": 0.0001926794576686869, |
|
"loss": 0.9042, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.03529757819394058, |
|
"grad_norm": 6.415445327758789, |
|
"learning_rate": 0.0001925936564166529, |
|
"loss": 2.238, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.035493675850573586, |
|
"grad_norm": 13.620756149291992, |
|
"learning_rate": 0.00019250737459948405, |
|
"loss": 1.5966, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03568977350720659, |
|
"grad_norm": 10.609662055969238, |
|
"learning_rate": 0.00019242061266498675, |
|
"loss": 1.081, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.035885871163839594, |
|
"grad_norm": 8.404073715209961, |
|
"learning_rate": 0.00019233337106345925, |
|
"loss": 1.849, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.0360819688204726, |
|
"grad_norm": 5.560455322265625, |
|
"learning_rate": 0.00019224565024768926, |
|
"loss": 1.4533, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.036278066477105596, |
|
"grad_norm": 7.896220684051514, |
|
"learning_rate": 0.00019215745067295169, |
|
"loss": 2.482, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0364741641337386, |
|
"grad_norm": 9.554024696350098, |
|
"learning_rate": 0.00019206877279700612, |
|
"loss": 1.9367, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.036670261790371604, |
|
"grad_norm": 3.333113193511963, |
|
"learning_rate": 0.00019197961708009473, |
|
"loss": 1.1477, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.03686635944700461, |
|
"grad_norm": 9.468240737915039, |
|
"learning_rate": 0.00019188998398493953, |
|
"loss": 1.0849, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03706245710363761, |
|
"grad_norm": 10.807921409606934, |
|
"learning_rate": 0.00019179987397674022, |
|
"loss": 2.0192, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03725855476027062, |
|
"grad_norm": 7.14724588394165, |
|
"learning_rate": 0.0001917092875231717, |
|
"loss": 2.1502, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03745465241690362, |
|
"grad_norm": 12.262707710266113, |
|
"learning_rate": 0.00019161822509438162, |
|
"loss": 2.423, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03765075007353662, |
|
"grad_norm": 35.0489387512207, |
|
"learning_rate": 0.000191526687162988, |
|
"loss": 2.5959, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03784684773016962, |
|
"grad_norm": 6.615735054016113, |
|
"learning_rate": 0.0001914346742040767, |
|
"loss": 1.7733, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.038042945386802626, |
|
"grad_norm": 4.537426471710205, |
|
"learning_rate": 0.00019134218669519896, |
|
"loss": 1.0028, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03823904304343563, |
|
"grad_norm": 5.247801303863525, |
|
"learning_rate": 0.00019124922511636912, |
|
"loss": 0.8412, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.038435140700068635, |
|
"grad_norm": 6.2183918952941895, |
|
"learning_rate": 0.00019115578995006173, |
|
"loss": 1.7212, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03863123835670164, |
|
"grad_norm": 9.330825805664062, |
|
"learning_rate": 0.00019106188168120948, |
|
"loss": 1.5341, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03882733601333464, |
|
"grad_norm": 9.86260986328125, |
|
"learning_rate": 0.00019096750079720037, |
|
"loss": 2.765, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03902343366996765, |
|
"grad_norm": 10.341052055358887, |
|
"learning_rate": 0.00019087264778787534, |
|
"loss": 1.9024, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.039219531326600644, |
|
"grad_norm": 9.549159049987793, |
|
"learning_rate": 0.00019077732314552566, |
|
"loss": 1.2644, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03941562898323365, |
|
"grad_norm": 5.25094747543335, |
|
"learning_rate": 0.00019068152736489036, |
|
"loss": 1.334, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.03961172663986665, |
|
"grad_norm": 7.197662830352783, |
|
"learning_rate": 0.00019058526094315378, |
|
"loss": 1.9093, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.03980782429649966, |
|
"grad_norm": 8.476766586303711, |
|
"learning_rate": 0.0001904885243799429, |
|
"loss": 1.477, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.04000392195313266, |
|
"grad_norm": 8.232537269592285, |
|
"learning_rate": 0.00019039131817732462, |
|
"loss": 1.4013, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.040200019609765665, |
|
"grad_norm": 15.687997817993164, |
|
"learning_rate": 0.0001902936428398035, |
|
"loss": 1.6772, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.04039611726639867, |
|
"grad_norm": 7.573246479034424, |
|
"learning_rate": 0.00019019549887431877, |
|
"loss": 1.5007, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.040592214923031666, |
|
"grad_norm": 11.531679153442383, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 2.6798, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.04078831257966467, |
|
"grad_norm": 6.225399494171143, |
|
"learning_rate": 0.00018999780709937398, |
|
"loss": 1.3078, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.040984410236297675, |
|
"grad_norm": 10.358306884765625, |
|
"learning_rate": 0.0001898982603159429, |
|
"loss": 1.7353, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.04118050789293068, |
|
"grad_norm": 8.146821975708008, |
|
"learning_rate": 0.00018979824695660087, |
|
"loss": 1.415, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04137660554956368, |
|
"grad_norm": 4.390834808349609, |
|
"learning_rate": 0.00018969776754042156, |
|
"loss": 1.7612, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.04157270320619669, |
|
"grad_norm": 7.958174228668213, |
|
"learning_rate": 0.0001895968225888976, |
|
"loss": 2.6614, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.04176880086282969, |
|
"grad_norm": 9.981225967407227, |
|
"learning_rate": 0.00018949541262593762, |
|
"loss": 2.0158, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.041964898519462696, |
|
"grad_norm": 4.456605911254883, |
|
"learning_rate": 0.00018939353817786387, |
|
"loss": 1.0621, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.04216099617609569, |
|
"grad_norm": 7.546274662017822, |
|
"learning_rate": 0.00018929119977340917, |
|
"loss": 1.7333, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.0423570938327287, |
|
"grad_norm": 11.629569053649902, |
|
"learning_rate": 0.0001891883979437143, |
|
"loss": 1.4268, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 17.710948944091797, |
|
"learning_rate": 0.00018908513322232528, |
|
"loss": 2.8701, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.042749289145994705, |
|
"grad_norm": 6.267049789428711, |
|
"learning_rate": 0.00018898140614519054, |
|
"loss": 1.6313, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.04294538680262771, |
|
"grad_norm": 4.971591949462891, |
|
"learning_rate": 0.00018887721725065814, |
|
"loss": 2.0962, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.043141484459260714, |
|
"grad_norm": 5.603585243225098, |
|
"learning_rate": 0.00018877256707947306, |
|
"loss": 0.6683, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04333758211589372, |
|
"grad_norm": 6.029137134552002, |
|
"learning_rate": 0.00018866745617477423, |
|
"loss": 1.5375, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.043533679772526715, |
|
"grad_norm": 7.4105143547058105, |
|
"learning_rate": 0.00018856188508209183, |
|
"loss": 1.9524, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.04372977742915972, |
|
"grad_norm": 8.321500778198242, |
|
"learning_rate": 0.00018845585434934452, |
|
"loss": 2.1109, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.04392587508579272, |
|
"grad_norm": 9.238992691040039, |
|
"learning_rate": 0.00018834936452683638, |
|
"loss": 1.4247, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04412197274242573, |
|
"grad_norm": 5.125700950622559, |
|
"learning_rate": 0.00018824241616725434, |
|
"loss": 1.1266, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04431807039905873, |
|
"grad_norm": 7.538069725036621, |
|
"learning_rate": 0.000188135009825665, |
|
"loss": 2.1554, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.044514168055691736, |
|
"grad_norm": 8.309137344360352, |
|
"learning_rate": 0.00018802714605951199, |
|
"loss": 1.1435, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.04471026571232474, |
|
"grad_norm": 22.02942657470703, |
|
"learning_rate": 0.00018791882542861302, |
|
"loss": 1.8154, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.044906363368957744, |
|
"grad_norm": 7.017299652099609, |
|
"learning_rate": 0.0001878100484951569, |
|
"loss": 1.4998, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.04510246102559074, |
|
"grad_norm": 18.39406394958496, |
|
"learning_rate": 0.00018770081582370068, |
|
"loss": 2.1662, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.045298558682223745, |
|
"grad_norm": 9.11802864074707, |
|
"learning_rate": 0.0001875911279811667, |
|
"loss": 0.7446, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.04549465633885675, |
|
"grad_norm": 7.193735122680664, |
|
"learning_rate": 0.00018748098553683968, |
|
"loss": 1.9472, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.045690753995489754, |
|
"grad_norm": 23.407245635986328, |
|
"learning_rate": 0.0001873703890623637, |
|
"loss": 2.1782, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.04588685165212276, |
|
"grad_norm": 6.547053813934326, |
|
"learning_rate": 0.00018725933913173938, |
|
"loss": 1.9687, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.04608294930875576, |
|
"grad_norm": 10.576699256896973, |
|
"learning_rate": 0.00018714783632132068, |
|
"loss": 1.8832, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.046279046965388766, |
|
"grad_norm": 5.852027416229248, |
|
"learning_rate": 0.00018703588120981207, |
|
"loss": 1.8932, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.04647514462202176, |
|
"grad_norm": 7.023755073547363, |
|
"learning_rate": 0.00018692347437826548, |
|
"loss": 3.7953, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.04667124227865477, |
|
"grad_norm": 13.61612606048584, |
|
"learning_rate": 0.00018681061641007737, |
|
"loss": 1.9077, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.04686733993528777, |
|
"grad_norm": 5.3344526290893555, |
|
"learning_rate": 0.0001866973078909854, |
|
"loss": 1.4342, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.047063437591920776, |
|
"grad_norm": 38.80408477783203, |
|
"learning_rate": 0.00018658354940906586, |
|
"loss": 2.3665, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04725953524855378, |
|
"grad_norm": 9.670344352722168, |
|
"learning_rate": 0.00018646934155473022, |
|
"loss": 0.9006, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.047455632905186784, |
|
"grad_norm": 5.1102495193481445, |
|
"learning_rate": 0.00018635468492072228, |
|
"loss": 1.2289, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.04765173056181979, |
|
"grad_norm": 9.1209077835083, |
|
"learning_rate": 0.00018623958010211493, |
|
"loss": 1.6009, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.04784782821845279, |
|
"grad_norm": 16.793027877807617, |
|
"learning_rate": 0.0001861240276963073, |
|
"loss": 0.94, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.04804392587508579, |
|
"grad_norm": 6.90054988861084, |
|
"learning_rate": 0.00018600802830302134, |
|
"loss": 1.559, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.048240023531718794, |
|
"grad_norm": 13.111268043518066, |
|
"learning_rate": 0.0001858915825242991, |
|
"loss": 2.1186, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.0484361211883518, |
|
"grad_norm": 6.356579780578613, |
|
"learning_rate": 0.00018577469096449925, |
|
"loss": 1.6653, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.0486322188449848, |
|
"grad_norm": 9.505541801452637, |
|
"learning_rate": 0.00018565735423029404, |
|
"loss": 0.9774, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.048828316501617806, |
|
"grad_norm": 8.927581787109375, |
|
"learning_rate": 0.00018553957293066632, |
|
"loss": 2.6455, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.04902441415825081, |
|
"grad_norm": 7.568793773651123, |
|
"learning_rate": 0.00018542134767690616, |
|
"loss": 1.1464, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.049220511814883815, |
|
"grad_norm": 7.632232189178467, |
|
"learning_rate": 0.00018530267908260784, |
|
"loss": 1.2671, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.04941660947151681, |
|
"grad_norm": 4.4279561042785645, |
|
"learning_rate": 0.00018518356776366657, |
|
"loss": 2.0384, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.049612707128149816, |
|
"grad_norm": 10.818602561950684, |
|
"learning_rate": 0.00018506401433827528, |
|
"loss": 1.0559, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.04980880478478282, |
|
"grad_norm": 5.57148551940918, |
|
"learning_rate": 0.00018494401942692153, |
|
"loss": 0.9603, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.050004902441415824, |
|
"grad_norm": 11.1985502243042, |
|
"learning_rate": 0.00018482358365238413, |
|
"loss": 2.4928, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.05020100009804883, |
|
"grad_norm": 4.890799522399902, |
|
"learning_rate": 0.00018470270763973004, |
|
"loss": 1.4034, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.05039709775468183, |
|
"grad_norm": 6.2078680992126465, |
|
"learning_rate": 0.00018458139201631108, |
|
"loss": 1.782, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.05059319541131484, |
|
"grad_norm": 24.89278221130371, |
|
"learning_rate": 0.00018445963741176065, |
|
"loss": 3.7879, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.05078929306794784, |
|
"grad_norm": 5.363570213317871, |
|
"learning_rate": 0.00018433744445799045, |
|
"loss": 1.4292, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.05098539072458084, |
|
"grad_norm": 7.669764041900635, |
|
"learning_rate": 0.0001842148137891873, |
|
"loss": 2.0483, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05118148838121384, |
|
"grad_norm": 5.229150295257568, |
|
"learning_rate": 0.00018409174604180976, |
|
"loss": 3.2863, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.05137758603784685, |
|
"grad_norm": 5.850373268127441, |
|
"learning_rate": 0.0001839682418545848, |
|
"loss": 1.8197, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.05157368369447985, |
|
"grad_norm": 7.138283729553223, |
|
"learning_rate": 0.00018384430186850454, |
|
"loss": 2.7101, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.051769781351112855, |
|
"grad_norm": 10.918169975280762, |
|
"learning_rate": 0.000183719926726823, |
|
"loss": 1.8243, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.05196587900774586, |
|
"grad_norm": 9.205517768859863, |
|
"learning_rate": 0.00018359511707505258, |
|
"loss": 1.4992, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.05216197666437886, |
|
"grad_norm": 8.567139625549316, |
|
"learning_rate": 0.00018346987356096086, |
|
"loss": 1.051, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.05235807432101187, |
|
"grad_norm": 10.313075065612793, |
|
"learning_rate": 0.00018334419683456717, |
|
"loss": 2.6062, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.052554171977644865, |
|
"grad_norm": 7.515801906585693, |
|
"learning_rate": 0.0001832180875481392, |
|
"loss": 1.266, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.05275026963427787, |
|
"grad_norm": 5.345809459686279, |
|
"learning_rate": 0.00018309154635618965, |
|
"loss": 1.2526, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.05294636729091087, |
|
"grad_norm": 13.568882942199707, |
|
"learning_rate": 0.00018296457391547296, |
|
"loss": 2.5183, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05314246494754388, |
|
"grad_norm": 10.022235870361328, |
|
"learning_rate": 0.00018283717088498155, |
|
"loss": 2.2774, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.05333856260417688, |
|
"grad_norm": 6.537176132202148, |
|
"learning_rate": 0.0001827093379259428, |
|
"loss": 1.4989, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.053534660260809885, |
|
"grad_norm": 17.213987350463867, |
|
"learning_rate": 0.00018258107570181533, |
|
"loss": 2.4885, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.05373075791744289, |
|
"grad_norm": 6.48647403717041, |
|
"learning_rate": 0.00018245238487828573, |
|
"loss": 1.2309, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.05392685557407589, |
|
"grad_norm": 5.479822158813477, |
|
"learning_rate": 0.000182323266123265, |
|
"loss": 1.8959, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05412295323070889, |
|
"grad_norm": 7.716124534606934, |
|
"learning_rate": 0.00018219372010688515, |
|
"loss": 1.8321, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.054319050887341895, |
|
"grad_norm": 9.968965530395508, |
|
"learning_rate": 0.00018206374750149567, |
|
"loss": 4.1652, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.0545151485439749, |
|
"grad_norm": 6.009235382080078, |
|
"learning_rate": 0.00018193334898166007, |
|
"loss": 0.8178, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.0547112462006079, |
|
"grad_norm": 8.031886100769043, |
|
"learning_rate": 0.00018180252522415242, |
|
"loss": 1.783, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.05490734385724091, |
|
"grad_norm": 5.5589680671691895, |
|
"learning_rate": 0.00018167127690795368, |
|
"loss": 1.3049, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05510344151387391, |
|
"grad_norm": 5.04995059967041, |
|
"learning_rate": 0.0001815396047142485, |
|
"loss": 0.8962, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.055299539170506916, |
|
"grad_norm": 5.3526692390441895, |
|
"learning_rate": 0.0001814075093264212, |
|
"loss": 1.201, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.05549563682713991, |
|
"grad_norm": 11.980429649353027, |
|
"learning_rate": 0.00018127499143005268, |
|
"loss": 0.6955, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.05569173448377292, |
|
"grad_norm": 38.28229904174805, |
|
"learning_rate": 0.00018114205171291663, |
|
"loss": 1.7335, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.05588783214040592, |
|
"grad_norm": 6.15138053894043, |
|
"learning_rate": 0.000181008690864976, |
|
"loss": 1.2766, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.056083929797038926, |
|
"grad_norm": 7.846836566925049, |
|
"learning_rate": 0.00018087490957837944, |
|
"loss": 1.155, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.05628002745367193, |
|
"grad_norm": 7.675628185272217, |
|
"learning_rate": 0.00018074070854745772, |
|
"loss": 1.6129, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.056476125110304934, |
|
"grad_norm": 12.245649337768555, |
|
"learning_rate": 0.00018060608846872005, |
|
"loss": 1.7585, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05667222276693794, |
|
"grad_norm": 10.520101547241211, |
|
"learning_rate": 0.00018047105004085053, |
|
"loss": 1.9265, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.056868320423570935, |
|
"grad_norm": 7.400151252746582, |
|
"learning_rate": 0.00018033559396470454, |
|
"loss": 1.4189, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05706441808020394, |
|
"grad_norm": 12.058060646057129, |
|
"learning_rate": 0.00018019972094330503, |
|
"loss": 2.3312, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.057260515736836944, |
|
"grad_norm": 5.313794136047363, |
|
"learning_rate": 0.00018006343168183893, |
|
"loss": 2.0051, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.05745661339346995, |
|
"grad_norm": 11.182997703552246, |
|
"learning_rate": 0.0001799267268876535, |
|
"loss": 1.4779, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.05765271105010295, |
|
"grad_norm": 16.24866485595703, |
|
"learning_rate": 0.0001797896072702526, |
|
"loss": 2.4689, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.057848808706735956, |
|
"grad_norm": 7.471411228179932, |
|
"learning_rate": 0.00017965207354129307, |
|
"loss": 3.0599, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.05804490636336896, |
|
"grad_norm": 7.715878486633301, |
|
"learning_rate": 0.00017951412641458098, |
|
"loss": 0.8256, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.058241004020001964, |
|
"grad_norm": 22.084482192993164, |
|
"learning_rate": 0.000179375766606068, |
|
"loss": 2.457, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.05843710167663496, |
|
"grad_norm": 8.041847229003906, |
|
"learning_rate": 0.00017923699483384753, |
|
"loss": 1.5642, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.058633199333267966, |
|
"grad_norm": 12.814888000488281, |
|
"learning_rate": 0.00017909781181815117, |
|
"loss": 1.5129, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.05882929698990097, |
|
"grad_norm": 9.216371536254883, |
|
"learning_rate": 0.0001789582182813449, |
|
"loss": 2.0632, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.059025394646533974, |
|
"grad_norm": 12.80371379852295, |
|
"learning_rate": 0.00017881821494792528, |
|
"loss": 2.8705, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.05922149230316698, |
|
"grad_norm": 7.234943389892578, |
|
"learning_rate": 0.00017867780254451576, |
|
"loss": 2.6664, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.05941758995979998, |
|
"grad_norm": 11.168726921081543, |
|
"learning_rate": 0.00017853698179986282, |
|
"loss": 1.347, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.059613687616432987, |
|
"grad_norm": 19.369266510009766, |
|
"learning_rate": 0.00017839575344483238, |
|
"loss": 2.68, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.059809785273065984, |
|
"grad_norm": 7.1730570793151855, |
|
"learning_rate": 0.0001782541182124057, |
|
"loss": 2.3908, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.06000588292969899, |
|
"grad_norm": 7.243929862976074, |
|
"learning_rate": 0.0001781120768376759, |
|
"loss": 1.0056, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.06020198058633199, |
|
"grad_norm": 7.748988628387451, |
|
"learning_rate": 0.00017796963005784394, |
|
"loss": 2.1776, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.060398078242964996, |
|
"grad_norm": 13.446945190429688, |
|
"learning_rate": 0.0001778267786122148, |
|
"loss": 2.3275, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.060594175899598, |
|
"grad_norm": 10.720627784729004, |
|
"learning_rate": 0.0001776835232421938, |
|
"loss": 1.046, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.060790273556231005, |
|
"grad_norm": 11.274985313415527, |
|
"learning_rate": 0.00017753986469128257, |
|
"loss": 2.4269, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06098637121286401, |
|
"grad_norm": 8.671335220336914, |
|
"learning_rate": 0.00017739580370507532, |
|
"loss": 2.1488, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.06118246886949701, |
|
"grad_norm": 8.375978469848633, |
|
"learning_rate": 0.0001772513410312548, |
|
"loss": 1.8458, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.06137856652613001, |
|
"grad_norm": 11.178112983703613, |
|
"learning_rate": 0.00017710647741958868, |
|
"loss": 2.7169, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.061574664182763014, |
|
"grad_norm": 8.29799747467041, |
|
"learning_rate": 0.00017696121362192544, |
|
"loss": 1.455, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.06177076183939602, |
|
"grad_norm": 6.712766647338867, |
|
"learning_rate": 0.00017681555039219054, |
|
"loss": 1.2604, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.06196685949602902, |
|
"grad_norm": 7.891608238220215, |
|
"learning_rate": 0.00017666948848638257, |
|
"loss": 2.1795, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.06216295715266203, |
|
"grad_norm": 5.039219379425049, |
|
"learning_rate": 0.00017652302866256916, |
|
"loss": 0.9069, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.06235905480929503, |
|
"grad_norm": 9.421103477478027, |
|
"learning_rate": 0.00017637617168088325, |
|
"loss": 2.4256, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.06255515246592804, |
|
"grad_norm": 4.435902118682861, |
|
"learning_rate": 0.000176228918303519, |
|
"loss": 1.9269, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.06275125012256104, |
|
"grad_norm": 10.938987731933594, |
|
"learning_rate": 0.00017608126929472795, |
|
"loss": 1.4649, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06294734777919404, |
|
"grad_norm": 6.332970142364502, |
|
"learning_rate": 0.00017593322542081485, |
|
"loss": 2.0089, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.06314344543582705, |
|
"grad_norm": 6.731532573699951, |
|
"learning_rate": 0.00017578478745013392, |
|
"loss": 2.4046, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.06333954309246005, |
|
"grad_norm": 8.772012710571289, |
|
"learning_rate": 0.00017563595615308474, |
|
"loss": 1.4935, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.06353564074909304, |
|
"grad_norm": 5.693745136260986, |
|
"learning_rate": 0.00017548673230210823, |
|
"loss": 1.848, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.06373173840572605, |
|
"grad_norm": 15.056157112121582, |
|
"learning_rate": 0.0001753371166716828, |
|
"loss": 1.4598, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06392783606235905, |
|
"grad_norm": 9.370506286621094, |
|
"learning_rate": 0.00017518711003832002, |
|
"loss": 1.4809, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.06412393371899205, |
|
"grad_norm": 19.398839950561523, |
|
"learning_rate": 0.000175036713180561, |
|
"loss": 1.0093, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.06432003137562506, |
|
"grad_norm": 4.393742084503174, |
|
"learning_rate": 0.00017488592687897193, |
|
"loss": 0.817, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 6.7713799476623535, |
|
"learning_rate": 0.00017473475191614037, |
|
"loss": 2.1701, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.06471222668889107, |
|
"grad_norm": 5.920267581939697, |
|
"learning_rate": 0.00017458318907667098, |
|
"loss": 3.3491, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06490832434552407, |
|
"grad_norm": 15.095996856689453, |
|
"learning_rate": 0.0001744312391471816, |
|
"loss": 1.7637, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.06510442200215708, |
|
"grad_norm": 9.470211029052734, |
|
"learning_rate": 0.00017427890291629893, |
|
"loss": 2.7744, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.06530051965879008, |
|
"grad_norm": 9.082067489624023, |
|
"learning_rate": 0.00017412618117465477, |
|
"loss": 3.1791, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.06549661731542308, |
|
"grad_norm": 5.174635410308838, |
|
"learning_rate": 0.0001739730747148816, |
|
"loss": 1.2189, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.06569271497205609, |
|
"grad_norm": 5.053405284881592, |
|
"learning_rate": 0.00017381958433160865, |
|
"loss": 1.7119, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.06588881262868909, |
|
"grad_norm": 5.771046161651611, |
|
"learning_rate": 0.0001736657108214578, |
|
"loss": 1.4188, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.0660849102853221, |
|
"grad_norm": 8.400517463684082, |
|
"learning_rate": 0.00017351145498303925, |
|
"loss": 2.3167, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.0662810079419551, |
|
"grad_norm": 4.6646728515625, |
|
"learning_rate": 0.0001733568176169476, |
|
"loss": 1.2102, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.06647710559858809, |
|
"grad_norm": 8.288646697998047, |
|
"learning_rate": 0.0001732017995257575, |
|
"loss": 2.4803, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.0666732032552211, |
|
"grad_norm": 10.970074653625488, |
|
"learning_rate": 0.00017304640151401967, |
|
"loss": 2.5839, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0668693009118541, |
|
"grad_norm": 6.0125732421875, |
|
"learning_rate": 0.00017289062438825665, |
|
"loss": 1.5807, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.0670653985684871, |
|
"grad_norm": 5.844028472900391, |
|
"learning_rate": 0.0001727344689569585, |
|
"loss": 3.34, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.06726149622512011, |
|
"grad_norm": 7.1026387214660645, |
|
"learning_rate": 0.00017257793603057871, |
|
"loss": 1.4347, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.06745759388175311, |
|
"grad_norm": 9.198262214660645, |
|
"learning_rate": 0.00017242102642153016, |
|
"loss": 1.834, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.06765369153838612, |
|
"grad_norm": 5.76854133605957, |
|
"learning_rate": 0.00017226374094418044, |
|
"loss": 0.9294, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.06784978919501912, |
|
"grad_norm": 10.319186210632324, |
|
"learning_rate": 0.0001721060804148482, |
|
"loss": 2.0088, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.06804588685165212, |
|
"grad_norm": 22.298240661621094, |
|
"learning_rate": 0.00017194804565179842, |
|
"loss": 2.6901, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.06824198450828513, |
|
"grad_norm": 11.38401985168457, |
|
"learning_rate": 0.00017178963747523847, |
|
"loss": 2.6342, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.06824198450828513, |
|
"eval_loss": 0.4400941729545593, |
|
"eval_runtime": 78.7276, |
|
"eval_samples_per_second": 27.284, |
|
"eval_steps_per_second": 13.642, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.06843808216491813, |
|
"grad_norm": 7.237430095672607, |
|
"learning_rate": 0.00017163085670731371, |
|
"loss": 1.6659, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.06863417982155114, |
|
"grad_norm": 10.189397811889648, |
|
"learning_rate": 0.00017147170417210333, |
|
"loss": 1.5962, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06883027747818414, |
|
"grad_norm": 11.650528907775879, |
|
"learning_rate": 0.00017131218069561593, |
|
"loss": 2.8224, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.06902637513481714, |
|
"grad_norm": 5.0007147789001465, |
|
"learning_rate": 0.00017115228710578534, |
|
"loss": 1.8055, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06922247279145015, |
|
"grad_norm": 4.950777530670166, |
|
"learning_rate": 0.0001709920242324663, |
|
"loss": 1.1707, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.06941857044808314, |
|
"grad_norm": 11.658537864685059, |
|
"learning_rate": 0.0001708313929074302, |
|
"loss": 1.6848, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.06961466810471614, |
|
"grad_norm": 6.6815009117126465, |
|
"learning_rate": 0.00017067039396436058, |
|
"loss": 1.6768, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.06981076576134915, |
|
"grad_norm": 7.522084712982178, |
|
"learning_rate": 0.00017050902823884903, |
|
"loss": 1.2074, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.07000686341798215, |
|
"grad_norm": 9.470911026000977, |
|
"learning_rate": 0.00017034729656839078, |
|
"loss": 0.8594, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.07020296107461516, |
|
"grad_norm": 4.798830032348633, |
|
"learning_rate": 0.00017018519979238023, |
|
"loss": 1.6142, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.07039905873124816, |
|
"grad_norm": 4.626184940338135, |
|
"learning_rate": 0.0001700227387521068, |
|
"loss": 1.6397, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.07059515638788116, |
|
"grad_norm": 6.68535041809082, |
|
"learning_rate": 0.00016985991429075036, |
|
"loss": 0.623, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07079125404451417, |
|
"grad_norm": 3.028799533843994, |
|
"learning_rate": 0.00016969672725337706, |
|
"loss": 0.7686, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.07098735170114717, |
|
"grad_norm": 14.592923164367676, |
|
"learning_rate": 0.00016953317848693474, |
|
"loss": 1.5171, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.07118344935778018, |
|
"grad_norm": 9.847641944885254, |
|
"learning_rate": 0.00016936926884024864, |
|
"loss": 2.5365, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.07137954701441318, |
|
"grad_norm": 5.795401573181152, |
|
"learning_rate": 0.00016920499916401707, |
|
"loss": 1.2201, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.07157564467104618, |
|
"grad_norm": 10.178254127502441, |
|
"learning_rate": 0.0001690403703108068, |
|
"loss": 2.3089, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.07177174232767919, |
|
"grad_norm": 5.105298042297363, |
|
"learning_rate": 0.00016887538313504883, |
|
"loss": 1.3503, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.07196783998431219, |
|
"grad_norm": 5.165500164031982, |
|
"learning_rate": 0.00016871003849303382, |
|
"loss": 1.9088, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.0721639376409452, |
|
"grad_norm": 6.532622337341309, |
|
"learning_rate": 0.0001685443372429077, |
|
"loss": 1.2754, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.07236003529757819, |
|
"grad_norm": 6.758758068084717, |
|
"learning_rate": 0.0001683782802446672, |
|
"loss": 1.6721, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.07255613295421119, |
|
"grad_norm": 4.5956902503967285, |
|
"learning_rate": 0.0001682118683601555, |
|
"loss": 1.7976, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0727522306108442, |
|
"grad_norm": 8.587203025817871, |
|
"learning_rate": 0.00016804510245305745, |
|
"loss": 1.7933, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.0729483282674772, |
|
"grad_norm": 8.018026351928711, |
|
"learning_rate": 0.00016787798338889552, |
|
"loss": 2.4964, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.0731444259241102, |
|
"grad_norm": 8.505922317504883, |
|
"learning_rate": 0.00016771051203502493, |
|
"loss": 2.9766, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.07334052358074321, |
|
"grad_norm": 7.051482200622559, |
|
"learning_rate": 0.00016754268926062938, |
|
"loss": 1.9882, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.07353662123737621, |
|
"grad_norm": 4.620705604553223, |
|
"learning_rate": 0.00016737451593671636, |
|
"loss": 2.3198, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.07373271889400922, |
|
"grad_norm": 8.614477157592773, |
|
"learning_rate": 0.00016720599293611286, |
|
"loss": 2.0611, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.07392881655064222, |
|
"grad_norm": 5.840315818786621, |
|
"learning_rate": 0.0001670371211334606, |
|
"loss": 1.5961, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.07412491420727522, |
|
"grad_norm": 6.045500755310059, |
|
"learning_rate": 0.00016686790140521164, |
|
"loss": 1.5132, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.07432101186390823, |
|
"grad_norm": 11.521757125854492, |
|
"learning_rate": 0.0001666983346296238, |
|
"loss": 2.0003, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.07451710952054123, |
|
"grad_norm": 7.7568230628967285, |
|
"learning_rate": 0.0001665284216867561, |
|
"loss": 2.4289, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07471320717717424, |
|
"grad_norm": 9.928805351257324, |
|
"learning_rate": 0.0001663581634584641, |
|
"loss": 2.2129, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.07490930483380724, |
|
"grad_norm": 6.011744022369385, |
|
"learning_rate": 0.00016618756082839554, |
|
"loss": 1.8396, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.07510540249044025, |
|
"grad_norm": 7.308403968811035, |
|
"learning_rate": 0.0001660166146819855, |
|
"loss": 0.9335, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.07530150014707324, |
|
"grad_norm": 10.000845909118652, |
|
"learning_rate": 0.0001658453259064519, |
|
"loss": 1.7884, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.07549759780370624, |
|
"grad_norm": 16.585180282592773, |
|
"learning_rate": 0.00016567369539079114, |
|
"loss": 3.0675, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.07569369546033924, |
|
"grad_norm": 10.055068969726562, |
|
"learning_rate": 0.000165501724025773, |
|
"loss": 2.0398, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.07588979311697225, |
|
"grad_norm": 9.56424331665039, |
|
"learning_rate": 0.0001653294127039365, |
|
"loss": 3.1253, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.07608589077360525, |
|
"grad_norm": 6.628775119781494, |
|
"learning_rate": 0.0001651567623195849, |
|
"loss": 0.869, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.07628198843023826, |
|
"grad_norm": 8.333356857299805, |
|
"learning_rate": 0.00016498377376878126, |
|
"loss": 2.1941, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.07647808608687126, |
|
"grad_norm": 6.76808500289917, |
|
"learning_rate": 0.0001648104479493437, |
|
"loss": 1.3802, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07667418374350427, |
|
"grad_norm": 7.221822738647461, |
|
"learning_rate": 0.0001646367857608409, |
|
"loss": 1.0955, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.07687028140013727, |
|
"grad_norm": 31.469024658203125, |
|
"learning_rate": 0.00016446278810458716, |
|
"loss": 2.4294, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.07706637905677027, |
|
"grad_norm": 5.735899448394775, |
|
"learning_rate": 0.000164288455883638, |
|
"loss": 2.1574, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.07726247671340328, |
|
"grad_norm": 6.462769031524658, |
|
"learning_rate": 0.00016411379000278524, |
|
"loss": 1.8524, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.07745857437003628, |
|
"grad_norm": 12.341320037841797, |
|
"learning_rate": 0.00016393879136855248, |
|
"loss": 2.2439, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.07765467202666929, |
|
"grad_norm": 6.433187007904053, |
|
"learning_rate": 0.0001637634608891903, |
|
"loss": 1.2662, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.07785076968330229, |
|
"grad_norm": 5.184267997741699, |
|
"learning_rate": 0.00016358779947467158, |
|
"loss": 1.0905, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.0780468673399353, |
|
"grad_norm": 8.464557647705078, |
|
"learning_rate": 0.00016341180803668674, |
|
"loss": 2.0666, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.07824296499656828, |
|
"grad_norm": 9.498639106750488, |
|
"learning_rate": 0.00016323548748863907, |
|
"loss": 2.2394, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.07843906265320129, |
|
"grad_norm": 7.747230052947998, |
|
"learning_rate": 0.00016305883874563994, |
|
"loss": 1.4313, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07863516030983429, |
|
"grad_norm": 8.759115219116211, |
|
"learning_rate": 0.0001628818627245041, |
|
"loss": 1.6428, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.0788312579664673, |
|
"grad_norm": 5.250945568084717, |
|
"learning_rate": 0.00016270456034374474, |
|
"loss": 0.7777, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.0790273556231003, |
|
"grad_norm": 9.001906394958496, |
|
"learning_rate": 0.00016252693252356916, |
|
"loss": 1.678, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.0792234532797333, |
|
"grad_norm": 7.177880764007568, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 1.4463, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.07941955093636631, |
|
"grad_norm": 10.035067558288574, |
|
"learning_rate": 0.00016217070425423788, |
|
"loss": 2.3035, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.07961564859299931, |
|
"grad_norm": 4.974843978881836, |
|
"learning_rate": 0.0001619921056539226, |
|
"loss": 1.2633, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.07981174624963232, |
|
"grad_norm": 7.599184036254883, |
|
"learning_rate": 0.00016181318531186206, |
|
"loss": 0.8719, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.08000784390626532, |
|
"grad_norm": 11.066450119018555, |
|
"learning_rate": 0.0001616339441566607, |
|
"loss": 2.2938, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.08020394156289833, |
|
"grad_norm": 6.441056251525879, |
|
"learning_rate": 0.000161454383118588, |
|
"loss": 0.9166, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.08040003921953133, |
|
"grad_norm": 9.670071601867676, |
|
"learning_rate": 0.00016127450312957353, |
|
"loss": 2.2514, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08059613687616433, |
|
"grad_norm": 7.870420455932617, |
|
"learning_rate": 0.00016109430512320237, |
|
"loss": 1.417, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.08079223453279734, |
|
"grad_norm": 8.569132804870605, |
|
"learning_rate": 0.00016091379003471007, |
|
"loss": 1.6958, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.08098833218943034, |
|
"grad_norm": 4.8630900382995605, |
|
"learning_rate": 0.00016073295880097784, |
|
"loss": 1.8919, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.08118442984606333, |
|
"grad_norm": 10.894848823547363, |
|
"learning_rate": 0.0001605518123605277, |
|
"loss": 1.7899, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.08138052750269634, |
|
"grad_norm": 6.055443286895752, |
|
"learning_rate": 0.00016037035165351768, |
|
"loss": 1.9432, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.08157662515932934, |
|
"grad_norm": 7.106583118438721, |
|
"learning_rate": 0.0001601885776217367, |
|
"loss": 1.736, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.08177272281596235, |
|
"grad_norm": 7.7677764892578125, |
|
"learning_rate": 0.00016000649120860003, |
|
"loss": 2.4425, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.08196882047259535, |
|
"grad_norm": 5.940224647521973, |
|
"learning_rate": 0.00015982409335914407, |
|
"loss": 1.0954, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.08216491812922835, |
|
"grad_norm": 6.025548458099365, |
|
"learning_rate": 0.00015964138502002175, |
|
"loss": 0.93, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.08236101578586136, |
|
"grad_norm": 7.528570652008057, |
|
"learning_rate": 0.00015945836713949726, |
|
"loss": 1.8381, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08255711344249436, |
|
"grad_norm": 15.305673599243164, |
|
"learning_rate": 0.00015927504066744148, |
|
"loss": 1.7413, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.08275321109912737, |
|
"grad_norm": 4.707043647766113, |
|
"learning_rate": 0.0001590914065553268, |
|
"loss": 1.4186, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.08294930875576037, |
|
"grad_norm": 5.808046817779541, |
|
"learning_rate": 0.00015890746575622231, |
|
"loss": 1.095, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.08314540641239337, |
|
"grad_norm": 6.294839859008789, |
|
"learning_rate": 0.00015872321922478884, |
|
"loss": 1.918, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.08334150406902638, |
|
"grad_norm": 4.934250354766846, |
|
"learning_rate": 0.00015853866791727396, |
|
"loss": 1.2578, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.08353760172565938, |
|
"grad_norm": 8.470808982849121, |
|
"learning_rate": 0.00015835381279150705, |
|
"loss": 2.0231, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.08373369938229239, |
|
"grad_norm": 18.526151657104492, |
|
"learning_rate": 0.00015816865480689426, |
|
"loss": 4.1691, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.08392979703892539, |
|
"grad_norm": 7.958248615264893, |
|
"learning_rate": 0.0001579831949244137, |
|
"loss": 1.066, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.08412589469555838, |
|
"grad_norm": 6.250977516174316, |
|
"learning_rate": 0.00015779743410661033, |
|
"loss": 0.8756, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.08432199235219139, |
|
"grad_norm": 4.739164352416992, |
|
"learning_rate": 0.00015761137331759084, |
|
"loss": 1.3986, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08451809000882439, |
|
"grad_norm": 10.723671913146973, |
|
"learning_rate": 0.00015742501352301893, |
|
"loss": 2.3962, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.0847141876654574, |
|
"grad_norm": 5.097165584564209, |
|
"learning_rate": 0.00015723835569011007, |
|
"loss": 3.1665, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.0849102853220904, |
|
"grad_norm": 7.0065388679504395, |
|
"learning_rate": 0.00015705140078762665, |
|
"loss": 1.4634, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 6.814738750457764, |
|
"learning_rate": 0.00015686414978587277, |
|
"loss": 0.9439, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.0853024806353564, |
|
"grad_norm": 8.381726264953613, |
|
"learning_rate": 0.0001566766036566893, |
|
"loss": 1.6046, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.08549857829198941, |
|
"grad_norm": 8.249631881713867, |
|
"learning_rate": 0.00015648876337344896, |
|
"loss": 1.6886, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.08569467594862241, |
|
"grad_norm": 9.020310401916504, |
|
"learning_rate": 0.00015630062991105098, |
|
"loss": 2.5881, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.08589077360525542, |
|
"grad_norm": 3.999058723449707, |
|
"learning_rate": 0.0001561122042459163, |
|
"loss": 1.8363, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.08608687126188842, |
|
"grad_norm": 7.487119674682617, |
|
"learning_rate": 0.00015592348735598237, |
|
"loss": 1.4798, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.08628296891852143, |
|
"grad_norm": 4.982146739959717, |
|
"learning_rate": 0.00015573448022069815, |
|
"loss": 2.7132, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08647906657515443, |
|
"grad_norm": 3.5383474826812744, |
|
"learning_rate": 0.00015554518382101892, |
|
"loss": 0.6142, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.08667516423178744, |
|
"grad_norm": 3.881410598754883, |
|
"learning_rate": 0.00015535559913940126, |
|
"loss": 0.8708, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.08687126188842044, |
|
"grad_norm": 5.159698963165283, |
|
"learning_rate": 0.00015516572715979806, |
|
"loss": 2.32, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.08706735954505343, |
|
"grad_norm": 9.29107666015625, |
|
"learning_rate": 0.00015497556886765316, |
|
"loss": 2.5744, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.08726345720168643, |
|
"grad_norm": 8.70083999633789, |
|
"learning_rate": 0.00015478512524989645, |
|
"loss": 0.8412, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.08745955485831944, |
|
"grad_norm": 6.164897918701172, |
|
"learning_rate": 0.00015459439729493865, |
|
"loss": 1.4857, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.08765565251495244, |
|
"grad_norm": 4.213920593261719, |
|
"learning_rate": 0.00015440338599266622, |
|
"loss": 1.2077, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.08785175017158545, |
|
"grad_norm": 14.897852897644043, |
|
"learning_rate": 0.00015421209233443617, |
|
"loss": 1.815, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.08804784782821845, |
|
"grad_norm": 6.773970127105713, |
|
"learning_rate": 0.00015402051731307093, |
|
"loss": 2.9485, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.08824394548485145, |
|
"grad_norm": 5.569252014160156, |
|
"learning_rate": 0.0001538286619228533, |
|
"loss": 1.1505, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08844004314148446, |
|
"grad_norm": 5.61276388168335, |
|
"learning_rate": 0.0001536365271595212, |
|
"loss": 1.2065, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.08863614079811746, |
|
"grad_norm": 3.9499528408050537, |
|
"learning_rate": 0.00015344411402026245, |
|
"loss": 1.3547, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.08883223845475047, |
|
"grad_norm": 11.833647727966309, |
|
"learning_rate": 0.00015325142350370967, |
|
"loss": 2.8476, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.08902833611138347, |
|
"grad_norm": 6.281819820404053, |
|
"learning_rate": 0.00015305845660993503, |
|
"loss": 1.0563, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.08922443376801648, |
|
"grad_norm": 9.338071823120117, |
|
"learning_rate": 0.00015286521434044526, |
|
"loss": 1.9847, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.08942053142464948, |
|
"grad_norm": 12.811955451965332, |
|
"learning_rate": 0.0001526716976981761, |
|
"loss": 1.2632, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.08961662908128248, |
|
"grad_norm": 5.077617168426514, |
|
"learning_rate": 0.0001524779076874875, |
|
"loss": 0.9728, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.08981272673791549, |
|
"grad_norm": 5.802744388580322, |
|
"learning_rate": 0.0001522838453141581, |
|
"loss": 0.9826, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.09000882439454848, |
|
"grad_norm": 6.643836498260498, |
|
"learning_rate": 0.00015208951158538004, |
|
"loss": 1.3868, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.09020492205118148, |
|
"grad_norm": 8.100834846496582, |
|
"learning_rate": 0.000151894907509754, |
|
"loss": 1.5572, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09040101970781449, |
|
"grad_norm": 8.695015907287598, |
|
"learning_rate": 0.00015170003409728356, |
|
"loss": 1.7788, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.09059711736444749, |
|
"grad_norm": 11.81883430480957, |
|
"learning_rate": 0.00015150489235937035, |
|
"loss": 1.7457, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.0907932150210805, |
|
"grad_norm": 8.165855407714844, |
|
"learning_rate": 0.00015130948330880847, |
|
"loss": 1.3886, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.0909893126777135, |
|
"grad_norm": 22.797456741333008, |
|
"learning_rate": 0.00015111380795977954, |
|
"loss": 1.8671, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.0911854103343465, |
|
"grad_norm": 5.275067329406738, |
|
"learning_rate": 0.00015091786732784716, |
|
"loss": 0.7186, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.09138150799097951, |
|
"grad_norm": 8.38714599609375, |
|
"learning_rate": 0.00015072166242995175, |
|
"loss": 1.7764, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.09157760564761251, |
|
"grad_norm": 8.916413307189941, |
|
"learning_rate": 0.0001505251942844054, |
|
"loss": 1.5589, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.09177370330424552, |
|
"grad_norm": 5.007913112640381, |
|
"learning_rate": 0.00015032846391088635, |
|
"loss": 0.6688, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.09196980096087852, |
|
"grad_norm": 10.685876846313477, |
|
"learning_rate": 0.0001501314723304339, |
|
"loss": 2.2118, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.09216589861751152, |
|
"grad_norm": 7.626431465148926, |
|
"learning_rate": 0.00014993422056544295, |
|
"loss": 1.1849, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09236199627414453, |
|
"grad_norm": 5.862109184265137, |
|
"learning_rate": 0.00014973670963965883, |
|
"loss": 1.4594, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.09255809393077753, |
|
"grad_norm": 5.194091320037842, |
|
"learning_rate": 0.00014953894057817188, |
|
"loss": 1.2243, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.09275419158741054, |
|
"grad_norm": 7.806075572967529, |
|
"learning_rate": 0.0001493409144074122, |
|
"loss": 1.2365, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.09295028924404353, |
|
"grad_norm": 5.004404067993164, |
|
"learning_rate": 0.00014914263215514431, |
|
"loss": 1.6081, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.09314638690067653, |
|
"grad_norm": 5.245405197143555, |
|
"learning_rate": 0.00014894409485046177, |
|
"loss": 2.2626, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.09334248455730954, |
|
"grad_norm": 6.017452716827393, |
|
"learning_rate": 0.00014874530352378194, |
|
"loss": 3.0452, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.09353858221394254, |
|
"grad_norm": 8.504718780517578, |
|
"learning_rate": 0.00014854625920684042, |
|
"loss": 1.4529, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.09373467987057554, |
|
"grad_norm": 7.850302696228027, |
|
"learning_rate": 0.00014834696293268603, |
|
"loss": 1.5511, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.09393077752720855, |
|
"grad_norm": 6.846382141113281, |
|
"learning_rate": 0.00014814741573567514, |
|
"loss": 2.0178, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.09412687518384155, |
|
"grad_norm": 8.834015846252441, |
|
"learning_rate": 0.00014794761865146648, |
|
"loss": 1.6438, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09432297284047456, |
|
"grad_norm": 7.127365589141846, |
|
"learning_rate": 0.00014774757271701557, |
|
"loss": 0.5588, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.09451907049710756, |
|
"grad_norm": 13.859881401062012, |
|
"learning_rate": 0.00014754727897056967, |
|
"loss": 2.9212, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.09471516815374056, |
|
"grad_norm": 5.914462089538574, |
|
"learning_rate": 0.0001473467384516621, |
|
"loss": 1.334, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.09491126581037357, |
|
"grad_norm": 3.947435140609741, |
|
"learning_rate": 0.0001471459522011069, |
|
"loss": 1.5284, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.09510736346700657, |
|
"grad_norm": 7.171452522277832, |
|
"learning_rate": 0.00014694492126099353, |
|
"loss": 1.7676, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.09530346112363958, |
|
"grad_norm": 7.364241600036621, |
|
"learning_rate": 0.0001467436466746814, |
|
"loss": 1.7551, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.09549955878027258, |
|
"grad_norm": 12.173360824584961, |
|
"learning_rate": 0.0001465421294867944, |
|
"loss": 2.5703, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.09569565643690559, |
|
"grad_norm": 14.59663200378418, |
|
"learning_rate": 0.00014634037074321557, |
|
"loss": 1.7594, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.09589175409353858, |
|
"grad_norm": 5.692611217498779, |
|
"learning_rate": 0.00014613837149108163, |
|
"loss": 2.5973, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.09608785175017158, |
|
"grad_norm": 6.9588470458984375, |
|
"learning_rate": 0.00014593613277877758, |
|
"loss": 2.6119, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09628394940680458, |
|
"grad_norm": 14.370820999145508, |
|
"learning_rate": 0.0001457336556559312, |
|
"loss": 1.9495, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.09648004706343759, |
|
"grad_norm": 8.259851455688477, |
|
"learning_rate": 0.0001455309411734076, |
|
"loss": 2.2663, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.09667614472007059, |
|
"grad_norm": 11.025699615478516, |
|
"learning_rate": 0.00014532799038330385, |
|
"loss": 2.2184, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.0968722423767036, |
|
"grad_norm": 6.014841079711914, |
|
"learning_rate": 0.00014512480433894343, |
|
"loss": 2.3436, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.0970683400333366, |
|
"grad_norm": 11.881712913513184, |
|
"learning_rate": 0.00014492138409487085, |
|
"loss": 0.7869, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.0972644376899696, |
|
"grad_norm": 4.720561504364014, |
|
"learning_rate": 0.00014471773070684599, |
|
"loss": 2.417, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.09746053534660261, |
|
"grad_norm": 11.112417221069336, |
|
"learning_rate": 0.00014451384523183903, |
|
"loss": 2.155, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.09765663300323561, |
|
"grad_norm": 5.968791961669922, |
|
"learning_rate": 0.0001443097287280244, |
|
"loss": 0.8096, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.09785273065986862, |
|
"grad_norm": 7.543048858642578, |
|
"learning_rate": 0.0001441053822547757, |
|
"loss": 1.7723, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.09804882831650162, |
|
"grad_norm": 7.892668724060059, |
|
"learning_rate": 0.00014390080687266013, |
|
"loss": 1.4027, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09824492597313463, |
|
"grad_norm": 7.690077304840088, |
|
"learning_rate": 0.00014369600364343285, |
|
"loss": 1.901, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.09844102362976763, |
|
"grad_norm": 4.714064598083496, |
|
"learning_rate": 0.00014349097363003163, |
|
"loss": 1.6198, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.09863712128640063, |
|
"grad_norm": 4.764143466949463, |
|
"learning_rate": 0.0001432857178965712, |
|
"loss": 1.3231, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.09883321894303362, |
|
"grad_norm": 9.042250633239746, |
|
"learning_rate": 0.00014308023750833783, |
|
"loss": 2.1337, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.09902931659966663, |
|
"grad_norm": 7.619399547576904, |
|
"learning_rate": 0.00014287453353178372, |
|
"loss": 1.1525, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.09922541425629963, |
|
"grad_norm": 10.186138153076172, |
|
"learning_rate": 0.00014266860703452156, |
|
"loss": 2.5917, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.09942151191293264, |
|
"grad_norm": 6.784362316131592, |
|
"learning_rate": 0.00014246245908531882, |
|
"loss": 2.1194, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.09961760956956564, |
|
"grad_norm": 46.69243621826172, |
|
"learning_rate": 0.0001422560907540925, |
|
"loss": 2.0196, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.09981370722619864, |
|
"grad_norm": 5.98226261138916, |
|
"learning_rate": 0.00014204950311190318, |
|
"loss": 2.131, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.10000980488283165, |
|
"grad_norm": 19.974212646484375, |
|
"learning_rate": 0.00014184269723094988, |
|
"loss": 1.9812, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.10020590253946465, |
|
"grad_norm": 6.612239360809326, |
|
"learning_rate": 0.00014163567418456406, |
|
"loss": 1.2688, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.10040200019609766, |
|
"grad_norm": 5.024367332458496, |
|
"learning_rate": 0.0001414284350472045, |
|
"loss": 1.0559, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.10059809785273066, |
|
"grad_norm": 6.517631530761719, |
|
"learning_rate": 0.00014122098089445142, |
|
"loss": 1.2611, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.10079419550936367, |
|
"grad_norm": 7.715580463409424, |
|
"learning_rate": 0.0001410133128030009, |
|
"loss": 1.0419, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.10099029316599667, |
|
"grad_norm": 5.8916120529174805, |
|
"learning_rate": 0.00014080543185065943, |
|
"loss": 0.938, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.10118639082262967, |
|
"grad_norm": 5.587378978729248, |
|
"learning_rate": 0.0001405973391163383, |
|
"loss": 1.4988, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.10138248847926268, |
|
"grad_norm": 7.234095096588135, |
|
"learning_rate": 0.0001403890356800479, |
|
"loss": 2.0476, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.10157858613589568, |
|
"grad_norm": 5.781263828277588, |
|
"learning_rate": 0.00014018052262289223, |
|
"loss": 1.9736, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.10177468379252867, |
|
"grad_norm": 7.789620399475098, |
|
"learning_rate": 0.0001399718010270632, |
|
"loss": 1.2162, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.10197078144916168, |
|
"grad_norm": 2.692659378051758, |
|
"learning_rate": 0.00013976287197583494, |
|
"loss": 0.391, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10216687910579468, |
|
"grad_norm": 7.2949676513671875, |
|
"learning_rate": 0.0001395537365535585, |
|
"loss": 1.2454, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.10236297676242768, |
|
"grad_norm": 14.400397300720215, |
|
"learning_rate": 0.00013934439584565583, |
|
"loss": 1.844, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.10255907441906069, |
|
"grad_norm": 5.972201347351074, |
|
"learning_rate": 0.0001391348509386144, |
|
"loss": 1.2853, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.1027551720756937, |
|
"grad_norm": 8.810315132141113, |
|
"learning_rate": 0.00013892510291998146, |
|
"loss": 2.7965, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.1029512697323267, |
|
"grad_norm": 8.922815322875977, |
|
"learning_rate": 0.00013871515287835839, |
|
"loss": 1.2606, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.1031473673889597, |
|
"grad_norm": 7.5427021980285645, |
|
"learning_rate": 0.00013850500190339514, |
|
"loss": 1.0504, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.1033434650455927, |
|
"grad_norm": 6.646986484527588, |
|
"learning_rate": 0.00013829465108578445, |
|
"loss": 1.1522, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.10353956270222571, |
|
"grad_norm": 8.387070655822754, |
|
"learning_rate": 0.0001380841015172563, |
|
"loss": 1.4298, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.10373566035885871, |
|
"grad_norm": 21.383251190185547, |
|
"learning_rate": 0.0001378733542905722, |
|
"loss": 1.9162, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.10393175801549172, |
|
"grad_norm": 5.776891231536865, |
|
"learning_rate": 0.00013766241049951948, |
|
"loss": 1.2837, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10412785567212472, |
|
"grad_norm": 4.5066094398498535, |
|
"learning_rate": 0.00013745127123890565, |
|
"loss": 1.5433, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.10432395332875773, |
|
"grad_norm": 6.012543201446533, |
|
"learning_rate": 0.00013723993760455272, |
|
"loss": 1.3534, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.10452005098539073, |
|
"grad_norm": 8.182510375976562, |
|
"learning_rate": 0.0001370284106932915, |
|
"loss": 1.5422, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.10471614864202373, |
|
"grad_norm": 12.530463218688965, |
|
"learning_rate": 0.00013681669160295597, |
|
"loss": 1.651, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.10491224629865673, |
|
"grad_norm": 8.226496696472168, |
|
"learning_rate": 0.00013660478143237746, |
|
"loss": 1.8992, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.10510834395528973, |
|
"grad_norm": 4.896340847015381, |
|
"learning_rate": 0.00013639268128137907, |
|
"loss": 2.2539, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.10530444161192273, |
|
"grad_norm": 4.573820114135742, |
|
"learning_rate": 0.00013618039225076986, |
|
"loss": 2.4075, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.10550053926855574, |
|
"grad_norm": 4.920849323272705, |
|
"learning_rate": 0.0001359679154423392, |
|
"loss": 2.2481, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.10569663692518874, |
|
"grad_norm": 6.206282138824463, |
|
"learning_rate": 0.00013575525195885107, |
|
"loss": 2.5488, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.10589273458182175, |
|
"grad_norm": 9.469644546508789, |
|
"learning_rate": 0.0001355424029040382, |
|
"loss": 2.0894, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.10608883223845475, |
|
"grad_norm": 5.231925010681152, |
|
"learning_rate": 0.00013532936938259656, |
|
"loss": 1.2463, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.10628492989508775, |
|
"grad_norm": 6.779435157775879, |
|
"learning_rate": 0.0001351161525001795, |
|
"loss": 1.8802, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.10648102755172076, |
|
"grad_norm": 7.8035197257995605, |
|
"learning_rate": 0.00013490275336339188, |
|
"loss": 1.1796, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.10667712520835376, |
|
"grad_norm": 5.93556022644043, |
|
"learning_rate": 0.00013468917307978467, |
|
"loss": 1.1082, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.10687322286498677, |
|
"grad_norm": 8.9798002243042, |
|
"learning_rate": 0.00013447541275784887, |
|
"loss": 1.6289, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.10706932052161977, |
|
"grad_norm": 4.270269393920898, |
|
"learning_rate": 0.00013426147350700996, |
|
"loss": 3.2414, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.10726541817825277, |
|
"grad_norm": 7.755331516265869, |
|
"learning_rate": 0.00013404735643762192, |
|
"loss": 3.5263, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.10746151583488578, |
|
"grad_norm": 3.364109754562378, |
|
"learning_rate": 0.0001338330626609618, |
|
"loss": 1.1847, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.10765761349151878, |
|
"grad_norm": 7.73459529876709, |
|
"learning_rate": 0.0001336185932892237, |
|
"loss": 2.0363, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.10785371114815177, |
|
"grad_norm": 7.035179138183594, |
|
"learning_rate": 0.000133403949435513, |
|
"loss": 1.3317, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10804980880478478, |
|
"grad_norm": 5.30495023727417, |
|
"learning_rate": 0.00013318913221384076, |
|
"loss": 1.3776, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.10824590646141778, |
|
"grad_norm": 6.3127593994140625, |
|
"learning_rate": 0.00013297414273911784, |
|
"loss": 0.9931, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.10844200411805079, |
|
"grad_norm": 3.763789653778076, |
|
"learning_rate": 0.00013275898212714889, |
|
"loss": 0.9685, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.10863810177468379, |
|
"grad_norm": 9.935264587402344, |
|
"learning_rate": 0.00013254365149462699, |
|
"loss": 1.1951, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.1088341994313168, |
|
"grad_norm": 6.151608943939209, |
|
"learning_rate": 0.00013232815195912754, |
|
"loss": 1.9978, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.1090302970879498, |
|
"grad_norm": 7.497893333435059, |
|
"learning_rate": 0.00013211248463910262, |
|
"loss": 2.1158, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.1092263947445828, |
|
"grad_norm": 4.703649520874023, |
|
"learning_rate": 0.00013189665065387507, |
|
"loss": 2.9424, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.1094224924012158, |
|
"grad_norm": 6.602818489074707, |
|
"learning_rate": 0.00013168065112363264, |
|
"loss": 1.9994, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.10961859005784881, |
|
"grad_norm": 5.672735691070557, |
|
"learning_rate": 0.00013146448716942245, |
|
"loss": 1.6569, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.10981468771448182, |
|
"grad_norm": 6.252768039703369, |
|
"learning_rate": 0.0001312481599131449, |
|
"loss": 1.0263, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11001078537111482, |
|
"grad_norm": 5.062610626220703, |
|
"learning_rate": 0.00013103167047754784, |
|
"loss": 1.8557, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.11020688302774782, |
|
"grad_norm": 17.327529907226562, |
|
"learning_rate": 0.000130815019986221, |
|
"loss": 2.2288, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.11040298068438083, |
|
"grad_norm": 9.787447929382324, |
|
"learning_rate": 0.00013059820956358998, |
|
"loss": 2.8692, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.11059907834101383, |
|
"grad_norm": 17.163454055786133, |
|
"learning_rate": 0.00013038124033491025, |
|
"loss": 2.3984, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.11079517599764682, |
|
"grad_norm": 7.947646617889404, |
|
"learning_rate": 0.00013016411342626168, |
|
"loss": 1.2144, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.11099127365427983, |
|
"grad_norm": 8.151859283447266, |
|
"learning_rate": 0.00012994682996454247, |
|
"loss": 1.7198, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.11118737131091283, |
|
"grad_norm": 8.358308792114258, |
|
"learning_rate": 0.00012972939107746325, |
|
"loss": 1.1448, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.11138346896754583, |
|
"grad_norm": 5.084352970123291, |
|
"learning_rate": 0.0001295117978935414, |
|
"loss": 2.3229, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.11157956662417884, |
|
"grad_norm": 4.846659183502197, |
|
"learning_rate": 0.0001292940515420951, |
|
"loss": 1.3941, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.11177566428081184, |
|
"grad_norm": 5.432335376739502, |
|
"learning_rate": 0.0001290761531532374, |
|
"loss": 1.9803, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11197176193744485, |
|
"grad_norm": 5.953497886657715, |
|
"learning_rate": 0.00012885810385787055, |
|
"loss": 1.0619, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.11216785959407785, |
|
"grad_norm": 4.770197868347168, |
|
"learning_rate": 0.00012863990478767994, |
|
"loss": 2.2999, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.11236395725071086, |
|
"grad_norm": 5.719841957092285, |
|
"learning_rate": 0.00012842155707512825, |
|
"loss": 2.0115, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.11256005490734386, |
|
"grad_norm": 6.13926362991333, |
|
"learning_rate": 0.00012820306185344976, |
|
"loss": 1.7, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.11275615256397686, |
|
"grad_norm": 14.388799667358398, |
|
"learning_rate": 0.0001279844202566442, |
|
"loss": 1.4477, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.11295225022060987, |
|
"grad_norm": 5.510779857635498, |
|
"learning_rate": 0.00012776563341947104, |
|
"loss": 1.145, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.11314834787724287, |
|
"grad_norm": 7.694248199462891, |
|
"learning_rate": 0.00012754670247744354, |
|
"loss": 2.0622, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.11334444553387588, |
|
"grad_norm": 10.04172420501709, |
|
"learning_rate": 0.0001273276285668229, |
|
"loss": 2.1074, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.11354054319050888, |
|
"grad_norm": 4.613293170928955, |
|
"learning_rate": 0.00012710841282461238, |
|
"loss": 1.3278, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.11373664084714187, |
|
"grad_norm": 7.878372669219971, |
|
"learning_rate": 0.0001268890563885512, |
|
"loss": 3.3353, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11393273850377487, |
|
"grad_norm": 4.252342224121094, |
|
"learning_rate": 0.0001266695603971089, |
|
"loss": 1.2478, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.11412883616040788, |
|
"grad_norm": 13.550718307495117, |
|
"learning_rate": 0.0001264499259894793, |
|
"loss": 2.3346, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.11432493381704088, |
|
"grad_norm": 5.674976348876953, |
|
"learning_rate": 0.0001262301543055746, |
|
"loss": 1.9865, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.11452103147367389, |
|
"grad_norm": 14.854180335998535, |
|
"learning_rate": 0.0001260102464860195, |
|
"loss": 2.1993, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.11471712913030689, |
|
"grad_norm": 5.227717399597168, |
|
"learning_rate": 0.0001257902036721452, |
|
"loss": 1.3025, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.1149132267869399, |
|
"grad_norm": 9.518735885620117, |
|
"learning_rate": 0.00012557002700598353, |
|
"loss": 1.9782, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.1151093244435729, |
|
"grad_norm": 15.842093467712402, |
|
"learning_rate": 0.00012534971763026104, |
|
"loss": 2.2285, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.1153054221002059, |
|
"grad_norm": 7.949627876281738, |
|
"learning_rate": 0.00012512927668839304, |
|
"loss": 1.0101, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.11550151975683891, |
|
"grad_norm": 10.285954475402832, |
|
"learning_rate": 0.00012490870532447774, |
|
"loss": 2.2186, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.11569761741347191, |
|
"grad_norm": 13.825008392333984, |
|
"learning_rate": 0.00012468800468329013, |
|
"loss": 1.9117, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11589371507010492, |
|
"grad_norm": 4.062519073486328, |
|
"learning_rate": 0.00012446717591027624, |
|
"loss": 1.3969, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.11608981272673792, |
|
"grad_norm": 13.4169282913208, |
|
"learning_rate": 0.00012424622015154703, |
|
"loss": 1.6026, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.11628591038337092, |
|
"grad_norm": 5.054627895355225, |
|
"learning_rate": 0.0001240251385538726, |
|
"loss": 3.3848, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.11648200804000393, |
|
"grad_norm": 5.519535541534424, |
|
"learning_rate": 0.00012380393226467615, |
|
"loss": 2.0605, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.11667810569663692, |
|
"grad_norm": 8.747673034667969, |
|
"learning_rate": 0.000123582602432028, |
|
"loss": 1.945, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.11687420335326992, |
|
"grad_norm": 7.695986747741699, |
|
"learning_rate": 0.0001233611502046397, |
|
"loss": 1.2818, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.11707030100990293, |
|
"grad_norm": 6.775882720947266, |
|
"learning_rate": 0.000123139576731858, |
|
"loss": 1.5774, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.11726639866653593, |
|
"grad_norm": 6.141412258148193, |
|
"learning_rate": 0.00012291788316365888, |
|
"loss": 2.0431, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.11746249632316894, |
|
"grad_norm": 6.908384323120117, |
|
"learning_rate": 0.00012269607065064177, |
|
"loss": 2.298, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.11765859397980194, |
|
"grad_norm": 6.526587963104248, |
|
"learning_rate": 0.0001224741403440233, |
|
"loss": 0.7667, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11785469163643494, |
|
"grad_norm": 6.505580902099609, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 2.2598, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.11805078929306795, |
|
"grad_norm": 5.94541072845459, |
|
"learning_rate": 0.00012202993095789966, |
|
"loss": 2.5347, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.11824688694970095, |
|
"grad_norm": 4.825118064880371, |
|
"learning_rate": 0.00012180765418386068, |
|
"loss": 1.3592, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.11844298460633396, |
|
"grad_norm": 10.260912895202637, |
|
"learning_rate": 0.00012158526422714076, |
|
"loss": 1.9816, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.11863908226296696, |
|
"grad_norm": 4.47437047958374, |
|
"learning_rate": 0.00012136276224195348, |
|
"loss": 1.3715, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.11883517991959996, |
|
"grad_norm": 5.319314002990723, |
|
"learning_rate": 0.00012114014938309393, |
|
"loss": 1.7164, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.11903127757623297, |
|
"grad_norm": 9.467094421386719, |
|
"learning_rate": 0.00012091742680593254, |
|
"loss": 1.711, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.11922737523286597, |
|
"grad_norm": 15.36571979522705, |
|
"learning_rate": 0.0001206945956664093, |
|
"loss": 2.1494, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.11942347288949898, |
|
"grad_norm": 5.132857322692871, |
|
"learning_rate": 0.00012047165712102759, |
|
"loss": 1.0723, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.11961957054613197, |
|
"grad_norm": 7.701923847198486, |
|
"learning_rate": 0.00012024861232684823, |
|
"loss": 0.6081, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11981566820276497, |
|
"grad_norm": 8.608162879943848, |
|
"learning_rate": 0.00012002546244148345, |
|
"loss": 2.7694, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.12001176585939798, |
|
"grad_norm": 6.971923351287842, |
|
"learning_rate": 0.00011980220862309097, |
|
"loss": 2.4804, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.12020786351603098, |
|
"grad_norm": 6.421287536621094, |
|
"learning_rate": 0.00011957885203036785, |
|
"loss": 1.1601, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.12040396117266398, |
|
"grad_norm": 5.901442527770996, |
|
"learning_rate": 0.00011935539382254459, |
|
"loss": 1.1592, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.12060005882929699, |
|
"grad_norm": 8.312341690063477, |
|
"learning_rate": 0.00011913183515937916, |
|
"loss": 1.4096, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.12079615648592999, |
|
"grad_norm": 9.224618911743164, |
|
"learning_rate": 0.00011890817720115075, |
|
"loss": 1.8728, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.120992254142563, |
|
"grad_norm": 6.559041500091553, |
|
"learning_rate": 0.00011868442110865399, |
|
"loss": 1.4327, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.121188351799196, |
|
"grad_norm": 6.139137268066406, |
|
"learning_rate": 0.0001184605680431928, |
|
"loss": 2.3143, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.121384449455829, |
|
"grad_norm": 7.806881427764893, |
|
"learning_rate": 0.0001182366191665744, |
|
"loss": 2.0893, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.12158054711246201, |
|
"grad_norm": 5.146418571472168, |
|
"learning_rate": 0.00011801257564110329, |
|
"loss": 0.9106, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12177664476909501, |
|
"grad_norm": 10.43996810913086, |
|
"learning_rate": 0.00011778843862957514, |
|
"loss": 1.5653, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.12197274242572802, |
|
"grad_norm": 9.192870140075684, |
|
"learning_rate": 0.0001175642092952709, |
|
"loss": 1.097, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.12216884008236102, |
|
"grad_norm": 12.34306526184082, |
|
"learning_rate": 0.00011733988880195068, |
|
"loss": 0.8703, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.12236493773899403, |
|
"grad_norm": 6.297984600067139, |
|
"learning_rate": 0.00011711547831384761, |
|
"loss": 0.8209, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.12256103539562702, |
|
"grad_norm": 6.844716548919678, |
|
"learning_rate": 0.00011689097899566198, |
|
"loss": 1.2469, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.12275713305226002, |
|
"grad_norm": 4.610165119171143, |
|
"learning_rate": 0.00011666639201255506, |
|
"loss": 1.6045, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.12295323070889302, |
|
"grad_norm": 8.704841613769531, |
|
"learning_rate": 0.00011644171853014319, |
|
"loss": 1.5989, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.12314932836552603, |
|
"grad_norm": 14.035907745361328, |
|
"learning_rate": 0.00011621695971449154, |
|
"loss": 2.3428, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.12334542602215903, |
|
"grad_norm": 8.880375862121582, |
|
"learning_rate": 0.00011599211673210826, |
|
"loss": 1.6628, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.12354152367879204, |
|
"grad_norm": 5.904176235198975, |
|
"learning_rate": 0.00011576719074993827, |
|
"loss": 1.8985, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12373762133542504, |
|
"grad_norm": 5.038250923156738, |
|
"learning_rate": 0.00011554218293535725, |
|
"loss": 1.8648, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.12393371899205805, |
|
"grad_norm": 4.206717014312744, |
|
"learning_rate": 0.00011531709445616563, |
|
"loss": 1.7799, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.12412981664869105, |
|
"grad_norm": 9.580304145812988, |
|
"learning_rate": 0.00011509192648058249, |
|
"loss": 1.484, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.12432591430532405, |
|
"grad_norm": 5.086032867431641, |
|
"learning_rate": 0.00011486668017723949, |
|
"loss": 1.2848, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.12452201196195706, |
|
"grad_norm": 3.946556568145752, |
|
"learning_rate": 0.00011464135671517482, |
|
"loss": 0.6885, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.12471810961859006, |
|
"grad_norm": 5.43235969543457, |
|
"learning_rate": 0.0001144159572638271, |
|
"loss": 1.1324, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.12491420727522307, |
|
"grad_norm": 14.442760467529297, |
|
"learning_rate": 0.00011419048299302939, |
|
"loss": 2.5022, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.12511030493185607, |
|
"grad_norm": 5.514584541320801, |
|
"learning_rate": 0.00011396493507300303, |
|
"loss": 0.714, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.12530640258848907, |
|
"grad_norm": 9.007026672363281, |
|
"learning_rate": 0.00011373931467435159, |
|
"loss": 1.894, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.12550250024512208, |
|
"grad_norm": 4.5296196937561035, |
|
"learning_rate": 0.00011351362296805485, |
|
"loss": 1.8764, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12569859790175508, |
|
"grad_norm": 6.608786106109619, |
|
"learning_rate": 0.00011328786112546269, |
|
"loss": 1.8515, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.1258946955583881, |
|
"grad_norm": 5.0649261474609375, |
|
"learning_rate": 0.00011306203031828886, |
|
"loss": 1.1955, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.1260907932150211, |
|
"grad_norm": 7.887217998504639, |
|
"learning_rate": 0.00011283613171860525, |
|
"loss": 1.9566, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.1262868908716541, |
|
"grad_norm": 3.997429609298706, |
|
"learning_rate": 0.00011261016649883545, |
|
"loss": 0.4142, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.1264829885282871, |
|
"grad_norm": 6.192634582519531, |
|
"learning_rate": 0.0001123841358317489, |
|
"loss": 0.7429, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.1266790861849201, |
|
"grad_norm": 10.252041816711426, |
|
"learning_rate": 0.00011215804089045459, |
|
"loss": 1.5821, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.12687518384155308, |
|
"grad_norm": 15.608290672302246, |
|
"learning_rate": 0.00011193188284839517, |
|
"loss": 2.9049, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.12707128149818608, |
|
"grad_norm": 7.4965314865112305, |
|
"learning_rate": 0.00011170566287934088, |
|
"loss": 1.7288, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.1272673791548191, |
|
"grad_norm": 13.268543243408203, |
|
"learning_rate": 0.00011147938215738323, |
|
"loss": 2.6662, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.1274634768114521, |
|
"grad_norm": 6.431972503662109, |
|
"learning_rate": 0.00011125304185692907, |
|
"loss": 2.416, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 5.82267951965332, |
|
"learning_rate": 0.0001110266431526945, |
|
"loss": 1.0295, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.1278556721247181, |
|
"grad_norm": 8.612664222717285, |
|
"learning_rate": 0.00011080018721969871, |
|
"loss": 2.5151, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.1280517697813511, |
|
"grad_norm": 10.103599548339844, |
|
"learning_rate": 0.00011057367523325792, |
|
"loss": 2.4646, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.1282478674379841, |
|
"grad_norm": 6.955473899841309, |
|
"learning_rate": 0.00011034710836897921, |
|
"loss": 1.3256, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.1284439650946171, |
|
"grad_norm": 6.604331016540527, |
|
"learning_rate": 0.00011012048780275463, |
|
"loss": 1.1124, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.12864006275125012, |
|
"grad_norm": 7.2269511222839355, |
|
"learning_rate": 0.00010989381471075482, |
|
"loss": 2.1239, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.12883616040788312, |
|
"grad_norm": 7.591269016265869, |
|
"learning_rate": 0.00010966709026942303, |
|
"loss": 1.9995, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 5.342310428619385, |
|
"learning_rate": 0.00010944031565546906, |
|
"loss": 0.8718, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.12922835572114913, |
|
"grad_norm": 9.045146942138672, |
|
"learning_rate": 0.0001092134920458631, |
|
"loss": 0.9157, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.12942445337778213, |
|
"grad_norm": 4.300910472869873, |
|
"learning_rate": 0.00010898662061782965, |
|
"loss": 1.0317, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12962055103441514, |
|
"grad_norm": 11.28966999053955, |
|
"learning_rate": 0.0001087597025488413, |
|
"loss": 0.7102, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.12981664869104814, |
|
"grad_norm": 7.143383502960205, |
|
"learning_rate": 0.00010853273901661285, |
|
"loss": 1.4524, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.13001274634768115, |
|
"grad_norm": 8.20251750946045, |
|
"learning_rate": 0.00010830573119909493, |
|
"loss": 1.825, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.13020884400431415, |
|
"grad_norm": 8.748811721801758, |
|
"learning_rate": 0.00010807868027446808, |
|
"loss": 0.6214, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.13040494166094715, |
|
"grad_norm": 6.7635040283203125, |
|
"learning_rate": 0.00010785158742113655, |
|
"loss": 1.158, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.13060103931758016, |
|
"grad_norm": 5.804599761962891, |
|
"learning_rate": 0.00010762445381772217, |
|
"loss": 1.2651, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.13079713697421316, |
|
"grad_norm": 6.046361923217773, |
|
"learning_rate": 0.00010739728064305834, |
|
"loss": 1.5022, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.13099323463084617, |
|
"grad_norm": 10.987192153930664, |
|
"learning_rate": 0.00010717006907618377, |
|
"loss": 1.5359, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.13118933228747917, |
|
"grad_norm": 8.694954872131348, |
|
"learning_rate": 0.00010694282029633647, |
|
"loss": 1.682, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.13138542994411218, |
|
"grad_norm": 15.458917617797852, |
|
"learning_rate": 0.00010671553548294753, |
|
"loss": 1.3832, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13158152760074518, |
|
"grad_norm": 3.7156457901000977, |
|
"learning_rate": 0.00010648821581563513, |
|
"loss": 2.4507, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.13177762525737818, |
|
"grad_norm": 9.7117338180542, |
|
"learning_rate": 0.00010626086247419826, |
|
"loss": 2.3141, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.1319737229140112, |
|
"grad_norm": 7.740649700164795, |
|
"learning_rate": 0.00010603347663861079, |
|
"loss": 2.1845, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.1321698205706442, |
|
"grad_norm": 7.079351425170898, |
|
"learning_rate": 0.00010580605948901514, |
|
"loss": 1.5912, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.1323659182272772, |
|
"grad_norm": 3.896484613418579, |
|
"learning_rate": 0.00010557861220571625, |
|
"loss": 0.8481, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.1325620158839102, |
|
"grad_norm": 12.182394981384277, |
|
"learning_rate": 0.00010535113596917556, |
|
"loss": 2.1278, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.13275811354054318, |
|
"grad_norm": 7.2080230712890625, |
|
"learning_rate": 0.00010512363196000465, |
|
"loss": 1.8256, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.13295421119717618, |
|
"grad_norm": 7.081282615661621, |
|
"learning_rate": 0.00010489610135895933, |
|
"loss": 1.5351, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.13315030885380919, |
|
"grad_norm": 4.274497985839844, |
|
"learning_rate": 0.00010466854534693335, |
|
"loss": 1.3264, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.1333464065104422, |
|
"grad_norm": 6.364016532897949, |
|
"learning_rate": 0.00010444096510495243, |
|
"loss": 1.751, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1335425041670752, |
|
"grad_norm": 6.791689872741699, |
|
"learning_rate": 0.00010421336181416797, |
|
"loss": 0.7944, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.1337386018237082, |
|
"grad_norm": 9.867424964904785, |
|
"learning_rate": 0.00010398573665585105, |
|
"loss": 1.6282, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.1339346994803412, |
|
"grad_norm": 12.289506912231445, |
|
"learning_rate": 0.0001037580908113862, |
|
"loss": 2.1015, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.1341307971369742, |
|
"grad_norm": 5.21271276473999, |
|
"learning_rate": 0.00010353042546226537, |
|
"loss": 1.0861, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.1343268947936072, |
|
"grad_norm": 6.344359874725342, |
|
"learning_rate": 0.00010330274179008161, |
|
"loss": 1.295, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.13452299245024021, |
|
"grad_norm": 4.4853196144104, |
|
"learning_rate": 0.00010307504097652323, |
|
"loss": 1.5746, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.13471909010687322, |
|
"grad_norm": 4.868480205535889, |
|
"learning_rate": 0.0001028473242033674, |
|
"loss": 0.7534, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.13491518776350622, |
|
"grad_norm": 33.27044677734375, |
|
"learning_rate": 0.00010261959265247419, |
|
"loss": 3.1026, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.13511128542013923, |
|
"grad_norm": 5.2553253173828125, |
|
"learning_rate": 0.0001023918475057803, |
|
"loss": 1.7087, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.13530738307677223, |
|
"grad_norm": 9.47342586517334, |
|
"learning_rate": 0.00010216408994529303, |
|
"loss": 1.2846, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13550348073340523, |
|
"grad_norm": 5.372720241546631, |
|
"learning_rate": 0.00010193632115308411, |
|
"loss": 2.6163, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.13569957839003824, |
|
"grad_norm": 9.89535903930664, |
|
"learning_rate": 0.00010170854231128352, |
|
"loss": 2.3616, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.13589567604667124, |
|
"grad_norm": 8.19672966003418, |
|
"learning_rate": 0.00010148075460207347, |
|
"loss": 1.8404, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.13609177370330425, |
|
"grad_norm": 5.280313491821289, |
|
"learning_rate": 0.0001012529592076821, |
|
"loss": 2.2078, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.13628787135993725, |
|
"grad_norm": 7.142581462860107, |
|
"learning_rate": 0.00010102515731037758, |
|
"loss": 0.9421, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.13648396901657026, |
|
"grad_norm": 8.142248153686523, |
|
"learning_rate": 0.00010079735009246167, |
|
"loss": 0.83, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.13648396901657026, |
|
"eval_loss": 0.40602046251296997, |
|
"eval_runtime": 78.837, |
|
"eval_samples_per_second": 27.246, |
|
"eval_steps_per_second": 13.623, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.13668006667320326, |
|
"grad_norm": 3.3614909648895264, |
|
"learning_rate": 0.00010056953873626384, |
|
"loss": 0.7098, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.13687616432983626, |
|
"grad_norm": 6.330589294433594, |
|
"learning_rate": 0.00010034172442413501, |
|
"loss": 1.97, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.13707226198646927, |
|
"grad_norm": 6.881412506103516, |
|
"learning_rate": 0.00010011390833844143, |
|
"loss": 2.2338, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.13726835964310227, |
|
"grad_norm": 6.3669114112854, |
|
"learning_rate": 9.988609166155859e-05, |
|
"loss": 1.2176, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13746445729973528, |
|
"grad_norm": 4.456204414367676, |
|
"learning_rate": 9.965827557586498e-05, |
|
"loss": 2.5429, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.13766055495636828, |
|
"grad_norm": 11.206175804138184, |
|
"learning_rate": 9.943046126373618e-05, |
|
"loss": 0.9134, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.13785665261300128, |
|
"grad_norm": 3.9151828289031982, |
|
"learning_rate": 9.920264990753837e-05, |
|
"loss": 0.8662, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.1380527502696343, |
|
"grad_norm": 9.173954963684082, |
|
"learning_rate": 9.897484268962243e-05, |
|
"loss": 1.7328, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.1382488479262673, |
|
"grad_norm": 9.788105010986328, |
|
"learning_rate": 9.874704079231791e-05, |
|
"loss": 1.5085, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.1384449455829003, |
|
"grad_norm": 4.705010414123535, |
|
"learning_rate": 9.851924539792656e-05, |
|
"loss": 1.7789, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.1386410432395333, |
|
"grad_norm": 9.228858947753906, |
|
"learning_rate": 9.82914576887165e-05, |
|
"loss": 1.3564, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.13883714089616628, |
|
"grad_norm": 13.22546100616455, |
|
"learning_rate": 9.806367884691594e-05, |
|
"loss": 1.752, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.13903323855279928, |
|
"grad_norm": 8.163661003112793, |
|
"learning_rate": 9.783591005470698e-05, |
|
"loss": 1.3811, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.1392293362094323, |
|
"grad_norm": 8.644879341125488, |
|
"learning_rate": 9.760815249421973e-05, |
|
"loss": 1.8551, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1394254338660653, |
|
"grad_norm": 4.906625270843506, |
|
"learning_rate": 9.738040734752582e-05, |
|
"loss": 1.3423, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.1396215315226983, |
|
"grad_norm": 7.070640563964844, |
|
"learning_rate": 9.715267579663262e-05, |
|
"loss": 1.1465, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.1398176291793313, |
|
"grad_norm": 4.564503192901611, |
|
"learning_rate": 9.692495902347678e-05, |
|
"loss": 1.167, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.1400137268359643, |
|
"grad_norm": 6.235091686248779, |
|
"learning_rate": 9.669725820991841e-05, |
|
"loss": 1.0448, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.1402098244925973, |
|
"grad_norm": 6.636250019073486, |
|
"learning_rate": 9.646957453773469e-05, |
|
"loss": 1.1572, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.1404059221492303, |
|
"grad_norm": 4.873953342437744, |
|
"learning_rate": 9.62419091886138e-05, |
|
"loss": 1.6995, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.14060201980586332, |
|
"grad_norm": 7.3861589431762695, |
|
"learning_rate": 9.601426334414898e-05, |
|
"loss": 1.6366, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.14079811746249632, |
|
"grad_norm": 4.689702033996582, |
|
"learning_rate": 9.578663818583203e-05, |
|
"loss": 0.6136, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.14099421511912932, |
|
"grad_norm": 6.917242527008057, |
|
"learning_rate": 9.555903489504761e-05, |
|
"loss": 1.985, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.14119031277576233, |
|
"grad_norm": 11.42885971069336, |
|
"learning_rate": 9.533145465306667e-05, |
|
"loss": 1.2503, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14138641043239533, |
|
"grad_norm": 6.561046600341797, |
|
"learning_rate": 9.510389864104069e-05, |
|
"loss": 1.207, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.14158250808902834, |
|
"grad_norm": 5.668408393859863, |
|
"learning_rate": 9.487636803999538e-05, |
|
"loss": 1.5323, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.14177860574566134, |
|
"grad_norm": 5.94567346572876, |
|
"learning_rate": 9.464886403082445e-05, |
|
"loss": 1.6155, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.14197470340229434, |
|
"grad_norm": 5.67940092086792, |
|
"learning_rate": 9.442138779428376e-05, |
|
"loss": 1.1803, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.14217080105892735, |
|
"grad_norm": 5.852919101715088, |
|
"learning_rate": 9.419394051098489e-05, |
|
"loss": 2.2628, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.14236689871556035, |
|
"grad_norm": 5.615955352783203, |
|
"learning_rate": 9.396652336138922e-05, |
|
"loss": 2.6917, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.14256299637219336, |
|
"grad_norm": 14.781683921813965, |
|
"learning_rate": 9.373913752580175e-05, |
|
"loss": 1.8918, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.14275909402882636, |
|
"grad_norm": 6.98385763168335, |
|
"learning_rate": 9.351178418436488e-05, |
|
"loss": 0.8915, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.14295519168545937, |
|
"grad_norm": 11.97396469116211, |
|
"learning_rate": 9.328446451705249e-05, |
|
"loss": 1.3453, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.14315128934209237, |
|
"grad_norm": 7.939299583435059, |
|
"learning_rate": 9.305717970366358e-05, |
|
"loss": 1.7643, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14334738699872537, |
|
"grad_norm": 6.75924015045166, |
|
"learning_rate": 9.282993092381625e-05, |
|
"loss": 0.9198, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.14354348465535838, |
|
"grad_norm": 7.962716579437256, |
|
"learning_rate": 9.260271935694168e-05, |
|
"loss": 1.5201, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.14373958231199138, |
|
"grad_norm": 6.077849864959717, |
|
"learning_rate": 9.237554618227785e-05, |
|
"loss": 1.1027, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.14393567996862439, |
|
"grad_norm": 5.715548992156982, |
|
"learning_rate": 9.214841257886349e-05, |
|
"loss": 0.9475, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.1441317776252574, |
|
"grad_norm": 8.250515937805176, |
|
"learning_rate": 9.192131972553191e-05, |
|
"loss": 1.5568, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1443278752818904, |
|
"grad_norm": 4.075748443603516, |
|
"learning_rate": 9.16942688009051e-05, |
|
"loss": 1.1728, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.1445239729385234, |
|
"grad_norm": 6.287231922149658, |
|
"learning_rate": 9.146726098338719e-05, |
|
"loss": 0.9493, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.14472007059515637, |
|
"grad_norm": 7.561791896820068, |
|
"learning_rate": 9.12402974511587e-05, |
|
"loss": 1.2801, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.14491616825178938, |
|
"grad_norm": 8.35126781463623, |
|
"learning_rate": 9.101337938217038e-05, |
|
"loss": 1.4908, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.14511226590842238, |
|
"grad_norm": 7.66392707824707, |
|
"learning_rate": 9.078650795413692e-05, |
|
"loss": 2.3328, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1453083635650554, |
|
"grad_norm": 8.209086418151855, |
|
"learning_rate": 9.055968434453097e-05, |
|
"loss": 1.1856, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.1455044612216884, |
|
"grad_norm": 6.804962635040283, |
|
"learning_rate": 9.0332909730577e-05, |
|
"loss": 1.8494, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.1457005588783214, |
|
"grad_norm": 8.357268333435059, |
|
"learning_rate": 9.01061852892452e-05, |
|
"loss": 2.0554, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.1458966565349544, |
|
"grad_norm": 9.071808815002441, |
|
"learning_rate": 8.98795121972454e-05, |
|
"loss": 1.455, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.1460927541915874, |
|
"grad_norm": 6.041928768157959, |
|
"learning_rate": 8.965289163102078e-05, |
|
"loss": 1.386, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.1462888518482204, |
|
"grad_norm": 9.787776947021484, |
|
"learning_rate": 8.942632476674211e-05, |
|
"loss": 1.4516, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.1464849495048534, |
|
"grad_norm": 11.039609909057617, |
|
"learning_rate": 8.919981278030133e-05, |
|
"loss": 2.5638, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.14668104716148642, |
|
"grad_norm": 7.651856899261475, |
|
"learning_rate": 8.89733568473055e-05, |
|
"loss": 2.2337, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.14687714481811942, |
|
"grad_norm": 5.052631855010986, |
|
"learning_rate": 8.874695814307094e-05, |
|
"loss": 2.1706, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.14707324247475242, |
|
"grad_norm": 4.033751964569092, |
|
"learning_rate": 8.852061784261678e-05, |
|
"loss": 0.7492, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14726934013138543, |
|
"grad_norm": 7.171450138092041, |
|
"learning_rate": 8.829433712065914e-05, |
|
"loss": 1.1281, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.14746543778801843, |
|
"grad_norm": 4.533056735992432, |
|
"learning_rate": 8.806811715160485e-05, |
|
"loss": 1.1391, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.14766153544465144, |
|
"grad_norm": 6.083869934082031, |
|
"learning_rate": 8.784195910954545e-05, |
|
"loss": 1.0986, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.14785763310128444, |
|
"grad_norm": 8.265074729919434, |
|
"learning_rate": 8.761586416825117e-05, |
|
"loss": 2.8251, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.14805373075791745, |
|
"grad_norm": 4.621420860290527, |
|
"learning_rate": 8.738983350116454e-05, |
|
"loss": 1.3493, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.14824982841455045, |
|
"grad_norm": 9.803274154663086, |
|
"learning_rate": 8.716386828139477e-05, |
|
"loss": 1.6575, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.14844592607118345, |
|
"grad_norm": 8.033260345458984, |
|
"learning_rate": 8.693796968171113e-05, |
|
"loss": 1.4908, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.14864202372781646, |
|
"grad_norm": 10.312782287597656, |
|
"learning_rate": 8.671213887453735e-05, |
|
"loss": 1.0033, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.14883812138444946, |
|
"grad_norm": 4.6735615730285645, |
|
"learning_rate": 8.648637703194516e-05, |
|
"loss": 2.4385, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.14903421904108247, |
|
"grad_norm": 7.040375709533691, |
|
"learning_rate": 8.62606853256484e-05, |
|
"loss": 1.4345, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14923031669771547, |
|
"grad_norm": 5.844077110290527, |
|
"learning_rate": 8.603506492699699e-05, |
|
"loss": 1.9578, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.14942641435434847, |
|
"grad_norm": 7.8477606773376465, |
|
"learning_rate": 8.58095170069706e-05, |
|
"loss": 1.915, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.14962251201098148, |
|
"grad_norm": 6.696018218994141, |
|
"learning_rate": 8.55840427361729e-05, |
|
"loss": 2.6032, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.14981860966761448, |
|
"grad_norm": 6.304353713989258, |
|
"learning_rate": 8.535864328482523e-05, |
|
"loss": 1.0302, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.1500147073242475, |
|
"grad_norm": 4.32615852355957, |
|
"learning_rate": 8.513331982276053e-05, |
|
"loss": 1.253, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.1502108049808805, |
|
"grad_norm": 5.056833744049072, |
|
"learning_rate": 8.490807351941753e-05, |
|
"loss": 0.9107, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.1504069026375135, |
|
"grad_norm": 11.254875183105469, |
|
"learning_rate": 8.468290554383436e-05, |
|
"loss": 2.1077, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.15060300029414647, |
|
"grad_norm": 5.245373249053955, |
|
"learning_rate": 8.445781706464277e-05, |
|
"loss": 0.7334, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.15079909795077948, |
|
"grad_norm": 6.346343994140625, |
|
"learning_rate": 8.423280925006178e-05, |
|
"loss": 2.2026, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.15099519560741248, |
|
"grad_norm": 9.745604515075684, |
|
"learning_rate": 8.400788326789175e-05, |
|
"loss": 0.8139, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15119129326404548, |
|
"grad_norm": 4.820820331573486, |
|
"learning_rate": 8.378304028550848e-05, |
|
"loss": 1.2438, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.1513873909206785, |
|
"grad_norm": 7.597200393676758, |
|
"learning_rate": 8.355828146985684e-05, |
|
"loss": 2.3756, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.1515834885773115, |
|
"grad_norm": 4.522254943847656, |
|
"learning_rate": 8.333360798744496e-05, |
|
"loss": 1.9562, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.1517795862339445, |
|
"grad_norm": 11.164361000061035, |
|
"learning_rate": 8.310902100433809e-05, |
|
"loss": 2.1888, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.1519756838905775, |
|
"grad_norm": 8.457398414611816, |
|
"learning_rate": 8.288452168615242e-05, |
|
"loss": 0.7737, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.1521717815472105, |
|
"grad_norm": 10.31162166595459, |
|
"learning_rate": 8.266011119804936e-05, |
|
"loss": 1.1301, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.1523678792038435, |
|
"grad_norm": 6.120646953582764, |
|
"learning_rate": 8.243579070472909e-05, |
|
"loss": 0.6059, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.1525639768604765, |
|
"grad_norm": 7.931275844573975, |
|
"learning_rate": 8.221156137042489e-05, |
|
"loss": 2.2832, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.15276007451710952, |
|
"grad_norm": 8.665715217590332, |
|
"learning_rate": 8.198742435889674e-05, |
|
"loss": 1.5639, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.15295617217374252, |
|
"grad_norm": 6.210934638977051, |
|
"learning_rate": 8.176338083342561e-05, |
|
"loss": 0.5992, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15315226983037553, |
|
"grad_norm": 4.350475311279297, |
|
"learning_rate": 8.153943195680723e-05, |
|
"loss": 1.9462, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.15334836748700853, |
|
"grad_norm": 10.62283992767334, |
|
"learning_rate": 8.131557889134602e-05, |
|
"loss": 2.5713, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.15354446514364153, |
|
"grad_norm": 19.049753189086914, |
|
"learning_rate": 8.109182279884928e-05, |
|
"loss": 2.0163, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.15374056280027454, |
|
"grad_norm": 6.098108768463135, |
|
"learning_rate": 8.086816484062085e-05, |
|
"loss": 0.9044, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.15393666045690754, |
|
"grad_norm": 4.7711567878723145, |
|
"learning_rate": 8.064460617745542e-05, |
|
"loss": 1.8375, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.15413275811354055, |
|
"grad_norm": 6.954524993896484, |
|
"learning_rate": 8.042114796963219e-05, |
|
"loss": 1.4588, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.15432885577017355, |
|
"grad_norm": 4.3450422286987305, |
|
"learning_rate": 8.019779137690906e-05, |
|
"loss": 1.9173, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.15452495342680655, |
|
"grad_norm": 6.4524688720703125, |
|
"learning_rate": 7.997453755851658e-05, |
|
"loss": 0.9777, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.15472105108343956, |
|
"grad_norm": 4.7262139320373535, |
|
"learning_rate": 7.975138767315178e-05, |
|
"loss": 2.0938, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.15491714874007256, |
|
"grad_norm": 6.220945835113525, |
|
"learning_rate": 7.952834287897242e-05, |
|
"loss": 1.1835, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15511324639670557, |
|
"grad_norm": 12.306231498718262, |
|
"learning_rate": 7.930540433359071e-05, |
|
"loss": 1.2498, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.15530934405333857, |
|
"grad_norm": 5.665555953979492, |
|
"learning_rate": 7.908257319406747e-05, |
|
"loss": 1.2846, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.15550544170997158, |
|
"grad_norm": 5.636224269866943, |
|
"learning_rate": 7.88598506169061e-05, |
|
"loss": 1.1359, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.15570153936660458, |
|
"grad_norm": 7.562717914581299, |
|
"learning_rate": 7.863723775804651e-05, |
|
"loss": 1.3149, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.15589763702323758, |
|
"grad_norm": 6.797637462615967, |
|
"learning_rate": 7.841473577285925e-05, |
|
"loss": 1.2934, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.1560937346798706, |
|
"grad_norm": 9.185527801513672, |
|
"learning_rate": 7.819234581613934e-05, |
|
"loss": 2.0876, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.1562898323365036, |
|
"grad_norm": 5.824263095855713, |
|
"learning_rate": 7.797006904210035e-05, |
|
"loss": 1.3984, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.15648592999313657, |
|
"grad_norm": 8.152481079101562, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 1.9248, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.15668202764976957, |
|
"grad_norm": 5.90303897857666, |
|
"learning_rate": 7.752585965597673e-05, |
|
"loss": 1.3629, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.15687812530640258, |
|
"grad_norm": 7.13554573059082, |
|
"learning_rate": 7.730392934935825e-05, |
|
"loss": 1.6686, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15707422296303558, |
|
"grad_norm": 14.757647514343262, |
|
"learning_rate": 7.708211683634112e-05, |
|
"loss": 3.0637, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.15727032061966859, |
|
"grad_norm": 6.754796504974365, |
|
"learning_rate": 7.686042326814205e-05, |
|
"loss": 1.6339, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.1574664182763016, |
|
"grad_norm": 4.844980716705322, |
|
"learning_rate": 7.663884979536035e-05, |
|
"loss": 1.799, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.1576625159329346, |
|
"grad_norm": 7.784121990203857, |
|
"learning_rate": 7.641739756797202e-05, |
|
"loss": 1.4723, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.1578586135895676, |
|
"grad_norm": 9.167228698730469, |
|
"learning_rate": 7.619606773532386e-05, |
|
"loss": 1.3234, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.1580547112462006, |
|
"grad_norm": 4.63117790222168, |
|
"learning_rate": 7.59748614461274e-05, |
|
"loss": 1.2373, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.1582508089028336, |
|
"grad_norm": 6.5301690101623535, |
|
"learning_rate": 7.5753779848453e-05, |
|
"loss": 2.5172, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.1584469065594666, |
|
"grad_norm": 6.791456699371338, |
|
"learning_rate": 7.553282408972382e-05, |
|
"loss": 1.563, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.15864300421609961, |
|
"grad_norm": 5.697129249572754, |
|
"learning_rate": 7.531199531670988e-05, |
|
"loss": 1.6731, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.15883910187273262, |
|
"grad_norm": 6.511412143707275, |
|
"learning_rate": 7.50912946755223e-05, |
|
"loss": 1.2208, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.15903519952936562, |
|
"grad_norm": 4.849149703979492, |
|
"learning_rate": 7.487072331160696e-05, |
|
"loss": 1.9081, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.15923129718599863, |
|
"grad_norm": 2.844851016998291, |
|
"learning_rate": 7.465028236973897e-05, |
|
"loss": 1.2562, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.15942739484263163, |
|
"grad_norm": 4.782979965209961, |
|
"learning_rate": 7.442997299401652e-05, |
|
"loss": 0.8038, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.15962349249926464, |
|
"grad_norm": 13.652055740356445, |
|
"learning_rate": 7.420979632785483e-05, |
|
"loss": 1.3842, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.15981959015589764, |
|
"grad_norm": 9.692163467407227, |
|
"learning_rate": 7.398975351398053e-05, |
|
"loss": 1.3089, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.16001568781253064, |
|
"grad_norm": 6.529005527496338, |
|
"learning_rate": 7.37698456944254e-05, |
|
"loss": 1.8166, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.16021178546916365, |
|
"grad_norm": 6.1307454109191895, |
|
"learning_rate": 7.355007401052072e-05, |
|
"loss": 1.6805, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.16040788312579665, |
|
"grad_norm": 23.4924259185791, |
|
"learning_rate": 7.333043960289113e-05, |
|
"loss": 1.8122, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.16060398078242966, |
|
"grad_norm": 5.680008411407471, |
|
"learning_rate": 7.311094361144881e-05, |
|
"loss": 1.7381, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.16080007843906266, |
|
"grad_norm": 6.790538787841797, |
|
"learning_rate": 7.289158717538765e-05, |
|
"loss": 1.9142, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16099617609569566, |
|
"grad_norm": 7.620028495788574, |
|
"learning_rate": 7.267237143317706e-05, |
|
"loss": 1.8411, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.16119227375232867, |
|
"grad_norm": 5.989513397216797, |
|
"learning_rate": 7.245329752255647e-05, |
|
"loss": 1.7845, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.16138837140896167, |
|
"grad_norm": 5.91652774810791, |
|
"learning_rate": 7.223436658052898e-05, |
|
"loss": 1.7267, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.16158446906559468, |
|
"grad_norm": 6.713134765625, |
|
"learning_rate": 7.201557974335583e-05, |
|
"loss": 1.6799, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.16178056672222768, |
|
"grad_norm": 5.243997097015381, |
|
"learning_rate": 7.179693814655026e-05, |
|
"loss": 1.7857, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.16197666437886069, |
|
"grad_norm": 4.856655120849609, |
|
"learning_rate": 7.157844292487173e-05, |
|
"loss": 1.0967, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.1621727620354937, |
|
"grad_norm": 4.94920015335083, |
|
"learning_rate": 7.13600952123201e-05, |
|
"loss": 1.4571, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.16236885969212667, |
|
"grad_norm": 5.146086692810059, |
|
"learning_rate": 7.114189614212944e-05, |
|
"loss": 0.6922, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.16256495734875967, |
|
"grad_norm": 8.812291145324707, |
|
"learning_rate": 7.092384684676262e-05, |
|
"loss": 1.2531, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.16276105500539267, |
|
"grad_norm": 6.924501419067383, |
|
"learning_rate": 7.070594845790497e-05, |
|
"loss": 1.3011, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16295715266202568, |
|
"grad_norm": 10.452120780944824, |
|
"learning_rate": 7.048820210645862e-05, |
|
"loss": 1.1114, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.16315325031865868, |
|
"grad_norm": 8.212396621704102, |
|
"learning_rate": 7.027060892253679e-05, |
|
"loss": 0.9207, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.1633493479752917, |
|
"grad_norm": 17.70586585998535, |
|
"learning_rate": 7.005317003545754e-05, |
|
"loss": 1.3019, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.1635454456319247, |
|
"grad_norm": 12.174421310424805, |
|
"learning_rate": 6.983588657373833e-05, |
|
"loss": 2.1275, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.1637415432885577, |
|
"grad_norm": 3.9426016807556152, |
|
"learning_rate": 6.96187596650898e-05, |
|
"loss": 2.4098, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.1639376409451907, |
|
"grad_norm": 6.121764183044434, |
|
"learning_rate": 6.940179043641005e-05, |
|
"loss": 1.0717, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.1641337386018237, |
|
"grad_norm": 4.13683557510376, |
|
"learning_rate": 6.918498001377901e-05, |
|
"loss": 1.4934, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.1643298362584567, |
|
"grad_norm": 4.907169818878174, |
|
"learning_rate": 6.896832952245217e-05, |
|
"loss": 1.9002, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.1645259339150897, |
|
"grad_norm": 8.611984252929688, |
|
"learning_rate": 6.875184008685514e-05, |
|
"loss": 1.1805, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.16472203157172272, |
|
"grad_norm": 8.626840591430664, |
|
"learning_rate": 6.853551283057757e-05, |
|
"loss": 1.1437, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16491812922835572, |
|
"grad_norm": 6.1718878746032715, |
|
"learning_rate": 6.831934887636738e-05, |
|
"loss": 1.5465, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.16511422688498872, |
|
"grad_norm": 6.6393961906433105, |
|
"learning_rate": 6.8103349346125e-05, |
|
"loss": 1.0682, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.16531032454162173, |
|
"grad_norm": 5.233375072479248, |
|
"learning_rate": 6.788751536089739e-05, |
|
"loss": 1.8218, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.16550642219825473, |
|
"grad_norm": 9.505565643310547, |
|
"learning_rate": 6.767184804087248e-05, |
|
"loss": 1.3462, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.16570251985488774, |
|
"grad_norm": 5.554503440856934, |
|
"learning_rate": 6.745634850537302e-05, |
|
"loss": 0.7193, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.16589861751152074, |
|
"grad_norm": 6.603649139404297, |
|
"learning_rate": 6.724101787285114e-05, |
|
"loss": 1.3164, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.16609471516815374, |
|
"grad_norm": 10.690621376037598, |
|
"learning_rate": 6.702585726088222e-05, |
|
"loss": 3.3145, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.16629081282478675, |
|
"grad_norm": 17.521020889282227, |
|
"learning_rate": 6.681086778615922e-05, |
|
"loss": 2.3221, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.16648691048141975, |
|
"grad_norm": 11.0389986038208, |
|
"learning_rate": 6.659605056448702e-05, |
|
"loss": 1.2903, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.16668300813805276, |
|
"grad_norm": 5.6815900802612305, |
|
"learning_rate": 6.638140671077633e-05, |
|
"loss": 1.1651, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16687910579468576, |
|
"grad_norm": 14.61434555053711, |
|
"learning_rate": 6.616693733903823e-05, |
|
"loss": 2.5921, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.16707520345131877, |
|
"grad_norm": 6.983891487121582, |
|
"learning_rate": 6.595264356237812e-05, |
|
"loss": 1.2114, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.16727130110795177, |
|
"grad_norm": 4.762997627258301, |
|
"learning_rate": 6.573852649299009e-05, |
|
"loss": 0.8655, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.16746739876458477, |
|
"grad_norm": 8.16657829284668, |
|
"learning_rate": 6.552458724215114e-05, |
|
"loss": 1.584, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.16766349642121778, |
|
"grad_norm": 3.753600597381592, |
|
"learning_rate": 6.531082692021532e-05, |
|
"loss": 0.7213, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.16785959407785078, |
|
"grad_norm": 20.525659561157227, |
|
"learning_rate": 6.509724663660813e-05, |
|
"loss": 3.2134, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.1680556917344838, |
|
"grad_norm": 4.803929805755615, |
|
"learning_rate": 6.488384749982053e-05, |
|
"loss": 1.0283, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.16825178939111676, |
|
"grad_norm": 4.031068325042725, |
|
"learning_rate": 6.467063061740345e-05, |
|
"loss": 0.86, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.16844788704774977, |
|
"grad_norm": 8.304176330566406, |
|
"learning_rate": 6.445759709596182e-05, |
|
"loss": 0.7242, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.16864398470438277, |
|
"grad_norm": 5.160337448120117, |
|
"learning_rate": 6.424474804114895e-05, |
|
"loss": 1.7592, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16884008236101578, |
|
"grad_norm": 6.138307571411133, |
|
"learning_rate": 6.403208455766081e-05, |
|
"loss": 2.6926, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.16903618001764878, |
|
"grad_norm": 3.878741979598999, |
|
"learning_rate": 6.381960774923017e-05, |
|
"loss": 1.2294, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.16923227767428178, |
|
"grad_norm": 4.720643043518066, |
|
"learning_rate": 6.360731871862093e-05, |
|
"loss": 1.7562, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.1694283753309148, |
|
"grad_norm": 4.653330326080322, |
|
"learning_rate": 6.339521856762254e-05, |
|
"loss": 1.2522, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.1696244729875478, |
|
"grad_norm": 6.003101348876953, |
|
"learning_rate": 6.318330839704405e-05, |
|
"loss": 1.4006, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1698205706441808, |
|
"grad_norm": 5.075412273406982, |
|
"learning_rate": 6.297158930670852e-05, |
|
"loss": 0.7885, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.1700166683008138, |
|
"grad_norm": 4.062684059143066, |
|
"learning_rate": 6.276006239544729e-05, |
|
"loss": 1.0201, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 5.678539752960205, |
|
"learning_rate": 6.254872876109438e-05, |
|
"loss": 1.1822, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.1704088636140798, |
|
"grad_norm": 4.655857563018799, |
|
"learning_rate": 6.233758950048056e-05, |
|
"loss": 1.4193, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.1706049612707128, |
|
"grad_norm": 18.205698013305664, |
|
"learning_rate": 6.21266457094278e-05, |
|
"loss": 0.7067, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.17080105892734582, |
|
"grad_norm": 8.73768424987793, |
|
"learning_rate": 6.191589848274368e-05, |
|
"loss": 1.9048, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.17099715658397882, |
|
"grad_norm": 5.723482131958008, |
|
"learning_rate": 6.170534891421556e-05, |
|
"loss": 0.8121, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.17119325424061183, |
|
"grad_norm": 6.16123104095459, |
|
"learning_rate": 6.149499809660488e-05, |
|
"loss": 2.5358, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.17138935189724483, |
|
"grad_norm": 6.115005016326904, |
|
"learning_rate": 6.128484712164164e-05, |
|
"loss": 1.6489, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.17158544955387783, |
|
"grad_norm": 9.149282455444336, |
|
"learning_rate": 6.107489708001855e-05, |
|
"loss": 1.7977, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.17178154721051084, |
|
"grad_norm": 7.190993309020996, |
|
"learning_rate": 6.086514906138563e-05, |
|
"loss": 1.3526, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.17197764486714384, |
|
"grad_norm": 8.292972564697266, |
|
"learning_rate": 6.065560415434417e-05, |
|
"loss": 1.8477, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.17217374252377685, |
|
"grad_norm": 8.858931541442871, |
|
"learning_rate": 6.044626344644151e-05, |
|
"loss": 1.3605, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.17236984018040985, |
|
"grad_norm": 6.186944961547852, |
|
"learning_rate": 6.023712802416508e-05, |
|
"loss": 1.0484, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.17256593783704285, |
|
"grad_norm": 9.702549934387207, |
|
"learning_rate": 6.0028198972936836e-05, |
|
"loss": 1.3224, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17276203549367586, |
|
"grad_norm": 12.195408821105957, |
|
"learning_rate": 5.981947737710779e-05, |
|
"loss": 1.8586, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.17295813315030886, |
|
"grad_norm": 6.8144025802612305, |
|
"learning_rate": 5.9610964319952104e-05, |
|
"loss": 1.5967, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.17315423080694187, |
|
"grad_norm": 10.37683391571045, |
|
"learning_rate": 5.940266088366173e-05, |
|
"loss": 1.7278, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.17335032846357487, |
|
"grad_norm": 5.85037899017334, |
|
"learning_rate": 5.919456814934061e-05, |
|
"loss": 1.3165, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.17354642612020788, |
|
"grad_norm": 7.210351943969727, |
|
"learning_rate": 5.8986687196999135e-05, |
|
"loss": 1.1156, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.17374252377684088, |
|
"grad_norm": 5.840338706970215, |
|
"learning_rate": 5.877901910554862e-05, |
|
"loss": 1.4444, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.17393862143347388, |
|
"grad_norm": 3.731435775756836, |
|
"learning_rate": 5.8571564952795475e-05, |
|
"loss": 0.9172, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.17413471909010686, |
|
"grad_norm": 13.559743881225586, |
|
"learning_rate": 5.8364325815435916e-05, |
|
"loss": 2.1312, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.17433081674673986, |
|
"grad_norm": 5.5318193435668945, |
|
"learning_rate": 5.815730276905014e-05, |
|
"loss": 1.735, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.17452691440337287, |
|
"grad_norm": 7.40202522277832, |
|
"learning_rate": 5.7950496888096795e-05, |
|
"loss": 1.42, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17472301206000587, |
|
"grad_norm": 3.6923038959503174, |
|
"learning_rate": 5.7743909245907535e-05, |
|
"loss": 1.095, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.17491910971663888, |
|
"grad_norm": 5.6912713050842285, |
|
"learning_rate": 5.753754091468115e-05, |
|
"loss": 2.2399, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.17511520737327188, |
|
"grad_norm": 3.8538315296173096, |
|
"learning_rate": 5.7331392965478493e-05, |
|
"loss": 1.2043, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.17531130502990488, |
|
"grad_norm": 5.220914840698242, |
|
"learning_rate": 5.712546646821627e-05, |
|
"loss": 1.0669, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.1755074026865379, |
|
"grad_norm": 5.886853218078613, |
|
"learning_rate": 5.6919762491662164e-05, |
|
"loss": 1.3425, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1757035003431709, |
|
"grad_norm": 7.820699214935303, |
|
"learning_rate": 5.671428210342884e-05, |
|
"loss": 1.6338, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.1758995979998039, |
|
"grad_norm": 9.36957836151123, |
|
"learning_rate": 5.650902636996837e-05, |
|
"loss": 1.1894, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.1760956956564369, |
|
"grad_norm": 3.9427287578582764, |
|
"learning_rate": 5.6303996356567177e-05, |
|
"loss": 1.179, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.1762917933130699, |
|
"grad_norm": 8.343499183654785, |
|
"learning_rate": 5.6099193127339864e-05, |
|
"loss": 1.9635, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.1764878909697029, |
|
"grad_norm": 7.9742279052734375, |
|
"learning_rate": 5.589461774522433e-05, |
|
"loss": 1.6162, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1766839886263359, |
|
"grad_norm": 4.933746337890625, |
|
"learning_rate": 5.5690271271975644e-05, |
|
"loss": 1.0064, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.17688008628296892, |
|
"grad_norm": 6.986325740814209, |
|
"learning_rate": 5.548615476816097e-05, |
|
"loss": 0.7052, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.17707618393960192, |
|
"grad_norm": 5.324771881103516, |
|
"learning_rate": 5.528226929315401e-05, |
|
"loss": 1.5658, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.17727228159623493, |
|
"grad_norm": 3.9526147842407227, |
|
"learning_rate": 5.507861590512916e-05, |
|
"loss": 1.0183, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.17746837925286793, |
|
"grad_norm": 8.242249488830566, |
|
"learning_rate": 5.48751956610566e-05, |
|
"loss": 1.9991, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.17766447690950093, |
|
"grad_norm": 6.519176483154297, |
|
"learning_rate": 5.467200961669619e-05, |
|
"loss": 1.2312, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.17786057456613394, |
|
"grad_norm": 3.49365496635437, |
|
"learning_rate": 5.446905882659243e-05, |
|
"loss": 0.5029, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.17805667222276694, |
|
"grad_norm": 5.808468818664551, |
|
"learning_rate": 5.426634434406883e-05, |
|
"loss": 1.1932, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.17825276987939995, |
|
"grad_norm": 6.1657328605651855, |
|
"learning_rate": 5.40638672212224e-05, |
|
"loss": 1.0127, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.17844886753603295, |
|
"grad_norm": 8.090532302856445, |
|
"learning_rate": 5.3861628508918384e-05, |
|
"loss": 1.0638, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17864496519266596, |
|
"grad_norm": 6.026912689208984, |
|
"learning_rate": 5.3659629256784424e-05, |
|
"loss": 1.1598, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.17884106284929896, |
|
"grad_norm": 7.103152751922607, |
|
"learning_rate": 5.345787051320564e-05, |
|
"loss": 1.9001, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.17903716050593196, |
|
"grad_norm": 4.5661163330078125, |
|
"learning_rate": 5.325635332531864e-05, |
|
"loss": 1.2711, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.17923325816256497, |
|
"grad_norm": 6.035799980163574, |
|
"learning_rate": 5.305507873900649e-05, |
|
"loss": 2.3649, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.17942935581919797, |
|
"grad_norm": 6.231631278991699, |
|
"learning_rate": 5.2854047798893125e-05, |
|
"loss": 2.7058, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.17962545347583098, |
|
"grad_norm": 9.146753311157227, |
|
"learning_rate": 5.26532615483379e-05, |
|
"loss": 1.1388, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.17982155113246398, |
|
"grad_norm": 8.76220417022705, |
|
"learning_rate": 5.245272102943034e-05, |
|
"loss": 1.0848, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.18001764878909696, |
|
"grad_norm": 6.711489200592041, |
|
"learning_rate": 5.225242728298445e-05, |
|
"loss": 1.0747, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.18021374644572996, |
|
"grad_norm": 4.66898775100708, |
|
"learning_rate": 5.2052381348533564e-05, |
|
"loss": 0.6995, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.18040984410236297, |
|
"grad_norm": 6.504391193389893, |
|
"learning_rate": 5.1852584264324866e-05, |
|
"loss": 1.5352, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.18060594175899597, |
|
"grad_norm": 6.724686145782471, |
|
"learning_rate": 5.165303706731397e-05, |
|
"loss": 1.2562, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.18080203941562897, |
|
"grad_norm": 7.192296981811523, |
|
"learning_rate": 5.1453740793159586e-05, |
|
"loss": 1.0158, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.18099813707226198, |
|
"grad_norm": 5.173651218414307, |
|
"learning_rate": 5.12546964762181e-05, |
|
"loss": 1.3406, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.18119423472889498, |
|
"grad_norm": 8.315585136413574, |
|
"learning_rate": 5.105590514953824e-05, |
|
"loss": 2.0057, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.18139033238552799, |
|
"grad_norm": 4.940390110015869, |
|
"learning_rate": 5.085736784485571e-05, |
|
"loss": 1.6468, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.181586430042161, |
|
"grad_norm": 6.491610527038574, |
|
"learning_rate": 5.065908559258782e-05, |
|
"loss": 2.6567, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.181782527698794, |
|
"grad_norm": 8.9893798828125, |
|
"learning_rate": 5.046105942182815e-05, |
|
"loss": 1.2412, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.181978625355427, |
|
"grad_norm": 8.103008270263672, |
|
"learning_rate": 5.026329036034119e-05, |
|
"loss": 2.1319, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.18217472301206, |
|
"grad_norm": 8.022109985351562, |
|
"learning_rate": 5.006577943455706e-05, |
|
"loss": 1.4119, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.182370820668693, |
|
"grad_norm": 12.814486503601074, |
|
"learning_rate": 4.9868527669566113e-05, |
|
"loss": 1.9388, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.182566918325326, |
|
"grad_norm": 7.568852424621582, |
|
"learning_rate": 4.967153608911366e-05, |
|
"loss": 1.2114, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.18276301598195901, |
|
"grad_norm": 3.7613790035247803, |
|
"learning_rate": 4.947480571559462e-05, |
|
"loss": 1.2221, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.18295911363859202, |
|
"grad_norm": 8.301228523254395, |
|
"learning_rate": 4.927833757004826e-05, |
|
"loss": 1.2867, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.18315521129522502, |
|
"grad_norm": 5.2592244148254395, |
|
"learning_rate": 4.908213267215287e-05, |
|
"loss": 2.0937, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.18335130895185803, |
|
"grad_norm": 6.635796546936035, |
|
"learning_rate": 4.888619204022047e-05, |
|
"loss": 1.9856, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.18354740660849103, |
|
"grad_norm": 5.507410049438477, |
|
"learning_rate": 4.869051669119154e-05, |
|
"loss": 1.2305, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.18374350426512404, |
|
"grad_norm": 8.553701400756836, |
|
"learning_rate": 4.8495107640629675e-05, |
|
"loss": 1.6872, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.18393960192175704, |
|
"grad_norm": 7.652828693389893, |
|
"learning_rate": 4.829996590271646e-05, |
|
"loss": 1.4061, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.18413569957839004, |
|
"grad_norm": 6.595338821411133, |
|
"learning_rate": 4.810509249024604e-05, |
|
"loss": 1.2953, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.18433179723502305, |
|
"grad_norm": 4.507275581359863, |
|
"learning_rate": 4.7910488414619967e-05, |
|
"loss": 0.95, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18452789489165605, |
|
"grad_norm": 7.35803747177124, |
|
"learning_rate": 4.7716154685841944e-05, |
|
"loss": 1.7958, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.18472399254828906, |
|
"grad_norm": 5.53951358795166, |
|
"learning_rate": 4.752209231251251e-05, |
|
"loss": 2.8191, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.18492009020492206, |
|
"grad_norm": 4.738945960998535, |
|
"learning_rate": 4.73283023018239e-05, |
|
"loss": 1.7728, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.18511618786155506, |
|
"grad_norm": 5.192052364349365, |
|
"learning_rate": 4.713478565955478e-05, |
|
"loss": 0.8087, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.18531228551818807, |
|
"grad_norm": 5.9963507652282715, |
|
"learning_rate": 4.694154339006501e-05, |
|
"loss": 1.1223, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.18550838317482107, |
|
"grad_norm": 14.023632049560547, |
|
"learning_rate": 4.6748576496290356e-05, |
|
"loss": 3.0858, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.18570448083145408, |
|
"grad_norm": 6.268606662750244, |
|
"learning_rate": 4.655588597973754e-05, |
|
"loss": 0.8988, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.18590057848808705, |
|
"grad_norm": 7.719700336456299, |
|
"learning_rate": 4.636347284047877e-05, |
|
"loss": 1.5117, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.18609667614472006, |
|
"grad_norm": 4.05161714553833, |
|
"learning_rate": 4.617133807714666e-05, |
|
"loss": 0.7625, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.18629277380135306, |
|
"grad_norm": 5.103604793548584, |
|
"learning_rate": 4.59794826869291e-05, |
|
"loss": 0.8914, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18648887145798607, |
|
"grad_norm": 7.7601847648620605, |
|
"learning_rate": 4.578790766556386e-05, |
|
"loss": 0.8747, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.18668496911461907, |
|
"grad_norm": 12.439096450805664, |
|
"learning_rate": 4.559661400733383e-05, |
|
"loss": 2.2439, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.18688106677125207, |
|
"grad_norm": 7.002286911010742, |
|
"learning_rate": 4.5405602705061345e-05, |
|
"loss": 1.4917, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.18707716442788508, |
|
"grad_norm": 5.178001880645752, |
|
"learning_rate": 4.521487475010354e-05, |
|
"loss": 1.5737, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.18727326208451808, |
|
"grad_norm": 9.555988311767578, |
|
"learning_rate": 4.502443113234688e-05, |
|
"loss": 1.7495, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.1874693597411511, |
|
"grad_norm": 8.907003402709961, |
|
"learning_rate": 4.483427284020194e-05, |
|
"loss": 1.6053, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.1876654573977841, |
|
"grad_norm": 5.562190532684326, |
|
"learning_rate": 4.464440086059878e-05, |
|
"loss": 1.1854, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.1878615550544171, |
|
"grad_norm": 6.078771591186523, |
|
"learning_rate": 4.4454816178981115e-05, |
|
"loss": 1.3774, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.1880576527110501, |
|
"grad_norm": 12.021577835083008, |
|
"learning_rate": 4.426551977930191e-05, |
|
"loss": 1.6445, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.1882537503676831, |
|
"grad_norm": 7.406763553619385, |
|
"learning_rate": 4.407651264401763e-05, |
|
"loss": 2.0784, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1884498480243161, |
|
"grad_norm": 3.961423635482788, |
|
"learning_rate": 4.38877957540837e-05, |
|
"loss": 1.5526, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.1886459456809491, |
|
"grad_norm": 7.410959243774414, |
|
"learning_rate": 4.3699370088949066e-05, |
|
"loss": 2.0247, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.18884204333758212, |
|
"grad_norm": 5.237401485443115, |
|
"learning_rate": 4.3511236626551047e-05, |
|
"loss": 2.199, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.18903814099421512, |
|
"grad_norm": 4.068509578704834, |
|
"learning_rate": 4.3323396343310715e-05, |
|
"loss": 2.3318, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.18923423865084812, |
|
"grad_norm": 5.523316383361816, |
|
"learning_rate": 4.313585021412724e-05, |
|
"loss": 1.3474, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.18943033630748113, |
|
"grad_norm": 5.052401542663574, |
|
"learning_rate": 4.294859921237339e-05, |
|
"loss": 1.1039, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.18962643396411413, |
|
"grad_norm": 8.468581199645996, |
|
"learning_rate": 4.2761644309889946e-05, |
|
"loss": 2.9943, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.18982253162074714, |
|
"grad_norm": 4.66163444519043, |
|
"learning_rate": 4.257498647698107e-05, |
|
"loss": 2.0152, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.19001862927738014, |
|
"grad_norm": 4.58842658996582, |
|
"learning_rate": 4.2388626682409194e-05, |
|
"loss": 1.2759, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.19021472693401315, |
|
"grad_norm": 3.5642545223236084, |
|
"learning_rate": 4.220256589338968e-05, |
|
"loss": 0.9877, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.19041082459064615, |
|
"grad_norm": 6.506111145019531, |
|
"learning_rate": 4.201680507558631e-05, |
|
"loss": 1.7171, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.19060692224727915, |
|
"grad_norm": 4.93696928024292, |
|
"learning_rate": 4.183134519310576e-05, |
|
"loss": 0.8798, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.19080301990391216, |
|
"grad_norm": 12.539702415466309, |
|
"learning_rate": 4.1646187208493005e-05, |
|
"loss": 3.0454, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.19099911756054516, |
|
"grad_norm": 9.189221382141113, |
|
"learning_rate": 4.146133208272608e-05, |
|
"loss": 1.9269, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.19119521521717817, |
|
"grad_norm": 5.362178802490234, |
|
"learning_rate": 4.1276780775211156e-05, |
|
"loss": 1.7086, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.19139131287381117, |
|
"grad_norm": 4.587559700012207, |
|
"learning_rate": 4.109253424377772e-05, |
|
"loss": 0.7529, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.19158741053044417, |
|
"grad_norm": 7.922691345214844, |
|
"learning_rate": 4.090859344467325e-05, |
|
"loss": 2.6641, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.19178350818707715, |
|
"grad_norm": 6.759201526641846, |
|
"learning_rate": 4.072495933255857e-05, |
|
"loss": 1.3197, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.19197960584371015, |
|
"grad_norm": 5.635506629943848, |
|
"learning_rate": 4.054163286050276e-05, |
|
"loss": 0.8712, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.19217570350034316, |
|
"grad_norm": 4.1558637619018555, |
|
"learning_rate": 4.035861497997828e-05, |
|
"loss": 2.3436, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19237180115697616, |
|
"grad_norm": 9.821369171142578, |
|
"learning_rate": 4.017590664085593e-05, |
|
"loss": 1.6202, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.19256789881360917, |
|
"grad_norm": 7.816912651062012, |
|
"learning_rate": 3.999350879139997e-05, |
|
"loss": 1.1312, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.19276399647024217, |
|
"grad_norm": 5.585190296173096, |
|
"learning_rate": 3.981142237826332e-05, |
|
"loss": 1.9218, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.19296009412687518, |
|
"grad_norm": 4.785935878753662, |
|
"learning_rate": 3.962964834648236e-05, |
|
"loss": 0.8693, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.19315619178350818, |
|
"grad_norm": 5.3974385261535645, |
|
"learning_rate": 3.944818763947231e-05, |
|
"loss": 1.1065, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.19335228944014118, |
|
"grad_norm": 9.581192016601562, |
|
"learning_rate": 3.926704119902219e-05, |
|
"loss": 0.867, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 10.229225158691406, |
|
"learning_rate": 3.9086209965289965e-05, |
|
"loss": 2.8435, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.1937444847534072, |
|
"grad_norm": 7.257898807525635, |
|
"learning_rate": 3.890569487679766e-05, |
|
"loss": 2.4263, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.1939405824100402, |
|
"grad_norm": 5.026792526245117, |
|
"learning_rate": 3.87254968704265e-05, |
|
"loss": 1.2831, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.1941366800666732, |
|
"grad_norm": 5.230473041534424, |
|
"learning_rate": 3.854561688141205e-05, |
|
"loss": 3.3858, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1943327777233062, |
|
"grad_norm": 6.340061187744141, |
|
"learning_rate": 3.836605584333931e-05, |
|
"loss": 1.5421, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.1945288753799392, |
|
"grad_norm": 7.890949726104736, |
|
"learning_rate": 3.818681468813794e-05, |
|
"loss": 1.7882, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.1947249730365722, |
|
"grad_norm": 4.434299945831299, |
|
"learning_rate": 3.800789434607741e-05, |
|
"loss": 1.274, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.19492107069320522, |
|
"grad_norm": 5.311686038970947, |
|
"learning_rate": 3.782929574576213e-05, |
|
"loss": 1.7605, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.19511716834983822, |
|
"grad_norm": 22.297441482543945, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 1.9094, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.19531326600647123, |
|
"grad_norm": 4.7996826171875, |
|
"learning_rate": 3.747306747643089e-05, |
|
"loss": 1.7196, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.19550936366310423, |
|
"grad_norm": 12.06876277923584, |
|
"learning_rate": 3.729543965625526e-05, |
|
"loss": 1.2351, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.19570546131973723, |
|
"grad_norm": 5.477703094482422, |
|
"learning_rate": 3.711813727549594e-05, |
|
"loss": 1.3366, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.19590155897637024, |
|
"grad_norm": 4.249327659606934, |
|
"learning_rate": 3.694116125436007e-05, |
|
"loss": 1.9066, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.19609765663300324, |
|
"grad_norm": 7.70191764831543, |
|
"learning_rate": 3.6764512511360935e-05, |
|
"loss": 1.3611, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19629375428963625, |
|
"grad_norm": 5.822709560394287, |
|
"learning_rate": 3.658819196331327e-05, |
|
"loss": 2.3203, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.19648985194626925, |
|
"grad_norm": 4.531265735626221, |
|
"learning_rate": 3.6412200525328435e-05, |
|
"loss": 0.7644, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.19668594960290225, |
|
"grad_norm": 4.714295387268066, |
|
"learning_rate": 3.623653911080971e-05, |
|
"loss": 1.6555, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.19688204725953526, |
|
"grad_norm": 7.089095592498779, |
|
"learning_rate": 3.606120863144753e-05, |
|
"loss": 1.6242, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.19707814491616826, |
|
"grad_norm": 2.268071174621582, |
|
"learning_rate": 3.588620999721477e-05, |
|
"loss": 0.3747, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.19727424257280127, |
|
"grad_norm": 9.244414329528809, |
|
"learning_rate": 3.571154411636203e-05, |
|
"loss": 1.8298, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.19747034022943427, |
|
"grad_norm": 4.8095808029174805, |
|
"learning_rate": 3.5537211895412846e-05, |
|
"loss": 0.8923, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.19766643788606725, |
|
"grad_norm": 7.700345516204834, |
|
"learning_rate": 3.536321423915913e-05, |
|
"loss": 0.691, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.19786253554270025, |
|
"grad_norm": 7.553009510040283, |
|
"learning_rate": 3.518955205065632e-05, |
|
"loss": 1.1634, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.19805863319933326, |
|
"grad_norm": 5.901485443115234, |
|
"learning_rate": 3.5016226231218774e-05, |
|
"loss": 2.3853, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19825473085596626, |
|
"grad_norm": 5.342306137084961, |
|
"learning_rate": 3.4843237680415156e-05, |
|
"loss": 0.9747, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.19845082851259926, |
|
"grad_norm": 5.793070316314697, |
|
"learning_rate": 3.46705872960635e-05, |
|
"loss": 0.7877, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.19864692616923227, |
|
"grad_norm": 9.105026245117188, |
|
"learning_rate": 3.449827597422698e-05, |
|
"loss": 1.5736, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.19884302382586527, |
|
"grad_norm": 7.359753608703613, |
|
"learning_rate": 3.432630460920887e-05, |
|
"loss": 1.4213, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.19903912148249828, |
|
"grad_norm": 4.728425025939941, |
|
"learning_rate": 3.415467409354809e-05, |
|
"loss": 0.7005, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.19923521913913128, |
|
"grad_norm": 4.326775074005127, |
|
"learning_rate": 3.398338531801457e-05, |
|
"loss": 1.0779, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.19943131679576429, |
|
"grad_norm": 5.717733383178711, |
|
"learning_rate": 3.381243917160448e-05, |
|
"loss": 2.7467, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.1996274144523973, |
|
"grad_norm": 12.189995765686035, |
|
"learning_rate": 3.364183654153592e-05, |
|
"loss": 1.4596, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.1998235121090303, |
|
"grad_norm": 5.9200239181518555, |
|
"learning_rate": 3.3471578313243903e-05, |
|
"loss": 1.8764, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.2000196097656633, |
|
"grad_norm": 11.550288200378418, |
|
"learning_rate": 3.330166537037618e-05, |
|
"loss": 1.3655, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2002157074222963, |
|
"grad_norm": 4.954752445220947, |
|
"learning_rate": 3.313209859478839e-05, |
|
"loss": 1.2426, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.2004118050789293, |
|
"grad_norm": 11.455079078674316, |
|
"learning_rate": 3.296287886653941e-05, |
|
"loss": 2.2854, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.2006079027355623, |
|
"grad_norm": 6.088212966918945, |
|
"learning_rate": 3.2794007063887186e-05, |
|
"loss": 1.3675, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.20080400039219531, |
|
"grad_norm": 7.098971366882324, |
|
"learning_rate": 3.262548406328365e-05, |
|
"loss": 1.0983, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.20100009804882832, |
|
"grad_norm": 3.8312485218048096, |
|
"learning_rate": 3.245731073937068e-05, |
|
"loss": 1.7243, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.20119619570546132, |
|
"grad_norm": 3.807969570159912, |
|
"learning_rate": 3.2289487964975076e-05, |
|
"loss": 1.0291, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.20139229336209433, |
|
"grad_norm": 4.957508087158203, |
|
"learning_rate": 3.212201661110449e-05, |
|
"loss": 1.1555, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.20158839101872733, |
|
"grad_norm": 10.736882209777832, |
|
"learning_rate": 3.1954897546942584e-05, |
|
"loss": 1.9095, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.20178448867536034, |
|
"grad_norm": 9.901065826416016, |
|
"learning_rate": 3.1788131639844534e-05, |
|
"loss": 2.2914, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.20198058633199334, |
|
"grad_norm": 5.642101764678955, |
|
"learning_rate": 3.162171975533282e-05, |
|
"loss": 1.3295, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.20217668398862634, |
|
"grad_norm": 6.677104949951172, |
|
"learning_rate": 3.1455662757092306e-05, |
|
"loss": 1.3554, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.20237278164525935, |
|
"grad_norm": 5.27210807800293, |
|
"learning_rate": 3.1289961506966214e-05, |
|
"loss": 1.0698, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.20256887930189235, |
|
"grad_norm": 6.362321853637695, |
|
"learning_rate": 3.11246168649512e-05, |
|
"loss": 1.135, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.20276497695852536, |
|
"grad_norm": 4.513217449188232, |
|
"learning_rate": 3.095962968919319e-05, |
|
"loss": 1.9138, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.20296107461515836, |
|
"grad_norm": 16.53858184814453, |
|
"learning_rate": 3.079500083598297e-05, |
|
"loss": 2.4364, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.20315717227179136, |
|
"grad_norm": 5.629922389984131, |
|
"learning_rate": 3.063073115975136e-05, |
|
"loss": 1.7899, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.20335326992842437, |
|
"grad_norm": 9.979622840881348, |
|
"learning_rate": 3.0466821513065314e-05, |
|
"loss": 1.7134, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.20354936758505734, |
|
"grad_norm": 8.400696754455566, |
|
"learning_rate": 3.030327274662298e-05, |
|
"loss": 1.6557, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.20374546524169035, |
|
"grad_norm": 9.457781791687012, |
|
"learning_rate": 3.0140085709249667e-05, |
|
"loss": 2.3906, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.20394156289832335, |
|
"grad_norm": 4.818408489227295, |
|
"learning_rate": 2.997726124789324e-05, |
|
"loss": 1.0997, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.20413766055495636, |
|
"grad_norm": 7.07374382019043, |
|
"learning_rate": 2.9814800207619774e-05, |
|
"loss": 1.441, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.20433375821158936, |
|
"grad_norm": 9.976203918457031, |
|
"learning_rate": 2.9652703431609263e-05, |
|
"loss": 2.2031, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.20452985586822237, |
|
"grad_norm": 8.051301002502441, |
|
"learning_rate": 2.9490971761151e-05, |
|
"loss": 1.2837, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.20472595352485537, |
|
"grad_norm": 3.8618202209472656, |
|
"learning_rate": 2.9329606035639458e-05, |
|
"loss": 1.5356, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.20472595352485537, |
|
"eval_loss": 0.3870697021484375, |
|
"eval_runtime": 78.8825, |
|
"eval_samples_per_second": 27.23, |
|
"eval_steps_per_second": 13.615, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.20492205118148837, |
|
"grad_norm": 4.138796329498291, |
|
"learning_rate": 2.9168607092569845e-05, |
|
"loss": 0.6768, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.20511814883812138, |
|
"grad_norm": 4.2951250076293945, |
|
"learning_rate": 2.9007975767533714e-05, |
|
"loss": 1.0667, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.20531424649475438, |
|
"grad_norm": 6.767472743988037, |
|
"learning_rate": 2.8847712894214686e-05, |
|
"loss": 2.2965, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.2055103441513874, |
|
"grad_norm": 5.017336845397949, |
|
"learning_rate": 2.8687819304384066e-05, |
|
"loss": 1.457, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.2057064418080204, |
|
"grad_norm": 5.76704740524292, |
|
"learning_rate": 2.852829582789669e-05, |
|
"loss": 1.3418, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.2059025394646534, |
|
"grad_norm": 5.123453617095947, |
|
"learning_rate": 2.8369143292686306e-05, |
|
"loss": 1.0328, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2060986371212864, |
|
"grad_norm": 6.851526260375977, |
|
"learning_rate": 2.821036252476156e-05, |
|
"loss": 2.0026, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.2062947347779194, |
|
"grad_norm": 5.268173694610596, |
|
"learning_rate": 2.8051954348201613e-05, |
|
"loss": 0.69, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.2064908324345524, |
|
"grad_norm": 6.25408935546875, |
|
"learning_rate": 2.789391958515183e-05, |
|
"loss": 1.2385, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.2066869300911854, |
|
"grad_norm": 4.305760383605957, |
|
"learning_rate": 2.7736259055819568e-05, |
|
"loss": 0.747, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.20688302774781842, |
|
"grad_norm": 9.248473167419434, |
|
"learning_rate": 2.757897357846988e-05, |
|
"loss": 1.4235, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.20707912540445142, |
|
"grad_norm": 4.8141984939575195, |
|
"learning_rate": 2.7422063969421285e-05, |
|
"loss": 0.5879, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.20727522306108442, |
|
"grad_norm": 4.95810604095459, |
|
"learning_rate": 2.7265531043041535e-05, |
|
"loss": 0.8899, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.20747132071771743, |
|
"grad_norm": 8.220407485961914, |
|
"learning_rate": 2.710937561174337e-05, |
|
"loss": 1.5904, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.20766741837435043, |
|
"grad_norm": 5.406398296356201, |
|
"learning_rate": 2.6953598485980336e-05, |
|
"loss": 2.2579, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.20786351603098344, |
|
"grad_norm": 7.9236273765563965, |
|
"learning_rate": 2.679820047424253e-05, |
|
"loss": 1.6289, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.20805961368761644, |
|
"grad_norm": 11.548074722290039, |
|
"learning_rate": 2.6643182383052446e-05, |
|
"loss": 1.3829, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.20825571134424944, |
|
"grad_norm": 6.566219329833984, |
|
"learning_rate": 2.6488545016960776e-05, |
|
"loss": 1.5911, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.20845180900088245, |
|
"grad_norm": 6.300790786743164, |
|
"learning_rate": 2.6334289178542226e-05, |
|
"loss": 0.8637, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.20864790665751545, |
|
"grad_norm": 5.563969135284424, |
|
"learning_rate": 2.6180415668391356e-05, |
|
"loss": 1.388, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.20884400431414846, |
|
"grad_norm": 6.617629528045654, |
|
"learning_rate": 2.602692528511843e-05, |
|
"loss": 1.1707, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.20904010197078146, |
|
"grad_norm": 6.338384628295898, |
|
"learning_rate": 2.5873818825345254e-05, |
|
"loss": 1.3016, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.20923619962741447, |
|
"grad_norm": 5.3434271812438965, |
|
"learning_rate": 2.5721097083701084e-05, |
|
"loss": 1.5446, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.20943229728404747, |
|
"grad_norm": 28.035884857177734, |
|
"learning_rate": 2.556876085281843e-05, |
|
"loss": 1.168, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.20962839494068045, |
|
"grad_norm": 14.401495933532715, |
|
"learning_rate": 2.5416810923329028e-05, |
|
"loss": 1.1242, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.20982449259731345, |
|
"grad_norm": 6.766130447387695, |
|
"learning_rate": 2.5265248083859648e-05, |
|
"loss": 1.4922, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.21002059025394645, |
|
"grad_norm": 5.8070292472839355, |
|
"learning_rate": 2.5114073121028093e-05, |
|
"loss": 1.7635, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.21021668791057946, |
|
"grad_norm": 5.450509548187256, |
|
"learning_rate": 2.4963286819439037e-05, |
|
"loss": 0.7839, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.21041278556721246, |
|
"grad_norm": 4.60319185256958, |
|
"learning_rate": 2.4812889961679986e-05, |
|
"loss": 1.8491, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.21060888322384547, |
|
"grad_norm": 3.9338133335113525, |
|
"learning_rate": 2.4662883328317222e-05, |
|
"loss": 0.9618, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.21080498088047847, |
|
"grad_norm": 7.811015605926514, |
|
"learning_rate": 2.451326769789176e-05, |
|
"loss": 1.5204, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.21100107853711147, |
|
"grad_norm": 6.122691631317139, |
|
"learning_rate": 2.4364043846915274e-05, |
|
"loss": 1.0331, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.21119717619374448, |
|
"grad_norm": 8.609393119812012, |
|
"learning_rate": 2.4215212549866116e-05, |
|
"loss": 1.9155, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.21139327385037748, |
|
"grad_norm": 7.292634963989258, |
|
"learning_rate": 2.4066774579185158e-05, |
|
"loss": 1.2971, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.2115893715070105, |
|
"grad_norm": 5.975107192993164, |
|
"learning_rate": 2.3918730705272064e-05, |
|
"loss": 1.4156, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.2117854691636435, |
|
"grad_norm": 14.805880546569824, |
|
"learning_rate": 2.377108169648098e-05, |
|
"loss": 1.0787, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2119815668202765, |
|
"grad_norm": 5.720880031585693, |
|
"learning_rate": 2.3623828319116748e-05, |
|
"loss": 1.3329, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.2121776644769095, |
|
"grad_norm": 7.515237331390381, |
|
"learning_rate": 2.3476971337430875e-05, |
|
"loss": 1.7147, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.2123737621335425, |
|
"grad_norm": 6.027990341186523, |
|
"learning_rate": 2.3330511513617448e-05, |
|
"loss": 1.9171, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.2125698597901755, |
|
"grad_norm": 4.526289463043213, |
|
"learning_rate": 2.318444960780949e-05, |
|
"loss": 0.6467, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 9.184370040893555, |
|
"learning_rate": 2.3038786378074574e-05, |
|
"loss": 1.6125, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.21296205510344152, |
|
"grad_norm": 7.263781547546387, |
|
"learning_rate": 2.289352258041133e-05, |
|
"loss": 1.6612, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.21315815276007452, |
|
"grad_norm": 9.886373519897461, |
|
"learning_rate": 2.274865896874523e-05, |
|
"loss": 2.4533, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.21335425041670752, |
|
"grad_norm": 9.56613540649414, |
|
"learning_rate": 2.2604196294924694e-05, |
|
"loss": 2.0031, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.21355034807334053, |
|
"grad_norm": 4.8680925369262695, |
|
"learning_rate": 2.2460135308717445e-05, |
|
"loss": 1.2974, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.21374644572997353, |
|
"grad_norm": 4.032529830932617, |
|
"learning_rate": 2.231647675780619e-05, |
|
"loss": 0.7491, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.21394254338660654, |
|
"grad_norm": 5.186695098876953, |
|
"learning_rate": 2.2173221387785216e-05, |
|
"loss": 1.2152, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.21413864104323954, |
|
"grad_norm": 5.538721561431885, |
|
"learning_rate": 2.2030369942156072e-05, |
|
"loss": 1.6134, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.21433473869987255, |
|
"grad_norm": 8.213644027709961, |
|
"learning_rate": 2.1887923162324097e-05, |
|
"loss": 1.5288, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.21453083635650555, |
|
"grad_norm": 8.110743522644043, |
|
"learning_rate": 2.1745881787594334e-05, |
|
"loss": 0.8424, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.21472693401313855, |
|
"grad_norm": 6.066911697387695, |
|
"learning_rate": 2.1604246555167638e-05, |
|
"loss": 1.9787, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.21492303166977156, |
|
"grad_norm": 3.873046636581421, |
|
"learning_rate": 2.1463018200137196e-05, |
|
"loss": 1.7871, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.21511912932640456, |
|
"grad_norm": 7.206579208374023, |
|
"learning_rate": 2.1322197455484248e-05, |
|
"loss": 1.3871, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.21531522698303757, |
|
"grad_norm": 7.201257705688477, |
|
"learning_rate": 2.1181785052074756e-05, |
|
"loss": 1.2177, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.21551132463967054, |
|
"grad_norm": 5.7325239181518555, |
|
"learning_rate": 2.104178171865513e-05, |
|
"loss": 1.0547, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.21570742229630355, |
|
"grad_norm": 4.294105052947998, |
|
"learning_rate": 2.0902188181848838e-05, |
|
"loss": 1.9177, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21590351995293655, |
|
"grad_norm": 4.344897270202637, |
|
"learning_rate": 2.0763005166152517e-05, |
|
"loss": 1.2101, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.21609961760956956, |
|
"grad_norm": 10.920205116271973, |
|
"learning_rate": 2.0624233393932024e-05, |
|
"loss": 1.0783, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.21629571526620256, |
|
"grad_norm": 6.155653953552246, |
|
"learning_rate": 2.0485873585419035e-05, |
|
"loss": 2.6903, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.21649181292283556, |
|
"grad_norm": 7.211439609527588, |
|
"learning_rate": 2.0347926458706945e-05, |
|
"loss": 1.8343, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.21668791057946857, |
|
"grad_norm": 8.261204719543457, |
|
"learning_rate": 2.021039272974742e-05, |
|
"loss": 2.3539, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.21688400823610157, |
|
"grad_norm": 5.709137916564941, |
|
"learning_rate": 2.0073273112346526e-05, |
|
"loss": 1.0312, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.21708010589273458, |
|
"grad_norm": 6.048385143280029, |
|
"learning_rate": 1.9936568318161076e-05, |
|
"loss": 1.086, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.21727620354936758, |
|
"grad_norm": 4.2964677810668945, |
|
"learning_rate": 1.9800279056695005e-05, |
|
"loss": 0.7691, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.21747230120600058, |
|
"grad_norm": 4.8451972007751465, |
|
"learning_rate": 1.966440603529549e-05, |
|
"loss": 1.4864, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.2176683988626336, |
|
"grad_norm": 5.61348295211792, |
|
"learning_rate": 1.952894995914949e-05, |
|
"loss": 1.5675, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2178644965192666, |
|
"grad_norm": 6.666314601898193, |
|
"learning_rate": 1.9393911531279974e-05, |
|
"loss": 1.3273, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.2180605941758996, |
|
"grad_norm": 5.22393798828125, |
|
"learning_rate": 1.9259291452542293e-05, |
|
"loss": 1.4175, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.2182566918325326, |
|
"grad_norm": 7.283446311950684, |
|
"learning_rate": 1.9125090421620574e-05, |
|
"loss": 2.5381, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.2184527894891656, |
|
"grad_norm": 8.553428649902344, |
|
"learning_rate": 1.8991309135024004e-05, |
|
"loss": 2.0767, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.2186488871457986, |
|
"grad_norm": 6.280401706695557, |
|
"learning_rate": 1.8857948287083416e-05, |
|
"loss": 0.9069, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.2188449848024316, |
|
"grad_norm": 14.387880325317383, |
|
"learning_rate": 1.8725008569947365e-05, |
|
"loss": 1.6501, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.21904108245906462, |
|
"grad_norm": 8.282727241516113, |
|
"learning_rate": 1.8592490673578843e-05, |
|
"loss": 1.645, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.21923718011569762, |
|
"grad_norm": 7.460145950317383, |
|
"learning_rate": 1.8460395285751542e-05, |
|
"loss": 2.3436, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.21943327777233063, |
|
"grad_norm": 4.364814758300781, |
|
"learning_rate": 1.8328723092046317e-05, |
|
"loss": 1.1313, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.21962937542896363, |
|
"grad_norm": 16.233600616455078, |
|
"learning_rate": 1.8197474775847613e-05, |
|
"loss": 2.5969, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21982547308559663, |
|
"grad_norm": 7.081477165222168, |
|
"learning_rate": 1.806665101833994e-05, |
|
"loss": 2.4986, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.22002157074222964, |
|
"grad_norm": 4.212589740753174, |
|
"learning_rate": 1.7936252498504356e-05, |
|
"loss": 1.3044, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.22021766839886264, |
|
"grad_norm": 6.28343391418457, |
|
"learning_rate": 1.7806279893114875e-05, |
|
"loss": 1.7792, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.22041376605549565, |
|
"grad_norm": 6.392016410827637, |
|
"learning_rate": 1.7676733876735018e-05, |
|
"loss": 1.6957, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.22060986371212865, |
|
"grad_norm": 8.44856071472168, |
|
"learning_rate": 1.754761512171429e-05, |
|
"loss": 2.5809, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.22080596136876166, |
|
"grad_norm": 6.063395977020264, |
|
"learning_rate": 1.741892429818468e-05, |
|
"loss": 1.1537, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.22100205902539466, |
|
"grad_norm": 22.53998374938965, |
|
"learning_rate": 1.729066207405722e-05, |
|
"loss": 2.6686, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.22119815668202766, |
|
"grad_norm": 4.210419178009033, |
|
"learning_rate": 1.7162829115018452e-05, |
|
"loss": 1.2635, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.22139425433866064, |
|
"grad_norm": 9.418660163879395, |
|
"learning_rate": 1.7035426084527062e-05, |
|
"loss": 2.5926, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.22159035199529364, |
|
"grad_norm": 8.299819946289062, |
|
"learning_rate": 1.690845364381034e-05, |
|
"loss": 1.5026, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.22178644965192665, |
|
"grad_norm": 7.302555084228516, |
|
"learning_rate": 1.6781912451860827e-05, |
|
"loss": 1.5762, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.22198254730855965, |
|
"grad_norm": 8.947975158691406, |
|
"learning_rate": 1.665580316543286e-05, |
|
"loss": 0.9516, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.22217864496519266, |
|
"grad_norm": 7.167786121368408, |
|
"learning_rate": 1.653012643903915e-05, |
|
"loss": 2.3246, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.22237474262182566, |
|
"grad_norm": 8.101836204528809, |
|
"learning_rate": 1.640488292494743e-05, |
|
"loss": 1.5492, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.22257084027845866, |
|
"grad_norm": 7.395213603973389, |
|
"learning_rate": 1.628007327317701e-05, |
|
"loss": 1.7188, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.22276693793509167, |
|
"grad_norm": 3.9345545768737793, |
|
"learning_rate": 1.6155698131495454e-05, |
|
"loss": 0.7864, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.22296303559172467, |
|
"grad_norm": 6.147872447967529, |
|
"learning_rate": 1.603175814541522e-05, |
|
"loss": 1.7044, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.22315913324835768, |
|
"grad_norm": 5.625443458557129, |
|
"learning_rate": 1.5908253958190256e-05, |
|
"loss": 2.0135, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.22335523090499068, |
|
"grad_norm": 8.224568367004395, |
|
"learning_rate": 1.5785186210812698e-05, |
|
"loss": 1.4209, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.22355132856162369, |
|
"grad_norm": 5.80567741394043, |
|
"learning_rate": 1.566255554200955e-05, |
|
"loss": 0.8693, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2237474262182567, |
|
"grad_norm": 7.716653347015381, |
|
"learning_rate": 1.5540362588239364e-05, |
|
"loss": 2.0515, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.2239435238748897, |
|
"grad_norm": 5.845829963684082, |
|
"learning_rate": 1.5418607983688927e-05, |
|
"loss": 1.3831, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.2241396215315227, |
|
"grad_norm": 5.728200435638428, |
|
"learning_rate": 1.529729236026999e-05, |
|
"loss": 1.6435, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.2243357191881557, |
|
"grad_norm": 9.004356384277344, |
|
"learning_rate": 1.5176416347615885e-05, |
|
"loss": 2.4094, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.2245318168447887, |
|
"grad_norm": 7.60123872756958, |
|
"learning_rate": 1.5055980573078487e-05, |
|
"loss": 1.6575, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.2247279145014217, |
|
"grad_norm": 6.572809219360352, |
|
"learning_rate": 1.4935985661724727e-05, |
|
"loss": 1.5638, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.22492401215805471, |
|
"grad_norm": 5.095000267028809, |
|
"learning_rate": 1.4816432236333444e-05, |
|
"loss": 1.6261, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.22512010981468772, |
|
"grad_norm": 5.581939220428467, |
|
"learning_rate": 1.4697320917392188e-05, |
|
"loss": 1.0154, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.22531620747132072, |
|
"grad_norm": 7.922572135925293, |
|
"learning_rate": 1.4578652323093855e-05, |
|
"loss": 3.1376, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.22551230512795373, |
|
"grad_norm": 8.814900398254395, |
|
"learning_rate": 1.4460427069333726e-05, |
|
"loss": 1.8381, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.22570840278458673, |
|
"grad_norm": 8.666311264038086, |
|
"learning_rate": 1.4342645769705977e-05, |
|
"loss": 0.7864, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.22590450044121974, |
|
"grad_norm": 4.692161560058594, |
|
"learning_rate": 1.4225309035500778e-05, |
|
"loss": 1.2963, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.22610059809785274, |
|
"grad_norm": 4.93988561630249, |
|
"learning_rate": 1.4108417475700908e-05, |
|
"loss": 0.8273, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.22629669575448574, |
|
"grad_norm": 5.998587131500244, |
|
"learning_rate": 1.3991971696978645e-05, |
|
"loss": 2.5023, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.22649279341111875, |
|
"grad_norm": 7.898712635040283, |
|
"learning_rate": 1.3875972303692752e-05, |
|
"loss": 1.7302, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.22668889106775175, |
|
"grad_norm": 4.3550190925598145, |
|
"learning_rate": 1.376041989788508e-05, |
|
"loss": 0.6245, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.22688498872438476, |
|
"grad_norm": 9.550539016723633, |
|
"learning_rate": 1.3645315079277765e-05, |
|
"loss": 1.7358, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.22708108638101776, |
|
"grad_norm": 7.5146164894104, |
|
"learning_rate": 1.3530658445269783e-05, |
|
"loss": 1.5159, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.22727718403765074, |
|
"grad_norm": 8.449292182922363, |
|
"learning_rate": 1.341645059093415e-05, |
|
"loss": 2.5644, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.22747328169428374, |
|
"grad_norm": 7.365999698638916, |
|
"learning_rate": 1.3302692109014625e-05, |
|
"loss": 1.3292, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22766937935091675, |
|
"grad_norm": 4.936117172241211, |
|
"learning_rate": 1.3189383589922665e-05, |
|
"loss": 0.636, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.22786547700754975, |
|
"grad_norm": 22.70414161682129, |
|
"learning_rate": 1.3076525621734526e-05, |
|
"loss": 1.5574, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.22806157466418275, |
|
"grad_norm": 6.079709053039551, |
|
"learning_rate": 1.2964118790187929e-05, |
|
"loss": 1.8188, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.22825767232081576, |
|
"grad_norm": 6.864228248596191, |
|
"learning_rate": 1.2852163678679341e-05, |
|
"loss": 1.2751, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.22845376997744876, |
|
"grad_norm": 3.384364366531372, |
|
"learning_rate": 1.2740660868260633e-05, |
|
"loss": 1.3381, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.22864986763408177, |
|
"grad_norm": 4.77728796005249, |
|
"learning_rate": 1.2629610937636283e-05, |
|
"loss": 1.9661, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.22884596529071477, |
|
"grad_norm": 4.837019443511963, |
|
"learning_rate": 1.251901446316035e-05, |
|
"loss": 1.7805, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.22904206294734777, |
|
"grad_norm": 4.927963733673096, |
|
"learning_rate": 1.2408872018833296e-05, |
|
"loss": 0.8884, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.22923816060398078, |
|
"grad_norm": 7.3830647468566895, |
|
"learning_rate": 1.2299184176299339e-05, |
|
"loss": 1.1489, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.22943425826061378, |
|
"grad_norm": 4.659049987792969, |
|
"learning_rate": 1.2189951504843112e-05, |
|
"loss": 0.6723, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2296303559172468, |
|
"grad_norm": 10.427599906921387, |
|
"learning_rate": 1.2081174571386989e-05, |
|
"loss": 1.1733, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.2298264535738798, |
|
"grad_norm": 13.781340599060059, |
|
"learning_rate": 1.1972853940488015e-05, |
|
"loss": 2.0666, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.2300225512305128, |
|
"grad_norm": 4.749293804168701, |
|
"learning_rate": 1.1864990174335012e-05, |
|
"loss": 1.657, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.2302186488871458, |
|
"grad_norm": 5.438106536865234, |
|
"learning_rate": 1.17575838327457e-05, |
|
"loss": 2.2866, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.2304147465437788, |
|
"grad_norm": 5.253787517547607, |
|
"learning_rate": 1.165063547316363e-05, |
|
"loss": 0.7286, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.2306108442004118, |
|
"grad_norm": 5.084885120391846, |
|
"learning_rate": 1.1544145650655514e-05, |
|
"loss": 1.5842, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.2308069418570448, |
|
"grad_norm": 6.295192718505859, |
|
"learning_rate": 1.1438114917908193e-05, |
|
"loss": 1.4375, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.23100303951367782, |
|
"grad_norm": 7.397315502166748, |
|
"learning_rate": 1.1332543825225806e-05, |
|
"loss": 1.5273, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.23119913717031082, |
|
"grad_norm": 4.620631217956543, |
|
"learning_rate": 1.122743292052697e-05, |
|
"loss": 2.3138, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.23139523482694382, |
|
"grad_norm": 6.895712852478027, |
|
"learning_rate": 1.1122782749341843e-05, |
|
"loss": 2.7047, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.23159133248357683, |
|
"grad_norm": 5.031332969665527, |
|
"learning_rate": 1.1018593854809478e-05, |
|
"loss": 1.3471, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.23178743014020983, |
|
"grad_norm": 5.385929584503174, |
|
"learning_rate": 1.0914866777674737e-05, |
|
"loss": 1.7638, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.23198352779684284, |
|
"grad_norm": 6.311648368835449, |
|
"learning_rate": 1.081160205628572e-05, |
|
"loss": 1.4686, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.23217962545347584, |
|
"grad_norm": 7.770483016967773, |
|
"learning_rate": 1.0708800226590854e-05, |
|
"loss": 0.9346, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.23237572311010884, |
|
"grad_norm": 7.618231773376465, |
|
"learning_rate": 1.0606461822136137e-05, |
|
"loss": 1.0662, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.23257182076674185, |
|
"grad_norm": 8.606494903564453, |
|
"learning_rate": 1.0504587374062391e-05, |
|
"loss": 1.5704, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.23276791842337485, |
|
"grad_norm": 25.36232566833496, |
|
"learning_rate": 1.0403177411102438e-05, |
|
"loss": 2.207, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.23296401608000786, |
|
"grad_norm": 8.098196983337402, |
|
"learning_rate": 1.0302232459578454e-05, |
|
"loss": 2.2944, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.23316011373664083, |
|
"grad_norm": 6.22314977645874, |
|
"learning_rate": 1.0201753043399143e-05, |
|
"loss": 1.62, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.23335621139327384, |
|
"grad_norm": 9.491933822631836, |
|
"learning_rate": 1.0101739684057098e-05, |
|
"loss": 1.392, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.23355230904990684, |
|
"grad_norm": 6.178860187530518, |
|
"learning_rate": 1.0002192900626028e-05, |
|
"loss": 1.6095, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.23374840670653985, |
|
"grad_norm": 5.492360591888428, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 2.4164, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.23394450436317285, |
|
"grad_norm": 7.380622386932373, |
|
"learning_rate": 9.804501125681243e-06, |
|
"loss": 1.8485, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.23414060201980585, |
|
"grad_norm": 6.217007637023926, |
|
"learning_rate": 9.70635716019651e-06, |
|
"loss": 1.5716, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.23433669967643886, |
|
"grad_norm": 3.790039539337158, |
|
"learning_rate": 9.608681822675381e-06, |
|
"loss": 1.5046, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.23453279733307186, |
|
"grad_norm": 4.153253555297852, |
|
"learning_rate": 9.51147562005713e-06, |
|
"loss": 0.7896, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.23472889498970487, |
|
"grad_norm": 5.173237323760986, |
|
"learning_rate": 9.414739056846222e-06, |
|
"loss": 1.1483, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.23492499264633787, |
|
"grad_norm": 6.67634916305542, |
|
"learning_rate": 9.318472635109653e-06, |
|
"loss": 1.0958, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.23512109030297088, |
|
"grad_norm": 5.527042388916016, |
|
"learning_rate": 9.222676854474365e-06, |
|
"loss": 2.349, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.23531718795960388, |
|
"grad_norm": 6.437062740325928, |
|
"learning_rate": 9.127352212124662e-06, |
|
"loss": 2.2577, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23551328561623688, |
|
"grad_norm": 6.333834171295166, |
|
"learning_rate": 9.032499202799628e-06, |
|
"loss": 1.5634, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.2357093832728699, |
|
"grad_norm": 8.251330375671387, |
|
"learning_rate": 8.938118318790522e-06, |
|
"loss": 1.6758, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.2359054809295029, |
|
"grad_norm": 11.161762237548828, |
|
"learning_rate": 8.844210049938262e-06, |
|
"loss": 0.9043, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.2361015785861359, |
|
"grad_norm": 5.800900936126709, |
|
"learning_rate": 8.750774883630908e-06, |
|
"loss": 2.1929, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.2362976762427689, |
|
"grad_norm": 4.731690883636475, |
|
"learning_rate": 8.657813304801043e-06, |
|
"loss": 0.5826, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.2364937738994019, |
|
"grad_norm": 4.208858966827393, |
|
"learning_rate": 8.565325795923341e-06, |
|
"loss": 0.9173, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.2366898715560349, |
|
"grad_norm": 4.98720645904541, |
|
"learning_rate": 8.473312837012026e-06, |
|
"loss": 1.7575, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.2368859692126679, |
|
"grad_norm": 6.606943130493164, |
|
"learning_rate": 8.3817749056184e-06, |
|
"loss": 1.6911, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.23708206686930092, |
|
"grad_norm": 6.240574359893799, |
|
"learning_rate": 8.290712476828332e-06, |
|
"loss": 2.235, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.23727816452593392, |
|
"grad_norm": 7.585755348205566, |
|
"learning_rate": 8.200126023259791e-06, |
|
"loss": 1.4902, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.23747426218256693, |
|
"grad_norm": 7.400862693786621, |
|
"learning_rate": 8.110016015060484e-06, |
|
"loss": 1.9178, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.23767035983919993, |
|
"grad_norm": 9.042954444885254, |
|
"learning_rate": 8.020382919905278e-06, |
|
"loss": 2.6052, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.23786645749583293, |
|
"grad_norm": 15.022893905639648, |
|
"learning_rate": 7.931227202993873e-06, |
|
"loss": 1.697, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.23806255515246594, |
|
"grad_norm": 11.485679626464844, |
|
"learning_rate": 7.842549327048365e-06, |
|
"loss": 2.7337, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.23825865280909894, |
|
"grad_norm": 4.79671049118042, |
|
"learning_rate": 7.754349752310752e-06, |
|
"loss": 1.7505, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.23845475046573195, |
|
"grad_norm": 4.849589824676514, |
|
"learning_rate": 7.666628936540776e-06, |
|
"loss": 0.7821, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.23865084812236495, |
|
"grad_norm": 4.220393180847168, |
|
"learning_rate": 7.579387335013255e-06, |
|
"loss": 0.7812, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.23884694577899795, |
|
"grad_norm": 12.55904769897461, |
|
"learning_rate": 7.492625400515951e-06, |
|
"loss": 1.8666, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.23904304343563093, |
|
"grad_norm": 5.080347537994385, |
|
"learning_rate": 7.406343583347119e-06, |
|
"loss": 0.7878, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.23923914109226393, |
|
"grad_norm": 4.184628486633301, |
|
"learning_rate": 7.320542331313118e-06, |
|
"loss": 1.193, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23943523874889694, |
|
"grad_norm": 4.931305885314941, |
|
"learning_rate": 7.235222089726279e-06, |
|
"loss": 0.9402, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.23963133640552994, |
|
"grad_norm": 8.1219482421875, |
|
"learning_rate": 7.15038330140233e-06, |
|
"loss": 2.1749, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.23982743406216295, |
|
"grad_norm": 5.320800304412842, |
|
"learning_rate": 7.066026406658355e-06, |
|
"loss": 1.0909, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.24002353171879595, |
|
"grad_norm": 5.9109907150268555, |
|
"learning_rate": 6.982151843310281e-06, |
|
"loss": 2.0466, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.24021962937542896, |
|
"grad_norm": 4.978448390960693, |
|
"learning_rate": 6.898760046670815e-06, |
|
"loss": 2.8843, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.24041572703206196, |
|
"grad_norm": 6.749833106994629, |
|
"learning_rate": 6.815851449547028e-06, |
|
"loss": 0.8198, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.24061182468869496, |
|
"grad_norm": 3.7781665325164795, |
|
"learning_rate": 6.7334264822381254e-06, |
|
"loss": 0.9111, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.24080792234532797, |
|
"grad_norm": 6.719789028167725, |
|
"learning_rate": 6.651485572533378e-06, |
|
"loss": 1.9275, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.24100402000196097, |
|
"grad_norm": 7.115839004516602, |
|
"learning_rate": 6.570029145709622e-06, |
|
"loss": 1.2663, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.24120011765859398, |
|
"grad_norm": 7.567049503326416, |
|
"learning_rate": 6.489057624529349e-06, |
|
"loss": 2.1002, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.24139621531522698, |
|
"grad_norm": 7.6808180809021, |
|
"learning_rate": 6.408571429238253e-06, |
|
"loss": 1.0844, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.24159231297185998, |
|
"grad_norm": 15.046584129333496, |
|
"learning_rate": 6.328570977563208e-06, |
|
"loss": 2.3513, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.241788410628493, |
|
"grad_norm": 9.350245475769043, |
|
"learning_rate": 6.24905668471013e-06, |
|
"loss": 1.81, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.241984508285126, |
|
"grad_norm": 5.964272499084473, |
|
"learning_rate": 6.170028963361618e-06, |
|
"loss": 2.0853, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.242180605941759, |
|
"grad_norm": 5.296453475952148, |
|
"learning_rate": 6.091488223675057e-06, |
|
"loss": 0.8561, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.242376703598392, |
|
"grad_norm": 20.43738555908203, |
|
"learning_rate": 6.013434873280288e-06, |
|
"loss": 3.0297, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.242572801255025, |
|
"grad_norm": 5.732899188995361, |
|
"learning_rate": 5.935869317277643e-06, |
|
"loss": 1.4933, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.242768898911658, |
|
"grad_norm": 3.8854384422302246, |
|
"learning_rate": 5.858791958235754e-06, |
|
"loss": 0.8324, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.24296499656829101, |
|
"grad_norm": 6.640622615814209, |
|
"learning_rate": 5.782203196189461e-06, |
|
"loss": 1.5419, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.24316109422492402, |
|
"grad_norm": 8.89739990234375, |
|
"learning_rate": 5.706103428637865e-06, |
|
"loss": 0.7287, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.24335719188155702, |
|
"grad_norm": 3.595503568649292, |
|
"learning_rate": 5.630493050542041e-06, |
|
"loss": 0.8245, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.24355328953819003, |
|
"grad_norm": 8.166557312011719, |
|
"learning_rate": 5.5553724543231825e-06, |
|
"loss": 1.5169, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.24374938719482303, |
|
"grad_norm": 11.863232612609863, |
|
"learning_rate": 5.480742029860464e-06, |
|
"loss": 2.4468, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.24394548485145603, |
|
"grad_norm": 4.747961044311523, |
|
"learning_rate": 5.406602164489072e-06, |
|
"loss": 1.1186, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.24414158250808904, |
|
"grad_norm": 3.378335952758789, |
|
"learning_rate": 5.332953242998151e-06, |
|
"loss": 1.222, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.24433768016472204, |
|
"grad_norm": 8.078283309936523, |
|
"learning_rate": 5.259795647628818e-06, |
|
"loss": 2.1882, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.24453377782135505, |
|
"grad_norm": 6.306114196777344, |
|
"learning_rate": 5.1871297580722515e-06, |
|
"loss": 1.9767, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.24472987547798805, |
|
"grad_norm": 4.042963981628418, |
|
"learning_rate": 5.114955951467537e-06, |
|
"loss": 1.6329, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.24492597313462103, |
|
"grad_norm": 4.9465179443359375, |
|
"learning_rate": 5.043274602399939e-06, |
|
"loss": 1.4165, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.24512207079125403, |
|
"grad_norm": 7.358532428741455, |
|
"learning_rate": 4.972086082898775e-06, |
|
"loss": 1.5899, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.24531816844788704, |
|
"grad_norm": 4.857394218444824, |
|
"learning_rate": 4.901390762435587e-06, |
|
"loss": 0.5367, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.24551426610452004, |
|
"grad_norm": 7.886226177215576, |
|
"learning_rate": 4.831189007922199e-06, |
|
"loss": 2.0356, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.24571036376115304, |
|
"grad_norm": 6.1324872970581055, |
|
"learning_rate": 4.761481183708783e-06, |
|
"loss": 1.1557, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.24590646141778605, |
|
"grad_norm": 4.222672939300537, |
|
"learning_rate": 4.692267651581994e-06, |
|
"loss": 1.5599, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.24610255907441905, |
|
"grad_norm": 6.940048694610596, |
|
"learning_rate": 4.6235487707631085e-06, |
|
"loss": 1.9772, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.24629865673105206, |
|
"grad_norm": 7.745510578155518, |
|
"learning_rate": 4.555324897906132e-06, |
|
"loss": 1.7293, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.24649475438768506, |
|
"grad_norm": 4.879486560821533, |
|
"learning_rate": 4.48759638709596e-06, |
|
"loss": 0.5805, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.24669085204431807, |
|
"grad_norm": 8.680879592895508, |
|
"learning_rate": 4.42036358984651e-06, |
|
"loss": 1.9445, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.24688694970095107, |
|
"grad_norm": 9.775550842285156, |
|
"learning_rate": 4.353626855098958e-06, |
|
"loss": 1.8064, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.24708304735758407, |
|
"grad_norm": 4.5769805908203125, |
|
"learning_rate": 4.287386529219894e-06, |
|
"loss": 1.3112, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24727914501421708, |
|
"grad_norm": 9.00042724609375, |
|
"learning_rate": 4.221642955999494e-06, |
|
"loss": 1.9532, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.24747524267085008, |
|
"grad_norm": 5.064240455627441, |
|
"learning_rate": 4.156396476649782e-06, |
|
"loss": 1.262, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.24767134032748309, |
|
"grad_norm": 5.231726169586182, |
|
"learning_rate": 4.091647429802869e-06, |
|
"loss": 0.9197, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.2478674379841161, |
|
"grad_norm": 4.577012062072754, |
|
"learning_rate": 4.027396151509133e-06, |
|
"loss": 0.9262, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.2480635356407491, |
|
"grad_norm": 11.125844955444336, |
|
"learning_rate": 3.963642975235515e-06, |
|
"loss": 0.8612, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.2482596332973821, |
|
"grad_norm": 6.266839981079102, |
|
"learning_rate": 3.900388231863805e-06, |
|
"loss": 1.8149, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.2484557309540151, |
|
"grad_norm": 11.235099792480469, |
|
"learning_rate": 3.8376322496888825e-06, |
|
"loss": 1.9527, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.2486518286106481, |
|
"grad_norm": 5.78483247756958, |
|
"learning_rate": 3.7753753544170655e-06, |
|
"loss": 1.5316, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.2488479262672811, |
|
"grad_norm": 7.571983814239502, |
|
"learning_rate": 3.7136178691643433e-06, |
|
"loss": 1.8357, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.24904402392391412, |
|
"grad_norm": 10.283147811889648, |
|
"learning_rate": 3.6523601144548003e-06, |
|
"loss": 1.9926, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24924012158054712, |
|
"grad_norm": 4.631470203399658, |
|
"learning_rate": 3.5916024082188414e-06, |
|
"loss": 0.7877, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.24943621923718012, |
|
"grad_norm": 5.498098373413086, |
|
"learning_rate": 3.531345065791636e-06, |
|
"loss": 3.0685, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.24963231689381313, |
|
"grad_norm": 5.340700149536133, |
|
"learning_rate": 3.471588399911441e-06, |
|
"loss": 1.7174, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.24982841455044613, |
|
"grad_norm": 8.994786262512207, |
|
"learning_rate": 3.4123327207179477e-06, |
|
"loss": 1.722, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.2500245122070791, |
|
"grad_norm": 4.499628067016602, |
|
"learning_rate": 3.3535783357507624e-06, |
|
"loss": 1.3553, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.25022060986371214, |
|
"grad_norm": 13.06672477722168, |
|
"learning_rate": 3.29532554994767e-06, |
|
"loss": 2.8721, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.2504167075203451, |
|
"grad_norm": 4.79543399810791, |
|
"learning_rate": 3.2375746656432284e-06, |
|
"loss": 1.0701, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.25061280517697815, |
|
"grad_norm": 5.686835765838623, |
|
"learning_rate": 3.180325982567034e-06, |
|
"loss": 1.2648, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.2508089028336111, |
|
"grad_norm": 4.657541275024414, |
|
"learning_rate": 3.1235797978422687e-06, |
|
"loss": 1.3918, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.25100500049024416, |
|
"grad_norm": 10.92974853515625, |
|
"learning_rate": 3.0673364059841338e-06, |
|
"loss": 2.4811, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.25120109814687713, |
|
"grad_norm": 5.082363128662109, |
|
"learning_rate": 3.0115960988982506e-06, |
|
"loss": 2.2096, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.25139719580351017, |
|
"grad_norm": 3.3826422691345215, |
|
"learning_rate": 2.9563591658793076e-06, |
|
"loss": 0.8372, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.25159329346014314, |
|
"grad_norm": 5.3609113693237305, |
|
"learning_rate": 2.901625893609361e-06, |
|
"loss": 1.2737, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.2517893911167762, |
|
"grad_norm": 4.7862043380737305, |
|
"learning_rate": 2.8473965661565347e-06, |
|
"loss": 0.7095, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.25198548877340915, |
|
"grad_norm": 8.444502830505371, |
|
"learning_rate": 2.793671464973413e-06, |
|
"loss": 2.1887, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.2521815864300422, |
|
"grad_norm": 5.742269039154053, |
|
"learning_rate": 2.740450868895583e-06, |
|
"loss": 1.1412, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.25237768408667516, |
|
"grad_norm": 7.201744079589844, |
|
"learning_rate": 2.687735054140317e-06, |
|
"loss": 1.3327, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.2525737817433082, |
|
"grad_norm": 7.618087291717529, |
|
"learning_rate": 2.63552429430497e-06, |
|
"loss": 1.9268, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.25276987939994117, |
|
"grad_norm": 7.360147953033447, |
|
"learning_rate": 2.5838188603657056e-06, |
|
"loss": 1.0334, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.2529659770565742, |
|
"grad_norm": 5.17711067199707, |
|
"learning_rate": 2.5326190206759527e-06, |
|
"loss": 1.784, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2531620747132072, |
|
"grad_norm": 6.017335891723633, |
|
"learning_rate": 2.4819250409651607e-06, |
|
"loss": 0.9696, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.2533581723698402, |
|
"grad_norm": 6.2475433349609375, |
|
"learning_rate": 2.4317371843372904e-06, |
|
"loss": 2.4048, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.2535542700264732, |
|
"grad_norm": 6.189507484436035, |
|
"learning_rate": 2.3820557112695153e-06, |
|
"loss": 1.3096, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.25375036768310616, |
|
"grad_norm": 7.4594011306762695, |
|
"learning_rate": 2.3328808796108657e-06, |
|
"loss": 0.8887, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.2539464653397392, |
|
"grad_norm": 4.671147346496582, |
|
"learning_rate": 2.2842129445808546e-06, |
|
"loss": 1.3748, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.25414256299637217, |
|
"grad_norm": 4.567478179931641, |
|
"learning_rate": 2.2360521587682313e-06, |
|
"loss": 0.8291, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.2543386606530052, |
|
"grad_norm": 7.609241008758545, |
|
"learning_rate": 2.188398772129552e-06, |
|
"loss": 2.9765, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.2545347583096382, |
|
"grad_norm": 9.003718376159668, |
|
"learning_rate": 2.1412530319879887e-06, |
|
"loss": 3.6435, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.2547308559662712, |
|
"grad_norm": 6.3176164627075195, |
|
"learning_rate": 2.0946151830320224e-06, |
|
"loss": 1.2652, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.2549269536229042, |
|
"grad_norm": 12.08651065826416, |
|
"learning_rate": 2.0484854673140983e-06, |
|
"loss": 2.2016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2551230512795372, |
|
"grad_norm": 3.733414649963379, |
|
"learning_rate": 2.002864124249504e-06, |
|
"loss": 2.3059, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 8.422008514404297, |
|
"learning_rate": 1.9577513906149702e-06, |
|
"loss": 1.5297, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.2555152465928032, |
|
"grad_norm": 7.98868989944458, |
|
"learning_rate": 1.913147500547574e-06, |
|
"loss": 2.1541, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.2557113442494362, |
|
"grad_norm": 4.629753589630127, |
|
"learning_rate": 1.869052685543471e-06, |
|
"loss": 1.617, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.25590744190606923, |
|
"grad_norm": 4.7464823722839355, |
|
"learning_rate": 1.825467174456652e-06, |
|
"loss": 0.7396, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.2561035395627022, |
|
"grad_norm": 6.383626937866211, |
|
"learning_rate": 1.7823911934978898e-06, |
|
"loss": 2.6406, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.25629963721933524, |
|
"grad_norm": 3.751051902770996, |
|
"learning_rate": 1.73982496623335e-06, |
|
"loss": 2.1239, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.2564957348759682, |
|
"grad_norm": 8.627437591552734, |
|
"learning_rate": 1.6977687135836584e-06, |
|
"loss": 1.5667, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.25669183253260125, |
|
"grad_norm": 5.191834926605225, |
|
"learning_rate": 1.656222653822581e-06, |
|
"loss": 1.7599, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.2568879301892342, |
|
"grad_norm": 11.265420913696289, |
|
"learning_rate": 1.615187002576013e-06, |
|
"loss": 1.6896, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.25708402784586726, |
|
"grad_norm": 7.610350608825684, |
|
"learning_rate": 1.574661972820779e-06, |
|
"loss": 1.0143, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.25728012550250023, |
|
"grad_norm": 2.748887538909912, |
|
"learning_rate": 1.5346477748835354e-06, |
|
"loss": 0.3839, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.25747622315913327, |
|
"grad_norm": 9.816802024841309, |
|
"learning_rate": 1.4951446164397587e-06, |
|
"loss": 1.659, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.25767232081576624, |
|
"grad_norm": 11.259073257446289, |
|
"learning_rate": 1.4561527025125476e-06, |
|
"loss": 1.9394, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.2578684184723993, |
|
"grad_norm": 5.475277423858643, |
|
"learning_rate": 1.4176722354716455e-06, |
|
"loss": 1.1844, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 3.6779868602752686, |
|
"learning_rate": 1.379703415032374e-06, |
|
"loss": 1.5793, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.2582606137856653, |
|
"grad_norm": 7.077935695648193, |
|
"learning_rate": 1.3422464382545797e-06, |
|
"loss": 2.019, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.25845671144229826, |
|
"grad_norm": 3.54400634765625, |
|
"learning_rate": 1.3053014995415891e-06, |
|
"loss": 1.0317, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.2586528090989313, |
|
"grad_norm": 8.784947395324707, |
|
"learning_rate": 1.268868790639277e-06, |
|
"loss": 0.6858, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.25884890675556427, |
|
"grad_norm": 3.767857789993286, |
|
"learning_rate": 1.2329485006349895e-06, |
|
"loss": 1.6571, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2590450044121973, |
|
"grad_norm": 2.997652769088745, |
|
"learning_rate": 1.1975408159566103e-06, |
|
"loss": 0.7133, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.2592411020688303, |
|
"grad_norm": 13.322317123413086, |
|
"learning_rate": 1.1626459203715633e-06, |
|
"loss": 2.6333, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.2594371997254633, |
|
"grad_norm": 7.647657871246338, |
|
"learning_rate": 1.128263994985901e-06, |
|
"loss": 1.3197, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.2596332973820963, |
|
"grad_norm": 5.545785903930664, |
|
"learning_rate": 1.0943952182433048e-06, |
|
"loss": 1.8365, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.25982939503872926, |
|
"grad_norm": 6.008249759674072, |
|
"learning_rate": 1.0610397659242322e-06, |
|
"loss": 0.7947, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.2600254926953623, |
|
"grad_norm": 6.118869304656982, |
|
"learning_rate": 1.0281978111449375e-06, |
|
"loss": 2.1696, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.26022159035199527, |
|
"grad_norm": 5.802155017852783, |
|
"learning_rate": 9.958695243565853e-07, |
|
"loss": 1.3875, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.2604176880086283, |
|
"grad_norm": 5.157081604003906, |
|
"learning_rate": 9.640550733444275e-07, |
|
"loss": 1.7365, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.2606137856652613, |
|
"grad_norm": 4.770791053771973, |
|
"learning_rate": 9.327546232268392e-07, |
|
"loss": 1.0385, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.2608098833218943, |
|
"grad_norm": 6.321690559387207, |
|
"learning_rate": 9.019683364545395e-07, |
|
"loss": 1.4739, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2610059809785273, |
|
"grad_norm": 10.451473236083984, |
|
"learning_rate": 8.71696372809705e-07, |
|
"loss": 2.4985, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.2612020786351603, |
|
"grad_norm": 4.130318641662598, |
|
"learning_rate": 8.419388894051472e-07, |
|
"loss": 0.9982, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.2613981762917933, |
|
"grad_norm": 6.775696277618408, |
|
"learning_rate": 8.126960406835249e-07, |
|
"loss": 1.3855, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.2615942739484263, |
|
"grad_norm": 4.182593822479248, |
|
"learning_rate": 7.839679784164778e-07, |
|
"loss": 1.0039, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.2617903716050593, |
|
"grad_norm": 8.561737060546875, |
|
"learning_rate": 7.557548517039381e-07, |
|
"loss": 2.7707, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.26198646926169233, |
|
"grad_norm": 7.187052249908447, |
|
"learning_rate": 7.28056806973243e-07, |
|
"loss": 2.3727, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.2621825669183253, |
|
"grad_norm": 6.421212673187256, |
|
"learning_rate": 7.008739879784787e-07, |
|
"loss": 1.4712, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.26237866457495834, |
|
"grad_norm": 5.969448089599609, |
|
"learning_rate": 6.742065357996486e-07, |
|
"loss": 2.0104, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.2625747622315913, |
|
"grad_norm": 3.251781463623047, |
|
"learning_rate": 6.480545888420176e-07, |
|
"loss": 0.7331, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.26277085988822435, |
|
"grad_norm": 8.010222434997559, |
|
"learning_rate": 6.224182828353242e-07, |
|
"loss": 2.0611, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2629669575448573, |
|
"grad_norm": 14.670138359069824, |
|
"learning_rate": 5.972977508331368e-07, |
|
"loss": 1.6784, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.26316305520149036, |
|
"grad_norm": 3.959542751312256, |
|
"learning_rate": 5.726931232120869e-07, |
|
"loss": 1.1185, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.26335915285812334, |
|
"grad_norm": 3.680546998977661, |
|
"learning_rate": 5.486045276712926e-07, |
|
"loss": 1.0173, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.26355525051475637, |
|
"grad_norm": 8.655952453613281, |
|
"learning_rate": 5.250320892316252e-07, |
|
"loss": 1.4111, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.26375134817138934, |
|
"grad_norm": 5.929543495178223, |
|
"learning_rate": 5.019759302350547e-07, |
|
"loss": 1.7795, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.2639474458280224, |
|
"grad_norm": 6.462841987609863, |
|
"learning_rate": 4.794361703440719e-07, |
|
"loss": 0.7667, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.26414354348465535, |
|
"grad_norm": 11.463775634765625, |
|
"learning_rate": 4.57412926541001e-07, |
|
"loss": 1.8102, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.2643396411412884, |
|
"grad_norm": 8.908552169799805, |
|
"learning_rate": 4.3590631312746545e-07, |
|
"loss": 1.5951, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.26453573879792136, |
|
"grad_norm": 5.707728862762451, |
|
"learning_rate": 4.149164417237117e-07, |
|
"loss": 1.287, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.2647318364545544, |
|
"grad_norm": 18.28376579284668, |
|
"learning_rate": 3.944434212680981e-07, |
|
"loss": 3.0229, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.26492793411118737, |
|
"grad_norm": 7.537192344665527, |
|
"learning_rate": 3.744873580165176e-07, |
|
"loss": 1.6773, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.2651240317678204, |
|
"grad_norm": 5.802631378173828, |
|
"learning_rate": 3.5504835554177605e-07, |
|
"loss": 1.5217, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.2653201294244534, |
|
"grad_norm": 5.430902004241943, |
|
"learning_rate": 3.361265147331816e-07, |
|
"loss": 1.668, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.26551622708108635, |
|
"grad_norm": 8.240734100341797, |
|
"learning_rate": 3.177219337958892e-07, |
|
"loss": 1.1068, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.2657123247377194, |
|
"grad_norm": 10.376667976379395, |
|
"learning_rate": 2.998347082505126e-07, |
|
"loss": 2.2403, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.26590842239435236, |
|
"grad_norm": 6.063961505889893, |
|
"learning_rate": 2.8246493093250227e-07, |
|
"loss": 1.3754, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.2661045200509854, |
|
"grad_norm": 5.9845733642578125, |
|
"learning_rate": 2.6561269199179006e-07, |
|
"loss": 1.5861, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.26630061770761837, |
|
"grad_norm": 6.585875511169434, |
|
"learning_rate": 2.492780788922344e-07, |
|
"loss": 2.882, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.2664967153642514, |
|
"grad_norm": 5.5460968017578125, |
|
"learning_rate": 2.3346117641116494e-07, |
|
"loss": 1.7507, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.2666928130208844, |
|
"grad_norm": 6.179254531860352, |
|
"learning_rate": 2.1816206663902717e-07, |
|
"loss": 0.9413, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.2668889106775174, |
|
"grad_norm": 18.120824813842773, |
|
"learning_rate": 2.0338082897886079e-07, |
|
"loss": 2.7844, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.2670850083341504, |
|
"grad_norm": 4.4856157302856445, |
|
"learning_rate": 1.891175401459444e-07, |
|
"loss": 0.7925, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.2672811059907834, |
|
"grad_norm": 5.911020755767822, |
|
"learning_rate": 1.7537227416735135e-07, |
|
"loss": 1.7331, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.2674772036474164, |
|
"grad_norm": 4.898580551147461, |
|
"learning_rate": 1.6214510238163893e-07, |
|
"loss": 1.5288, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.2676733013040494, |
|
"grad_norm": 4.032687664031982, |
|
"learning_rate": 1.4943609343839316e-07, |
|
"loss": 1.6682, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.2678693989606824, |
|
"grad_norm": 4.88344144821167, |
|
"learning_rate": 1.37245313297929e-07, |
|
"loss": 0.8977, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.26806549661731544, |
|
"grad_norm": 5.699201583862305, |
|
"learning_rate": 1.2557282523094627e-07, |
|
"loss": 1.4562, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.2682615942739484, |
|
"grad_norm": 6.58184814453125, |
|
"learning_rate": 1.1441868981815207e-07, |
|
"loss": 0.9233, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.26845769193058144, |
|
"grad_norm": 7.048121929168701, |
|
"learning_rate": 1.0378296494999439e-07, |
|
"loss": 2.6469, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.2686537895872144, |
|
"grad_norm": 6.19679069519043, |
|
"learning_rate": 9.366570582637346e-08, |
|
"loss": 1.6399, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.26884988724384745, |
|
"grad_norm": 6.541812896728516, |
|
"learning_rate": 8.406696495627531e-08, |
|
"loss": 2.3217, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.26904598490048043, |
|
"grad_norm": 7.404893398284912, |
|
"learning_rate": 7.498679215761639e-08, |
|
"loss": 1.32, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.26924208255711346, |
|
"grad_norm": 6.199706077575684, |
|
"learning_rate": 6.642523455687721e-08, |
|
"loss": 1.0584, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.26943818021374644, |
|
"grad_norm": 4.55602502822876, |
|
"learning_rate": 5.838233658892467e-08, |
|
"loss": 1.0957, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.26963427787037947, |
|
"grad_norm": 8.639842987060547, |
|
"learning_rate": 5.08581399967345e-08, |
|
"loss": 1.0236, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.26983037552701244, |
|
"grad_norm": 4.639615535736084, |
|
"learning_rate": 4.3852683831235866e-08, |
|
"loss": 0.8647, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.2700264731836455, |
|
"grad_norm": 4.805739402770996, |
|
"learning_rate": 3.736600445104488e-08, |
|
"loss": 1.9424, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.27022257084027845, |
|
"grad_norm": 4.504196643829346, |
|
"learning_rate": 3.139813552230919e-08, |
|
"loss": 0.815, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.2704186684969115, |
|
"grad_norm": 8.973424911499023, |
|
"learning_rate": 2.5949108018530342e-08, |
|
"loss": 1.7934, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.27061476615354446, |
|
"grad_norm": 5.073940277099609, |
|
"learning_rate": 2.101895022040834e-08, |
|
"loss": 1.6873, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.2708108638101775, |
|
"grad_norm": 5.69566535949707, |
|
"learning_rate": 1.6607687715675113e-08, |
|
"loss": 1.5254, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 0.27100696146681047, |
|
"grad_norm": 2.8405473232269287, |
|
"learning_rate": 1.2715343398972402e-08, |
|
"loss": 1.5803, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.2712030591234435, |
|
"grad_norm": 3.1316022872924805, |
|
"learning_rate": 9.341937471740724e-09, |
|
"loss": 0.5618, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 0.2713991567800765, |
|
"grad_norm": 6.8218793869018555, |
|
"learning_rate": 6.487487442097262e-09, |
|
"loss": 0.5681, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.27159525443670945, |
|
"grad_norm": 6.514251232147217, |
|
"learning_rate": 4.1520081247803375e-09, |
|
"loss": 2.2744, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.2717913520933425, |
|
"grad_norm": 8.452823638916016, |
|
"learning_rate": 2.335511641005095e-09, |
|
"loss": 1.4333, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.27198744974997546, |
|
"grad_norm": 5.929966449737549, |
|
"learning_rate": 1.0380074184856981e-09, |
|
"loss": 1.6609, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 0.2721835474066085, |
|
"grad_norm": 3.682739019393921, |
|
"learning_rate": 2.595021913243123e-10, |
|
"loss": 1.7354, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.27237964506324147, |
|
"grad_norm": 6.1388397216796875, |
|
"learning_rate": 0.0, |
|
"loss": 0.7588, |
|
"step": 1389 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1389, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 348, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.976036715065508e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|