|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.99849510910459, |
|
"eval_steps": 500, |
|
"global_step": 332, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006019563581640331, |
|
"grad_norm": 41.037254333496094, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 4.2766, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012039127163280662, |
|
"grad_norm": 43.19103240966797, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 4.2655, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01805869074492099, |
|
"grad_norm": 41.71216583251953, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 4.1574, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.024078254326561323, |
|
"grad_norm": 51.17884063720703, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 3.3329, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.030097817908201655, |
|
"grad_norm": 28.706247329711914, |
|
"learning_rate": 7.3529411764705884e-06, |
|
"loss": 2.0066, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03611738148984198, |
|
"grad_norm": 19.702205657958984, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 1.2989, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.042136945071482315, |
|
"grad_norm": 10.646201133728027, |
|
"learning_rate": 1.0294117647058824e-05, |
|
"loss": 0.4697, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04815650865312265, |
|
"grad_norm": 7.015563488006592, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.167, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05417607223476298, |
|
"grad_norm": 2.405210494995117, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.054, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06019563581640331, |
|
"grad_norm": 2.9235267639160156, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.1401, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06621519939804364, |
|
"grad_norm": 3.1382505893707275, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 0.0757, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07223476297968397, |
|
"grad_norm": 2.7751779556274414, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.089, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0782543265613243, |
|
"grad_norm": 1.7391453981399536, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 0.11, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08427389014296463, |
|
"grad_norm": 2.010361671447754, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.1479, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09029345372460497, |
|
"grad_norm": 3.3061070442199707, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 0.0665, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0963130173062453, |
|
"grad_norm": 2.8843464851379395, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.074, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10233258088788563, |
|
"grad_norm": 0.8146764039993286, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0315, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.10835214446952596, |
|
"grad_norm": 2.45939040184021, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.0863, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1143717080511663, |
|
"grad_norm": 2.4333105087280273, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 0.1191, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12039127163280662, |
|
"grad_norm": 1.5534690618515015, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.1213, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12641083521444696, |
|
"grad_norm": 1.2200188636779785, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.0619, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13243039879608728, |
|
"grad_norm": 1.9440735578536987, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.1123, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1384499623777276, |
|
"grad_norm": 1.5800230503082275, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 0.0655, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.14446952595936793, |
|
"grad_norm": 0.6280108690261841, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.0309, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1504890895410083, |
|
"grad_norm": 1.1837276220321655, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 0.082, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1565086531226486, |
|
"grad_norm": 3.0979809761047363, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.0754, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.16252821670428894, |
|
"grad_norm": 0.919219434261322, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 0.0652, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16854778028592926, |
|
"grad_norm": 1.2674806118011475, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.0524, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1745673438675696, |
|
"grad_norm": 1.4973307847976685, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 0.0739, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.18058690744920994, |
|
"grad_norm": 1.3600691556930542, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.0932, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18660647103085026, |
|
"grad_norm": 0.6800034046173096, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 0.0525, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1926260346124906, |
|
"grad_norm": 0.27061381936073303, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.0183, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1986455981941309, |
|
"grad_norm": 0.5821884870529175, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 0.0342, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20466516177577126, |
|
"grad_norm": 1.5963926315307617, |
|
"learning_rate": 5e-05, |
|
"loss": 0.086, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2106847253574116, |
|
"grad_norm": 1.2303105592727661, |
|
"learning_rate": 4.983221476510067e-05, |
|
"loss": 0.1283, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.21670428893905191, |
|
"grad_norm": 0.7925997376441956, |
|
"learning_rate": 4.966442953020135e-05, |
|
"loss": 0.0465, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22272385252069224, |
|
"grad_norm": 0.44674405455589294, |
|
"learning_rate": 4.9496644295302015e-05, |
|
"loss": 0.0208, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2287434161023326, |
|
"grad_norm": 1.129119873046875, |
|
"learning_rate": 4.932885906040269e-05, |
|
"loss": 0.1034, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.23476297968397292, |
|
"grad_norm": 0.747196614742279, |
|
"learning_rate": 4.9161073825503354e-05, |
|
"loss": 0.1117, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.24078254326561324, |
|
"grad_norm": 1.0140711069107056, |
|
"learning_rate": 4.8993288590604034e-05, |
|
"loss": 0.0713, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24680210684725357, |
|
"grad_norm": 0.9150713086128235, |
|
"learning_rate": 4.88255033557047e-05, |
|
"loss": 0.1045, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2528216704288939, |
|
"grad_norm": 0.7237759232521057, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.0399, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2588412340105342, |
|
"grad_norm": 0.4736149311065674, |
|
"learning_rate": 4.848993288590604e-05, |
|
"loss": 0.0283, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.26486079759217457, |
|
"grad_norm": 0.8596872091293335, |
|
"learning_rate": 4.832214765100672e-05, |
|
"loss": 0.0516, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2708803611738149, |
|
"grad_norm": 0.8274044394493103, |
|
"learning_rate": 4.8154362416107385e-05, |
|
"loss": 0.0866, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2768999247554552, |
|
"grad_norm": 1.1380550861358643, |
|
"learning_rate": 4.798657718120805e-05, |
|
"loss": 0.0628, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.28291948833709557, |
|
"grad_norm": 1.1349643468856812, |
|
"learning_rate": 4.7818791946308725e-05, |
|
"loss": 0.0997, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.28893905191873587, |
|
"grad_norm": 1.2396087646484375, |
|
"learning_rate": 4.76510067114094e-05, |
|
"loss": 0.0668, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2949586155003762, |
|
"grad_norm": 0.6159345507621765, |
|
"learning_rate": 4.748322147651007e-05, |
|
"loss": 0.0454, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3009781790820166, |
|
"grad_norm": 0.9823417663574219, |
|
"learning_rate": 4.731543624161074e-05, |
|
"loss": 0.0358, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.30699774266365687, |
|
"grad_norm": 1.3460859060287476, |
|
"learning_rate": 4.714765100671141e-05, |
|
"loss": 0.1146, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.3130173062452972, |
|
"grad_norm": 0.8716734647750854, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0996, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3190368698269376, |
|
"grad_norm": 0.8868650794029236, |
|
"learning_rate": 4.6812080536912756e-05, |
|
"loss": 0.0607, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.32505643340857787, |
|
"grad_norm": 0.5762543678283691, |
|
"learning_rate": 4.664429530201342e-05, |
|
"loss": 0.0603, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3310759969902182, |
|
"grad_norm": 0.5473377704620361, |
|
"learning_rate": 4.6476510067114095e-05, |
|
"loss": 0.031, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3370955605718585, |
|
"grad_norm": 0.4517374634742737, |
|
"learning_rate": 4.630872483221477e-05, |
|
"loss": 0.0318, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3431151241534989, |
|
"grad_norm": 1.007686734199524, |
|
"learning_rate": 4.6140939597315434e-05, |
|
"loss": 0.0596, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3491346877351392, |
|
"grad_norm": 0.5532180666923523, |
|
"learning_rate": 4.597315436241611e-05, |
|
"loss": 0.0917, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3551542513167795, |
|
"grad_norm": 0.6608918309211731, |
|
"learning_rate": 4.580536912751678e-05, |
|
"loss": 0.0768, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3611738148984199, |
|
"grad_norm": 0.9971833229064941, |
|
"learning_rate": 4.5637583892617453e-05, |
|
"loss": 0.0614, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3671933784800602, |
|
"grad_norm": 0.4296749532222748, |
|
"learning_rate": 4.546979865771812e-05, |
|
"loss": 0.0503, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3732129420617005, |
|
"grad_norm": 0.5677506923675537, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0644, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3792325056433409, |
|
"grad_norm": 0.6360709071159363, |
|
"learning_rate": 4.5134228187919466e-05, |
|
"loss": 0.0989, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3852520692249812, |
|
"grad_norm": 1.348215937614441, |
|
"learning_rate": 4.496644295302014e-05, |
|
"loss": 0.0967, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3912716328066215, |
|
"grad_norm": 0.6315649747848511, |
|
"learning_rate": 4.4798657718120805e-05, |
|
"loss": 0.0489, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3972911963882618, |
|
"grad_norm": 0.6150538921356201, |
|
"learning_rate": 4.463087248322148e-05, |
|
"loss": 0.0634, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4033107599699022, |
|
"grad_norm": 0.3067854344844818, |
|
"learning_rate": 4.446308724832215e-05, |
|
"loss": 0.0181, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.40933032355154253, |
|
"grad_norm": 0.7488537430763245, |
|
"learning_rate": 4.4295302013422824e-05, |
|
"loss": 0.0847, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4153498871331828, |
|
"grad_norm": 0.769372284412384, |
|
"learning_rate": 4.412751677852349e-05, |
|
"loss": 0.037, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4213694507148232, |
|
"grad_norm": 0.9909029006958008, |
|
"learning_rate": 4.395973154362416e-05, |
|
"loss": 0.0417, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.42738901429646353, |
|
"grad_norm": 0.7407757043838501, |
|
"learning_rate": 4.3791946308724836e-05, |
|
"loss": 0.051, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.43340857787810383, |
|
"grad_norm": 1.149268388748169, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0409, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4394281414597442, |
|
"grad_norm": 0.9848851561546326, |
|
"learning_rate": 4.3456375838926176e-05, |
|
"loss": 0.0149, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4454477050413845, |
|
"grad_norm": 1.4406760931015015, |
|
"learning_rate": 4.328859060402685e-05, |
|
"loss": 0.0762, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.45146726862302483, |
|
"grad_norm": 1.003056526184082, |
|
"learning_rate": 4.312080536912752e-05, |
|
"loss": 0.0865, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4574868322046652, |
|
"grad_norm": 1.0864567756652832, |
|
"learning_rate": 4.295302013422819e-05, |
|
"loss": 0.0535, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4635063957863055, |
|
"grad_norm": 1.5504230260849, |
|
"learning_rate": 4.278523489932886e-05, |
|
"loss": 0.0679, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.46952595936794583, |
|
"grad_norm": 1.389381766319275, |
|
"learning_rate": 4.2617449664429534e-05, |
|
"loss": 0.1011, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.47554552294958613, |
|
"grad_norm": 0.06696069985628128, |
|
"learning_rate": 4.244966442953021e-05, |
|
"loss": 0.0017, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4815650865312265, |
|
"grad_norm": 0.8552239537239075, |
|
"learning_rate": 4.228187919463087e-05, |
|
"loss": 0.1052, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48758465011286684, |
|
"grad_norm": 1.8147671222686768, |
|
"learning_rate": 4.2114093959731546e-05, |
|
"loss": 0.0364, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.49360421369450713, |
|
"grad_norm": 0.7592940330505371, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0373, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4996237772761475, |
|
"grad_norm": 0.7351986765861511, |
|
"learning_rate": 4.1778523489932886e-05, |
|
"loss": 0.1033, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5056433408577878, |
|
"grad_norm": 0.3439026176929474, |
|
"learning_rate": 4.161073825503356e-05, |
|
"loss": 0.0166, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5116629044394282, |
|
"grad_norm": 0.41652995347976685, |
|
"learning_rate": 4.144295302013423e-05, |
|
"loss": 0.0591, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5176824680210684, |
|
"grad_norm": 0.5505680441856384, |
|
"learning_rate": 4.1275167785234905e-05, |
|
"loss": 0.0719, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5237020316027088, |
|
"grad_norm": 0.5010355114936829, |
|
"learning_rate": 4.110738255033557e-05, |
|
"loss": 0.0598, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5297215951843491, |
|
"grad_norm": 0.5710484385490417, |
|
"learning_rate": 4.0939597315436244e-05, |
|
"loss": 0.0354, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5357411587659895, |
|
"grad_norm": 0.815994381904602, |
|
"learning_rate": 4.077181208053692e-05, |
|
"loss": 0.0583, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5417607223476298, |
|
"grad_norm": 0.5417527556419373, |
|
"learning_rate": 4.060402684563759e-05, |
|
"loss": 0.0551, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5477802859292701, |
|
"grad_norm": 0.6098296046257019, |
|
"learning_rate": 4.0436241610738256e-05, |
|
"loss": 0.0675, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5537998495109104, |
|
"grad_norm": 1.0332790613174438, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0726, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5598194130925508, |
|
"grad_norm": 0.5562874674797058, |
|
"learning_rate": 4.01006711409396e-05, |
|
"loss": 0.0493, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5658389766741911, |
|
"grad_norm": 0.8887888789176941, |
|
"learning_rate": 3.993288590604027e-05, |
|
"loss": 0.0924, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5718585402558315, |
|
"grad_norm": 0.67585688829422, |
|
"learning_rate": 3.976510067114094e-05, |
|
"loss": 0.03, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5778781038374717, |
|
"grad_norm": 3.7685415744781494, |
|
"learning_rate": 3.959731543624161e-05, |
|
"loss": 0.0489, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5838976674191121, |
|
"grad_norm": 0.8312086462974548, |
|
"learning_rate": 3.942953020134229e-05, |
|
"loss": 0.071, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5899172310007524, |
|
"grad_norm": 0.6857490539550781, |
|
"learning_rate": 3.9261744966442954e-05, |
|
"loss": 0.0166, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5959367945823928, |
|
"grad_norm": 0.6559200882911682, |
|
"learning_rate": 3.909395973154363e-05, |
|
"loss": 0.1012, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6019563581640331, |
|
"grad_norm": 1.4492859840393066, |
|
"learning_rate": 3.89261744966443e-05, |
|
"loss": 0.076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6079759217456734, |
|
"grad_norm": 0.7843137383460999, |
|
"learning_rate": 3.875838926174497e-05, |
|
"loss": 0.0577, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6139954853273137, |
|
"grad_norm": 0.794602632522583, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0626, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6200150489089541, |
|
"grad_norm": 1.4473546743392944, |
|
"learning_rate": 3.8422818791946305e-05, |
|
"loss": 0.0356, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6260346124905944, |
|
"grad_norm": 0.4639027416706085, |
|
"learning_rate": 3.8255033557046985e-05, |
|
"loss": 0.0259, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6320541760722348, |
|
"grad_norm": 1.1497997045516968, |
|
"learning_rate": 3.808724832214765e-05, |
|
"loss": 0.0516, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6380737396538751, |
|
"grad_norm": 0.327901691198349, |
|
"learning_rate": 3.7919463087248324e-05, |
|
"loss": 0.0405, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6440933032355154, |
|
"grad_norm": 0.4509243369102478, |
|
"learning_rate": 3.775167785234899e-05, |
|
"loss": 0.0892, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6501128668171557, |
|
"grad_norm": 0.6975520849227905, |
|
"learning_rate": 3.758389261744967e-05, |
|
"loss": 0.0978, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6561324303987961, |
|
"grad_norm": 0.6053667664527893, |
|
"learning_rate": 3.741610738255034e-05, |
|
"loss": 0.0318, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6621519939804364, |
|
"grad_norm": 0.5161236524581909, |
|
"learning_rate": 3.724832214765101e-05, |
|
"loss": 0.0561, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6681715575620768, |
|
"grad_norm": 0.4180920124053955, |
|
"learning_rate": 3.7080536912751676e-05, |
|
"loss": 0.0404, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.674191121143717, |
|
"grad_norm": 0.4068116843700409, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0399, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6802106847253574, |
|
"grad_norm": 0.25368958711624146, |
|
"learning_rate": 3.674496644295302e-05, |
|
"loss": 0.0374, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6862302483069977, |
|
"grad_norm": 0.4473256766796112, |
|
"learning_rate": 3.6577181208053695e-05, |
|
"loss": 0.0533, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6922498118886381, |
|
"grad_norm": 0.39927905797958374, |
|
"learning_rate": 3.640939597315436e-05, |
|
"loss": 0.0367, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6982693754702785, |
|
"grad_norm": 0.5100545883178711, |
|
"learning_rate": 3.6241610738255034e-05, |
|
"loss": 0.0841, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7042889390519187, |
|
"grad_norm": 1.113686203956604, |
|
"learning_rate": 3.607382550335571e-05, |
|
"loss": 0.0798, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.710308502633559, |
|
"grad_norm": 0.4927540123462677, |
|
"learning_rate": 3.5906040268456373e-05, |
|
"loss": 0.0201, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7163280662151994, |
|
"grad_norm": 0.2962929904460907, |
|
"learning_rate": 3.5738255033557046e-05, |
|
"loss": 0.0413, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7223476297968398, |
|
"grad_norm": 0.4307601749897003, |
|
"learning_rate": 3.557046979865772e-05, |
|
"loss": 0.022, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7283671933784801, |
|
"grad_norm": 0.5823848247528076, |
|
"learning_rate": 3.540268456375839e-05, |
|
"loss": 0.0357, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7343867569601203, |
|
"grad_norm": 0.3515729010105133, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.016, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7404063205417607, |
|
"grad_norm": 0.6808828115463257, |
|
"learning_rate": 3.506711409395974e-05, |
|
"loss": 0.0143, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.746425884123401, |
|
"grad_norm": 0.7892507910728455, |
|
"learning_rate": 3.4899328859060405e-05, |
|
"loss": 0.0264, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7524454477050414, |
|
"grad_norm": 1.1977708339691162, |
|
"learning_rate": 3.473154362416108e-05, |
|
"loss": 0.0229, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7584650112866818, |
|
"grad_norm": 1.4794739484786987, |
|
"learning_rate": 3.4563758389261744e-05, |
|
"loss": 0.0504, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.764484574868322, |
|
"grad_norm": 0.9014606475830078, |
|
"learning_rate": 3.439597315436242e-05, |
|
"loss": 0.0223, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7705041384499624, |
|
"grad_norm": 0.5018836855888367, |
|
"learning_rate": 3.422818791946309e-05, |
|
"loss": 0.0359, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7765237020316027, |
|
"grad_norm": 1.3349637985229492, |
|
"learning_rate": 3.4060402684563756e-05, |
|
"loss": 0.0572, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.782543265613243, |
|
"grad_norm": 1.1911202669143677, |
|
"learning_rate": 3.389261744966443e-05, |
|
"loss": 0.0956, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7885628291948834, |
|
"grad_norm": 2.8993449211120605, |
|
"learning_rate": 3.37248322147651e-05, |
|
"loss": 0.1212, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7945823927765236, |
|
"grad_norm": 0.6151400208473206, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0641, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.800601956358164, |
|
"grad_norm": 1.7681182622909546, |
|
"learning_rate": 3.338926174496644e-05, |
|
"loss": 0.1288, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8066215199398044, |
|
"grad_norm": 1.9313393831253052, |
|
"learning_rate": 3.3221476510067115e-05, |
|
"loss": 0.122, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8126410835214447, |
|
"grad_norm": 0.7092230916023254, |
|
"learning_rate": 3.305369127516779e-05, |
|
"loss": 0.0395, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8186606471030851, |
|
"grad_norm": 0.6039671301841736, |
|
"learning_rate": 3.288590604026846e-05, |
|
"loss": 0.0517, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8246802106847254, |
|
"grad_norm": 0.6897003054618835, |
|
"learning_rate": 3.271812080536913e-05, |
|
"loss": 0.0507, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8306997742663657, |
|
"grad_norm": 0.05981295928359032, |
|
"learning_rate": 3.25503355704698e-05, |
|
"loss": 0.0023, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.836719337848006, |
|
"grad_norm": 0.48830369114875793, |
|
"learning_rate": 3.238255033557047e-05, |
|
"loss": 0.0652, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.8427389014296464, |
|
"grad_norm": 1.0506463050842285, |
|
"learning_rate": 3.221476510067114e-05, |
|
"loss": 0.0709, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8487584650112867, |
|
"grad_norm": 0.3859744668006897, |
|
"learning_rate": 3.204697986577181e-05, |
|
"loss": 0.0396, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8547780285929271, |
|
"grad_norm": 0.768587052822113, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.0599, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8607975921745673, |
|
"grad_norm": 0.6868757605552673, |
|
"learning_rate": 3.171140939597316e-05, |
|
"loss": 0.0373, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8668171557562077, |
|
"grad_norm": 0.7010259628295898, |
|
"learning_rate": 3.1543624161073825e-05, |
|
"loss": 0.0717, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.872836719337848, |
|
"grad_norm": 0.5125268697738647, |
|
"learning_rate": 3.13758389261745e-05, |
|
"loss": 0.0457, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8788562829194884, |
|
"grad_norm": 0.9679777026176453, |
|
"learning_rate": 3.120805369127517e-05, |
|
"loss": 0.0988, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8848758465011287, |
|
"grad_norm": 0.7588280439376831, |
|
"learning_rate": 3.1040268456375844e-05, |
|
"loss": 0.0691, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.890895410082769, |
|
"grad_norm": 0.4412769079208374, |
|
"learning_rate": 3.087248322147651e-05, |
|
"loss": 0.0243, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8969149736644093, |
|
"grad_norm": 0.5840623378753662, |
|
"learning_rate": 3.070469798657718e-05, |
|
"loss": 0.0553, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.9029345372460497, |
|
"grad_norm": 0.34683701395988464, |
|
"learning_rate": 3.0536912751677856e-05, |
|
"loss": 0.0568, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.90895410082769, |
|
"grad_norm": 0.6545599102973938, |
|
"learning_rate": 3.0369127516778522e-05, |
|
"loss": 0.0523, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.9149736644093304, |
|
"grad_norm": 0.3024606704711914, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.04, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9209932279909706, |
|
"grad_norm": 0.40984031558036804, |
|
"learning_rate": 3.0033557046979865e-05, |
|
"loss": 0.027, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.927012791572611, |
|
"grad_norm": 0.3794308602809906, |
|
"learning_rate": 2.986577181208054e-05, |
|
"loss": 0.0927, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.9330323551542513, |
|
"grad_norm": 0.6882305145263672, |
|
"learning_rate": 2.9697986577181207e-05, |
|
"loss": 0.0343, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9390519187358917, |
|
"grad_norm": 0.6028600335121155, |
|
"learning_rate": 2.9530201342281884e-05, |
|
"loss": 0.0479, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.945071482317532, |
|
"grad_norm": 3.9858436584472656, |
|
"learning_rate": 2.936241610738255e-05, |
|
"loss": 0.0709, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.9510910458991723, |
|
"grad_norm": 0.193309485912323, |
|
"learning_rate": 2.9194630872483227e-05, |
|
"loss": 0.0098, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9571106094808126, |
|
"grad_norm": 0.6534713506698608, |
|
"learning_rate": 2.9026845637583893e-05, |
|
"loss": 0.0519, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.963130173062453, |
|
"grad_norm": 0.27771925926208496, |
|
"learning_rate": 2.885906040268457e-05, |
|
"loss": 0.0238, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9691497366440933, |
|
"grad_norm": 0.7335049510002136, |
|
"learning_rate": 2.8691275167785235e-05, |
|
"loss": 0.0677, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9751693002257337, |
|
"grad_norm": 1.1832383871078491, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.1371, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9811888638073739, |
|
"grad_norm": 0.7754644155502319, |
|
"learning_rate": 2.8355704697986578e-05, |
|
"loss": 0.0619, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.9872084273890143, |
|
"grad_norm": 0.6826126575469971, |
|
"learning_rate": 2.8187919463087248e-05, |
|
"loss": 0.0682, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.9932279909706546, |
|
"grad_norm": 0.9121167659759521, |
|
"learning_rate": 2.802013422818792e-05, |
|
"loss": 0.0234, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.999247554552295, |
|
"grad_norm": 0.6562134623527527, |
|
"learning_rate": 2.785234899328859e-05, |
|
"loss": 0.0615, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.0052671181339352, |
|
"grad_norm": 2.136812925338745, |
|
"learning_rate": 2.7684563758389263e-05, |
|
"loss": 0.0835, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.0112866817155757, |
|
"grad_norm": 0.430277019739151, |
|
"learning_rate": 2.7516778523489933e-05, |
|
"loss": 0.0205, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.017306245297216, |
|
"grad_norm": 0.37437501549720764, |
|
"learning_rate": 2.7348993288590606e-05, |
|
"loss": 0.0147, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.0233258088788564, |
|
"grad_norm": 0.09916182607412338, |
|
"learning_rate": 2.7181208053691276e-05, |
|
"loss": 0.0166, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0293453724604966, |
|
"grad_norm": 0.22763441503047943, |
|
"learning_rate": 2.701342281879195e-05, |
|
"loss": 0.0296, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.0353649360421369, |
|
"grad_norm": 0.3235589265823364, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.0214, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0413844996237773, |
|
"grad_norm": 0.09323552995920181, |
|
"learning_rate": 2.6677852348993288e-05, |
|
"loss": 0.0029, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.0474040632054176, |
|
"grad_norm": 0.18400466442108154, |
|
"learning_rate": 2.651006711409396e-05, |
|
"loss": 0.0227, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.053423626787058, |
|
"grad_norm": 0.4601859450340271, |
|
"learning_rate": 2.634228187919463e-05, |
|
"loss": 0.0247, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0594431903686983, |
|
"grad_norm": 0.06925185769796371, |
|
"learning_rate": 2.6174496644295304e-05, |
|
"loss": 0.0028, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.0654627539503385, |
|
"grad_norm": 0.7103378772735596, |
|
"learning_rate": 2.6006711409395973e-05, |
|
"loss": 0.0679, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.071482317531979, |
|
"grad_norm": 0.2948612868785858, |
|
"learning_rate": 2.5838926174496646e-05, |
|
"loss": 0.0158, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0775018811136192, |
|
"grad_norm": 0.5460957288742065, |
|
"learning_rate": 2.5671140939597316e-05, |
|
"loss": 0.0252, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.0835214446952597, |
|
"grad_norm": 0.13775992393493652, |
|
"learning_rate": 2.550335570469799e-05, |
|
"loss": 0.0125, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0895410082769, |
|
"grad_norm": 0.2737879753112793, |
|
"learning_rate": 2.533557046979866e-05, |
|
"loss": 0.0087, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.0955605718585402, |
|
"grad_norm": 0.37196484208106995, |
|
"learning_rate": 2.516778523489933e-05, |
|
"loss": 0.0579, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.1015801354401806, |
|
"grad_norm": 0.3493405282497406, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0126, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.1075996990218209, |
|
"grad_norm": 1.0219722986221313, |
|
"learning_rate": 2.4832214765100674e-05, |
|
"loss": 0.0701, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.1136192626034613, |
|
"grad_norm": 0.32175976037979126, |
|
"learning_rate": 2.4664429530201344e-05, |
|
"loss": 0.012, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.1196388261851016, |
|
"grad_norm": 0.33765479922294617, |
|
"learning_rate": 2.4496644295302017e-05, |
|
"loss": 0.0106, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.1256583897667418, |
|
"grad_norm": 0.17531374096870422, |
|
"learning_rate": 2.4328859060402687e-05, |
|
"loss": 0.0161, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.1316779533483823, |
|
"grad_norm": 0.1013503223657608, |
|
"learning_rate": 2.416107382550336e-05, |
|
"loss": 0.0057, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.1376975169300225, |
|
"grad_norm": 0.5186209082603455, |
|
"learning_rate": 2.3993288590604026e-05, |
|
"loss": 0.0189, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.143717080511663, |
|
"grad_norm": 0.577898383140564, |
|
"learning_rate": 2.38255033557047e-05, |
|
"loss": 0.0315, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1497366440933032, |
|
"grad_norm": 0.2543765604496002, |
|
"learning_rate": 2.365771812080537e-05, |
|
"loss": 0.0259, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.1557562076749435, |
|
"grad_norm": 1.04751718044281, |
|
"learning_rate": 2.348993288590604e-05, |
|
"loss": 0.0267, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.161775771256584, |
|
"grad_norm": 0.30151480436325073, |
|
"learning_rate": 2.332214765100671e-05, |
|
"loss": 0.016, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.1677953348382242, |
|
"grad_norm": 1.1602953672409058, |
|
"learning_rate": 2.3154362416107384e-05, |
|
"loss": 0.0342, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.1738148984198646, |
|
"grad_norm": 0.6510918140411377, |
|
"learning_rate": 2.2986577181208054e-05, |
|
"loss": 0.0367, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1798344620015049, |
|
"grad_norm": 0.2937709093093872, |
|
"learning_rate": 2.2818791946308727e-05, |
|
"loss": 0.0124, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.1858540255831453, |
|
"grad_norm": 0.3778565526008606, |
|
"learning_rate": 2.2651006711409396e-05, |
|
"loss": 0.0353, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.1918735891647856, |
|
"grad_norm": 0.34342288970947266, |
|
"learning_rate": 2.248322147651007e-05, |
|
"loss": 0.0228, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1978931527464258, |
|
"grad_norm": 0.25225672125816345, |
|
"learning_rate": 2.231543624161074e-05, |
|
"loss": 0.0037, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.2039127163280663, |
|
"grad_norm": 0.3875395953655243, |
|
"learning_rate": 2.2147651006711412e-05, |
|
"loss": 0.024, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2099322799097065, |
|
"grad_norm": 0.48843473196029663, |
|
"learning_rate": 2.197986577181208e-05, |
|
"loss": 0.0411, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.215951843491347, |
|
"grad_norm": 0.008358384482562542, |
|
"learning_rate": 2.181208053691275e-05, |
|
"loss": 0.0002, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.2219714070729872, |
|
"grad_norm": 0.0617498978972435, |
|
"learning_rate": 2.1644295302013424e-05, |
|
"loss": 0.0016, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.2279909706546275, |
|
"grad_norm": 0.5839952826499939, |
|
"learning_rate": 2.1476510067114094e-05, |
|
"loss": 0.0255, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.234010534236268, |
|
"grad_norm": 0.6008470058441162, |
|
"learning_rate": 2.1308724832214767e-05, |
|
"loss": 0.0279, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.2400300978179082, |
|
"grad_norm": 0.08057394623756409, |
|
"learning_rate": 2.1140939597315437e-05, |
|
"loss": 0.014, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.2460496613995486, |
|
"grad_norm": 0.8297271728515625, |
|
"learning_rate": 2.097315436241611e-05, |
|
"loss": 0.0433, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.2520692249811889, |
|
"grad_norm": 1.0753511190414429, |
|
"learning_rate": 2.080536912751678e-05, |
|
"loss": 0.0342, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.2580887885628291, |
|
"grad_norm": 0.11652516573667526, |
|
"learning_rate": 2.0637583892617452e-05, |
|
"loss": 0.0122, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.2641083521444696, |
|
"grad_norm": 0.23289084434509277, |
|
"learning_rate": 2.0469798657718122e-05, |
|
"loss": 0.0229, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2701279157261098, |
|
"grad_norm": 0.5731219053268433, |
|
"learning_rate": 2.0302013422818795e-05, |
|
"loss": 0.0455, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.2761474793077503, |
|
"grad_norm": 0.8601072430610657, |
|
"learning_rate": 2.013422818791946e-05, |
|
"loss": 0.0294, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.2821670428893905, |
|
"grad_norm": 0.9172778129577637, |
|
"learning_rate": 1.9966442953020134e-05, |
|
"loss": 0.0803, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2881866064710308, |
|
"grad_norm": 0.18378061056137085, |
|
"learning_rate": 1.9798657718120804e-05, |
|
"loss": 0.0151, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.2942061700526712, |
|
"grad_norm": 0.2338120937347412, |
|
"learning_rate": 1.9630872483221477e-05, |
|
"loss": 0.0214, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3002257336343115, |
|
"grad_norm": 0.09691441804170609, |
|
"learning_rate": 1.946308724832215e-05, |
|
"loss": 0.0087, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.306245297215952, |
|
"grad_norm": 0.1699642539024353, |
|
"learning_rate": 1.929530201342282e-05, |
|
"loss": 0.0088, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.3122648607975922, |
|
"grad_norm": 0.014856619760394096, |
|
"learning_rate": 1.9127516778523493e-05, |
|
"loss": 0.0003, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.3182844243792324, |
|
"grad_norm": 0.17981240153312683, |
|
"learning_rate": 1.8959731543624162e-05, |
|
"loss": 0.0148, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.324303987960873, |
|
"grad_norm": 0.1564723402261734, |
|
"learning_rate": 1.8791946308724835e-05, |
|
"loss": 0.025, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3303235515425131, |
|
"grad_norm": 0.05128008872270584, |
|
"learning_rate": 1.8624161073825505e-05, |
|
"loss": 0.0013, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.3363431151241536, |
|
"grad_norm": 0.018907951191067696, |
|
"learning_rate": 1.8456375838926178e-05, |
|
"loss": 0.0006, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.3423626787057938, |
|
"grad_norm": 0.047860465943813324, |
|
"learning_rate": 1.8288590604026847e-05, |
|
"loss": 0.0018, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.348382242287434, |
|
"grad_norm": 1.6964343786239624, |
|
"learning_rate": 1.8120805369127517e-05, |
|
"loss": 0.0834, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.3544018058690745, |
|
"grad_norm": 0.09841305017471313, |
|
"learning_rate": 1.7953020134228187e-05, |
|
"loss": 0.0018, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.3604213694507148, |
|
"grad_norm": 0.29318398237228394, |
|
"learning_rate": 1.778523489932886e-05, |
|
"loss": 0.0292, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.3664409330323553, |
|
"grad_norm": 0.6777030229568481, |
|
"learning_rate": 1.761744966442953e-05, |
|
"loss": 0.0453, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.3724604966139955, |
|
"grad_norm": 0.09742780774831772, |
|
"learning_rate": 1.7449664429530202e-05, |
|
"loss": 0.0022, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.3784800601956357, |
|
"grad_norm": 0.21270929276943207, |
|
"learning_rate": 1.7281879194630872e-05, |
|
"loss": 0.0216, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.3844996237772762, |
|
"grad_norm": 0.10257343202829361, |
|
"learning_rate": 1.7114093959731545e-05, |
|
"loss": 0.0032, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3905191873589164, |
|
"grad_norm": 0.2899154722690582, |
|
"learning_rate": 1.6946308724832215e-05, |
|
"loss": 0.0336, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.396538750940557, |
|
"grad_norm": 0.560697615146637, |
|
"learning_rate": 1.6778523489932888e-05, |
|
"loss": 0.0167, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.4025583145221971, |
|
"grad_norm": 0.15792670845985413, |
|
"learning_rate": 1.6610738255033557e-05, |
|
"loss": 0.0161, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.4085778781038374, |
|
"grad_norm": 0.112309031188488, |
|
"learning_rate": 1.644295302013423e-05, |
|
"loss": 0.0081, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.4145974416854779, |
|
"grad_norm": 0.6623883247375488, |
|
"learning_rate": 1.62751677852349e-05, |
|
"loss": 0.0679, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.420617005267118, |
|
"grad_norm": 0.27897122502326965, |
|
"learning_rate": 1.610738255033557e-05, |
|
"loss": 0.0162, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.4266365688487586, |
|
"grad_norm": 0.08262226730585098, |
|
"learning_rate": 1.5939597315436243e-05, |
|
"loss": 0.0029, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.4326561324303988, |
|
"grad_norm": 0.13499091565608978, |
|
"learning_rate": 1.5771812080536912e-05, |
|
"loss": 0.0147, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.438675696012039, |
|
"grad_norm": 0.2563413977622986, |
|
"learning_rate": 1.5604026845637585e-05, |
|
"loss": 0.0186, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.4446952595936795, |
|
"grad_norm": 0.38309767842292786, |
|
"learning_rate": 1.5436241610738255e-05, |
|
"loss": 0.0042, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4507148231753197, |
|
"grad_norm": 0.5308915972709656, |
|
"learning_rate": 1.5268456375838928e-05, |
|
"loss": 0.023, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.4567343867569602, |
|
"grad_norm": 0.5418457984924316, |
|
"learning_rate": 1.51006711409396e-05, |
|
"loss": 0.0271, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.4627539503386005, |
|
"grad_norm": 0.16427500545978546, |
|
"learning_rate": 1.493288590604027e-05, |
|
"loss": 0.0167, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.4687735139202407, |
|
"grad_norm": 0.1764906644821167, |
|
"learning_rate": 1.4765100671140942e-05, |
|
"loss": 0.0041, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.4747930775018812, |
|
"grad_norm": 0.028177335858345032, |
|
"learning_rate": 1.4597315436241613e-05, |
|
"loss": 0.0011, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4808126410835214, |
|
"grad_norm": 0.28984132409095764, |
|
"learning_rate": 1.4429530201342285e-05, |
|
"loss": 0.0037, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.4868322046651619, |
|
"grad_norm": 0.016668178141117096, |
|
"learning_rate": 1.4261744966442953e-05, |
|
"loss": 0.0007, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.492851768246802, |
|
"grad_norm": 0.11294250190258026, |
|
"learning_rate": 1.4093959731543624e-05, |
|
"loss": 0.0117, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.4988713318284423, |
|
"grad_norm": 0.18805146217346191, |
|
"learning_rate": 1.3926174496644295e-05, |
|
"loss": 0.0152, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.5048908954100828, |
|
"grad_norm": 0.15651652216911316, |
|
"learning_rate": 1.3758389261744966e-05, |
|
"loss": 0.0195, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.510910458991723, |
|
"grad_norm": 0.07596039772033691, |
|
"learning_rate": 1.3590604026845638e-05, |
|
"loss": 0.0035, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.5169300225733635, |
|
"grad_norm": 0.5767983198165894, |
|
"learning_rate": 1.3422818791946309e-05, |
|
"loss": 0.0209, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.5229495861550038, |
|
"grad_norm": 0.14054809510707855, |
|
"learning_rate": 1.325503355704698e-05, |
|
"loss": 0.0156, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.528969149736644, |
|
"grad_norm": 0.5480087995529175, |
|
"learning_rate": 1.3087248322147652e-05, |
|
"loss": 0.0372, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.5349887133182845, |
|
"grad_norm": 0.21327197551727295, |
|
"learning_rate": 1.2919463087248323e-05, |
|
"loss": 0.037, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.5410082768999247, |
|
"grad_norm": 0.8947880268096924, |
|
"learning_rate": 1.2751677852348994e-05, |
|
"loss": 0.0352, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.5470278404815652, |
|
"grad_norm": 0.16651660203933716, |
|
"learning_rate": 1.2583892617449666e-05, |
|
"loss": 0.0062, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.5530474040632054, |
|
"grad_norm": 0.18476413190364838, |
|
"learning_rate": 1.2416107382550337e-05, |
|
"loss": 0.0177, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.5590669676448456, |
|
"grad_norm": 0.013061203993856907, |
|
"learning_rate": 1.2248322147651008e-05, |
|
"loss": 0.0005, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.565086531226486, |
|
"grad_norm": 0.31754836440086365, |
|
"learning_rate": 1.208053691275168e-05, |
|
"loss": 0.0181, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5711060948081266, |
|
"grad_norm": 0.26034584641456604, |
|
"learning_rate": 1.191275167785235e-05, |
|
"loss": 0.0342, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.5771256583897668, |
|
"grad_norm": 0.15222539007663727, |
|
"learning_rate": 1.174496644295302e-05, |
|
"loss": 0.0081, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.583145221971407, |
|
"grad_norm": 0.1855451464653015, |
|
"learning_rate": 1.1577181208053692e-05, |
|
"loss": 0.0222, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.5891647855530473, |
|
"grad_norm": 0.47386428713798523, |
|
"learning_rate": 1.1409395973154363e-05, |
|
"loss": 0.0484, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.5951843491346878, |
|
"grad_norm": 0.05222529545426369, |
|
"learning_rate": 1.1241610738255035e-05, |
|
"loss": 0.0018, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6012039127163282, |
|
"grad_norm": 0.36145541071891785, |
|
"learning_rate": 1.1073825503355706e-05, |
|
"loss": 0.0256, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.6072234762979685, |
|
"grad_norm": 0.2200651317834854, |
|
"learning_rate": 1.0906040268456376e-05, |
|
"loss": 0.0048, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.6132430398796087, |
|
"grad_norm": 0.2838999330997467, |
|
"learning_rate": 1.0738255033557047e-05, |
|
"loss": 0.0088, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.619262603461249, |
|
"grad_norm": 0.5340823531150818, |
|
"learning_rate": 1.0570469798657718e-05, |
|
"loss": 0.0054, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.6252821670428894, |
|
"grad_norm": 0.27307260036468506, |
|
"learning_rate": 1.040268456375839e-05, |
|
"loss": 0.014, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6313017306245299, |
|
"grad_norm": 0.694962739944458, |
|
"learning_rate": 1.0234899328859061e-05, |
|
"loss": 0.0126, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.6373212942061701, |
|
"grad_norm": 0.19789136946201324, |
|
"learning_rate": 1.006711409395973e-05, |
|
"loss": 0.0172, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.6433408577878104, |
|
"grad_norm": 0.5267607569694519, |
|
"learning_rate": 9.899328859060402e-06, |
|
"loss": 0.0408, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.6493604213694506, |
|
"grad_norm": 0.015556755475699902, |
|
"learning_rate": 9.731543624161075e-06, |
|
"loss": 0.0006, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.655379984951091, |
|
"grad_norm": 0.5071566104888916, |
|
"learning_rate": 9.563758389261746e-06, |
|
"loss": 0.0281, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.6613995485327315, |
|
"grad_norm": 0.1441573202610016, |
|
"learning_rate": 9.395973154362418e-06, |
|
"loss": 0.0151, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.6674191121143718, |
|
"grad_norm": 0.22274713218212128, |
|
"learning_rate": 9.228187919463089e-06, |
|
"loss": 0.0174, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.673438675696012, |
|
"grad_norm": 1.108049988746643, |
|
"learning_rate": 9.060402684563759e-06, |
|
"loss": 0.0189, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.6794582392776523, |
|
"grad_norm": 0.47223615646362305, |
|
"learning_rate": 8.89261744966443e-06, |
|
"loss": 0.0261, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.6854778028592927, |
|
"grad_norm": 0.11383321136236191, |
|
"learning_rate": 8.724832214765101e-06, |
|
"loss": 0.0139, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6914973664409332, |
|
"grad_norm": 0.01508291345089674, |
|
"learning_rate": 8.557046979865773e-06, |
|
"loss": 0.0006, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.6975169300225734, |
|
"grad_norm": 0.572291910648346, |
|
"learning_rate": 8.389261744966444e-06, |
|
"loss": 0.0587, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.7035364936042137, |
|
"grad_norm": 0.8027609586715698, |
|
"learning_rate": 8.221476510067115e-06, |
|
"loss": 0.0252, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.709556057185854, |
|
"grad_norm": 0.0062585920095443726, |
|
"learning_rate": 8.053691275167785e-06, |
|
"loss": 0.0002, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.7155756207674944, |
|
"grad_norm": 0.18026615679264069, |
|
"learning_rate": 7.885906040268456e-06, |
|
"loss": 0.0069, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7215951843491348, |
|
"grad_norm": 0.02148846536874771, |
|
"learning_rate": 7.718120805369127e-06, |
|
"loss": 0.0006, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.727614747930775, |
|
"grad_norm": 0.40165480971336365, |
|
"learning_rate": 7.5503355704698e-06, |
|
"loss": 0.0483, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.7336343115124153, |
|
"grad_norm": 0.34756484627723694, |
|
"learning_rate": 7.382550335570471e-06, |
|
"loss": 0.0089, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.7396538750940556, |
|
"grad_norm": 0.8310803771018982, |
|
"learning_rate": 7.214765100671142e-06, |
|
"loss": 0.0349, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.745673438675696, |
|
"grad_norm": 0.21227827668190002, |
|
"learning_rate": 7.046979865771812e-06, |
|
"loss": 0.0098, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7516930022573365, |
|
"grad_norm": 0.18423959612846375, |
|
"learning_rate": 6.879194630872483e-06, |
|
"loss": 0.0196, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.7577125658389767, |
|
"grad_norm": 0.13899557292461395, |
|
"learning_rate": 6.7114093959731546e-06, |
|
"loss": 0.0086, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.763732129420617, |
|
"grad_norm": 0.9509983658790588, |
|
"learning_rate": 6.543624161073826e-06, |
|
"loss": 0.0227, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.7697516930022572, |
|
"grad_norm": 0.2806952893733978, |
|
"learning_rate": 6.375838926174497e-06, |
|
"loss": 0.0308, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.7757712565838977, |
|
"grad_norm": 0.2584255337715149, |
|
"learning_rate": 6.2080536912751686e-06, |
|
"loss": 0.0078, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.7817908201655381, |
|
"grad_norm": 0.6496636867523193, |
|
"learning_rate": 6.04026845637584e-06, |
|
"loss": 0.0764, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.7878103837471784, |
|
"grad_norm": 2.131640672683716, |
|
"learning_rate": 5.87248322147651e-06, |
|
"loss": 0.0485, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.7938299473288186, |
|
"grad_norm": 0.25984302163124084, |
|
"learning_rate": 5.704697986577182e-06, |
|
"loss": 0.0256, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.7998495109104589, |
|
"grad_norm": 0.8501216769218445, |
|
"learning_rate": 5.536912751677853e-06, |
|
"loss": 0.0559, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.8058690744920993, |
|
"grad_norm": 0.3276824355125427, |
|
"learning_rate": 5.3691275167785235e-06, |
|
"loss": 0.0228, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8118886380737398, |
|
"grad_norm": 0.19804896414279938, |
|
"learning_rate": 5.201342281879195e-06, |
|
"loss": 0.0121, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.81790820165538, |
|
"grad_norm": 0.6229518055915833, |
|
"learning_rate": 5.033557046979865e-06, |
|
"loss": 0.0046, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.8239277652370203, |
|
"grad_norm": 0.16715337336063385, |
|
"learning_rate": 4.8657718120805375e-06, |
|
"loss": 0.0107, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.8299473288186605, |
|
"grad_norm": 0.14998656511306763, |
|
"learning_rate": 4.697986577181209e-06, |
|
"loss": 0.0041, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.835966892400301, |
|
"grad_norm": 0.3184771239757538, |
|
"learning_rate": 4.530201342281879e-06, |
|
"loss": 0.0107, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.8419864559819414, |
|
"grad_norm": 0.665398120880127, |
|
"learning_rate": 4.362416107382551e-06, |
|
"loss": 0.0227, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.8480060195635817, |
|
"grad_norm": 0.19727759063243866, |
|
"learning_rate": 4.194630872483222e-06, |
|
"loss": 0.0122, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.854025583145222, |
|
"grad_norm": 0.13394923508167267, |
|
"learning_rate": 4.026845637583892e-06, |
|
"loss": 0.0053, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.8600451467268622, |
|
"grad_norm": 0.17236709594726562, |
|
"learning_rate": 3.859060402684564e-06, |
|
"loss": 0.0184, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.8660647103085026, |
|
"grad_norm": 1.0638315677642822, |
|
"learning_rate": 3.6912751677852355e-06, |
|
"loss": 0.0311, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.872084273890143, |
|
"grad_norm": 0.06651457399129868, |
|
"learning_rate": 3.523489932885906e-06, |
|
"loss": 0.0023, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.8781038374717833, |
|
"grad_norm": 0.19191402196884155, |
|
"learning_rate": 3.3557046979865773e-06, |
|
"loss": 0.0195, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.8841234010534236, |
|
"grad_norm": 0.004882109817117453, |
|
"learning_rate": 3.1879194630872486e-06, |
|
"loss": 0.0002, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.8901429646350638, |
|
"grad_norm": 0.2256098985671997, |
|
"learning_rate": 3.02013422818792e-06, |
|
"loss": 0.0145, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.8961625282167043, |
|
"grad_norm": 0.49490997195243835, |
|
"learning_rate": 2.852348993288591e-06, |
|
"loss": 0.0378, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.9021820917983447, |
|
"grad_norm": 0.2518860101699829, |
|
"learning_rate": 2.6845637583892617e-06, |
|
"loss": 0.0424, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.908201655379985, |
|
"grad_norm": 0.01092343870550394, |
|
"learning_rate": 2.5167785234899326e-06, |
|
"loss": 0.0003, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.9142212189616252, |
|
"grad_norm": 0.45049425959587097, |
|
"learning_rate": 2.3489932885906044e-06, |
|
"loss": 0.053, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.9202407825432655, |
|
"grad_norm": 0.01676754839718342, |
|
"learning_rate": 2.1812080536912753e-06, |
|
"loss": 0.0007, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.926260346124906, |
|
"grad_norm": 0.08567023277282715, |
|
"learning_rate": 2.013422818791946e-06, |
|
"loss": 0.0022, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9322799097065464, |
|
"grad_norm": 0.243726447224617, |
|
"learning_rate": 1.8456375838926177e-06, |
|
"loss": 0.022, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.9382994732881866, |
|
"grad_norm": 0.20735050737857819, |
|
"learning_rate": 1.6778523489932886e-06, |
|
"loss": 0.0113, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.9443190368698269, |
|
"grad_norm": 0.013643703423440456, |
|
"learning_rate": 1.51006711409396e-06, |
|
"loss": 0.0005, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.9503386004514671, |
|
"grad_norm": 0.14169737696647644, |
|
"learning_rate": 1.3422818791946309e-06, |
|
"loss": 0.004, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.9563581640331076, |
|
"grad_norm": 0.21097888052463531, |
|
"learning_rate": 1.1744966442953022e-06, |
|
"loss": 0.0163, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.962377727614748, |
|
"grad_norm": 0.3481338620185852, |
|
"learning_rate": 1.006711409395973e-06, |
|
"loss": 0.0328, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.9683972911963883, |
|
"grad_norm": 0.639370858669281, |
|
"learning_rate": 8.389261744966443e-07, |
|
"loss": 0.0385, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.9744168547780285, |
|
"grad_norm": 0.01708345301449299, |
|
"learning_rate": 6.711409395973154e-07, |
|
"loss": 0.0006, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.9804364183596688, |
|
"grad_norm": 1.5634732246398926, |
|
"learning_rate": 5.033557046979866e-07, |
|
"loss": 0.0966, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.9864559819413092, |
|
"grad_norm": 0.03818768635392189, |
|
"learning_rate": 3.355704697986577e-07, |
|
"loss": 0.001, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9924755455229497, |
|
"grad_norm": 1.7485132217407227, |
|
"learning_rate": 1.6778523489932886e-07, |
|
"loss": 0.0881, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.99849510910459, |
|
"grad_norm": 2.490537643432617, |
|
"learning_rate": 0.0, |
|
"loss": 0.0464, |
|
"step": 332 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 332, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.806882647064781e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|