diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,24543 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8321659893760142, + "eval_steps": 500, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005547773262506761, + "grad_norm": 86.47028350830078, + "learning_rate": 9.999992405676425e-08, + "logits/chosen": 0.04256238415837288, + "logits/rejected": -0.03134341910481453, + "logps/chosen": -188.57350158691406, + "logps/rejected": -175.35324096679688, + "loss": 2.5855, + "nll_loss": 1.9216177463531494, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0365375280380249, + "rewards/margins": 0.013619521632790565, + "rewards/rejected": 0.022918006405234337, + "step": 10 + }, + { + "epoch": 0.0011095546525013522, + "grad_norm": 105.78651428222656, + "learning_rate": 9.999969622728772e-08, + "logits/chosen": -0.11089731752872467, + "logits/rejected": -0.15940730273723602, + "logps/chosen": -214.8294677734375, + "logps/rejected": -232.4839324951172, + "loss": 2.5074, + "nll_loss": 1.7895090579986572, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.16805259883403778, + "rewards/margins": 0.01384829543530941, + "rewards/rejected": 0.154204323887825, + "step": 20 + }, + { + "epoch": 0.0016643319787520284, + "grad_norm": 114.77359771728516, + "learning_rate": 9.99993165122625e-08, + "logits/chosen": -0.24033674597740173, + "logits/rejected": -0.35203057527542114, + "logps/chosen": -260.21307373046875, + "logps/rejected": -285.6676025390625, + "loss": 2.2734, + "nll_loss": 1.5780918598175049, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.5284448266029358, + "rewards/margins": 0.022413188591599464, + "rewards/rejected": 0.5060315728187561, + "step": 30 + }, + { + "epoch": 0.0022191093050027044, + "grad_norm": 103.22981262207031, + "learning_rate": 9.999878491284204e-08, + "logits/chosen": -0.11801149696111679, + "logits/rejected": -0.18273355066776276, + "logps/chosen": -189.36073303222656, + "logps/rejected": -205.96383666992188, + "loss": 2.1295, + "nll_loss": 1.3643723726272583, + "rewards/accuracies": 0.32499998807907104, + "rewards/chosen": 0.8830834627151489, + "rewards/margins": -0.03812415525317192, + "rewards/rejected": 0.9212075471878052, + "step": 40 + }, + { + "epoch": 0.002773886631253381, + "grad_norm": 66.34463500976562, + "learning_rate": 9.99981014306412e-08, + "logits/chosen": -0.2742760479450226, + "logits/rejected": -0.3168026804924011, + "logps/chosen": -194.3908233642578, + "logps/rejected": -215.14193725585938, + "loss": 1.8903, + "nll_loss": 1.2410157918930054, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 1.4482768774032593, + "rewards/margins": 0.08231005072593689, + "rewards/rejected": 1.365966796875, + "step": 50 + }, + { + "epoch": 0.003328663957504057, + "grad_norm": 53.22880935668945, + "learning_rate": 9.999726606773624e-08, + "logits/chosen": -0.3148336708545685, + "logits/rejected": -0.40042591094970703, + "logps/chosen": -217.2548065185547, + "logps/rejected": -261.5478210449219, + "loss": 1.8681, + "nll_loss": 1.2332053184509277, + "rewards/accuracies": 0.5, + "rewards/chosen": 1.531562089920044, + "rewards/margins": 0.05171762779355049, + "rewards/rejected": 1.4798444509506226, + "step": 60 + }, + { + "epoch": 0.003883441283754733, + "grad_norm": 62.05373764038086, + "learning_rate": 9.999627882666472e-08, + "logits/chosen": -0.39523762464523315, + "logits/rejected": -0.46755728125572205, + "logps/chosen": -224.984130859375, + "logps/rejected": -253.46240234375, + "loss": 1.8248, + "nll_loss": 1.3099695444107056, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 1.6396353244781494, + "rewards/margins": 0.09223873913288116, + "rewards/rejected": 1.5473965406417847, + "step": 70 + }, + { + "epoch": 0.004438218610005409, + "grad_norm": 59.43037796020508, + "learning_rate": 9.999513971042565e-08, + "logits/chosen": -0.33986055850982666, + "logits/rejected": -0.37254834175109863, + "logps/chosen": -210.43704223632812, + "logps/rejected": -217.45449829101562, + "loss": 1.8836, + "nll_loss": 1.2146633863449097, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 1.6813926696777344, + "rewards/margins": 0.15952646732330322, + "rewards/rejected": 1.5218660831451416, + "step": 80 + }, + { + "epoch": 0.004992995936256085, + "grad_norm": 62.62155532836914, + "learning_rate": 9.999384872247934e-08, + "logits/chosen": -0.17636926472187042, + "logits/rejected": -0.25223809480667114, + "logps/chosen": -157.65005493164062, + "logps/rejected": -188.19406127929688, + "loss": 1.792, + "nll_loss": 1.0303140878677368, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 1.56096613407135, + "rewards/margins": 0.1818581521511078, + "rewards/rejected": 1.37910795211792, + "step": 90 + }, + { + "epoch": 0.005547773262506762, + "grad_norm": 56.658512115478516, + "learning_rate": 9.999240586674747e-08, + "logits/chosen": -0.32734403014183044, + "logits/rejected": -0.40522679686546326, + "logps/chosen": -182.51206970214844, + "logps/rejected": -214.18594360351562, + "loss": 1.8367, + "nll_loss": 1.136197805404663, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 1.675817847251892, + "rewards/margins": 0.09024398773908615, + "rewards/rejected": 1.5855739116668701, + "step": 100 + }, + { + "epoch": 0.006102550588757438, + "grad_norm": 59.043766021728516, + "learning_rate": 9.999081114761303e-08, + "logits/chosen": -0.28781935572624207, + "logits/rejected": -0.3697580397129059, + "logps/chosen": -198.2109375, + "logps/rejected": -214.5436553955078, + "loss": 1.8053, + "nll_loss": 1.127979040145874, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 1.7802499532699585, + "rewards/margins": 0.28318971395492554, + "rewards/rejected": 1.4970601797103882, + "step": 110 + }, + { + "epoch": 0.006657327915008114, + "grad_norm": 50.7485466003418, + "learning_rate": 9.998906456992035e-08, + "logits/chosen": -0.35893505811691284, + "logits/rejected": -0.46098464727401733, + "logps/chosen": -207.06576538085938, + "logps/rejected": -245.782958984375, + "loss": 1.8427, + "nll_loss": 1.1750494241714478, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.7762939929962158, + "rewards/margins": 0.3124890923500061, + "rewards/rejected": 1.463804841041565, + "step": 120 + }, + { + "epoch": 0.00721210524125879, + "grad_norm": 59.86566162109375, + "learning_rate": 9.998716613897509e-08, + "logits/chosen": -0.29412102699279785, + "logits/rejected": -0.38112324476242065, + "logps/chosen": -203.17233276367188, + "logps/rejected": -233.68563842773438, + "loss": 1.8138, + "nll_loss": 1.1727923154830933, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 1.8533748388290405, + "rewards/margins": 0.33772191405296326, + "rewards/rejected": 1.5156530141830444, + "step": 130 + }, + { + "epoch": 0.007766882567509466, + "grad_norm": 53.81575393676758, + "learning_rate": 9.998511586054413e-08, + "logits/chosen": -0.310435950756073, + "logits/rejected": -0.407255083322525, + "logps/chosen": -203.92947387695312, + "logps/rejected": -233.2860870361328, + "loss": 1.7059, + "nll_loss": 1.1747105121612549, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.8009592294692993, + "rewards/margins": 0.29193150997161865, + "rewards/rejected": 1.5090277194976807, + "step": 140 + }, + { + "epoch": 0.008321659893760143, + "grad_norm": 58.20719528198242, + "learning_rate": 9.998291374085569e-08, + "logits/chosen": -0.4028782844543457, + "logits/rejected": -0.4347217082977295, + "logps/chosen": -206.7960205078125, + "logps/rejected": -217.45614624023438, + "loss": 1.7235, + "nll_loss": 1.26735520362854, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.8107702732086182, + "rewards/margins": 0.17375993728637695, + "rewards/rejected": 1.6370102167129517, + "step": 150 + }, + { + "epoch": 0.008876437220010818, + "grad_norm": 77.8411865234375, + "learning_rate": 9.99805597865992e-08, + "logits/chosen": -0.4058915674686432, + "logits/rejected": -0.5049432516098022, + "logps/chosen": -199.4020233154297, + "logps/rejected": -243.2211151123047, + "loss": 1.7651, + "nll_loss": 1.1955233812332153, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.9076124429702759, + "rewards/margins": 0.31628042459487915, + "rewards/rejected": 1.5913320779800415, + "step": 160 + }, + { + "epoch": 0.009431214546261495, + "grad_norm": 66.69096374511719, + "learning_rate": 9.997805400492532e-08, + "logits/chosen": -0.34897083044052124, + "logits/rejected": -0.4341156482696533, + "logps/chosen": -185.31228637695312, + "logps/rejected": -222.63546752929688, + "loss": 1.7043, + "nll_loss": 1.1404896974563599, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.8629522323608398, + "rewards/margins": 0.28871744871139526, + "rewards/rejected": 1.5742347240447998, + "step": 170 + }, + { + "epoch": 0.00998599187251217, + "grad_norm": 54.87003707885742, + "learning_rate": 9.997539640344596e-08, + "logits/chosen": -0.1554606407880783, + "logits/rejected": -0.21996864676475525, + "logps/chosen": -144.2554168701172, + "logps/rejected": -139.35382080078125, + "loss": 1.7168, + "nll_loss": 0.9475277662277222, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 1.7399733066558838, + "rewards/margins": 0.28856176137924194, + "rewards/rejected": 1.451411485671997, + "step": 180 + }, + { + "epoch": 0.010540769198762847, + "grad_norm": 69.4969482421875, + "learning_rate": 9.99725869902342e-08, + "logits/chosen": -0.37828877568244934, + "logits/rejected": -0.38139861822128296, + "logps/chosen": -236.5293426513672, + "logps/rejected": -235.115966796875, + "loss": 1.6938, + "nll_loss": 1.2049442529678345, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.005309581756592, + "rewards/margins": 0.38655903935432434, + "rewards/rejected": 1.6187505722045898, + "step": 190 + }, + { + "epoch": 0.011095546525013524, + "grad_norm": 73.94921875, + "learning_rate": 9.996962577382426e-08, + "logits/chosen": -0.19098533689975739, + "logits/rejected": -0.2874911427497864, + "logps/chosen": -151.21435546875, + "logps/rejected": -179.45016479492188, + "loss": 1.6993, + "nll_loss": 1.0086549520492554, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.8682178258895874, + "rewards/margins": 0.3940388262271881, + "rewards/rejected": 1.4741789102554321, + "step": 200 + }, + { + "epoch": 0.011650323851264199, + "grad_norm": 51.379154205322266, + "learning_rate": 9.996651276321152e-08, + "logits/chosen": -0.3097096383571625, + "logits/rejected": -0.3990236222743988, + "logps/chosen": -189.82765197753906, + "logps/rejected": -216.7052764892578, + "loss": 1.6636, + "nll_loss": 1.052120327949524, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 1.9751056432724, + "rewards/margins": 0.4109489917755127, + "rewards/rejected": 1.5641567707061768, + "step": 210 + }, + { + "epoch": 0.012205101177514876, + "grad_norm": 55.620426177978516, + "learning_rate": 9.996324796785246e-08, + "logits/chosen": -0.4845232367515564, + "logits/rejected": -0.5828734040260315, + "logps/chosen": -235.7965850830078, + "logps/rejected": -251.658935546875, + "loss": 1.7518, + "nll_loss": 1.1625698804855347, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 2.1334118843078613, + "rewards/margins": 0.5587369203567505, + "rewards/rejected": 1.5746748447418213, + "step": 220 + }, + { + "epoch": 0.01275987850376555, + "grad_norm": 65.55672454833984, + "learning_rate": 9.995983139766464e-08, + "logits/chosen": -0.5563563704490662, + "logits/rejected": -0.643342137336731, + "logps/chosen": -214.36947631835938, + "logps/rejected": -240.8154754638672, + "loss": 1.7696, + "nll_loss": 1.4078218936920166, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 2.1256346702575684, + "rewards/margins": 0.37678810954093933, + "rewards/rejected": 1.748846411705017, + "step": 230 + }, + { + "epoch": 0.013314655830016228, + "grad_norm": 58.193077087402344, + "learning_rate": 9.99562630630267e-08, + "logits/chosen": -0.3747154176235199, + "logits/rejected": -0.46808844804763794, + "logps/chosen": -179.42970275878906, + "logps/rejected": -219.7896270751953, + "loss": 1.6669, + "nll_loss": 1.1212873458862305, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 2.1608200073242188, + "rewards/margins": 0.5892239809036255, + "rewards/rejected": 1.5715959072113037, + "step": 240 + }, + { + "epoch": 0.013869433156266903, + "grad_norm": 77.5068588256836, + "learning_rate": 9.995254297477825e-08, + "logits/chosen": -0.3856232464313507, + "logits/rejected": -0.5023887753486633, + "logps/chosen": -181.36752319335938, + "logps/rejected": -235.1916046142578, + "loss": 1.6287, + "nll_loss": 1.062013030052185, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 2.17362904548645, + "rewards/margins": 0.7540701031684875, + "rewards/rejected": 1.4195587635040283, + "step": 250 + }, + { + "epoch": 0.01442421048251758, + "grad_norm": 61.57682418823242, + "learning_rate": 9.994867114421993e-08, + "logits/chosen": -0.46952685713768005, + "logits/rejected": -0.5864278078079224, + "logps/chosen": -212.41775512695312, + "logps/rejected": -256.9131774902344, + "loss": 1.6886, + "nll_loss": 1.1452209949493408, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 2.1298396587371826, + "rewards/margins": 0.6472753286361694, + "rewards/rejected": 1.4825643301010132, + "step": 260 + }, + { + "epoch": 0.014978987808768256, + "grad_norm": 79.09966278076172, + "learning_rate": 9.99446475831133e-08, + "logits/chosen": -0.32850465178489685, + "logits/rejected": -0.46790608763694763, + "logps/chosen": -152.0748291015625, + "logps/rejected": -170.48959350585938, + "loss": 1.6692, + "nll_loss": 0.9634621739387512, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.94126296043396, + "rewards/margins": 0.6225162148475647, + "rewards/rejected": 1.31874680519104, + "step": 270 + }, + { + "epoch": 0.015533765135018932, + "grad_norm": 72.3378677368164, + "learning_rate": 9.994047230368086e-08, + "logits/chosen": -0.4431266188621521, + "logits/rejected": -0.5226654410362244, + "logps/chosen": -200.44613647460938, + "logps/rejected": -239.3496551513672, + "loss": 1.6507, + "nll_loss": 1.104430913925171, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.2142229080200195, + "rewards/margins": 0.6966776251792908, + "rewards/rejected": 1.5175453424453735, + "step": 280 + }, + { + "epoch": 0.016088542461269607, + "grad_norm": 61.740116119384766, + "learning_rate": 9.993614531860596e-08, + "logits/chosen": -0.46498972177505493, + "logits/rejected": -0.5346661806106567, + "logps/chosen": -194.7556610107422, + "logps/rejected": -229.0714874267578, + "loss": 1.6973, + "nll_loss": 1.1759881973266602, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 2.4024055004119873, + "rewards/margins": 0.8359010815620422, + "rewards/rejected": 1.566504716873169, + "step": 290 + }, + { + "epoch": 0.016643319787520285, + "grad_norm": 73.7476577758789, + "learning_rate": 9.993166664103283e-08, + "logits/chosen": -0.511029064655304, + "logits/rejected": -0.6332219839096069, + "logps/chosen": -206.7277374267578, + "logps/rejected": -246.34201049804688, + "loss": 1.6736, + "nll_loss": 1.2045782804489136, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 2.3775553703308105, + "rewards/margins": 0.7974990010261536, + "rewards/rejected": 1.5800564289093018, + "step": 300 + }, + { + "epoch": 0.01719809711377096, + "grad_norm": 91.33706665039062, + "learning_rate": 9.992703628456647e-08, + "logits/chosen": -0.5446246862411499, + "logits/rejected": -0.5947648286819458, + "logps/chosen": -202.0845489501953, + "logps/rejected": -219.13375854492188, + "loss": 1.6406, + "nll_loss": 1.200272798538208, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 2.4559853076934814, + "rewards/margins": 0.7933844327926636, + "rewards/rejected": 1.6626008749008179, + "step": 310 + }, + { + "epoch": 0.017752874440021636, + "grad_norm": 61.63032150268555, + "learning_rate": 9.992225426327267e-08, + "logits/chosen": -0.606124997138977, + "logits/rejected": -0.6858624219894409, + "logps/chosen": -198.80601501464844, + "logps/rejected": -236.6532440185547, + "loss": 1.5924, + "nll_loss": 1.2124780416488647, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 2.247375011444092, + "rewards/margins": 0.5193211436271667, + "rewards/rejected": 1.7280542850494385, + "step": 320 + }, + { + "epoch": 0.018307651766272314, + "grad_norm": 79.13629150390625, + "learning_rate": 9.991732059167788e-08, + "logits/chosen": -0.39404815435409546, + "logits/rejected": -0.4788663387298584, + "logps/chosen": -189.56886291503906, + "logps/rejected": -212.91897583007812, + "loss": 1.6604, + "nll_loss": 1.1193745136260986, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.2288386821746826, + "rewards/margins": 0.758010983467102, + "rewards/rejected": 1.4708276987075806, + "step": 330 + }, + { + "epoch": 0.01886242909252299, + "grad_norm": 99.47632598876953, + "learning_rate": 9.99122352847693e-08, + "logits/chosen": -0.3280274271965027, + "logits/rejected": -0.3921307325363159, + "logps/chosen": -163.58642578125, + "logps/rejected": -182.4820098876953, + "loss": 1.6261, + "nll_loss": 0.9385612607002258, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 2.386798620223999, + "rewards/margins": 0.7635468244552612, + "rewards/rejected": 1.6232519149780273, + "step": 340 + }, + { + "epoch": 0.019417206418773664, + "grad_norm": 78.503662109375, + "learning_rate": 9.990699835799469e-08, + "logits/chosen": -0.579135000705719, + "logits/rejected": -0.6546199917793274, + "logps/chosen": -209.2771759033203, + "logps/rejected": -240.32766723632812, + "loss": 1.6131, + "nll_loss": 1.2728625535964966, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 2.6339731216430664, + "rewards/margins": 0.9537647366523743, + "rewards/rejected": 1.6802085638046265, + "step": 350 + }, + { + "epoch": 0.01997198374502434, + "grad_norm": 69.08805084228516, + "learning_rate": 9.99016098272624e-08, + "logits/chosen": -0.49542540311813354, + "logits/rejected": -0.6185147762298584, + "logps/chosen": -178.43887329101562, + "logps/rejected": -234.18759155273438, + "loss": 1.6395, + "nll_loss": 1.087710976600647, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 2.4325428009033203, + "rewards/margins": 1.0164880752563477, + "rewards/rejected": 1.4160544872283936, + "step": 360 + }, + { + "epoch": 0.020526761071275018, + "grad_norm": 59.481170654296875, + "learning_rate": 9.98960697089414e-08, + "logits/chosen": -0.5423728227615356, + "logits/rejected": -0.6052581071853638, + "logps/chosen": -170.61349487304688, + "logps/rejected": -195.85025024414062, + "loss": 1.6007, + "nll_loss": 1.1169687509536743, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 2.3467555046081543, + "rewards/margins": 0.7417221069335938, + "rewards/rejected": 1.605033278465271, + "step": 370 + }, + { + "epoch": 0.021081538397525693, + "grad_norm": 51.370872497558594, + "learning_rate": 9.9890378019861e-08, + "logits/chosen": -0.7442265748977661, + "logits/rejected": -0.8148695826530457, + "logps/chosen": -241.3246612548828, + "logps/rejected": -275.6942443847656, + "loss": 1.5691, + "nll_loss": 1.3698346614837646, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 2.58601450920105, + "rewards/margins": 0.5415834188461304, + "rewards/rejected": 2.044431209564209, + "step": 380 + }, + { + "epoch": 0.02163631572377637, + "grad_norm": 57.66556167602539, + "learning_rate": 9.988453477731102e-08, + "logits/chosen": -0.5455671548843384, + "logits/rejected": -0.7036603689193726, + "logps/chosen": -176.88845825195312, + "logps/rejected": -220.8589630126953, + "loss": 1.5857, + "nll_loss": 1.0153230428695679, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 2.3425281047821045, + "rewards/margins": 1.0104955434799194, + "rewards/rejected": 1.3320326805114746, + "step": 390 + }, + { + "epoch": 0.022191093050027047, + "grad_norm": 70.84477996826172, + "learning_rate": 9.987853999904168e-08, + "logits/chosen": -0.610481858253479, + "logits/rejected": -0.6613737344741821, + "logps/chosen": -218.7194366455078, + "logps/rejected": -236.94515991210938, + "loss": 1.6499, + "nll_loss": 1.2476584911346436, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.5958473682403564, + "rewards/margins": 0.9840685725212097, + "rewards/rejected": 1.6117788553237915, + "step": 400 + }, + { + "epoch": 0.022745870376277722, + "grad_norm": 116.12479400634766, + "learning_rate": 9.987239370326348e-08, + "logits/chosen": -0.5972326993942261, + "logits/rejected": -0.6860564947128296, + "logps/chosen": -182.69808959960938, + "logps/rejected": -217.4706268310547, + "loss": 1.6819, + "nll_loss": 1.1688315868377686, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 2.6170666217803955, + "rewards/margins": 1.3096208572387695, + "rewards/rejected": 1.3074455261230469, + "step": 410 + }, + { + "epoch": 0.023300647702528397, + "grad_norm": 63.733253479003906, + "learning_rate": 9.986609590864719e-08, + "logits/chosen": -0.38191336393356323, + "logits/rejected": -0.4844127297401428, + "logps/chosen": -166.2962188720703, + "logps/rejected": -198.14926147460938, + "loss": 1.5327, + "nll_loss": 1.038710117340088, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 2.370504856109619, + "rewards/margins": 0.9496327638626099, + "rewards/rejected": 1.4208720922470093, + "step": 420 + }, + { + "epoch": 0.023855425028779072, + "grad_norm": 75.02265167236328, + "learning_rate": 9.985964663432382e-08, + "logits/chosen": -0.6233263611793518, + "logits/rejected": -0.6644175052642822, + "logps/chosen": -193.20115661621094, + "logps/rejected": -218.0725555419922, + "loss": 1.6417, + "nll_loss": 1.2874863147735596, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 2.6517550945281982, + "rewards/margins": 0.9659770727157593, + "rewards/rejected": 1.685778021812439, + "step": 430 + }, + { + "epoch": 0.02441020235502975, + "grad_norm": 65.05445098876953, + "learning_rate": 9.985304589988453e-08, + "logits/chosen": -0.606865406036377, + "logits/rejected": -0.6933251619338989, + "logps/chosen": -217.9680633544922, + "logps/rejected": -251.608642578125, + "loss": 1.5824, + "nll_loss": 1.2322168350219727, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 2.7905972003936768, + "rewards/margins": 1.4538475275039673, + "rewards/rejected": 1.3367496728897095, + "step": 440 + }, + { + "epoch": 0.024964979681280426, + "grad_norm": 74.69596099853516, + "learning_rate": 9.984629372538054e-08, + "logits/chosen": -0.5925602912902832, + "logits/rejected": -0.6561893224716187, + "logps/chosen": -231.200439453125, + "logps/rejected": -264.5725402832031, + "loss": 1.6613, + "nll_loss": 1.2283960580825806, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 2.739058256149292, + "rewards/margins": 0.8342123031616211, + "rewards/rejected": 1.904845952987671, + "step": 450 + }, + { + "epoch": 0.0255197570075311, + "grad_norm": 75.31664276123047, + "learning_rate": 9.983939013132314e-08, + "logits/chosen": -0.44524669647216797, + "logits/rejected": -0.5208362340927124, + "logps/chosen": -167.56126403808594, + "logps/rejected": -195.00735473632812, + "loss": 1.5971, + "nll_loss": 1.096200942993164, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.3428120613098145, + "rewards/margins": 0.7069460153579712, + "rewards/rejected": 1.6358659267425537, + "step": 460 + }, + { + "epoch": 0.02607453433378178, + "grad_norm": 63.74409484863281, + "learning_rate": 9.98323351386836e-08, + "logits/chosen": -0.5915762782096863, + "logits/rejected": -0.6436539888381958, + "logps/chosen": -196.46066284179688, + "logps/rejected": -213.55349731445312, + "loss": 1.5516, + "nll_loss": 1.2401580810546875, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 2.625981092453003, + "rewards/margins": 1.0905460119247437, + "rewards/rejected": 1.5354353189468384, + "step": 470 + }, + { + "epoch": 0.026629311660032455, + "grad_norm": 87.08705139160156, + "learning_rate": 9.982512876889306e-08, + "logits/chosen": -0.4064413905143738, + "logits/rejected": -0.5316869020462036, + "logps/chosen": -157.93026733398438, + "logps/rejected": -187.75381469726562, + "loss": 1.5626, + "nll_loss": 0.9701600074768066, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 2.28480863571167, + "rewards/margins": 0.9264081120491028, + "rewards/rejected": 1.3584007024765015, + "step": 480 + }, + { + "epoch": 0.02718408898628313, + "grad_norm": 102.86444091796875, + "learning_rate": 9.981777104384251e-08, + "logits/chosen": -0.4961855411529541, + "logits/rejected": -0.5452759265899658, + "logps/chosen": -196.3448028564453, + "logps/rejected": -235.82666015625, + "loss": 1.6135, + "nll_loss": 1.1173430681228638, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 2.8101401329040527, + "rewards/margins": 1.1897588968276978, + "rewards/rejected": 1.6203811168670654, + "step": 490 + }, + { + "epoch": 0.027738866312533805, + "grad_norm": 93.84529876708984, + "learning_rate": 9.981026198588274e-08, + "logits/chosen": -0.6152495741844177, + "logits/rejected": -0.6937671899795532, + "logps/chosen": -199.5059356689453, + "logps/rejected": -227.841552734375, + "loss": 1.6164, + "nll_loss": 1.1395609378814697, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.346026659011841, + "rewards/margins": 1.5802319049835205, + "rewards/rejected": 1.765795111656189, + "step": 500 + }, + { + "epoch": 0.027738866312533805, + "eval_logits/chosen": -0.6478434801101685, + "eval_logits/rejected": -0.7143262028694153, + "eval_logps/chosen": -226.89523315429688, + "eval_logps/rejected": -264.53521728515625, + "eval_loss": 1.531517505645752, + "eval_nll_loss": 1.1829402446746826, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 3.1065964698791504, + "eval_rewards/margins": 1.6522696018218994, + "eval_rewards/rejected": 1.4543266296386719, + "eval_runtime": 17.055, + "eval_samples_per_second": 15.01, + "eval_steps_per_second": 1.876, + "step": 500 + }, + { + "epoch": 0.028293643638784484, + "grad_norm": 74.31314849853516, + "learning_rate": 9.980260161782426e-08, + "logits/chosen": -0.6298533082008362, + "logits/rejected": -0.6791015863418579, + "logps/chosen": -218.2083282470703, + "logps/rejected": -267.1485290527344, + "loss": 1.5716, + "nll_loss": 1.2364524602890015, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.068859577178955, + "rewards/margins": 1.1550592184066772, + "rewards/rejected": 1.9138002395629883, + "step": 510 + }, + { + "epoch": 0.02884842096503516, + "grad_norm": 68.77310943603516, + "learning_rate": 9.979478996293715e-08, + "logits/chosen": -0.6073177456855774, + "logits/rejected": -0.7440160512924194, + "logps/chosen": -205.8466339111328, + "logps/rejected": -260.246337890625, + "loss": 1.602, + "nll_loss": 1.171318769454956, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.0000762939453125, + "rewards/margins": 1.3454234600067139, + "rewards/rejected": 1.6546528339385986, + "step": 520 + }, + { + "epoch": 0.029403198291285834, + "grad_norm": 67.26824188232422, + "learning_rate": 9.978682704495114e-08, + "logits/chosen": -0.6818415522575378, + "logits/rejected": -0.7456626892089844, + "logps/chosen": -211.9142303466797, + "logps/rejected": -255.11474609375, + "loss": 1.5557, + "nll_loss": 1.2860023975372314, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.183370590209961, + "rewards/margins": 1.4452582597732544, + "rewards/rejected": 1.7381126880645752, + "step": 530 + }, + { + "epoch": 0.029957975617536513, + "grad_norm": 56.54665756225586, + "learning_rate": 9.977871288805541e-08, + "logits/chosen": -0.5180370211601257, + "logits/rejected": -0.6200538873672485, + "logps/chosen": -194.35459899902344, + "logps/rejected": -227.3801727294922, + "loss": 1.5135, + "nll_loss": 1.081203579902649, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 2.9965686798095703, + "rewards/margins": 1.2237999439239502, + "rewards/rejected": 1.7727687358856201, + "step": 540 + }, + { + "epoch": 0.030512752943787188, + "grad_norm": 56.38474655151367, + "learning_rate": 9.977044751689857e-08, + "logits/chosen": -0.3500790297985077, + "logits/rejected": -0.46968212723731995, + "logps/chosen": -147.5152587890625, + "logps/rejected": -186.73446655273438, + "loss": 1.5259, + "nll_loss": 0.9268163442611694, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 2.580355167388916, + "rewards/margins": 1.3773891925811768, + "rewards/rejected": 1.2029658555984497, + "step": 550 + }, + { + "epoch": 0.031067530270037863, + "grad_norm": 67.37399291992188, + "learning_rate": 9.976203095658858e-08, + "logits/chosen": -0.5463757514953613, + "logits/rejected": -0.6025634407997131, + "logps/chosen": -173.28712463378906, + "logps/rejected": -226.4482421875, + "loss": 1.541, + "nll_loss": 1.0410239696502686, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 2.802887201309204, + "rewards/margins": 1.544614553451538, + "rewards/rejected": 1.2582728862762451, + "step": 560 + }, + { + "epoch": 0.03162230759628854, + "grad_norm": 49.75868606567383, + "learning_rate": 9.975346323269267e-08, + "logits/chosen": -0.4543730616569519, + "logits/rejected": -0.5487458109855652, + "logps/chosen": -211.029541015625, + "logps/rejected": -250.74288940429688, + "loss": 1.498, + "nll_loss": 1.0694835186004639, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 3.212559938430786, + "rewards/margins": 1.8369373083114624, + "rewards/rejected": 1.3756221532821655, + "step": 570 + }, + { + "epoch": 0.03217708492253921, + "grad_norm": 61.089996337890625, + "learning_rate": 9.974474437123729e-08, + "logits/chosen": -0.540164053440094, + "logits/rejected": -0.6340337991714478, + "logps/chosen": -191.64480590820312, + "logps/rejected": -229.88601684570312, + "loss": 1.5814, + "nll_loss": 1.1106784343719482, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 2.882720947265625, + "rewards/margins": 1.5230586528778076, + "rewards/rejected": 1.3596618175506592, + "step": 580 + }, + { + "epoch": 0.03273186224878989, + "grad_norm": 104.65751647949219, + "learning_rate": 9.973587439870794e-08, + "logits/chosen": -0.4297700822353363, + "logits/rejected": -0.4823095202445984, + "logps/chosen": -164.86154174804688, + "logps/rejected": -193.41534423828125, + "loss": 1.5013, + "nll_loss": 1.0106995105743408, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 2.8032305240631104, + "rewards/margins": 1.1382739543914795, + "rewards/rejected": 1.6649566888809204, + "step": 590 + }, + { + "epoch": 0.03328663957504057, + "grad_norm": 71.44564819335938, + "learning_rate": 9.972685334204924e-08, + "logits/chosen": -0.46265238523483276, + "logits/rejected": -0.5998337864875793, + "logps/chosen": -156.38107299804688, + "logps/rejected": -203.08433532714844, + "loss": 1.5262, + "nll_loss": 0.9892654418945312, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 2.995356559753418, + "rewards/margins": 1.4686530828475952, + "rewards/rejected": 1.5267035961151123, + "step": 600 + }, + { + "epoch": 0.03384141690129124, + "grad_norm": 56.26408767700195, + "learning_rate": 9.97176812286647e-08, + "logits/chosen": -0.5253828763961792, + "logits/rejected": -0.6142227649688721, + "logps/chosen": -191.7180938720703, + "logps/rejected": -222.5738067626953, + "loss": 1.5112, + "nll_loss": 1.0993685722351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 3.0786499977111816, + "rewards/margins": 1.657843828201294, + "rewards/rejected": 1.4208059310913086, + "step": 610 + }, + { + "epoch": 0.03439619422754192, + "grad_norm": 63.693519592285156, + "learning_rate": 9.970835808641671e-08, + "logits/chosen": -0.49338769912719727, + "logits/rejected": -0.6104284524917603, + "logps/chosen": -183.81399536132812, + "logps/rejected": -230.7450408935547, + "loss": 1.5753, + "nll_loss": 1.096469521522522, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.137953042984009, + "rewards/margins": 2.151078939437866, + "rewards/rejected": 0.9868742227554321, + "step": 620 + }, + { + "epoch": 0.0349509715537926, + "grad_norm": 88.63583374023438, + "learning_rate": 9.969888394362646e-08, + "logits/chosen": -0.509103536605835, + "logits/rejected": -0.5807837843894958, + "logps/chosen": -160.57984924316406, + "logps/rejected": -197.91236877441406, + "loss": 1.5844, + "nll_loss": 1.0496104955673218, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.8470611572265625, + "rewards/margins": 1.0343971252441406, + "rewards/rejected": 1.8126637935638428, + "step": 630 + }, + { + "epoch": 0.03550574888004327, + "grad_norm": 86.9113540649414, + "learning_rate": 9.968925882907385e-08, + "logits/chosen": -0.5038835406303406, + "logits/rejected": -0.5882635116577148, + "logps/chosen": -213.25167846679688, + "logps/rejected": -233.34841918945312, + "loss": 1.4754, + "nll_loss": 1.0951595306396484, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.2811102867126465, + "rewards/margins": 1.6264727115631104, + "rewards/rejected": 1.654637336730957, + "step": 640 + }, + { + "epoch": 0.03606052620629395, + "grad_norm": 68.81783294677734, + "learning_rate": 9.967948277199735e-08, + "logits/chosen": -0.537903368473053, + "logits/rejected": -0.5941354036331177, + "logps/chosen": -180.93685913085938, + "logps/rejected": -201.2696075439453, + "loss": 1.5086, + "nll_loss": 1.1625057458877563, + "rewards/accuracies": 0.625, + "rewards/chosen": 3.2426884174346924, + "rewards/margins": 1.281341314315796, + "rewards/rejected": 1.9613468647003174, + "step": 650 + }, + { + "epoch": 0.03661530353254463, + "grad_norm": 46.247196197509766, + "learning_rate": 9.966955580209398e-08, + "logits/chosen": -0.536319375038147, + "logits/rejected": -0.6314636468887329, + "logps/chosen": -193.7196807861328, + "logps/rejected": -213.14944458007812, + "loss": 1.4659, + "nll_loss": 1.107458472251892, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.1198184490203857, + "rewards/margins": 1.1047157049179077, + "rewards/rejected": 2.0151028633117676, + "step": 660 + }, + { + "epoch": 0.0371700808587953, + "grad_norm": 46.85151672363281, + "learning_rate": 9.96594779495192e-08, + "logits/chosen": -0.43557968735694885, + "logits/rejected": -0.531055748462677, + "logps/chosen": -155.8391571044922, + "logps/rejected": -196.74227905273438, + "loss": 1.5626, + "nll_loss": 1.0155750513076782, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.0035159587860107, + "rewards/margins": 1.622287392616272, + "rewards/rejected": 1.3812288045883179, + "step": 670 + }, + { + "epoch": 0.03772485818504598, + "grad_norm": 174.26632690429688, + "learning_rate": 9.964924924488679e-08, + "logits/chosen": -0.6362596750259399, + "logits/rejected": -0.7269699573516846, + "logps/chosen": -206.3799285888672, + "logps/rejected": -230.740966796875, + "loss": 1.4886, + "nll_loss": 1.1488772630691528, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 3.098313093185425, + "rewards/margins": 1.6047836542129517, + "rewards/rejected": 1.4935296773910522, + "step": 680 + }, + { + "epoch": 0.03827963551129665, + "grad_norm": 80.29759216308594, + "learning_rate": 9.963886971926878e-08, + "logits/chosen": -0.6532067060470581, + "logits/rejected": -0.7521528005599976, + "logps/chosen": -216.44223022460938, + "logps/rejected": -262.30413818359375, + "loss": 1.5963, + "nll_loss": 1.176645040512085, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.6967339515686035, + "rewards/margins": 1.538691520690918, + "rewards/rejected": 2.1580424308776855, + "step": 690 + }, + { + "epoch": 0.03883441283754733, + "grad_norm": 84.83824157714844, + "learning_rate": 9.96283394041954e-08, + "logits/chosen": -0.5008292198181152, + "logits/rejected": -0.6104961633682251, + "logps/chosen": -166.3019561767578, + "logps/rejected": -190.43441772460938, + "loss": 1.5288, + "nll_loss": 1.0193572044372559, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 2.90037202835083, + "rewards/margins": 1.5122241973876953, + "rewards/rejected": 1.3881480693817139, + "step": 700 + }, + { + "epoch": 0.03938919016379801, + "grad_norm": 57.603782653808594, + "learning_rate": 9.961765833165484e-08, + "logits/chosen": -0.7234429121017456, + "logits/rejected": -0.7425190210342407, + "logps/chosen": -206.9298553466797, + "logps/rejected": -241.0787811279297, + "loss": 1.5898, + "nll_loss": 1.2404186725616455, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.3879165649414062, + "rewards/margins": 1.2144347429275513, + "rewards/rejected": 2.1734814643859863, + "step": 710 + }, + { + "epoch": 0.03994396749004868, + "grad_norm": 55.41166687011719, + "learning_rate": 9.960682653409335e-08, + "logits/chosen": -0.4222160279750824, + "logits/rejected": -0.5175357460975647, + "logps/chosen": -190.08583068847656, + "logps/rejected": -220.3984375, + "loss": 1.5289, + "nll_loss": 1.068023920059204, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.291684627532959, + "rewards/margins": 1.6787996292114258, + "rewards/rejected": 1.6128854751586914, + "step": 720 + }, + { + "epoch": 0.04049874481629936, + "grad_norm": 54.48349380493164, + "learning_rate": 9.959584404441498e-08, + "logits/chosen": -0.5538616180419922, + "logits/rejected": -0.6187587380409241, + "logps/chosen": -182.77786254882812, + "logps/rejected": -196.11972045898438, + "loss": 1.4448, + "nll_loss": 1.0926969051361084, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 3.199425220489502, + "rewards/margins": 0.9773725271224976, + "rewards/rejected": 2.222052812576294, + "step": 730 + }, + { + "epoch": 0.041053522142550036, + "grad_norm": 114.1539077758789, + "learning_rate": 9.958471089598157e-08, + "logits/chosen": -0.4567710757255554, + "logits/rejected": -0.5534912943840027, + "logps/chosen": -155.90713500976562, + "logps/rejected": -199.89883422851562, + "loss": 1.4889, + "nll_loss": 0.9103935360908508, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 3.1568973064422607, + "rewards/margins": 1.3936717510223389, + "rewards/rejected": 1.7632255554199219, + "step": 740 + }, + { + "epoch": 0.04160829946880071, + "grad_norm": 62.2696533203125, + "learning_rate": 9.957342712261261e-08, + "logits/chosen": -0.6232119798660278, + "logits/rejected": -0.699394166469574, + "logps/chosen": -175.486083984375, + "logps/rejected": -237.1534881591797, + "loss": 1.5239, + "nll_loss": 1.146842122077942, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.41463041305542, + "rewards/margins": 1.6498804092407227, + "rewards/rejected": 1.7647498846054077, + "step": 750 + }, + { + "epoch": 0.04216307679505139, + "grad_norm": 67.86518859863281, + "learning_rate": 9.956199275858517e-08, + "logits/chosen": -0.5560430288314819, + "logits/rejected": -0.6693638563156128, + "logps/chosen": -180.52297973632812, + "logps/rejected": -226.023681640625, + "loss": 1.5402, + "nll_loss": 1.0728282928466797, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.674917697906494, + "rewards/margins": 1.7028071880340576, + "rewards/rejected": 1.9721105098724365, + "step": 760 + }, + { + "epoch": 0.042717854121302065, + "grad_norm": 62.7828254699707, + "learning_rate": 9.955040783863372e-08, + "logits/chosen": -0.6319522857666016, + "logits/rejected": -0.7485511302947998, + "logps/chosen": -214.3317108154297, + "logps/rejected": -242.33926391601562, + "loss": 1.4777, + "nll_loss": 1.2144583463668823, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.5419113636016846, + "rewards/margins": 1.8491764068603516, + "rewards/rejected": 1.692734956741333, + "step": 770 + }, + { + "epoch": 0.04327263144755274, + "grad_norm": 53.742855072021484, + "learning_rate": 9.953867239795012e-08, + "logits/chosen": -0.6601709127426147, + "logits/rejected": -0.734355092048645, + "logps/chosen": -194.30819702148438, + "logps/rejected": -248.2646026611328, + "loss": 1.4185, + "nll_loss": 1.2157113552093506, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.3003029823303223, + "rewards/margins": 1.761346459388733, + "rewards/rejected": 1.5389564037322998, + "step": 780 + }, + { + "epoch": 0.043827408773803415, + "grad_norm": 61.616336822509766, + "learning_rate": 9.95267864721835e-08, + "logits/chosen": -0.5370885729789734, + "logits/rejected": -0.6669055223464966, + "logps/chosen": -195.1478271484375, + "logps/rejected": -231.2130889892578, + "loss": 1.5329, + "nll_loss": 1.0364364385604858, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.594130754470825, + "rewards/margins": 2.225724220275879, + "rewards/rejected": 1.368406057357788, + "step": 790 + }, + { + "epoch": 0.044382186100054094, + "grad_norm": 75.67598724365234, + "learning_rate": 9.951475009744003e-08, + "logits/chosen": -0.7143956422805786, + "logits/rejected": -0.8007022142410278, + "logps/chosen": -206.11630249023438, + "logps/rejected": -242.289794921875, + "loss": 1.5044, + "nll_loss": 1.1931127309799194, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.010062217712402, + "rewards/margins": 1.7878021001815796, + "rewards/rejected": 2.222259998321533, + "step": 800 + }, + { + "epoch": 0.044936963426304766, + "grad_norm": 61.18656539916992, + "learning_rate": 9.950256331028302e-08, + "logits/chosen": -0.5474633574485779, + "logits/rejected": -0.6423521041870117, + "logps/chosen": -190.60850524902344, + "logps/rejected": -223.872802734375, + "loss": 1.5677, + "nll_loss": 1.1300718784332275, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.4836227893829346, + "rewards/margins": 1.1142834424972534, + "rewards/rejected": 2.3693394660949707, + "step": 810 + }, + { + "epoch": 0.045491740752555444, + "grad_norm": 47.220394134521484, + "learning_rate": 9.949022614773256e-08, + "logits/chosen": -0.3815156817436218, + "logits/rejected": -0.5080620050430298, + "logps/chosen": -151.33038330078125, + "logps/rejected": -183.33843994140625, + "loss": 1.4676, + "nll_loss": 0.9675423502922058, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.125697612762451, + "rewards/margins": 1.7089370489120483, + "rewards/rejected": 1.4167604446411133, + "step": 820 + }, + { + "epoch": 0.046046518078806116, + "grad_norm": 57.33856964111328, + "learning_rate": 9.94777386472657e-08, + "logits/chosen": -0.702529788017273, + "logits/rejected": -0.7468611598014832, + "logps/chosen": -195.8870391845703, + "logps/rejected": -253.25631713867188, + "loss": 1.4441, + "nll_loss": 1.1202274560928345, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.815953016281128, + "rewards/margins": 1.9469791650772095, + "rewards/rejected": 1.8689740896224976, + "step": 830 + }, + { + "epoch": 0.046601295405056795, + "grad_norm": 70.29081726074219, + "learning_rate": 9.9465100846816e-08, + "logits/chosen": -0.3773537278175354, + "logits/rejected": -0.48507270216941833, + "logps/chosen": -152.4845428466797, + "logps/rejected": -187.5793914794922, + "loss": 1.4435, + "nll_loss": 0.8839551210403442, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.077239513397217, + "rewards/margins": 1.1600372791290283, + "rewards/rejected": 1.9172027111053467, + "step": 840 + }, + { + "epoch": 0.04715607273130747, + "grad_norm": 62.970558166503906, + "learning_rate": 9.945231278477374e-08, + "logits/chosen": -0.4544038772583008, + "logits/rejected": -0.5889581441879272, + "logps/chosen": -163.62313842773438, + "logps/rejected": -207.5544891357422, + "loss": 1.4982, + "nll_loss": 1.0126638412475586, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.214735507965088, + "rewards/margins": 2.0006070137023926, + "rewards/rejected": 1.2141282558441162, + "step": 850 + }, + { + "epoch": 0.047710850057558145, + "grad_norm": 136.8060760498047, + "learning_rate": 9.943937449998556e-08, + "logits/chosen": -0.697186291217804, + "logits/rejected": -0.7817809581756592, + "logps/chosen": -224.75277709960938, + "logps/rejected": -257.46600341796875, + "loss": 1.6197, + "nll_loss": 1.2137398719787598, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.9525508880615234, + "rewards/margins": 1.7223342657089233, + "rewards/rejected": 2.2302165031433105, + "step": 860 + }, + { + "epoch": 0.04826562738380882, + "grad_norm": 75.36298370361328, + "learning_rate": 9.94262860317545e-08, + "logits/chosen": -0.5289721488952637, + "logits/rejected": -0.5983772873878479, + "logps/chosen": -175.97885131835938, + "logps/rejected": -205.8876953125, + "loss": 1.4627, + "nll_loss": 1.0002416372299194, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.4056830406188965, + "rewards/margins": 1.640223503112793, + "rewards/rejected": 1.765459418296814, + "step": 870 + }, + { + "epoch": 0.0488204047100595, + "grad_norm": 63.62454605102539, + "learning_rate": 9.941304741983973e-08, + "logits/chosen": -0.38287869095802307, + "logits/rejected": -0.5381209254264832, + "logps/chosen": -142.0276641845703, + "logps/rejected": -179.9833984375, + "loss": 1.4147, + "nll_loss": 0.9833440780639648, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.339392900466919, + "rewards/margins": 1.7146053314208984, + "rewards/rejected": 1.6247879266738892, + "step": 880 + }, + { + "epoch": 0.049375182036310174, + "grad_norm": 61.91400146484375, + "learning_rate": 9.939965870445664e-08, + "logits/chosen": -0.40123963356018066, + "logits/rejected": -0.5148253440856934, + "logps/chosen": -160.2271270751953, + "logps/rejected": -185.10690307617188, + "loss": 1.5126, + "nll_loss": 0.9827170372009277, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.2213611602783203, + "rewards/margins": 1.576289415359497, + "rewards/rejected": 1.6450717449188232, + "step": 890 + }, + { + "epoch": 0.04992995936256085, + "grad_norm": 69.13755798339844, + "learning_rate": 9.938611992627646e-08, + "logits/chosen": -0.552926242351532, + "logits/rejected": -0.6027761697769165, + "logps/chosen": -182.66445922851562, + "logps/rejected": -219.7320556640625, + "loss": 1.5206, + "nll_loss": 1.1046699285507202, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.8532652854919434, + "rewards/margins": 1.7462717294692993, + "rewards/rejected": 2.1069931983947754, + "step": 900 + }, + { + "epoch": 0.05048473668881153, + "grad_norm": 54.19386291503906, + "learning_rate": 9.937243112642638e-08, + "logits/chosen": -0.29705414175987244, + "logits/rejected": -0.4054291248321533, + "logps/chosen": -164.974609375, + "logps/rejected": -170.09359741210938, + "loss": 1.5039, + "nll_loss": 0.947147011756897, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.277088165283203, + "rewards/margins": 1.8471357822418213, + "rewards/rejected": 1.4299525022506714, + "step": 910 + }, + { + "epoch": 0.0510395140150622, + "grad_norm": 56.5627555847168, + "learning_rate": 9.935859234648924e-08, + "logits/chosen": -0.27827510237693787, + "logits/rejected": -0.43911415338516235, + "logps/chosen": -122.29353332519531, + "logps/rejected": -162.04446411132812, + "loss": 1.3927, + "nll_loss": 0.7657750844955444, + "rewards/accuracies": 0.75, + "rewards/chosen": 2.9163410663604736, + "rewards/margins": 1.802353858947754, + "rewards/rejected": 1.1139872074127197, + "step": 920 + }, + { + "epoch": 0.05159429134131288, + "grad_norm": 77.0843505859375, + "learning_rate": 9.934460362850354e-08, + "logits/chosen": -0.515708327293396, + "logits/rejected": -0.5827298164367676, + "logps/chosen": -187.35801696777344, + "logps/rejected": -232.73379516601562, + "loss": 1.5256, + "nll_loss": 1.2366583347320557, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.5616250038146973, + "rewards/margins": 1.882982611656189, + "rewards/rejected": 1.6786420345306396, + "step": 930 + }, + { + "epoch": 0.05214906866756356, + "grad_norm": 49.424930572509766, + "learning_rate": 9.93304650149632e-08, + "logits/chosen": -0.4492836892604828, + "logits/rejected": -0.5456808805465698, + "logps/chosen": -197.4065399169922, + "logps/rejected": -237.48648071289062, + "loss": 1.532, + "nll_loss": 1.0610148906707764, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.585148572921753, + "rewards/margins": 2.1268129348754883, + "rewards/rejected": 1.4583359956741333, + "step": 940 + }, + { + "epoch": 0.05270384599381423, + "grad_norm": 85.44376373291016, + "learning_rate": 9.931617654881752e-08, + "logits/chosen": -0.3419482111930847, + "logits/rejected": -0.45933622121810913, + "logps/chosen": -134.19366455078125, + "logps/rejected": -165.301025390625, + "loss": 1.4692, + "nll_loss": 0.9292080998420715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 3.1901347637176514, + "rewards/margins": 1.2426807880401611, + "rewards/rejected": 1.9474540948867798, + "step": 950 + }, + { + "epoch": 0.05325862332006491, + "grad_norm": 58.297786712646484, + "learning_rate": 9.930173827347097e-08, + "logits/chosen": -0.36291202902793884, + "logits/rejected": -0.4757510721683502, + "logps/chosen": -183.56942749023438, + "logps/rejected": -212.82138061523438, + "loss": 1.4557, + "nll_loss": 0.9521719217300415, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.624375820159912, + "rewards/margins": 1.6794826984405518, + "rewards/rejected": 1.94489324092865, + "step": 960 + }, + { + "epoch": 0.05381340064631558, + "grad_norm": 93.24034118652344, + "learning_rate": 9.928715023278314e-08, + "logits/chosen": -0.3988448977470398, + "logits/rejected": -0.4805443286895752, + "logps/chosen": -154.73455810546875, + "logps/rejected": -181.87069702148438, + "loss": 1.4925, + "nll_loss": 1.0295617580413818, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.4048991203308105, + "rewards/margins": 1.5122768878936768, + "rewards/rejected": 1.8926219940185547, + "step": 970 + }, + { + "epoch": 0.05436817797256626, + "grad_norm": 74.3322982788086, + "learning_rate": 9.927241247106855e-08, + "logits/chosen": -0.5332263708114624, + "logits/rejected": -0.6788077354431152, + "logps/chosen": -206.145751953125, + "logps/rejected": -250.1248016357422, + "loss": 1.4918, + "nll_loss": 1.1099421977996826, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.051599025726318, + "rewards/margins": 2.0857737064361572, + "rewards/rejected": 1.9658254384994507, + "step": 980 + }, + { + "epoch": 0.05492295529881694, + "grad_norm": 59.86268997192383, + "learning_rate": 9.92575250330965e-08, + "logits/chosen": -0.4802762567996979, + "logits/rejected": -0.593323826789856, + "logps/chosen": -199.94229125976562, + "logps/rejected": -239.09506225585938, + "loss": 1.4978, + "nll_loss": 1.1220160722732544, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.9692130088806152, + "rewards/margins": 1.8433116674423218, + "rewards/rejected": 2.125900983810425, + "step": 990 + }, + { + "epoch": 0.05547773262506761, + "grad_norm": 75.09992980957031, + "learning_rate": 9.924248796409105e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -167.0904998779297, + "logps/rejected": -193.38198852539062, + "loss": 1.4847, + "nll_loss": NaN, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 3.5240886211395264, + "rewards/margins": 1.3706116676330566, + "rewards/rejected": 2.1534767150878906, + "step": 1000 + }, + { + "epoch": 0.05547773262506761, + "eval_logits/chosen": -0.5849028825759888, + "eval_logits/rejected": -0.6528286933898926, + "eval_logps/chosen": -216.97853088378906, + "eval_logps/rejected": -263.5906066894531, + "eval_loss": 1.4300326108932495, + "eval_nll_loss": 1.1286273002624512, + "eval_rewards/accuracies": 0.84375, + "eval_rewards/chosen": 4.0982666015625, + "eval_rewards/margins": 2.5494768619537354, + "eval_rewards/rejected": 1.5487897396087646, + "eval_runtime": 17.0359, + "eval_samples_per_second": 15.027, + "eval_steps_per_second": 1.878, + "step": 1000 + }, + { + "epoch": 0.05603250995131829, + "grad_norm": 63.52638244628906, + "learning_rate": 9.922730130973071e-08, + "logits/chosen": -0.5006999969482422, + "logits/rejected": -0.6237480044364929, + "logps/chosen": -202.09140014648438, + "logps/rejected": -257.4411926269531, + "loss": 1.5155, + "nll_loss": 1.095794439315796, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.9617226123809814, + "rewards/margins": 2.351763963699341, + "rewards/rejected": 1.6099590063095093, + "step": 1010 + }, + { + "epoch": 0.05658728727756897, + "grad_norm": 60.034427642822266, + "learning_rate": 9.921196511614846e-08, + "logits/chosen": -0.42120856046676636, + "logits/rejected": -0.4760383665561676, + "logps/chosen": -141.9215087890625, + "logps/rejected": -157.95162963867188, + "loss": 1.4648, + "nll_loss": 0.9661208391189575, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.336350679397583, + "rewards/margins": 1.4146296977996826, + "rewards/rejected": 1.92172110080719, + "step": 1020 + }, + { + "epoch": 0.05714206460381964, + "grad_norm": 72.62284088134766, + "learning_rate": 9.919647942993148e-08, + "logits/chosen": -0.5252307057380676, + "logits/rejected": -0.6019777655601501, + "logps/chosen": -192.70838928222656, + "logps/rejected": -247.6120147705078, + "loss": 1.4889, + "nll_loss": 1.0725551843643188, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.920849561691284, + "rewards/margins": 1.793335199356079, + "rewards/rejected": 2.127514600753784, + "step": 1030 + }, + { + "epoch": 0.05769684193007032, + "grad_norm": 65.8388671875, + "learning_rate": 9.91808442981211e-08, + "logits/chosen": -0.41704192757606506, + "logits/rejected": -0.5714241862297058, + "logps/chosen": -186.66038513183594, + "logps/rejected": -229.0773468017578, + "loss": 1.3864, + "nll_loss": 0.9837188720703125, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.7575690746307373, + "rewards/margins": 2.2268686294555664, + "rewards/rejected": 1.5307005643844604, + "step": 1040 + }, + { + "epoch": 0.058251619256321, + "grad_norm": 77.91301727294922, + "learning_rate": 9.91650597682126e-08, + "logits/chosen": -0.7347007989883423, + "logits/rejected": -0.7379493713378906, + "logps/chosen": -224.37423706054688, + "logps/rejected": -263.5487976074219, + "loss": 1.4952, + "nll_loss": 1.2833170890808105, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.219830513000488, + "rewards/margins": 1.8176199197769165, + "rewards/rejected": 2.4022104740142822, + "step": 1050 + }, + { + "epoch": 0.05880639658257167, + "grad_norm": 50.89873123168945, + "learning_rate": 9.914912588815517e-08, + "logits/chosen": -0.4994504451751709, + "logits/rejected": -0.6095007658004761, + "logps/chosen": -169.8141326904297, + "logps/rejected": -214.57492065429688, + "loss": 1.4384, + "nll_loss": 1.0103446245193481, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.483236789703369, + "rewards/margins": 1.6239086389541626, + "rewards/rejected": 1.859328031539917, + "step": 1060 + }, + { + "epoch": 0.05936117390882235, + "grad_norm": 75.92971801757812, + "learning_rate": 9.913304270635156e-08, + "logits/chosen": -0.3685445785522461, + "logits/rejected": -0.4870659410953522, + "logps/chosen": -164.15875244140625, + "logps/rejected": -191.3291778564453, + "loss": 1.5313, + "nll_loss": 0.9337444305419922, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 3.4293224811553955, + "rewards/margins": 1.4811770915985107, + "rewards/rejected": 1.9481449127197266, + "step": 1070 + }, + { + "epoch": 0.059915951235073026, + "grad_norm": 71.02056121826172, + "learning_rate": 9.911681027165818e-08, + "logits/chosen": -0.39040133357048035, + "logits/rejected": -0.48705339431762695, + "logps/chosen": -138.97422790527344, + "logps/rejected": -162.4997100830078, + "loss": 1.4756, + "nll_loss": 0.9448060989379883, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.3441505432128906, + "rewards/margins": 1.700945258140564, + "rewards/rejected": 1.643204927444458, + "step": 1080 + }, + { + "epoch": 0.0604707285613237, + "grad_norm": 67.55992126464844, + "learning_rate": 9.910042863338474e-08, + "logits/chosen": -0.3325367569923401, + "logits/rejected": -0.46114760637283325, + "logps/chosen": -151.6166534423828, + "logps/rejected": -196.2194366455078, + "loss": 1.4461, + "nll_loss": 0.9018818140029907, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.460953950881958, + "rewards/margins": 2.2122249603271484, + "rewards/rejected": 1.2487289905548096, + "step": 1090 + }, + { + "epoch": 0.061025505887574376, + "grad_norm": 81.95460510253906, + "learning_rate": 9.908389784129423e-08, + "logits/chosen": -0.5383685231208801, + "logits/rejected": -0.6315664052963257, + "logps/chosen": -189.26133728027344, + "logps/rejected": -218.469970703125, + "loss": 1.4756, + "nll_loss": 1.1009684801101685, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.154654502868652, + "rewards/margins": 1.9512214660644531, + "rewards/rejected": 2.203433036804199, + "step": 1100 + }, + { + "epoch": 0.061580283213825054, + "grad_norm": 89.14315795898438, + "learning_rate": 9.906721794560272e-08, + "logits/chosen": -0.6819087862968445, + "logits/rejected": -0.7490144968032837, + "logps/chosen": -181.35693359375, + "logps/rejected": -242.1012420654297, + "loss": 1.4939, + "nll_loss": 1.1502264738082886, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.75810170173645, + "rewards/margins": 1.9803187847137451, + "rewards/rejected": 1.7777824401855469, + "step": 1110 + }, + { + "epoch": 0.062135060540075726, + "grad_norm": 47.63044738769531, + "learning_rate": 9.905038899697923e-08, + "logits/chosen": -0.4637017846107483, + "logits/rejected": -0.5964235067367554, + "logps/chosen": -166.28646850585938, + "logps/rejected": -200.87290954589844, + "loss": 1.3853, + "nll_loss": 0.9769280552864075, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.4817116260528564, + "rewards/margins": 1.9392492771148682, + "rewards/rejected": 1.542462706565857, + "step": 1120 + }, + { + "epoch": 0.0626898378663264, + "grad_norm": 115.41996765136719, + "learning_rate": 9.903341104654555e-08, + "logits/chosen": -0.6566618084907532, + "logits/rejected": -0.7295863628387451, + "logps/chosen": -207.2266082763672, + "logps/rejected": -257.237548828125, + "loss": 1.4687, + "nll_loss": 1.2293752431869507, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.08721923828125, + "rewards/margins": 1.7286536693572998, + "rewards/rejected": 2.358565330505371, + "step": 1130 + }, + { + "epoch": 0.06324461519257708, + "grad_norm": 101.11539459228516, + "learning_rate": 9.901628414587611e-08, + "logits/chosen": -0.4132419228553772, + "logits/rejected": -0.5075428485870361, + "logps/chosen": -157.56349182128906, + "logps/rejected": -176.73516845703125, + "loss": 1.4391, + "nll_loss": 1.0173633098602295, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.75007700920105, + "rewards/margins": 1.989717721939087, + "rewards/rejected": 1.760359525680542, + "step": 1140 + }, + { + "epoch": 0.06379939251882775, + "grad_norm": 74.02896118164062, + "learning_rate": 9.899900834699777e-08, + "logits/chosen": -0.4107815623283386, + "logits/rejected": -0.5122383236885071, + "logps/chosen": -172.81393432617188, + "logps/rejected": -206.0386199951172, + "loss": 1.5624, + "nll_loss": 0.9882047772407532, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.779421329498291, + "rewards/margins": 2.0062572956085205, + "rewards/rejected": 1.7731640338897705, + "step": 1150 + }, + { + "epoch": 0.06435416984507843, + "grad_norm": 46.51359558105469, + "learning_rate": 9.898158370238976e-08, + "logits/chosen": -0.7199384570121765, + "logits/rejected": -0.7897688746452332, + "logps/chosen": -253.076171875, + "logps/rejected": -301.18597412109375, + "loss": 1.4927, + "nll_loss": 1.3301750421524048, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.715771675109863, + "rewards/margins": 2.7596588134765625, + "rewards/rejected": 1.9561126232147217, + "step": 1160 + }, + { + "epoch": 0.06490894717132911, + "grad_norm": 70.0849609375, + "learning_rate": 9.896401026498343e-08, + "logits/chosen": -0.5783329010009766, + "logits/rejected": -0.6710189580917358, + "logps/chosen": -198.61477661132812, + "logps/rejected": -258.7814025878906, + "loss": 1.5312, + "nll_loss": 1.1829686164855957, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.90598726272583, + "rewards/margins": 2.0684738159179688, + "rewards/rejected": 1.8375133275985718, + "step": 1170 + }, + { + "epoch": 0.06546372449757978, + "grad_norm": 166.74185180664062, + "learning_rate": 9.894628808816212e-08, + "logits/chosen": -0.43889349699020386, + "logits/rejected": -0.5390881299972534, + "logps/chosen": -200.86856079101562, + "logps/rejected": -249.177734375, + "loss": 1.4699, + "nll_loss": 1.1773768663406372, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 3.9578583240509033, + "rewards/margins": 1.9102592468261719, + "rewards/rejected": 2.0475986003875732, + "step": 1180 + }, + { + "epoch": 0.06601850182383046, + "grad_norm": 67.43829345703125, + "learning_rate": 9.892841722576102e-08, + "logits/chosen": -0.4639360308647156, + "logits/rejected": -0.5482727885246277, + "logps/chosen": -206.2231903076172, + "logps/rejected": -238.5017547607422, + "loss": 1.4611, + "nll_loss": 1.0525023937225342, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.9667439460754395, + "rewards/margins": 1.8209073543548584, + "rewards/rejected": 2.145836353302002, + "step": 1190 + }, + { + "epoch": 0.06657327915008114, + "grad_norm": 52.553775787353516, + "learning_rate": 9.891039773206698e-08, + "logits/chosen": -0.26780468225479126, + "logits/rejected": -0.4361448287963867, + "logps/chosen": -144.7643585205078, + "logps/rejected": -160.23165893554688, + "loss": 1.4146, + "nll_loss": 0.84807288646698, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.4257991313934326, + "rewards/margins": 2.3991568088531494, + "rewards/rejected": 1.026642084121704, + "step": 1200 + }, + { + "epoch": 0.06712805647633181, + "grad_norm": 67.21038818359375, + "learning_rate": 9.889222966181832e-08, + "logits/chosen": -0.3794083595275879, + "logits/rejected": -0.4984433650970459, + "logps/chosen": -149.71102905273438, + "logps/rejected": -184.6141357421875, + "loss": 1.4207, + "nll_loss": 0.8969683647155762, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.4109930992126465, + "rewards/margins": 2.001525640487671, + "rewards/rejected": 1.4094676971435547, + "step": 1210 + }, + { + "epoch": 0.06768283380258248, + "grad_norm": 52.498809814453125, + "learning_rate": 9.887391307020474e-08, + "logits/chosen": -0.5022028684616089, + "logits/rejected": -0.6018491983413696, + "logps/chosen": -176.34487915039062, + "logps/rejected": -228.7765350341797, + "loss": 1.4615, + "nll_loss": 1.0426609516143799, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.007895469665527, + "rewards/margins": 2.2430477142333984, + "rewards/rejected": 1.7648475170135498, + "step": 1220 + }, + { + "epoch": 0.06823761112883317, + "grad_norm": 87.10646057128906, + "learning_rate": 9.885544801286707e-08, + "logits/chosen": -0.5170990228652954, + "logits/rejected": -0.6354082226753235, + "logps/chosen": -222.16311645507812, + "logps/rejected": -261.0590515136719, + "loss": 1.4259, + "nll_loss": 1.1672570705413818, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.265625476837158, + "rewards/margins": 1.9461997747421265, + "rewards/rejected": 2.319425106048584, + "step": 1230 + }, + { + "epoch": 0.06879238845508384, + "grad_norm": 51.09988784790039, + "learning_rate": 9.883683454589719e-08, + "logits/chosen": -0.5030714273452759, + "logits/rejected": -0.515740156173706, + "logps/chosen": -187.69720458984375, + "logps/rejected": -203.89950561523438, + "loss": 1.45, + "nll_loss": 1.1443597078323364, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.7398152351379395, + "rewards/margins": 1.2381267547607422, + "rewards/rejected": 2.5016884803771973, + "step": 1240 + }, + { + "epoch": 0.06934716578133451, + "grad_norm": 55.36213302612305, + "learning_rate": 9.881807272583775e-08, + "logits/chosen": -0.504234790802002, + "logits/rejected": -0.6037534475326538, + "logps/chosen": -199.28404235839844, + "logps/rejected": -232.842529296875, + "loss": 1.5279, + "nll_loss": 1.1022770404815674, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.129483699798584, + "rewards/margins": 2.379916191101074, + "rewards/rejected": 1.7495676279067993, + "step": 1250 + }, + { + "epoch": 0.0699019431075852, + "grad_norm": 81.34074401855469, + "learning_rate": 9.87991626096821e-08, + "logits/chosen": -0.39803606271743774, + "logits/rejected": -0.5363016724586487, + "logps/chosen": -169.8374786376953, + "logps/rejected": -200.1742706298828, + "loss": 1.4199, + "nll_loss": 1.0287504196166992, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.8434112071990967, + "rewards/margins": 2.135047435760498, + "rewards/rejected": 1.7083642482757568, + "step": 1260 + }, + { + "epoch": 0.07045672043383587, + "grad_norm": 58.16438674926758, + "learning_rate": 9.878010425487406e-08, + "logits/chosen": -0.3032079339027405, + "logits/rejected": -0.4739023745059967, + "logps/chosen": -171.28402709960938, + "logps/rejected": -232.79196166992188, + "loss": 1.4122, + "nll_loss": 0.9198330044746399, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.7980828285217285, + "rewards/margins": 2.5010132789611816, + "rewards/rejected": 1.2970690727233887, + "step": 1270 + }, + { + "epoch": 0.07101149776008654, + "grad_norm": 136.7597198486328, + "learning_rate": 9.876089771930773e-08, + "logits/chosen": -0.3859093189239502, + "logits/rejected": -0.480471670627594, + "logps/chosen": -202.7171173095703, + "logps/rejected": -246.14987182617188, + "loss": 1.4481, + "nll_loss": 1.058734655380249, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.078368186950684, + "rewards/margins": 2.5813488960266113, + "rewards/rejected": 1.497018814086914, + "step": 1280 + }, + { + "epoch": 0.07156627508633723, + "grad_norm": 72.67900848388672, + "learning_rate": 9.87415430613274e-08, + "logits/chosen": -0.6447448134422302, + "logits/rejected": -0.6883346438407898, + "logps/chosen": -196.11752319335938, + "logps/rejected": -229.57937622070312, + "loss": 1.5414, + "nll_loss": 1.1916755437850952, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 4.364915370941162, + "rewards/margins": 1.8597400188446045, + "rewards/rejected": 2.505175828933716, + "step": 1290 + }, + { + "epoch": 0.0721210524125879, + "grad_norm": 87.22559356689453, + "learning_rate": 9.872204033972725e-08, + "logits/chosen": -0.48257774114608765, + "logits/rejected": -0.5261390209197998, + "logps/chosen": -162.76255798339844, + "logps/rejected": -173.9418182373047, + "loss": 1.4177, + "nll_loss": 1.1087771654129028, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 3.952287197113037, + "rewards/margins": 0.9866729974746704, + "rewards/rejected": 2.9656143188476562, + "step": 1300 + }, + { + "epoch": 0.07267582973883857, + "grad_norm": 58.74521255493164, + "learning_rate": 9.87023896137513e-08, + "logits/chosen": -0.5969017744064331, + "logits/rejected": -0.6603950262069702, + "logps/chosen": -196.26272583007812, + "logps/rejected": -233.4521484375, + "loss": 1.4637, + "nll_loss": 1.133876085281372, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.516307830810547, + "rewards/margins": 2.2669475078582764, + "rewards/rejected": 2.2493605613708496, + "step": 1310 + }, + { + "epoch": 0.07323060706508926, + "grad_norm": 71.52519226074219, + "learning_rate": 9.868259094309312e-08, + "logits/chosen": -0.4480765461921692, + "logits/rejected": -0.5158835649490356, + "logps/chosen": -180.5970916748047, + "logps/rejected": -208.57308959960938, + "loss": 1.4415, + "nll_loss": 0.9754490852355957, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.8777382373809814, + "rewards/margins": 2.0324997901916504, + "rewards/rejected": 1.8452380895614624, + "step": 1320 + }, + { + "epoch": 0.07378538439133993, + "grad_norm": 80.70097351074219, + "learning_rate": 9.866264438789573e-08, + "logits/chosen": -0.3521033823490143, + "logits/rejected": -0.5131471157073975, + "logps/chosen": -164.20703125, + "logps/rejected": -206.5201873779297, + "loss": 1.4401, + "nll_loss": 0.8772087097167969, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.5664222240448, + "rewards/margins": 2.3574984073638916, + "rewards/rejected": 1.2089238166809082, + "step": 1330 + }, + { + "epoch": 0.0743401617175906, + "grad_norm": 82.3136215209961, + "learning_rate": 9.864255000875135e-08, + "logits/chosen": -0.6259538531303406, + "logits/rejected": -0.6996973156929016, + "logps/chosen": -176.74862670898438, + "logps/rejected": -215.646728515625, + "loss": 1.342, + "nll_loss": 1.1308798789978027, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.244176864624023, + "rewards/margins": 2.25408935546875, + "rewards/rejected": 1.9900877475738525, + "step": 1340 + }, + { + "epoch": 0.07489493904384127, + "grad_norm": 53.10133361816406, + "learning_rate": 9.862230786670127e-08, + "logits/chosen": -0.4824862480163574, + "logits/rejected": -0.5932101607322693, + "logps/chosen": -173.16281127929688, + "logps/rejected": -226.42526245117188, + "loss": 1.4382, + "nll_loss": 1.0056755542755127, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.18387508392334, + "rewards/margins": 2.5786943435668945, + "rewards/rejected": 1.6051805019378662, + "step": 1350 + }, + { + "epoch": 0.07544971637009196, + "grad_norm": 64.7579574584961, + "learning_rate": 9.860191802323567e-08, + "logits/chosen": -0.46298331022262573, + "logits/rejected": -0.5535684823989868, + "logps/chosen": -187.52774047851562, + "logps/rejected": -206.24996948242188, + "loss": 1.4767, + "nll_loss": 1.0748491287231445, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.873023271560669, + "rewards/margins": 1.7396652698516846, + "rewards/rejected": 2.1333580017089844, + "step": 1360 + }, + { + "epoch": 0.07600449369634263, + "grad_norm": 70.06917572021484, + "learning_rate": 9.858138054029334e-08, + "logits/chosen": -0.4316721558570862, + "logits/rejected": -0.5568591356277466, + "logps/chosen": -165.09683227539062, + "logps/rejected": -199.37698364257812, + "loss": 1.5074, + "nll_loss": 0.9594496488571167, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.6436493396759033, + "rewards/margins": 1.799843430519104, + "rewards/rejected": 1.8438060283660889, + "step": 1370 + }, + { + "epoch": 0.0765592710225933, + "grad_norm": 64.8796157836914, + "learning_rate": 9.85606954802616e-08, + "logits/chosen": -0.49656182527542114, + "logits/rejected": -0.6436377763748169, + "logps/chosen": -179.42794799804688, + "logps/rejected": -237.13687133789062, + "loss": 1.4397, + "nll_loss": 1.0689128637313843, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.391759872436523, + "rewards/margins": 2.9239375591278076, + "rewards/rejected": 1.4678226709365845, + "step": 1380 + }, + { + "epoch": 0.07711404834884399, + "grad_norm": 64.41740417480469, + "learning_rate": 9.85398629059761e-08, + "logits/chosen": -0.4562680125236511, + "logits/rejected": -0.5946453809738159, + "logps/chosen": -196.96487426757812, + "logps/rejected": -232.92086791992188, + "loss": 1.5122, + "nll_loss": 1.0467199087142944, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.238462924957275, + "rewards/margins": 2.175060987472534, + "rewards/rejected": 2.063401937484741, + "step": 1390 + }, + { + "epoch": 0.07766882567509466, + "grad_norm": 75.7577133178711, + "learning_rate": 9.851888288072053e-08, + "logits/chosen": -0.49670663475990295, + "logits/rejected": -0.5817729234695435, + "logps/chosen": -221.2942352294922, + "logps/rejected": -270.5111999511719, + "loss": 1.5238, + "nll_loss": 1.1094176769256592, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.342289924621582, + "rewards/margins": 1.7921016216278076, + "rewards/rejected": 2.5501883029937744, + "step": 1400 + }, + { + "epoch": 0.07822360300134533, + "grad_norm": 52.14347839355469, + "learning_rate": 9.849775546822654e-08, + "logits/chosen": -0.3518516719341278, + "logits/rejected": -0.47065719962120056, + "logps/chosen": -170.92910766601562, + "logps/rejected": -220.5255584716797, + "loss": 1.4916, + "nll_loss": 1.0167186260223389, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.730181932449341, + "rewards/margins": 2.0523428916931152, + "rewards/rejected": 1.6778392791748047, + "step": 1410 + }, + { + "epoch": 0.07877838032759601, + "grad_norm": 91.87435150146484, + "learning_rate": 9.847648073267349e-08, + "logits/chosen": -0.35065048933029175, + "logits/rejected": -0.44443875551223755, + "logps/chosen": -166.6290740966797, + "logps/rejected": -195.34881591796875, + "loss": 1.4514, + "nll_loss": 1.0776584148406982, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 3.8567519187927246, + "rewards/margins": 1.3364388942718506, + "rewards/rejected": 2.5203135013580322, + "step": 1420 + }, + { + "epoch": 0.07933315765384669, + "grad_norm": 70.29676055908203, + "learning_rate": 9.845505873868828e-08, + "logits/chosen": -0.5298658609390259, + "logits/rejected": -0.5880465507507324, + "logps/chosen": -199.88330078125, + "logps/rejected": -245.65884399414062, + "loss": 1.4259, + "nll_loss": 1.1225529909133911, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.7313690185546875, + "rewards/margins": 2.4238762855529785, + "rewards/rejected": 2.307492733001709, + "step": 1430 + }, + { + "epoch": 0.07988793498009736, + "grad_norm": 48.4738883972168, + "learning_rate": 9.843348955134512e-08, + "logits/chosen": -0.24719564616680145, + "logits/rejected": -0.36896735429763794, + "logps/chosen": -125.46437072753906, + "logps/rejected": -174.4749755859375, + "loss": 1.3892, + "nll_loss": 0.8810212016105652, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.6172351837158203, + "rewards/margins": 1.618438482284546, + "rewards/rejected": 1.9987967014312744, + "step": 1440 + }, + { + "epoch": 0.08044271230634804, + "grad_norm": 53.75647735595703, + "learning_rate": 9.841177323616539e-08, + "logits/chosen": -0.38867291808128357, + "logits/rejected": -0.45642417669296265, + "logps/chosen": -171.5618896484375, + "logps/rejected": -219.5745391845703, + "loss": 1.4276, + "nll_loss": 1.084578514099121, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.974804639816284, + "rewards/margins": 1.9698415994644165, + "rewards/rejected": 2.0049631595611572, + "step": 1450 + }, + { + "epoch": 0.08099748963259872, + "grad_norm": 77.83335876464844, + "learning_rate": 9.838990985911733e-08, + "logits/chosen": -0.2802060842514038, + "logits/rejected": -0.42082786560058594, + "logps/chosen": -182.0515594482422, + "logps/rejected": -234.0809783935547, + "loss": 1.36, + "nll_loss": 0.9822225570678711, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.118023872375488, + "rewards/margins": 2.575249671936035, + "rewards/rejected": 1.5427742004394531, + "step": 1460 + }, + { + "epoch": 0.08155226695884939, + "grad_norm": 102.21620178222656, + "learning_rate": 9.836789948661601e-08, + "logits/chosen": -0.525530993938446, + "logits/rejected": -0.6287695169448853, + "logps/chosen": -194.15786743164062, + "logps/rejected": -247.2244873046875, + "loss": 1.4288, + "nll_loss": 1.122293472290039, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.391050338745117, + "rewards/margins": 3.0546398162841797, + "rewards/rejected": 1.3364105224609375, + "step": 1470 + }, + { + "epoch": 0.08210704428510007, + "grad_norm": 45.071205139160156, + "learning_rate": 9.834574218552296e-08, + "logits/chosen": -0.47796911001205444, + "logits/rejected": -0.5628957748413086, + "logps/chosen": -219.58468627929688, + "logps/rejected": -254.8559112548828, + "loss": 1.4685, + "nll_loss": 1.1712572574615479, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.881595611572266, + "rewards/margins": 2.209044933319092, + "rewards/rejected": 2.6725502014160156, + "step": 1480 + }, + { + "epoch": 0.08266182161135074, + "grad_norm": 55.145233154296875, + "learning_rate": 9.832343802314609e-08, + "logits/chosen": -0.4577816426753998, + "logits/rejected": -0.5360755920410156, + "logps/chosen": -166.1474151611328, + "logps/rejected": -212.7266082763672, + "loss": 1.5458, + "nll_loss": 1.0354721546173096, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.9975013732910156, + "rewards/margins": 1.7976630926132202, + "rewards/rejected": 2.1998379230499268, + "step": 1490 + }, + { + "epoch": 0.08321659893760142, + "grad_norm": 60.92216491699219, + "learning_rate": 9.830098706723939e-08, + "logits/chosen": -0.4298805296421051, + "logits/rejected": -0.5215967893600464, + "logps/chosen": -168.5172576904297, + "logps/rejected": -216.61776733398438, + "loss": 1.4519, + "nll_loss": 1.0098575353622437, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 3.8528778553009033, + "rewards/margins": 2.0149474143981934, + "rewards/rejected": 1.837930679321289, + "step": 1500 + }, + { + "epoch": 0.08321659893760142, + "eval_logits/chosen": -0.5136753916740417, + "eval_logits/rejected": -0.5830292105674744, + "eval_logps/chosen": -210.95797729492188, + "eval_logps/rejected": -261.8099060058594, + "eval_loss": 1.3702356815338135, + "eval_nll_loss": 1.0946283340454102, + "eval_rewards/accuracies": 0.8125, + "eval_rewards/chosen": 4.700324535369873, + "eval_rewards/margins": 2.973465919494629, + "eval_rewards/rejected": 1.7268586158752441, + "eval_runtime": 17.0904, + "eval_samples_per_second": 14.979, + "eval_steps_per_second": 1.872, + "step": 1500 + }, + { + "epoch": 0.0837713762638521, + "grad_norm": 53.873538970947266, + "learning_rate": 9.82783893860028e-08, + "logits/chosen": -0.5111064314842224, + "logits/rejected": -0.5315500497817993, + "logps/chosen": -219.55331420898438, + "logps/rejected": -232.83358764648438, + "loss": 1.4566, + "nll_loss": 1.1236168146133423, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.430356502532959, + "rewards/margins": 2.316408634185791, + "rewards/rejected": 2.113948345184326, + "step": 1510 + }, + { + "epoch": 0.08432615359010277, + "grad_norm": 71.6829605102539, + "learning_rate": 9.825564504808194e-08, + "logits/chosen": -0.2526777386665344, + "logits/rejected": -0.4250311851501465, + "logps/chosen": -171.2292938232422, + "logps/rejected": -220.65261840820312, + "loss": 1.3977, + "nll_loss": 1.0460208654403687, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.5906548500061035, + "rewards/margins": 2.9093871116638184, + "rewards/rejected": 0.6812671422958374, + "step": 1520 + }, + { + "epoch": 0.08488093091635344, + "grad_norm": 113.11854553222656, + "learning_rate": 9.8232754122568e-08, + "logits/chosen": -0.40277594327926636, + "logits/rejected": -0.535345733165741, + "logps/chosen": -165.22146606445312, + "logps/rejected": -204.9488525390625, + "loss": 1.3966, + "nll_loss": 0.9646211862564087, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.141239166259766, + "rewards/margins": 2.4749937057495117, + "rewards/rejected": 1.6662452220916748, + "step": 1530 + }, + { + "epoch": 0.08543570824260413, + "grad_norm": 52.08089065551758, + "learning_rate": 9.820971667899738e-08, + "logits/chosen": -0.31119513511657715, + "logits/rejected": -0.412311851978302, + "logps/chosen": -157.13917541503906, + "logps/rejected": -171.68382263183594, + "loss": 1.4096, + "nll_loss": 0.9384014010429382, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.6781508922576904, + "rewards/margins": 1.3841872215270996, + "rewards/rejected": 2.29396390914917, + "step": 1540 + }, + { + "epoch": 0.0859904855688548, + "grad_norm": 47.32821273803711, + "learning_rate": 9.81865327873516e-08, + "logits/chosen": -0.38009652495384216, + "logits/rejected": -0.46700453758239746, + "logps/chosen": -166.48367309570312, + "logps/rejected": -226.55978393554688, + "loss": 1.4395, + "nll_loss": 1.0256550312042236, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.013241291046143, + "rewards/margins": 2.4705066680908203, + "rewards/rejected": 1.5427347421646118, + "step": 1550 + }, + { + "epoch": 0.08654526289510547, + "grad_norm": 62.39463806152344, + "learning_rate": 9.816320251805707e-08, + "logits/chosen": -0.42687439918518066, + "logits/rejected": -0.5247625708580017, + "logps/chosen": -184.75782775878906, + "logps/rejected": -218.777099609375, + "loss": 1.3588, + "nll_loss": 1.0533530712127686, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.659199237823486, + "rewards/margins": 2.8975088596343994, + "rewards/rejected": 1.7616904973983765, + "step": 1560 + }, + { + "epoch": 0.08710004022135616, + "grad_norm": 61.17539978027344, + "learning_rate": 9.813972594198482e-08, + "logits/chosen": -0.1231955885887146, + "logits/rejected": -0.18562906980514526, + "logps/chosen": -125.1419448852539, + "logps/rejected": -158.606201171875, + "loss": 1.52, + "nll_loss": 0.7582105398178101, + "rewards/accuracies": 0.625, + "rewards/chosen": 3.6675257682800293, + "rewards/margins": 1.8140029907226562, + "rewards/rejected": 1.8535226583480835, + "step": 1570 + }, + { + "epoch": 0.08765481754760683, + "grad_norm": 67.13497161865234, + "learning_rate": 9.811610313045036e-08, + "logits/chosen": -0.40413790941238403, + "logits/rejected": -0.5055649280548096, + "logps/chosen": -157.7107696533203, + "logps/rejected": -210.97189331054688, + "loss": 1.4775, + "nll_loss": 0.965429425239563, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.296413898468018, + "rewards/margins": 2.041860342025757, + "rewards/rejected": 2.2545535564422607, + "step": 1580 + }, + { + "epoch": 0.0882095948738575, + "grad_norm": 56.057708740234375, + "learning_rate": 9.809233415521336e-08, + "logits/chosen": -0.5187186598777771, + "logits/rejected": -0.649011492729187, + "logps/chosen": -191.6517791748047, + "logps/rejected": -231.47738647460938, + "loss": 1.5193, + "nll_loss": 1.0975863933563232, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.417916297912598, + "rewards/margins": 2.629579544067383, + "rewards/rejected": 1.7883371114730835, + "step": 1590 + }, + { + "epoch": 0.08876437220010819, + "grad_norm": 42.684879302978516, + "learning_rate": 9.806841908847757e-08, + "logits/chosen": -0.4185276925563812, + "logits/rejected": -0.5338040590286255, + "logps/chosen": -170.87448120117188, + "logps/rejected": -228.2291259765625, + "loss": 1.3534, + "nll_loss": 0.9347308278083801, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.035393238067627, + "rewards/margins": 2.006787061691284, + "rewards/rejected": 2.0286059379577637, + "step": 1600 + }, + { + "epoch": 0.08931914952635886, + "grad_norm": 44.60700607299805, + "learning_rate": 9.804435800289046e-08, + "logits/chosen": -0.4805383086204529, + "logits/rejected": -0.5604225397109985, + "logps/chosen": -190.8730926513672, + "logps/rejected": -237.6107940673828, + "loss": 1.3941, + "nll_loss": 1.0892784595489502, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.903960227966309, + "rewards/margins": 2.260164499282837, + "rewards/rejected": 2.6437954902648926, + "step": 1610 + }, + { + "epoch": 0.08987392685260953, + "grad_norm": 77.23152160644531, + "learning_rate": 9.802015097154314e-08, + "logits/chosen": -0.2898111641407013, + "logits/rejected": -0.43970757722854614, + "logps/chosen": -159.96951293945312, + "logps/rejected": -189.67385864257812, + "loss": 1.4362, + "nll_loss": 0.9260392189025879, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.9348278045654297, + "rewards/margins": 2.105313301086426, + "rewards/rejected": 1.8295142650604248, + "step": 1620 + }, + { + "epoch": 0.0904287041788602, + "grad_norm": 158.89340209960938, + "learning_rate": 9.799579806796998e-08, + "logits/chosen": -0.40225309133529663, + "logits/rejected": -0.5593348741531372, + "logps/chosen": -159.14244079589844, + "logps/rejected": -219.98593139648438, + "loss": 1.3974, + "nll_loss": 0.9277356863021851, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.9359307289123535, + "rewards/margins": 2.5688395500183105, + "rewards/rejected": 1.3670909404754639, + "step": 1630 + }, + { + "epoch": 0.09098348150511089, + "grad_norm": 59.240806579589844, + "learning_rate": 9.797129936614854e-08, + "logits/chosen": -0.3756170868873596, + "logits/rejected": -0.5068528056144714, + "logps/chosen": -181.37208557128906, + "logps/rejected": -230.23306274414062, + "loss": 1.3775, + "nll_loss": 1.0865579843521118, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.105896472930908, + "rewards/margins": 2.750957489013672, + "rewards/rejected": 1.3549387454986572, + "step": 1640 + }, + { + "epoch": 0.09153825883136156, + "grad_norm": 55.147117614746094, + "learning_rate": 9.794665494049925e-08, + "logits/chosen": -0.4884285032749176, + "logits/rejected": -0.5975539088249207, + "logps/chosen": -193.12168884277344, + "logps/rejected": -249.9346160888672, + "loss": 1.4176, + "nll_loss": 1.0450865030288696, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.538626670837402, + "rewards/margins": 2.036604404449463, + "rewards/rejected": 2.5020222663879395, + "step": 1650 + }, + { + "epoch": 0.09209303615761223, + "grad_norm": 81.85306549072266, + "learning_rate": 9.792186486588518e-08, + "logits/chosen": -0.4586809277534485, + "logits/rejected": -0.5848284959793091, + "logps/chosen": -202.53909301757812, + "logps/rejected": -270.3190002441406, + "loss": 1.4877, + "nll_loss": 1.1120998859405518, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.986132621765137, + "rewards/margins": 2.992452383041382, + "rewards/rejected": 1.9936797618865967, + "step": 1660 + }, + { + "epoch": 0.09264781348386292, + "grad_norm": 55.15419387817383, + "learning_rate": 9.789692921761188e-08, + "logits/chosen": -0.4707298278808594, + "logits/rejected": -0.6037745475769043, + "logps/chosen": -176.55191040039062, + "logps/rejected": -206.2583465576172, + "loss": 1.5395, + "nll_loss": 1.0647156238555908, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.59691858291626, + "rewards/margins": 2.427427291870117, + "rewards/rejected": 2.1694915294647217, + "step": 1670 + }, + { + "epoch": 0.09320259081011359, + "grad_norm": 55.7188720703125, + "learning_rate": 9.787184807142712e-08, + "logits/chosen": -0.3414674699306488, + "logits/rejected": -0.4763232171535492, + "logps/chosen": -168.2011260986328, + "logps/rejected": -199.79759216308594, + "loss": 1.4015, + "nll_loss": 0.9019485712051392, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.1074419021606445, + "rewards/margins": 2.4492523670196533, + "rewards/rejected": 1.6581900119781494, + "step": 1680 + }, + { + "epoch": 0.09375736813636426, + "grad_norm": 109.35962677001953, + "learning_rate": 9.784662150352062e-08, + "logits/chosen": -0.4362913966178894, + "logits/rejected": -0.4770258963108063, + "logps/chosen": -187.99073791503906, + "logps/rejected": -217.7333984375, + "loss": 1.4942, + "nll_loss": 1.0999925136566162, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.209378242492676, + "rewards/margins": 1.955529808998108, + "rewards/rejected": 2.2538483142852783, + "step": 1690 + }, + { + "epoch": 0.09431214546261495, + "grad_norm": 59.52936935424805, + "learning_rate": 9.782124959052387e-08, + "logits/chosen": -0.31050777435302734, + "logits/rejected": -0.4834163784980774, + "logps/chosen": -151.91183471679688, + "logps/rejected": -206.20889282226562, + "loss": 1.4293, + "nll_loss": 1.0492300987243652, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.9366631507873535, + "rewards/margins": 2.777130365371704, + "rewards/rejected": 1.1595325469970703, + "step": 1700 + }, + { + "epoch": 0.09486692278886562, + "grad_norm": 54.30078125, + "learning_rate": 9.779573240950986e-08, + "logits/chosen": -0.3345809876918793, + "logits/rejected": -0.49536198377609253, + "logps/chosen": -164.2360076904297, + "logps/rejected": -208.17587280273438, + "loss": 1.4578, + "nll_loss": 0.9239109754562378, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.0734710693359375, + "rewards/margins": 2.3916687965393066, + "rewards/rejected": 1.6818021535873413, + "step": 1710 + }, + { + "epoch": 0.09542170011511629, + "grad_norm": 70.32289123535156, + "learning_rate": 9.777007003799293e-08, + "logits/chosen": -0.3356882631778717, + "logits/rejected": -0.44931039214134216, + "logps/chosen": -144.88253784179688, + "logps/rejected": -189.18826293945312, + "loss": 1.3894, + "nll_loss": 0.9175260663032532, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.052679538726807, + "rewards/margins": 2.4650509357452393, + "rewards/rejected": 1.5876284837722778, + "step": 1720 + }, + { + "epoch": 0.09597647744136698, + "grad_norm": 63.81721878051758, + "learning_rate": 9.774426255392838e-08, + "logits/chosen": -0.4146268367767334, + "logits/rejected": -0.5499147772789001, + "logps/chosen": -183.33480834960938, + "logps/rejected": -236.8433837890625, + "loss": 1.4114, + "nll_loss": 1.0112346410751343, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.3223090171813965, + "rewards/margins": 3.125511646270752, + "rewards/rejected": 1.196797251701355, + "step": 1730 + }, + { + "epoch": 0.09653125476761765, + "grad_norm": 62.25202560424805, + "learning_rate": 9.771831003571235e-08, + "logits/chosen": -0.38659048080444336, + "logits/rejected": -0.5219605565071106, + "logps/chosen": -150.69058227539062, + "logps/rejected": -203.98416137695312, + "loss": 1.4122, + "nll_loss": 0.9478418231010437, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.085387229919434, + "rewards/margins": 2.139543056488037, + "rewards/rejected": 1.9458439350128174, + "step": 1740 + }, + { + "epoch": 0.09708603209386832, + "grad_norm": 59.93496322631836, + "learning_rate": 9.769221256218163e-08, + "logits/chosen": -0.3459341526031494, + "logits/rejected": -0.4226457476615906, + "logps/chosen": -146.584228515625, + "logps/rejected": -199.90530395507812, + "loss": 1.351, + "nll_loss": 1.013200283050537, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.9782562255859375, + "rewards/margins": 1.7799739837646484, + "rewards/rejected": 2.198282241821289, + "step": 1750 + }, + { + "epoch": 0.097640809420119, + "grad_norm": 176.6165313720703, + "learning_rate": 9.766597021261323e-08, + "logits/chosen": -0.4375430643558502, + "logits/rejected": -0.5497900247573853, + "logps/chosen": -189.30075073242188, + "logps/rejected": -244.5822296142578, + "loss": 1.3991, + "nll_loss": 1.0267442464828491, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.295384407043457, + "rewards/margins": 2.189558744430542, + "rewards/rejected": 2.105825185775757, + "step": 1760 + }, + { + "epoch": 0.09819558674636968, + "grad_norm": 69.3628158569336, + "learning_rate": 9.763958306672433e-08, + "logits/chosen": -0.3780445456504822, + "logits/rejected": -0.46087446808815, + "logps/chosen": -152.63388061523438, + "logps/rejected": -201.66612243652344, + "loss": 1.4051, + "nll_loss": 0.9626606702804565, + "rewards/accuracies": 0.625, + "rewards/chosen": 3.8893821239471436, + "rewards/margins": 1.7870652675628662, + "rewards/rejected": 2.1023168563842773, + "step": 1770 + }, + { + "epoch": 0.09875036407262035, + "grad_norm": 75.25344848632812, + "learning_rate": 9.761305120467192e-08, + "logits/chosen": -0.5166088342666626, + "logits/rejected": -0.6094520688056946, + "logps/chosen": -200.65896606445312, + "logps/rejected": -249.94650268554688, + "loss": 1.4433, + "nll_loss": 1.131493330001831, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.6476335525512695, + "rewards/margins": 2.0360946655273438, + "rewards/rejected": 2.6115384101867676, + "step": 1780 + }, + { + "epoch": 0.09930514139887103, + "grad_norm": 152.2681884765625, + "learning_rate": 9.758637470705263e-08, + "logits/chosen": -0.4471518397331238, + "logits/rejected": -0.5774090886116028, + "logps/chosen": -179.9422149658203, + "logps/rejected": -255.3101348876953, + "loss": 1.4773, + "nll_loss": 1.0336309671401978, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.0185651779174805, + "rewards/margins": 3.1911580562591553, + "rewards/rejected": 1.827406883239746, + "step": 1790 + }, + { + "epoch": 0.0998599187251217, + "grad_norm": 46.491920471191406, + "learning_rate": 9.755955365490245e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -175.13861083984375, + "logps/rejected": -212.89462280273438, + "loss": 1.3892, + "nll_loss": NaN, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 4.287023067474365, + "rewards/margins": 2.0191829204559326, + "rewards/rejected": 2.26784086227417, + "step": 1800 + }, + { + "epoch": 0.10041469605137238, + "grad_norm": 48.82244873046875, + "learning_rate": 9.753258812969647e-08, + "logits/chosen": -0.3557376265525818, + "logits/rejected": -0.5209270715713501, + "logps/chosen": -165.89688110351562, + "logps/rejected": -213.8446502685547, + "loss": 1.4122, + "nll_loss": 0.9721792340278625, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.09226655960083, + "rewards/margins": 2.568732976913452, + "rewards/rejected": 1.5235334634780884, + "step": 1810 + }, + { + "epoch": 0.10096947337762306, + "grad_norm": 55.496524810791016, + "learning_rate": 9.750547821334867e-08, + "logits/chosen": -0.4283338487148285, + "logits/rejected": -0.5281612873077393, + "logps/chosen": -168.81820678710938, + "logps/rejected": -198.29931640625, + "loss": 1.3929, + "nll_loss": 1.0313835144042969, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.235574245452881, + "rewards/margins": 1.6748672723770142, + "rewards/rejected": 2.5607073307037354, + "step": 1820 + }, + { + "epoch": 0.10152425070387373, + "grad_norm": 97.6830825805664, + "learning_rate": 9.747822398821163e-08, + "logits/chosen": -0.2920827269554138, + "logits/rejected": -0.38235941529273987, + "logps/chosen": -158.22317504882812, + "logps/rejected": -206.8228302001953, + "loss": 1.47, + "nll_loss": 0.9028658866882324, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.040580749511719, + "rewards/margins": 2.828991413116455, + "rewards/rejected": 1.2115892171859741, + "step": 1830 + }, + { + "epoch": 0.1020790280301244, + "grad_norm": 58.198036193847656, + "learning_rate": 9.74508255370763e-08, + "logits/chosen": -0.32580018043518066, + "logits/rejected": -0.4605080187320709, + "logps/chosen": -179.208984375, + "logps/rejected": -224.35549926757812, + "loss": 1.4179, + "nll_loss": 0.9562959671020508, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 4.129389762878418, + "rewards/margins": 2.2468628883361816, + "rewards/rejected": 1.8825271129608154, + "step": 1840 + }, + { + "epoch": 0.10263380535637509, + "grad_norm": 52.22254180908203, + "learning_rate": 9.74232829431718e-08, + "logits/chosen": -0.33645763993263245, + "logits/rejected": -0.40842223167419434, + "logps/chosen": -180.7502899169922, + "logps/rejected": -201.20211791992188, + "loss": 1.4376, + "nll_loss": 0.9569438099861145, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.07291316986084, + "rewards/margins": 2.01356840133667, + "rewards/rejected": 2.05934476852417, + "step": 1850 + }, + { + "epoch": 0.10318858268262576, + "grad_norm": 71.80465698242188, + "learning_rate": 9.739559629016504e-08, + "logits/chosen": -0.40867680311203003, + "logits/rejected": -0.5316632986068726, + "logps/chosen": -166.32406616210938, + "logps/rejected": -198.76620483398438, + "loss": 1.3739, + "nll_loss": 0.9971723556518555, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.112570285797119, + "rewards/margins": 2.156506299972534, + "rewards/rejected": 1.9560636281967163, + "step": 1860 + }, + { + "epoch": 0.10374336000887643, + "grad_norm": 81.6459732055664, + "learning_rate": 9.73677656621606e-08, + "logits/chosen": -0.2995353639125824, + "logits/rejected": -0.47169438004493713, + "logps/chosen": -158.6389923095703, + "logps/rejected": -209.0341033935547, + "loss": 1.3715, + "nll_loss": 0.9142085313796997, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.959867000579834, + "rewards/margins": 2.00348162651062, + "rewards/rejected": 1.9563853740692139, + "step": 1870 + }, + { + "epoch": 0.10429813733512712, + "grad_norm": 129.5898895263672, + "learning_rate": 9.733979114370039e-08, + "logits/chosen": -0.36188822984695435, + "logits/rejected": -0.49326688051223755, + "logps/chosen": -160.85301208496094, + "logps/rejected": -207.2004852294922, + "loss": 1.4365, + "nll_loss": 1.1323637962341309, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.231746196746826, + "rewards/margins": 2.284331798553467, + "rewards/rejected": 1.9474146366119385, + "step": 1880 + }, + { + "epoch": 0.10485291466137779, + "grad_norm": 35.927913665771484, + "learning_rate": 9.731167281976343e-08, + "logits/chosen": -0.20025630295276642, + "logits/rejected": -0.40075206756591797, + "logps/chosen": -136.97433471679688, + "logps/rejected": -193.73257446289062, + "loss": 1.3541, + "nll_loss": 0.7926868200302124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 3.765598773956299, + "rewards/margins": 2.8811652660369873, + "rewards/rejected": 0.8844332695007324, + "step": 1890 + }, + { + "epoch": 0.10540769198762846, + "grad_norm": 52.5135498046875, + "learning_rate": 9.728341077576558e-08, + "logits/chosen": -0.3085986077785492, + "logits/rejected": -0.5054864883422852, + "logps/chosen": -144.28353881835938, + "logps/rejected": -197.0537567138672, + "loss": 1.4769, + "nll_loss": 0.9166660308837891, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.9618003368377686, + "rewards/margins": 1.8736820220947266, + "rewards/rejected": 2.088118553161621, + "step": 1900 + }, + { + "epoch": 0.10596246931387915, + "grad_norm": 67.79092407226562, + "learning_rate": 9.725500509755928e-08, + "logits/chosen": -0.5144957304000854, + "logits/rejected": -0.6106340885162354, + "logps/chosen": -197.23281860351562, + "logps/rejected": -215.8070068359375, + "loss": 1.4379, + "nll_loss": 1.1905429363250732, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.12790060043335, + "rewards/margins": 1.6955938339233398, + "rewards/rejected": 2.432307004928589, + "step": 1910 + }, + { + "epoch": 0.10651724664012982, + "grad_norm": 51.10321807861328, + "learning_rate": 9.722645587143332e-08, + "logits/chosen": -0.4561639726161957, + "logits/rejected": -0.5225346088409424, + "logps/chosen": -188.32070922851562, + "logps/rejected": -241.41073608398438, + "loss": 1.3968, + "nll_loss": 1.073650598526001, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.139203071594238, + "rewards/margins": 2.8594624996185303, + "rewards/rejected": 2.279740571975708, + "step": 1920 + }, + { + "epoch": 0.10707202396638049, + "grad_norm": 44.254398345947266, + "learning_rate": 9.719776318411248e-08, + "logits/chosen": -0.4358777105808258, + "logits/rejected": -0.5370916724205017, + "logps/chosen": -185.11911010742188, + "logps/rejected": -222.16281127929688, + "loss": 1.3303, + "nll_loss": 1.0807971954345703, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.67964506149292, + "rewards/margins": 2.1169772148132324, + "rewards/rejected": 2.5626680850982666, + "step": 1930 + }, + { + "epoch": 0.10762680129263116, + "grad_norm": 37.96709060668945, + "learning_rate": 9.716892712275742e-08, + "logits/chosen": -0.28701865673065186, + "logits/rejected": -0.4266432821750641, + "logps/chosen": -169.7532958984375, + "logps/rejected": -221.8658905029297, + "loss": 1.3887, + "nll_loss": 1.0122158527374268, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.1084699630737305, + "rewards/margins": 2.203993558883667, + "rewards/rejected": 1.9044767618179321, + "step": 1940 + }, + { + "epoch": 0.10818157861888185, + "grad_norm": 74.52854919433594, + "learning_rate": 9.713994777496426e-08, + "logits/chosen": -0.35312619805336, + "logits/rejected": -0.44773387908935547, + "logps/chosen": -211.1968536376953, + "logps/rejected": -245.0993194580078, + "loss": 1.3873, + "nll_loss": 1.113231897354126, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.581801414489746, + "rewards/margins": 1.4854530096054077, + "rewards/rejected": 3.096348285675049, + "step": 1950 + }, + { + "epoch": 0.10873635594513252, + "grad_norm": 54.876041412353516, + "learning_rate": 9.711082522876444e-08, + "logits/chosen": -0.32861536741256714, + "logits/rejected": -0.44481348991394043, + "logps/chosen": -170.75796508789062, + "logps/rejected": -215.32333374023438, + "loss": 1.431, + "nll_loss": 0.9936912655830383, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 4.210268020629883, + "rewards/margins": 1.985047698020935, + "rewards/rejected": 2.2252204418182373, + "step": 1960 + }, + { + "epoch": 0.10929113327138319, + "grad_norm": 57.80482864379883, + "learning_rate": 9.708155957262437e-08, + "logits/chosen": -0.2759546935558319, + "logits/rejected": -0.3380012512207031, + "logps/chosen": -157.31033325195312, + "logps/rejected": -198.11666870117188, + "loss": 1.3951, + "nll_loss": 1.0163196325302124, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.121042251586914, + "rewards/margins": 1.612929105758667, + "rewards/rejected": 2.508113145828247, + "step": 1970 + }, + { + "epoch": 0.10984591059763388, + "grad_norm": 86.91264343261719, + "learning_rate": 9.705215089544518e-08, + "logits/chosen": -0.3782724142074585, + "logits/rejected": -0.5245341062545776, + "logps/chosen": -195.6573944091797, + "logps/rejected": -244.4193878173828, + "loss": 1.4257, + "nll_loss": 1.0299656391143799, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.9395599365234375, + "rewards/margins": 2.814535140991211, + "rewards/rejected": 2.1250247955322266, + "step": 1980 + }, + { + "epoch": 0.11040068792388455, + "grad_norm": 43.226234436035156, + "learning_rate": 9.702259928656249e-08, + "logits/chosen": -0.10544047504663467, + "logits/rejected": -0.2132861167192459, + "logps/chosen": -164.14208984375, + "logps/rejected": -190.37362670898438, + "loss": 1.427, + "nll_loss": 1.0020391941070557, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.172114372253418, + "rewards/margins": 2.6576035022735596, + "rewards/rejected": 1.5145113468170166, + "step": 1990 + }, + { + "epoch": 0.11095546525013522, + "grad_norm": 64.444091796875, + "learning_rate": 9.69929048357461e-08, + "logits/chosen": -0.26225289702415466, + "logits/rejected": -0.3790452182292938, + "logps/chosen": -168.84194946289062, + "logps/rejected": -215.24838256835938, + "loss": 1.3836, + "nll_loss": 1.0095182657241821, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.499636650085449, + "rewards/margins": 2.150195598602295, + "rewards/rejected": 2.349440813064575, + "step": 2000 + }, + { + "epoch": 0.11095546525013522, + "eval_logits/chosen": -0.4270898401737213, + "eval_logits/rejected": -0.5021917819976807, + "eval_logps/chosen": -208.1876983642578, + "eval_logps/rejected": -262.03790283203125, + "eval_loss": 1.341249704360962, + "eval_nll_loss": 1.0813319683074951, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 4.977352142333984, + "eval_rewards/margins": 3.27329158782959, + "eval_rewards/rejected": 1.7040609121322632, + "eval_runtime": 16.7677, + "eval_samples_per_second": 15.267, + "eval_steps_per_second": 1.908, + "step": 2000 + }, + { + "epoch": 0.1115102425763859, + "grad_norm": 63.21308135986328, + "learning_rate": 9.69630676331997e-08, + "logits/chosen": -0.20330052077770233, + "logits/rejected": -0.34869131445884705, + "logps/chosen": -144.7039031982422, + "logps/rejected": -180.5189666748047, + "loss": 1.3936, + "nll_loss": 0.895270049571991, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.967087984085083, + "rewards/margins": 2.0332229137420654, + "rewards/rejected": 1.933864951133728, + "step": 2010 + }, + { + "epoch": 0.11206501990263658, + "grad_norm": 45.90394973754883, + "learning_rate": 9.693308776956066e-08, + "logits/chosen": -0.31239813566207886, + "logits/rejected": -0.39418482780456543, + "logps/chosen": -167.0019989013672, + "logps/rejected": -191.33401489257812, + "loss": 1.4304, + "nll_loss": 1.0243754386901855, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.358006954193115, + "rewards/margins": 1.9015105962753296, + "rewards/rejected": 2.456496238708496, + "step": 2020 + }, + { + "epoch": 0.11261979722888725, + "grad_norm": 104.78388214111328, + "learning_rate": 9.690296533589967e-08, + "logits/chosen": -0.40280312299728394, + "logits/rejected": -0.535481333732605, + "logps/chosen": -194.77020263671875, + "logps/rejected": -264.6238708496094, + "loss": 1.3595, + "nll_loss": 1.0842351913452148, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.004550933837891, + "rewards/margins": 3.121086597442627, + "rewards/rejected": 1.8834642171859741, + "step": 2030 + }, + { + "epoch": 0.11317457455513794, + "grad_norm": 76.62211608886719, + "learning_rate": 9.687270042372054e-08, + "logits/chosen": -0.4506068825721741, + "logits/rejected": -0.540243923664093, + "logps/chosen": -207.814208984375, + "logps/rejected": -275.18365478515625, + "loss": 1.4491, + "nll_loss": 1.1151775121688843, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.382890224456787, + "rewards/margins": 3.2836947441101074, + "rewards/rejected": 2.0991952419281006, + "step": 2040 + }, + { + "epoch": 0.11372935188138861, + "grad_norm": 56.325111389160156, + "learning_rate": 9.684229312495988e-08, + "logits/chosen": -0.37510785460472107, + "logits/rejected": -0.4385475516319275, + "logps/chosen": -180.5703582763672, + "logps/rejected": -232.40567016601562, + "loss": 1.4121, + "nll_loss": 1.07108473777771, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.931944847106934, + "rewards/margins": 2.992361307144165, + "rewards/rejected": 1.939583420753479, + "step": 2050 + }, + { + "epoch": 0.11428412920763928, + "grad_norm": 70.96062469482422, + "learning_rate": 9.681174353198685e-08, + "logits/chosen": -0.28505033254623413, + "logits/rejected": -0.40620937943458557, + "logps/chosen": -187.11529541015625, + "logps/rejected": -233.93344116210938, + "loss": 1.4701, + "nll_loss": 1.0873926877975464, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.375863075256348, + "rewards/margins": 2.9457461833953857, + "rewards/rejected": 1.4301164150238037, + "step": 2060 + }, + { + "epoch": 0.11483890653388996, + "grad_norm": 68.80590057373047, + "learning_rate": 9.678105173760285e-08, + "logits/chosen": -0.18282446265220642, + "logits/rejected": -0.39009958505630493, + "logps/chosen": -157.3737030029297, + "logps/rejected": -227.16940307617188, + "loss": 1.4138, + "nll_loss": 0.8834668397903442, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.3074188232421875, + "rewards/margins": 3.3912670612335205, + "rewards/rejected": 0.9161517024040222, + "step": 2070 + }, + { + "epoch": 0.11539368386014064, + "grad_norm": 70.03636932373047, + "learning_rate": 9.675021783504122e-08, + "logits/chosen": -0.30556461215019226, + "logits/rejected": -0.35940033197402954, + "logps/chosen": -158.53201293945312, + "logps/rejected": -205.63998413085938, + "loss": 1.3905, + "nll_loss": 1.0669022798538208, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.223614692687988, + "rewards/margins": 2.5228731632232666, + "rewards/rejected": 1.7007417678833008, + "step": 2080 + }, + { + "epoch": 0.11594846118639131, + "grad_norm": 55.57160949707031, + "learning_rate": 9.671924191796705e-08, + "logits/chosen": -0.2807254195213318, + "logits/rejected": -0.4453394412994385, + "logps/chosen": -152.5956268310547, + "logps/rejected": -212.7981719970703, + "loss": 1.4375, + "nll_loss": 0.9396727681159973, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.076318740844727, + "rewards/margins": 2.426535129547119, + "rewards/rejected": 1.649783730506897, + "step": 2090 + }, + { + "epoch": 0.116503238512642, + "grad_norm": 69.33473205566406, + "learning_rate": 9.668812408047677e-08, + "logits/chosen": -0.3224617540836334, + "logits/rejected": -0.4488009810447693, + "logps/chosen": -158.0413818359375, + "logps/rejected": -199.71896362304688, + "loss": 1.3829, + "nll_loss": 1.0075881481170654, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.9751713275909424, + "rewards/margins": 2.181872844696045, + "rewards/rejected": 1.7932977676391602, + "step": 2100 + }, + { + "epoch": 0.11705801583889267, + "grad_norm": 60.37345504760742, + "learning_rate": 9.665686441709795e-08, + "logits/chosen": -0.2691134512424469, + "logits/rejected": -0.3475096821784973, + "logps/chosen": -143.7865753173828, + "logps/rejected": -180.9481658935547, + "loss": 1.3225, + "nll_loss": 0.8925089836120605, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.228122711181641, + "rewards/margins": 1.9724922180175781, + "rewards/rejected": 2.2556302547454834, + "step": 2110 + }, + { + "epoch": 0.11761279316514334, + "grad_norm": 75.00153350830078, + "learning_rate": 9.6625463022789e-08, + "logits/chosen": -0.5172563195228577, + "logits/rejected": -0.5626317858695984, + "logps/chosen": -240.8362274169922, + "logps/rejected": -280.37213134765625, + "loss": 1.4174, + "nll_loss": 1.210435390472412, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.2368035316467285, + "rewards/margins": 2.980569839477539, + "rewards/rejected": 2.2562336921691895, + "step": 2120 + }, + { + "epoch": 0.11816757049139402, + "grad_norm": 92.12297058105469, + "learning_rate": 9.659391999293887e-08, + "logits/chosen": -0.3115905225276947, + "logits/rejected": -0.41539478302001953, + "logps/chosen": -164.57894897460938, + "logps/rejected": -210.8973846435547, + "loss": 1.425, + "nll_loss": 1.0213747024536133, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.291318893432617, + "rewards/margins": 2.2107698917388916, + "rewards/rejected": 2.0805487632751465, + "step": 2130 + }, + { + "epoch": 0.1187223478176447, + "grad_norm": 83.99625396728516, + "learning_rate": 9.656223542336671e-08, + "logits/chosen": -0.4168204367160797, + "logits/rejected": -0.5383001565933228, + "logps/chosen": -192.26571655273438, + "logps/rejected": -239.5349578857422, + "loss": 1.4688, + "nll_loss": 1.14029860496521, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.050605297088623, + "rewards/margins": 3.2355990409851074, + "rewards/rejected": 1.8150064945220947, + "step": 2140 + }, + { + "epoch": 0.11927712514389537, + "grad_norm": 30.861711502075195, + "learning_rate": 9.65304094103217e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -168.80422973632812, + "logps/rejected": -230.22152709960938, + "loss": 1.4595, + "nll_loss": NaN, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 4.314932346343994, + "rewards/margins": 2.1812479496002197, + "rewards/rejected": 2.1336848735809326, + "step": 2150 + }, + { + "epoch": 0.11983190247014605, + "grad_norm": 57.356346130371094, + "learning_rate": 9.649844205048267e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -189.3910369873047, + "logps/rejected": -230.20474243164062, + "loss": 1.4426, + "nll_loss": NaN, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.695038795471191, + "rewards/margins": 2.3562328815460205, + "rewards/rejected": 2.338805913925171, + "step": 2160 + }, + { + "epoch": 0.12038667979639672, + "grad_norm": 87.51946258544922, + "learning_rate": 9.646633344095778e-08, + "logits/chosen": -0.19644995033740997, + "logits/rejected": -0.37703800201416016, + "logps/chosen": -167.8843231201172, + "logps/rejected": -216.3540802001953, + "loss": 1.4262, + "nll_loss": 0.916726291179657, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.303994178771973, + "rewards/margins": 2.7248735427856445, + "rewards/rejected": 1.57912015914917, + "step": 2170 + }, + { + "epoch": 0.1209414571226474, + "grad_norm": 49.67451858520508, + "learning_rate": 9.643408367928432e-08, + "logits/chosen": -0.2518579959869385, + "logits/rejected": -0.43059998750686646, + "logps/chosen": -168.8131103515625, + "logps/rejected": -232.3626708984375, + "loss": 1.353, + "nll_loss": 0.9257919192314148, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 4.668224811553955, + "rewards/margins": 3.628369092941284, + "rewards/rejected": 1.0398554801940918, + "step": 2180 + }, + { + "epoch": 0.12149623444889808, + "grad_norm": 34.286373138427734, + "learning_rate": 9.640169286342832e-08, + "logits/chosen": -0.2528410851955414, + "logits/rejected": -0.3437945246696472, + "logps/chosen": -153.04017639160156, + "logps/rejected": -205.0538787841797, + "loss": 1.389, + "nll_loss": 1.110828161239624, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.003229141235352, + "rewards/margins": 1.8116796016693115, + "rewards/rejected": 2.191549777984619, + "step": 2190 + }, + { + "epoch": 0.12205101177514875, + "grad_norm": 55.41353988647461, + "learning_rate": 9.636916109178433e-08, + "logits/chosen": -0.251595675945282, + "logits/rejected": -0.3192422389984131, + "logps/chosen": -196.3855438232422, + "logps/rejected": -246.07162475585938, + "loss": 1.3982, + "nll_loss": 1.0199682712554932, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.79991340637207, + "rewards/margins": 2.171217441558838, + "rewards/rejected": 2.6286959648132324, + "step": 2200 + }, + { + "epoch": 0.12260578910139942, + "grad_norm": 102.95879364013672, + "learning_rate": 9.633648846317505e-08, + "logits/chosen": -0.2962803542613983, + "logits/rejected": -0.3776516020298004, + "logps/chosen": -202.29464721679688, + "logps/rejected": -251.30032348632812, + "loss": 1.4217, + "nll_loss": 1.0513694286346436, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.9984965324401855, + "rewards/margins": 2.980966091156006, + "rewards/rejected": 2.017530679702759, + "step": 2210 + }, + { + "epoch": 0.12316056642765011, + "grad_norm": 114.79225158691406, + "learning_rate": 9.630367507685111e-08, + "logits/chosen": -0.10262326151132584, + "logits/rejected": -0.22282084822654724, + "logps/chosen": -125.6513900756836, + "logps/rejected": -162.16390991210938, + "loss": 1.396, + "nll_loss": 0.80219566822052, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.7719345092773438, + "rewards/margins": 2.6604316234588623, + "rewards/rejected": 1.1115028858184814, + "step": 2220 + }, + { + "epoch": 0.12371534375390078, + "grad_norm": 68.96896362304688, + "learning_rate": 9.627072103249068e-08, + "logits/chosen": -0.30825161933898926, + "logits/rejected": -0.41360312700271606, + "logps/chosen": -183.67562866210938, + "logps/rejected": -223.6616668701172, + "loss": 1.4085, + "nll_loss": 1.0462400913238525, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.440855979919434, + "rewards/margins": 2.040837049484253, + "rewards/rejected": 2.4000189304351807, + "step": 2230 + }, + { + "epoch": 0.12427012108015145, + "grad_norm": 57.11476516723633, + "learning_rate": 9.623762643019926e-08, + "logits/chosen": -0.11846674978733063, + "logits/rejected": -0.26219338178634644, + "logps/chosen": -150.19265747070312, + "logps/rejected": -192.1765899658203, + "loss": 1.3587, + "nll_loss": 0.9004217982292175, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.165079593658447, + "rewards/margins": 2.5247955322265625, + "rewards/rejected": 1.6402838230133057, + "step": 2240 + }, + { + "epoch": 0.12482489840640212, + "grad_norm": 69.78447723388672, + "learning_rate": 9.620439137050927e-08, + "logits/chosen": -0.19403323531150818, + "logits/rejected": -0.30210986733436584, + "logps/chosen": -170.3800506591797, + "logps/rejected": -195.099609375, + "loss": 1.3113, + "nll_loss": 0.9437860250473022, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.235895156860352, + "rewards/margins": 2.4345595836639404, + "rewards/rejected": 1.801335334777832, + "step": 2250 + }, + { + "epoch": 0.1253796757326528, + "grad_norm": 77.17230987548828, + "learning_rate": 9.617101595437982e-08, + "logits/chosen": -0.05190909653902054, + "logits/rejected": -0.2363159954547882, + "logps/chosen": -140.75799560546875, + "logps/rejected": -211.8653106689453, + "loss": 1.3641, + "nll_loss": 0.8239955902099609, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.866058826446533, + "rewards/margins": 2.844568967819214, + "rewards/rejected": 1.0214897394180298, + "step": 2260 + }, + { + "epoch": 0.1259344530589035, + "grad_norm": 49.746421813964844, + "learning_rate": 9.613750028319642e-08, + "logits/chosen": -0.047062940895557404, + "logits/rejected": -0.15455859899520874, + "logps/chosen": -143.74386596679688, + "logps/rejected": -195.1613311767578, + "loss": 1.4324, + "nll_loss": 0.9071475863456726, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.077420234680176, + "rewards/margins": 2.103487730026245, + "rewards/rejected": 1.9739322662353516, + "step": 2270 + }, + { + "epoch": 0.12648923038515417, + "grad_norm": 51.56296920776367, + "learning_rate": 9.61038444587706e-08, + "logits/chosen": -0.13705101609230042, + "logits/rejected": -0.2227870225906372, + "logps/chosen": -129.62725830078125, + "logps/rejected": -178.81573486328125, + "loss": 1.3893, + "nll_loss": 0.9397897720336914, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.89752197265625, + "rewards/margins": 1.2923376560211182, + "rewards/rejected": 2.6051838397979736, + "step": 2280 + }, + { + "epoch": 0.12704400771140484, + "grad_norm": 42.245574951171875, + "learning_rate": 9.607004858333964e-08, + "logits/chosen": -0.44759708642959595, + "logits/rejected": -0.49922627210617065, + "logps/chosen": -196.9328155517578, + "logps/rejected": -245.5009307861328, + "loss": 1.4152, + "nll_loss": 1.1975692510604858, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.185031890869141, + "rewards/margins": 2.9058709144592285, + "rewards/rejected": 2.279160976409912, + "step": 2290 + }, + { + "epoch": 0.1275987850376555, + "grad_norm": 67.2354736328125, + "learning_rate": 9.60361127595663e-08, + "logits/chosen": -0.320967435836792, + "logits/rejected": -0.4030250012874603, + "logps/chosen": -155.604736328125, + "logps/rejected": -204.4038848876953, + "loss": 1.4695, + "nll_loss": 0.9363700747489929, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.52934455871582, + "rewards/margins": 2.116450548171997, + "rewards/rejected": 2.4128942489624023, + "step": 2300 + }, + { + "epoch": 0.12815356236390618, + "grad_norm": 63.65040969848633, + "learning_rate": 9.600203709053839e-08, + "logits/chosen": -0.3212184011936188, + "logits/rejected": -0.4064360558986664, + "logps/chosen": -176.45187377929688, + "logps/rejected": -233.3936309814453, + "loss": 1.4012, + "nll_loss": 1.0256242752075195, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.018517971038818, + "rewards/margins": 2.8526389598846436, + "rewards/rejected": 2.1658787727355957, + "step": 2310 + }, + { + "epoch": 0.12870833969015685, + "grad_norm": 63.4724235534668, + "learning_rate": 9.596782167976859e-08, + "logits/chosen": -0.11651863902807236, + "logits/rejected": -0.23722949624061584, + "logps/chosen": -128.25538635253906, + "logps/rejected": -159.47409057617188, + "loss": 1.5035, + "nll_loss": 0.8557698130607605, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 3.848564624786377, + "rewards/margins": 1.983266830444336, + "rewards/rejected": 1.8652980327606201, + "step": 2320 + }, + { + "epoch": 0.12926311701640755, + "grad_norm": 92.61750030517578, + "learning_rate": 9.593346663119406e-08, + "logits/chosen": -0.2534486651420593, + "logits/rejected": -0.34855300188064575, + "logps/chosen": -171.09487915039062, + "logps/rejected": -218.22903442382812, + "loss": 1.3338, + "nll_loss": 0.996772289276123, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.49443244934082, + "rewards/margins": 2.554107666015625, + "rewards/rejected": 1.9403250217437744, + "step": 2330 + }, + { + "epoch": 0.12981789434265822, + "grad_norm": 55.44472122192383, + "learning_rate": 9.589897204917612e-08, + "logits/chosen": -0.21271423995494843, + "logits/rejected": -0.30972006916999817, + "logps/chosen": -167.2717742919922, + "logps/rejected": -234.3817596435547, + "loss": 1.4084, + "nll_loss": 0.9619334936141968, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.373074531555176, + "rewards/margins": 2.7067365646362305, + "rewards/rejected": 1.6663382053375244, + "step": 2340 + }, + { + "epoch": 0.1303726716689089, + "grad_norm": 47.895484924316406, + "learning_rate": 9.586433803850002e-08, + "logits/chosen": -0.3097013235092163, + "logits/rejected": -0.4161096215248108, + "logps/chosen": -190.05905151367188, + "logps/rejected": -239.8685302734375, + "loss": 1.3589, + "nll_loss": 1.0496143102645874, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.816398620605469, + "rewards/margins": 2.6777281761169434, + "rewards/rejected": 2.1386706829071045, + "step": 2350 + }, + { + "epoch": 0.13092744899515957, + "grad_norm": 78.9901351928711, + "learning_rate": 9.582956470437448e-08, + "logits/chosen": -0.18011830747127533, + "logits/rejected": -0.3116056025028229, + "logps/chosen": -165.65756225585938, + "logps/rejected": -230.0876007080078, + "loss": 1.3762, + "nll_loss": 0.921899676322937, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.897641658782959, + "rewards/margins": 3.6547799110412598, + "rewards/rejected": 1.2428618669509888, + "step": 2360 + }, + { + "epoch": 0.13148222632141024, + "grad_norm": 59.10150909423828, + "learning_rate": 9.57946521524315e-08, + "logits/chosen": -0.18872778117656708, + "logits/rejected": -0.2746146023273468, + "logps/chosen": -181.9303436279297, + "logps/rejected": -245.42684936523438, + "loss": 1.4089, + "nll_loss": 0.9592266082763672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.487532615661621, + "rewards/margins": 2.240269899368286, + "rewards/rejected": 2.247262477874756, + "step": 2370 + }, + { + "epoch": 0.1320370036476609, + "grad_norm": 83.45726776123047, + "learning_rate": 9.575960048872594e-08, + "logits/chosen": -0.18271943926811218, + "logits/rejected": -0.3004222512245178, + "logps/chosen": -165.93307495117188, + "logps/rejected": -194.65318298339844, + "loss": 1.3263, + "nll_loss": 0.9600454568862915, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.347784996032715, + "rewards/margins": 2.3133761882781982, + "rewards/rejected": 2.0344088077545166, + "step": 2380 + }, + { + "epoch": 0.13259178097391158, + "grad_norm": 63.885406494140625, + "learning_rate": 9.572440981973529e-08, + "logits/chosen": -0.0731525644659996, + "logits/rejected": -0.19846148788928986, + "logps/chosen": -142.23365783691406, + "logps/rejected": -195.64901733398438, + "loss": 1.2997, + "nll_loss": 0.7700817584991455, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.073976039886475, + "rewards/margins": 2.837172508239746, + "rewards/rejected": 1.2368037700653076, + "step": 2390 + }, + { + "epoch": 0.13314655830016228, + "grad_norm": 73.3870849609375, + "learning_rate": 9.56890802523593e-08, + "logits/chosen": -0.3486558198928833, + "logits/rejected": -0.46232134103775024, + "logps/chosen": -193.32313537597656, + "logps/rejected": -259.9707336425781, + "loss": 1.4372, + "nll_loss": 1.0756404399871826, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.954958915710449, + "rewards/margins": 3.1835172176361084, + "rewards/rejected": 1.7714424133300781, + "step": 2400 + }, + { + "epoch": 0.13370133562641295, + "grad_norm": 63.42995834350586, + "learning_rate": 9.565361189391958e-08, + "logits/chosen": -0.26387467980384827, + "logits/rejected": -0.3511542081832886, + "logps/chosen": -138.95855712890625, + "logps/rejected": -180.8175048828125, + "loss": 1.4044, + "nll_loss": 0.8946741819381714, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.0163798332214355, + "rewards/margins": 2.472100257873535, + "rewards/rejected": 1.54427969455719, + "step": 2410 + }, + { + "epoch": 0.13425611295266363, + "grad_norm": 60.89449691772461, + "learning_rate": 9.561800485215947e-08, + "logits/chosen": -0.24570202827453613, + "logits/rejected": -0.354546457529068, + "logps/chosen": -169.95260620117188, + "logps/rejected": -213.67324829101562, + "loss": 1.4563, + "nll_loss": 0.9607990980148315, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.3260698318481445, + "rewards/margins": 2.637070655822754, + "rewards/rejected": 1.6889995336532593, + "step": 2420 + }, + { + "epoch": 0.1348108902789143, + "grad_norm": 66.38797760009766, + "learning_rate": 9.55822592352435e-08, + "logits/chosen": -0.406474769115448, + "logits/rejected": -0.4907301962375641, + "logps/chosen": -184.1668701171875, + "logps/rejected": -223.18600463867188, + "loss": 1.3542, + "nll_loss": 1.0416371822357178, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.934833526611328, + "rewards/margins": 3.2224936485290527, + "rewards/rejected": 1.7123390436172485, + "step": 2430 + }, + { + "epoch": 0.13536566760516497, + "grad_norm": 76.15042877197266, + "learning_rate": 9.554637515175716e-08, + "logits/chosen": -0.3425951600074768, + "logits/rejected": -0.4833051264286041, + "logps/chosen": -144.9541778564453, + "logps/rejected": -199.95538330078125, + "loss": 1.323, + "nll_loss": 0.9206892251968384, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.348945617675781, + "rewards/margins": 2.3510971069335938, + "rewards/rejected": 1.9978487491607666, + "step": 2440 + }, + { + "epoch": 0.13592044493141564, + "grad_norm": 71.86099243164062, + "learning_rate": 9.551035271070663e-08, + "logits/chosen": -0.40942057967185974, + "logits/rejected": -0.505861759185791, + "logps/chosen": -166.58743286132812, + "logps/rejected": -208.3878173828125, + "loss": 1.3985, + "nll_loss": 1.0548110008239746, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.600960731506348, + "rewards/margins": 2.512728214263916, + "rewards/rejected": 2.0882325172424316, + "step": 2450 + }, + { + "epoch": 0.13647522225766634, + "grad_norm": 53.405128479003906, + "learning_rate": 9.547419202151832e-08, + "logits/chosen": -0.3199925720691681, + "logits/rejected": -0.42514413595199585, + "logps/chosen": -169.04751586914062, + "logps/rejected": -202.2387237548828, + "loss": 1.331, + "nll_loss": 0.8953672647476196, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.623941898345947, + "rewards/margins": 3.305065155029297, + "rewards/rejected": 1.3188765048980713, + "step": 2460 + }, + { + "epoch": 0.137029999583917, + "grad_norm": 49.56265640258789, + "learning_rate": 9.54378931940386e-08, + "logits/chosen": -0.2953924536705017, + "logits/rejected": -0.3510599434375763, + "logps/chosen": -169.89207458496094, + "logps/rejected": -211.8363800048828, + "loss": 1.3748, + "nll_loss": 0.9912246465682983, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.64087438583374, + "rewards/margins": 2.8930931091308594, + "rewards/rejected": 1.7477811574935913, + "step": 2470 + }, + { + "epoch": 0.13758477691016768, + "grad_norm": 119.32328033447266, + "learning_rate": 9.540145633853352e-08, + "logits/chosen": -0.2507792115211487, + "logits/rejected": -0.3617081940174103, + "logps/chosen": -166.5975799560547, + "logps/rejected": -203.4107666015625, + "loss": 1.4113, + "nll_loss": 1.057375192642212, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.210103511810303, + "rewards/margins": 2.3813328742980957, + "rewards/rejected": 1.8287712335586548, + "step": 2480 + }, + { + "epoch": 0.13813955423641835, + "grad_norm": 49.55260467529297, + "learning_rate": 9.536488156568836e-08, + "logits/chosen": -0.25834280252456665, + "logits/rejected": -0.3647618591785431, + "logps/chosen": -181.38433837890625, + "logps/rejected": -227.53884887695312, + "loss": 1.3538, + "nll_loss": 1.0067346096038818, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.781300067901611, + "rewards/margins": 3.069465160369873, + "rewards/rejected": 1.7118346691131592, + "step": 2490 + }, + { + "epoch": 0.13869433156266903, + "grad_norm": 72.44939422607422, + "learning_rate": 9.53281689866074e-08, + "logits/chosen": -0.3534695506095886, + "logits/rejected": -0.46817415952682495, + "logps/chosen": -177.7882843017578, + "logps/rejected": -227.4732666015625, + "loss": 1.3581, + "nll_loss": 1.0279648303985596, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.046257495880127, + "rewards/margins": 3.195817232131958, + "rewards/rejected": 1.850440263748169, + "step": 2500 + }, + { + "epoch": 0.13869433156266903, + "eval_logits/chosen": -0.41355088353157043, + "eval_logits/rejected": -0.4752618968486786, + "eval_logps/chosen": -205.35128784179688, + "eval_logps/rejected": -262.29425048828125, + "eval_loss": 1.344786286354065, + "eval_nll_loss": 1.066452980041504, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 5.260993003845215, + "eval_rewards/margins": 3.582566976547241, + "eval_rewards/rejected": 1.6784261465072632, + "eval_runtime": 16.7354, + "eval_samples_per_second": 15.297, + "eval_steps_per_second": 1.912, + "step": 2500 + }, + { + "epoch": 0.1392491088889197, + "grad_norm": 80.55164337158203, + "learning_rate": 9.529131871281351e-08, + "logits/chosen": -0.3231019079685211, + "logits/rejected": -0.38222265243530273, + "logps/chosen": -167.98793029785156, + "logps/rejected": -213.79104614257812, + "loss": 1.4011, + "nll_loss": 1.0517648458480835, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.999415397644043, + "rewards/margins": 2.8201375007629395, + "rewards/rejected": 2.1792776584625244, + "step": 2510 + }, + { + "epoch": 0.1398038862151704, + "grad_norm": 69.63285827636719, + "learning_rate": 9.525433085624788e-08, + "logits/chosen": -0.24561266601085663, + "logits/rejected": -0.31572413444519043, + "logps/chosen": -184.41099548339844, + "logps/rejected": -204.45999145507812, + "loss": 1.3704, + "nll_loss": 0.9846397638320923, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.188200950622559, + "rewards/margins": 2.166051149368286, + "rewards/rejected": 2.0221495628356934, + "step": 2520 + }, + { + "epoch": 0.14035866354142107, + "grad_norm": 53.4434700012207, + "learning_rate": 9.521720552926957e-08, + "logits/chosen": -0.4215124249458313, + "logits/rejected": -0.4378494322299957, + "logps/chosen": -199.27281188964844, + "logps/rejected": -233.3812713623047, + "loss": 1.43, + "nll_loss": 1.1478370428085327, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.725975036621094, + "rewards/margins": 1.5603116750717163, + "rewards/rejected": 3.165663242340088, + "step": 2530 + }, + { + "epoch": 0.14091344086767174, + "grad_norm": 73.6417236328125, + "learning_rate": 9.517994284465531e-08, + "logits/chosen": -0.2891234755516052, + "logits/rejected": -0.38554567098617554, + "logps/chosen": -153.97988891601562, + "logps/rejected": -211.9022674560547, + "loss": 1.3697, + "nll_loss": 0.9445359110832214, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.174930572509766, + "rewards/margins": 2.6157615184783936, + "rewards/rejected": 1.559168815612793, + "step": 2540 + }, + { + "epoch": 0.1414682181939224, + "grad_norm": 59.96479034423828, + "learning_rate": 9.514254291559905e-08, + "logits/chosen": -0.1881057322025299, + "logits/rejected": -0.2699630856513977, + "logps/chosen": -128.26663208007812, + "logps/rejected": -177.18075561523438, + "loss": 1.3287, + "nll_loss": 0.8441031575202942, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 4.137763977050781, + "rewards/margins": 2.495912551879883, + "rewards/rejected": 1.6418521404266357, + "step": 2550 + }, + { + "epoch": 0.14202299552017308, + "grad_norm": 65.00444030761719, + "learning_rate": 9.510500585571164e-08, + "logits/chosen": -0.4077147841453552, + "logits/rejected": -0.5147227048873901, + "logps/chosen": -179.1188201904297, + "logps/rejected": -270.15191650390625, + "loss": 1.4109, + "nll_loss": 1.0775953531265259, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.214983940124512, + "rewards/margins": 3.057213306427002, + "rewards/rejected": 2.157771110534668, + "step": 2560 + }, + { + "epoch": 0.14257777284642376, + "grad_norm": 60.63315200805664, + "learning_rate": 9.506733177902051e-08, + "logits/chosen": -0.3077200949192047, + "logits/rejected": -0.4166482985019684, + "logps/chosen": -184.0797119140625, + "logps/rejected": -217.2776641845703, + "loss": 1.4804, + "nll_loss": 1.0299670696258545, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.7556843757629395, + "rewards/margins": 2.782696485519409, + "rewards/rejected": 1.9729883670806885, + "step": 2570 + }, + { + "epoch": 0.14313255017267446, + "grad_norm": 68.85614013671875, + "learning_rate": 9.502952079996933e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -166.62692260742188, + "logps/rejected": -220.95968627929688, + "loss": 1.3492, + "nll_loss": NaN, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.6029486656188965, + "rewards/margins": 2.756474494934082, + "rewards/rejected": 1.8464739322662354, + "step": 2580 + }, + { + "epoch": 0.14368732749892513, + "grad_norm": 37.320533752441406, + "learning_rate": 9.49915730334176e-08, + "logits/chosen": -0.3359619677066803, + "logits/rejected": -0.41889262199401855, + "logps/chosen": -158.69314575195312, + "logps/rejected": -187.04347229003906, + "loss": 1.3012, + "nll_loss": 0.8397552371025085, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.5668864250183105, + "rewards/margins": 2.7107009887695312, + "rewards/rejected": 1.8561855554580688, + "step": 2590 + }, + { + "epoch": 0.1442421048251758, + "grad_norm": 81.52698516845703, + "learning_rate": 9.495348859464041e-08, + "logits/chosen": -0.29530245065689087, + "logits/rejected": -0.4065031111240387, + "logps/chosen": -196.07713317871094, + "logps/rejected": -223.0460662841797, + "loss": 1.4478, + "nll_loss": 1.1197636127471924, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.687514305114746, + "rewards/margins": 2.8510119915008545, + "rewards/rejected": 1.8365027904510498, + "step": 2600 + }, + { + "epoch": 0.14479688215142647, + "grad_norm": 67.42103576660156, + "learning_rate": 9.491526759932793e-08, + "logits/chosen": -0.3822721838951111, + "logits/rejected": -0.4680250287055969, + "logps/chosen": -198.443359375, + "logps/rejected": -236.1912384033203, + "loss": 1.4023, + "nll_loss": 1.0499932765960693, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.903171539306641, + "rewards/margins": 1.9136698246002197, + "rewards/rejected": 2.989501714706421, + "step": 2610 + }, + { + "epoch": 0.14535165947767714, + "grad_norm": 62.30340576171875, + "learning_rate": 9.487691016358524e-08, + "logits/chosen": -0.26125961542129517, + "logits/rejected": -0.42635488510131836, + "logps/chosen": -184.85794067382812, + "logps/rejected": -258.67816162109375, + "loss": 1.3805, + "nll_loss": 1.0284109115600586, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.900014400482178, + "rewards/margins": 4.008078575134277, + "rewards/rejected": 0.8919361233711243, + "step": 2620 + }, + { + "epoch": 0.1459064368039278, + "grad_norm": 103.89203643798828, + "learning_rate": 9.483841640393181e-08, + "logits/chosen": -0.33772093057632446, + "logits/rejected": -0.45589035749435425, + "logps/chosen": -168.879150390625, + "logps/rejected": -214.2537078857422, + "loss": 1.4352, + "nll_loss": 0.9708501100540161, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.847733497619629, + "rewards/margins": 2.6163687705993652, + "rewards/rejected": 2.2313640117645264, + "step": 2630 + }, + { + "epoch": 0.1464612141301785, + "grad_norm": 50.89051055908203, + "learning_rate": 9.47997864373013e-08, + "logits/chosen": -0.2704085409641266, + "logits/rejected": -0.32763293385505676, + "logps/chosen": -138.16543579101562, + "logps/rejected": -175.11236572265625, + "loss": 1.3185, + "nll_loss": 0.8956116437911987, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.168116092681885, + "rewards/margins": 2.5568606853485107, + "rewards/rejected": 1.6112550497055054, + "step": 2640 + }, + { + "epoch": 0.14701599145642918, + "grad_norm": 48.47789001464844, + "learning_rate": 9.47610203810411e-08, + "logits/chosen": -0.35066038370132446, + "logits/rejected": -0.4566461145877838, + "logps/chosen": -175.8914031982422, + "logps/rejected": -235.3515625, + "loss": 1.3196, + "nll_loss": 0.9624403119087219, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.729374408721924, + "rewards/margins": 2.690577983856201, + "rewards/rejected": 2.038796901702881, + "step": 2650 + }, + { + "epoch": 0.14757076878267986, + "grad_norm": 43.42479705810547, + "learning_rate": 9.472211835291199e-08, + "logits/chosen": -0.4881797432899475, + "logits/rejected": -0.5933297872543335, + "logps/chosen": -187.93209838867188, + "logps/rejected": -237.4537353515625, + "loss": 1.3203, + "nll_loss": 1.1305028200149536, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.392927169799805, + "rewards/margins": 2.4824376106262207, + "rewards/rejected": 2.910489559173584, + "step": 2660 + }, + { + "epoch": 0.14812554610893053, + "grad_norm": 65.60295104980469, + "learning_rate": 9.468308047108779e-08, + "logits/chosen": -0.43689948320388794, + "logits/rejected": -0.4873555302619934, + "logps/chosen": -210.8523406982422, + "logps/rejected": -237.1154327392578, + "loss": 1.4299, + "nll_loss": 1.1426368951797485, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.3748369216918945, + "rewards/margins": 2.182204484939575, + "rewards/rejected": 3.1926326751708984, + "step": 2670 + }, + { + "epoch": 0.1486803234351812, + "grad_norm": 91.42697143554688, + "learning_rate": 9.464390685415504e-08, + "logits/chosen": -0.3554970324039459, + "logits/rejected": -0.3768675923347473, + "logps/chosen": -195.17538452148438, + "logps/rejected": -215.8704071044922, + "loss": 1.4193, + "nll_loss": 1.110939860343933, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.654454231262207, + "rewards/margins": 1.4705301523208618, + "rewards/rejected": 3.183924436569214, + "step": 2680 + }, + { + "epoch": 0.14923510076143187, + "grad_norm": 68.16394805908203, + "learning_rate": 9.46045976211126e-08, + "logits/chosen": -0.4355560839176178, + "logits/rejected": -0.5083562135696411, + "logps/chosen": -196.14529418945312, + "logps/rejected": -241.79342651367188, + "loss": 1.5208, + "nll_loss": 1.1575534343719482, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.816002368927002, + "rewards/margins": 2.4721813201904297, + "rewards/rejected": 2.343820333480835, + "step": 2690 + }, + { + "epoch": 0.14978987808768254, + "grad_norm": 43.191001892089844, + "learning_rate": 9.456515289137125e-08, + "logits/chosen": -0.2441895306110382, + "logits/rejected": -0.3482380509376526, + "logps/chosen": -182.13026428222656, + "logps/rejected": -232.80166625976562, + "loss": 1.3585, + "nll_loss": 0.9620084762573242, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.937588214874268, + "rewards/margins": 2.732166290283203, + "rewards/rejected": 2.2054219245910645, + "step": 2700 + }, + { + "epoch": 0.15034465541393324, + "grad_norm": 72.89177703857422, + "learning_rate": 9.452557278475344e-08, + "logits/chosen": -0.42393484711647034, + "logits/rejected": -0.5233877301216125, + "logps/chosen": -169.06707763671875, + "logps/rejected": -248.55252075195312, + "loss": 1.3353, + "nll_loss": 1.0183719396591187, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.008650779724121, + "rewards/margins": 2.579192638397217, + "rewards/rejected": 2.4294581413269043, + "step": 2710 + }, + { + "epoch": 0.15089943274018391, + "grad_norm": 61.85404968261719, + "learning_rate": 9.448585742149279e-08, + "logits/chosen": -0.3834911584854126, + "logits/rejected": -0.516968846321106, + "logps/chosen": -186.55929565429688, + "logps/rejected": -254.18295288085938, + "loss": 1.4313, + "nll_loss": 1.0471513271331787, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.193806171417236, + "rewards/margins": 2.868375301361084, + "rewards/rejected": 2.3254313468933105, + "step": 2720 + }, + { + "epoch": 0.15145421006643459, + "grad_norm": 58.406558990478516, + "learning_rate": 9.444600692223388e-08, + "logits/chosen": -0.41122984886169434, + "logits/rejected": -0.5016804337501526, + "logps/chosen": -190.3726348876953, + "logps/rejected": -236.7998046875, + "loss": 1.3765, + "nll_loss": 1.0485427379608154, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.190579891204834, + "rewards/margins": 2.7871081829071045, + "rewards/rejected": 2.4034721851348877, + "step": 2730 + }, + { + "epoch": 0.15200898739268526, + "grad_norm": 55.32042694091797, + "learning_rate": 9.44060214080317e-08, + "logits/chosen": -0.19496165215969086, + "logits/rejected": -0.30803734064102173, + "logps/chosen": -174.32630920410156, + "logps/rejected": -217.2548370361328, + "loss": 1.4081, + "nll_loss": 0.9257704615592957, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.517314910888672, + "rewards/margins": 2.4943456649780273, + "rewards/rejected": 2.0229687690734863, + "step": 2740 + }, + { + "epoch": 0.15256376471893593, + "grad_norm": 50.69704818725586, + "learning_rate": 9.436590100035144e-08, + "logits/chosen": -0.4139935076236725, + "logits/rejected": -0.48557084798812866, + "logps/chosen": -186.61923217773438, + "logps/rejected": -255.1016387939453, + "loss": 1.3999, + "nll_loss": 1.157854676246643, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.923837661743164, + "rewards/margins": 2.315887928009033, + "rewards/rejected": 2.607950210571289, + "step": 2750 + }, + { + "epoch": 0.1531185420451866, + "grad_norm": 71.0564956665039, + "learning_rate": 9.432564582106803e-08, + "logits/chosen": -0.34736424684524536, + "logits/rejected": -0.397489994764328, + "logps/chosen": -162.69871520996094, + "logps/rejected": -206.42636108398438, + "loss": 1.287, + "nll_loss": 1.0692551136016846, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.872149467468262, + "rewards/margins": 2.6135616302490234, + "rewards/rejected": 2.2585878372192383, + "step": 2760 + }, + { + "epoch": 0.1536733193714373, + "grad_norm": 119.2959213256836, + "learning_rate": 9.428525599246582e-08, + "logits/chosen": -0.4050524830818176, + "logits/rejected": -0.4646620750427246, + "logps/chosen": -180.15858459472656, + "logps/rejected": -206.5132293701172, + "loss": 1.3877, + "nll_loss": 1.1630154848098755, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.824166297912598, + "rewards/margins": 2.219174385070801, + "rewards/rejected": 2.6049914360046387, + "step": 2770 + }, + { + "epoch": 0.15422809669768797, + "grad_norm": 66.43024444580078, + "learning_rate": 9.424473163723818e-08, + "logits/chosen": -0.2653568387031555, + "logits/rejected": -0.3475129008293152, + "logps/chosen": -180.5072021484375, + "logps/rejected": -226.7493896484375, + "loss": 1.3441, + "nll_loss": 0.967044472694397, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.633328437805176, + "rewards/margins": 3.564223527908325, + "rewards/rejected": 1.0691044330596924, + "step": 2780 + }, + { + "epoch": 0.15478287402393864, + "grad_norm": 64.4342269897461, + "learning_rate": 9.420407287848716e-08, + "logits/chosen": -0.30602386593818665, + "logits/rejected": -0.35876408219337463, + "logps/chosen": -184.25668334960938, + "logps/rejected": -224.2547607421875, + "loss": 1.3739, + "nll_loss": 0.9765238761901855, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.992084980010986, + "rewards/margins": 2.586191177368164, + "rewards/rejected": 2.4058938026428223, + "step": 2790 + }, + { + "epoch": 0.15533765135018932, + "grad_norm": 84.9169921875, + "learning_rate": 9.416327983972303e-08, + "logits/chosen": -0.45902055501937866, + "logits/rejected": -0.5061752200126648, + "logps/chosen": -180.96231079101562, + "logps/rejected": -223.2859344482422, + "loss": 1.4444, + "nll_loss": 1.0957945585250854, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.965345859527588, + "rewards/margins": 2.9676547050476074, + "rewards/rejected": 1.9976913928985596, + "step": 2800 + }, + { + "epoch": 0.15589242867644, + "grad_norm": 67.10375213623047, + "learning_rate": 9.412235264486403e-08, + "logits/chosen": -0.36773133277893066, + "logits/rejected": -0.44952455163002014, + "logps/chosen": -169.8708953857422, + "logps/rejected": -204.29342651367188, + "loss": 1.3693, + "nll_loss": 1.1890907287597656, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.539219379425049, + "rewards/margins": 2.4898459911346436, + "rewards/rejected": 2.049373149871826, + "step": 2810 + }, + { + "epoch": 0.15644720600269066, + "grad_norm": 137.09971618652344, + "learning_rate": 9.40812914182359e-08, + "logits/chosen": -0.5243615508079529, + "logits/rejected": -0.5689690709114075, + "logps/chosen": -236.2031707763672, + "logps/rejected": -265.97564697265625, + "loss": 1.3745, + "nll_loss": 1.4004731178283691, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.410025596618652, + "rewards/margins": 2.4597747325897217, + "rewards/rejected": 2.9502501487731934, + "step": 2820 + }, + { + "epoch": 0.15700198332894136, + "grad_norm": 76.55516052246094, + "learning_rate": 9.404009628457152e-08, + "logits/chosen": -0.2582315504550934, + "logits/rejected": -0.3453051447868347, + "logps/chosen": -173.9943084716797, + "logps/rejected": -205.97421264648438, + "loss": 1.3869, + "nll_loss": 1.0055551528930664, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.79890775680542, + "rewards/margins": 2.4663453102111816, + "rewards/rejected": 2.3325629234313965, + "step": 2830 + }, + { + "epoch": 0.15755676065519203, + "grad_norm": 105.99337768554688, + "learning_rate": 9.399876736901059e-08, + "logits/chosen": -0.1436496376991272, + "logits/rejected": -0.26093825697898865, + "logps/chosen": -134.3251495361328, + "logps/rejected": -169.06387329101562, + "loss": 1.272, + "nll_loss": 0.8987034559249878, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 3.7388052940368652, + "rewards/margins": 2.1922247409820557, + "rewards/rejected": 1.5465809106826782, + "step": 2840 + }, + { + "epoch": 0.1581115379814427, + "grad_norm": 73.1646957397461, + "learning_rate": 9.395730479709914e-08, + "logits/chosen": -0.2593434154987335, + "logits/rejected": -0.3618290424346924, + "logps/chosen": -166.7423095703125, + "logps/rejected": -228.65707397460938, + "loss": 1.3584, + "nll_loss": 0.9439231157302856, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.7212347984313965, + "rewards/margins": 3.2248616218566895, + "rewards/rejected": 1.496372938156128, + "step": 2850 + }, + { + "epoch": 0.15866631530769337, + "grad_norm": 53.906005859375, + "learning_rate": 9.391570869478928e-08, + "logits/chosen": -0.3020009398460388, + "logits/rejected": -0.40985146164894104, + "logps/chosen": -196.04159545898438, + "logps/rejected": -255.9525146484375, + "loss": 1.4542, + "nll_loss": 1.0601094961166382, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.943997383117676, + "rewards/margins": 3.0306007862091064, + "rewards/rejected": 1.9133962392807007, + "step": 2860 + }, + { + "epoch": 0.15922109263394404, + "grad_norm": 50.61529541015625, + "learning_rate": 9.387397918843868e-08, + "logits/chosen": -0.387464702129364, + "logits/rejected": -0.4730927050113678, + "logps/chosen": -171.99435424804688, + "logps/rejected": -221.2834930419922, + "loss": 1.3183, + "nll_loss": 1.0262539386749268, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.058302402496338, + "rewards/margins": 2.2185654640197754, + "rewards/rejected": 2.8397369384765625, + "step": 2870 + }, + { + "epoch": 0.15977586996019472, + "grad_norm": 54.42586135864258, + "learning_rate": 9.383211640481031e-08, + "logits/chosen": -0.3841005563735962, + "logits/rejected": -0.4482545852661133, + "logps/chosen": -210.54629516601562, + "logps/rejected": -233.26083374023438, + "loss": 1.4826, + "nll_loss": 1.2217636108398438, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.09761905670166, + "rewards/margins": 2.4193615913391113, + "rewards/rejected": 2.678257703781128, + "step": 2880 + }, + { + "epoch": 0.16033064728644542, + "grad_norm": 71.05906677246094, + "learning_rate": 9.379012047107198e-08, + "logits/chosen": -0.25542598962783813, + "logits/rejected": -0.3733716905117035, + "logps/chosen": -196.41189575195312, + "logps/rejected": -223.8795928955078, + "loss": 1.3641, + "nll_loss": 0.9989587068557739, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.955661773681641, + "rewards/margins": 2.755467176437378, + "rewards/rejected": 2.2001938819885254, + "step": 2890 + }, + { + "epoch": 0.1608854246126961, + "grad_norm": 64.01283264160156, + "learning_rate": 9.374799151479595e-08, + "logits/chosen": -0.16815884411334991, + "logits/rejected": -0.23455043137073517, + "logps/chosen": -159.39224243164062, + "logps/rejected": -203.01136779785156, + "loss": 1.3999, + "nll_loss": 0.9491283297538757, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.669705390930176, + "rewards/margins": 1.8462598323822021, + "rewards/rejected": 2.8234455585479736, + "step": 2900 + }, + { + "epoch": 0.16144020193894676, + "grad_norm": 36.37623596191406, + "learning_rate": 9.370572966395862e-08, + "logits/chosen": -0.4131261706352234, + "logits/rejected": -0.46238717436790466, + "logps/chosen": -209.1028289794922, + "logps/rejected": -246.9708251953125, + "loss": 1.3781, + "nll_loss": 1.1953654289245605, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.529759407043457, + "rewards/margins": 2.8865723609924316, + "rewards/rejected": 2.6431870460510254, + "step": 2910 + }, + { + "epoch": 0.16199497926519743, + "grad_norm": 104.28368377685547, + "learning_rate": 9.366333504694005e-08, + "logits/chosen": -0.3713774085044861, + "logits/rejected": -0.426577627658844, + "logps/chosen": -194.2244415283203, + "logps/rejected": -248.76950073242188, + "loss": 1.406, + "nll_loss": 1.1306129693984985, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.217923641204834, + "rewards/margins": 2.2736878395080566, + "rewards/rejected": 2.9442355632781982, + "step": 2920 + }, + { + "epoch": 0.1625497565914481, + "grad_norm": 71.3147964477539, + "learning_rate": 9.36208077925236e-08, + "logits/chosen": -0.2785489559173584, + "logits/rejected": -0.3307238221168518, + "logps/chosen": -167.78414916992188, + "logps/rejected": -202.63185119628906, + "loss": 1.3369, + "nll_loss": 1.0394691228866577, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.59388542175293, + "rewards/margins": 2.529740810394287, + "rewards/rejected": 2.0641446113586426, + "step": 2930 + }, + { + "epoch": 0.16310453391769877, + "grad_norm": 59.345767974853516, + "learning_rate": 9.357814802989559e-08, + "logits/chosen": -0.2544178366661072, + "logits/rejected": -0.3156990706920624, + "logps/chosen": -164.45431518554688, + "logps/rejected": -215.3196258544922, + "loss": 1.2965, + "nll_loss": 0.9578148722648621, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.471480369567871, + "rewards/margins": 2.459791898727417, + "rewards/rejected": 2.011688232421875, + "step": 2940 + }, + { + "epoch": 0.16365931124394947, + "grad_norm": 44.61174011230469, + "learning_rate": 9.35353558886448e-08, + "logits/chosen": -0.203196719288826, + "logits/rejected": -0.27118998765945435, + "logps/chosen": -191.95758056640625, + "logps/rejected": -233.2113037109375, + "loss": 1.3525, + "nll_loss": 0.9398837089538574, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.863980770111084, + "rewards/margins": 2.6459555625915527, + "rewards/rejected": 2.2180254459381104, + "step": 2950 + }, + { + "epoch": 0.16421408857020015, + "grad_norm": 116.35255432128906, + "learning_rate": 9.349243149876222e-08, + "logits/chosen": -0.12396907806396484, + "logits/rejected": -0.23714527487754822, + "logps/chosen": -148.67955017089844, + "logps/rejected": -216.0026397705078, + "loss": 1.4087, + "nll_loss": 0.8972294926643372, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.648340702056885, + "rewards/margins": 3.5599422454833984, + "rewards/rejected": 1.0883982181549072, + "step": 2960 + }, + { + "epoch": 0.16476886589645082, + "grad_norm": 80.23884582519531, + "learning_rate": 9.34493749906405e-08, + "logits/chosen": -0.23079581558704376, + "logits/rejected": -0.3237621486186981, + "logps/chosen": -154.03265380859375, + "logps/rejected": -206.5592498779297, + "loss": 1.2691, + "nll_loss": 0.9200903177261353, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.6948981285095215, + "rewards/margins": 3.045192241668701, + "rewards/rejected": 1.6497061252593994, + "step": 2970 + }, + { + "epoch": 0.1653236432227015, + "grad_norm": 71.23548889160156, + "learning_rate": 9.340618649507368e-08, + "logits/chosen": -0.23892824351787567, + "logits/rejected": -0.3380570411682129, + "logps/chosen": -201.36761474609375, + "logps/rejected": -257.79241943359375, + "loss": 1.3058, + "nll_loss": 1.0435470342636108, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.308444023132324, + "rewards/margins": 3.2355926036834717, + "rewards/rejected": 2.0728511810302734, + "step": 2980 + }, + { + "epoch": 0.16587842054895216, + "grad_norm": 57.883033752441406, + "learning_rate": 9.33628661432567e-08, + "logits/chosen": -0.18109621107578278, + "logits/rejected": -0.27738335728645325, + "logps/chosen": -195.41867065429688, + "logps/rejected": -238.0283966064453, + "loss": 1.2382, + "nll_loss": 1.0464454889297485, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.014590263366699, + "rewards/margins": 3.5181357860565186, + "rewards/rejected": 1.4964540004730225, + "step": 2990 + }, + { + "epoch": 0.16643319787520283, + "grad_norm": 51.56818771362305, + "learning_rate": 9.331941406678509e-08, + "logits/chosen": -0.23022404313087463, + "logits/rejected": -0.3125895857810974, + "logps/chosen": -166.96844482421875, + "logps/rejected": -210.93698120117188, + "loss": 1.375, + "nll_loss": 0.9681900143623352, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.88405179977417, + "rewards/margins": 3.1794562339782715, + "rewards/rejected": 1.7045953273773193, + "step": 3000 + }, + { + "epoch": 0.16643319787520283, + "eval_logits/chosen": -0.3941521942615509, + "eval_logits/rejected": -0.45799243450164795, + "eval_logps/chosen": -203.202392578125, + "eval_logps/rejected": -262.8951110839844, + "eval_loss": 1.3165993690490723, + "eval_nll_loss": 1.0564641952514648, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 5.475882530212402, + "eval_rewards/margins": 3.857545852661133, + "eval_rewards/rejected": 1.6183371543884277, + "eval_runtime": 16.8723, + "eval_samples_per_second": 15.173, + "eval_steps_per_second": 1.897, + "step": 3000 + }, + { + "epoch": 0.1669879752014535, + "grad_norm": 56.567138671875, + "learning_rate": 9.327583039765452e-08, + "logits/chosen": -0.05765485763549805, + "logits/rejected": -0.15203312039375305, + "logps/chosen": -147.2929229736328, + "logps/rejected": -186.84970092773438, + "loss": 1.3847, + "nll_loss": 0.8762642741203308, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.501176357269287, + "rewards/margins": 2.618312358856201, + "rewards/rejected": 1.882863998413086, + "step": 3010 + }, + { + "epoch": 0.1675427525277042, + "grad_norm": 134.11328125, + "learning_rate": 9.323211526826034e-08, + "logits/chosen": -0.33124592900276184, + "logits/rejected": -0.46882420778274536, + "logps/chosen": -182.77406311035156, + "logps/rejected": -228.29013061523438, + "loss": 1.3804, + "nll_loss": 1.0590949058532715, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.268399715423584, + "rewards/margins": 3.4461803436279297, + "rewards/rejected": 1.8222192525863647, + "step": 3020 + }, + { + "epoch": 0.16809752985395487, + "grad_norm": 43.63470458984375, + "learning_rate": 9.31882688113973e-08, + "logits/chosen": -0.4445907175540924, + "logits/rejected": -0.5267969369888306, + "logps/chosen": -192.04876708984375, + "logps/rejected": -261.2143249511719, + "loss": 1.2969, + "nll_loss": 1.1113297939300537, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.295589447021484, + "rewards/margins": 3.152954339981079, + "rewards/rejected": 2.1426353454589844, + "step": 3030 + }, + { + "epoch": 0.16865230718020555, + "grad_norm": 65.57992553710938, + "learning_rate": 9.314429116025908e-08, + "logits/chosen": -0.30112895369529724, + "logits/rejected": -0.3968796730041504, + "logps/chosen": -181.86544799804688, + "logps/rejected": -220.15896606445312, + "loss": 1.3056, + "nll_loss": 0.9839954376220703, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.820133209228516, + "rewards/margins": 3.0142996311187744, + "rewards/rejected": 1.8058335781097412, + "step": 3040 + }, + { + "epoch": 0.16920708450645622, + "grad_norm": 95.29837799072266, + "learning_rate": 9.310018244843788e-08, + "logits/chosen": -0.2917076051235199, + "logits/rejected": -0.39179855585098267, + "logps/chosen": -170.2184295654297, + "logps/rejected": -213.0507049560547, + "loss": 1.4588, + "nll_loss": 0.9763672947883606, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.969861030578613, + "rewards/margins": 2.7968153953552246, + "rewards/rejected": 2.1730453968048096, + "step": 3050 + }, + { + "epoch": 0.1697618618327069, + "grad_norm": 125.24748992919922, + "learning_rate": 9.305594280992403e-08, + "logits/chosen": -0.3027943968772888, + "logits/rejected": -0.40738552808761597, + "logps/chosen": -187.3314971923828, + "logps/rejected": -250.4483642578125, + "loss": 1.3594, + "nll_loss": 1.0154049396514893, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.080492973327637, + "rewards/margins": 2.5206997394561768, + "rewards/rejected": 2.559792995452881, + "step": 3060 + }, + { + "epoch": 0.17031663915895756, + "grad_norm": 45.90873718261719, + "learning_rate": 9.301157237910559e-08, + "logits/chosen": -0.3818047046661377, + "logits/rejected": -0.46516746282577515, + "logps/chosen": -188.83514404296875, + "logps/rejected": -254.16354370117188, + "loss": 1.3877, + "nll_loss": 1.0332419872283936, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.420754432678223, + "rewards/margins": 2.7936758995056152, + "rewards/rejected": 2.6270785331726074, + "step": 3070 + }, + { + "epoch": 0.17087141648520826, + "grad_norm": 90.6324691772461, + "learning_rate": 9.296707129076793e-08, + "logits/chosen": -0.3408610224723816, + "logits/rejected": -0.459175169467926, + "logps/chosen": -175.30142211914062, + "logps/rejected": -227.79067993164062, + "loss": 1.4235, + "nll_loss": 1.0021774768829346, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.705409049987793, + "rewards/margins": 2.9445652961730957, + "rewards/rejected": 1.7608436346054077, + "step": 3080 + }, + { + "epoch": 0.17142619381145893, + "grad_norm": 53.0914306640625, + "learning_rate": 9.29224396800933e-08, + "logits/chosen": -0.36489245295524597, + "logits/rejected": -0.425426721572876, + "logps/chosen": -183.64111328125, + "logps/rejected": -202.63961791992188, + "loss": 1.3559, + "nll_loss": 1.0612441301345825, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.426742076873779, + "rewards/margins": 2.255833387374878, + "rewards/rejected": 2.1709086894989014, + "step": 3090 + }, + { + "epoch": 0.1719809711377096, + "grad_norm": 58.5355224609375, + "learning_rate": 9.287767768266046e-08, + "logits/chosen": -0.35046714544296265, + "logits/rejected": -0.4076654314994812, + "logps/chosen": -153.88473510742188, + "logps/rejected": -202.56646728515625, + "loss": 1.4322, + "nll_loss": 1.020686388015747, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.608212471008301, + "rewards/margins": 2.0267574787139893, + "rewards/rejected": 2.5814545154571533, + "step": 3100 + }, + { + "epoch": 0.17253574846396028, + "grad_norm": 60.846561431884766, + "learning_rate": 9.283278543444427e-08, + "logits/chosen": -0.29373809695243835, + "logits/rejected": -0.3485686182975769, + "logps/chosen": -175.6277618408203, + "logps/rejected": -191.9043426513672, + "loss": 1.3505, + "nll_loss": 1.0474942922592163, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 4.512275695800781, + "rewards/margins": 1.951250672340393, + "rewards/rejected": 2.5610251426696777, + "step": 3110 + }, + { + "epoch": 0.17309052579021095, + "grad_norm": 63.5445671081543, + "learning_rate": 9.278776307181517e-08, + "logits/chosen": -0.4287452697753906, + "logits/rejected": -0.4757808744907379, + "logps/chosen": -212.8755340576172, + "logps/rejected": -244.24636840820312, + "loss": 1.4416, + "nll_loss": 1.128204107284546, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.158940315246582, + "rewards/margins": 2.630156993865967, + "rewards/rejected": 2.528783082962036, + "step": 3120 + }, + { + "epoch": 0.17364530311646162, + "grad_norm": 51.99097442626953, + "learning_rate": 9.2742610731539e-08, + "logits/chosen": -0.32245302200317383, + "logits/rejected": -0.4503195285797119, + "logps/chosen": -162.48129272460938, + "logps/rejected": -205.0572052001953, + "loss": 1.42, + "nll_loss": 0.9195488691329956, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.636639595031738, + "rewards/margins": 2.9686594009399414, + "rewards/rejected": 1.6679799556732178, + "step": 3130 + }, + { + "epoch": 0.17420008044271232, + "grad_norm": 53.674537658691406, + "learning_rate": 9.269732855077628e-08, + "logits/chosen": -0.36281704902648926, + "logits/rejected": -0.44908076524734497, + "logps/chosen": -148.302734375, + "logps/rejected": -188.53761291503906, + "loss": 1.4262, + "nll_loss": 0.9345152974128723, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.75624418258667, + "rewards/margins": 2.426478862762451, + "rewards/rejected": 2.3297653198242188, + "step": 3140 + }, + { + "epoch": 0.174754857768963, + "grad_norm": 32.641780853271484, + "learning_rate": 9.265191666708207e-08, + "logits/chosen": -0.27269551157951355, + "logits/rejected": -0.3571647107601166, + "logps/chosen": -205.01992797851562, + "logps/rejected": -225.32373046875, + "loss": 1.3487, + "nll_loss": 1.0292689800262451, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.078030586242676, + "rewards/margins": 2.4737343788146973, + "rewards/rejected": 2.6042959690093994, + "step": 3150 + }, + { + "epoch": 0.17530963509521366, + "grad_norm": 55.32010269165039, + "learning_rate": 9.260637521840538e-08, + "logits/chosen": -0.3078479766845703, + "logits/rejected": -0.4543367028236389, + "logps/chosen": -163.51068115234375, + "logps/rejected": -225.3749542236328, + "loss": 1.3688, + "nll_loss": 0.9280654788017273, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.966848373413086, + "rewards/margins": 2.6937406063079834, + "rewards/rejected": 2.2731080055236816, + "step": 3160 + }, + { + "epoch": 0.17586441242146433, + "grad_norm": 95.10186767578125, + "learning_rate": 9.256070434308878e-08, + "logits/chosen": -0.25554990768432617, + "logits/rejected": -0.3518657684326172, + "logps/chosen": -154.96678161621094, + "logps/rejected": -189.62200927734375, + "loss": 1.3324, + "nll_loss": 0.9391428232192993, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.641543388366699, + "rewards/margins": 2.5734002590179443, + "rewards/rejected": 2.068143367767334, + "step": 3170 + }, + { + "epoch": 0.176419189747715, + "grad_norm": 79.28388977050781, + "learning_rate": 9.251490417986807e-08, + "logits/chosen": -0.13116374611854553, + "logits/rejected": -0.24308066070079803, + "logps/chosen": -138.0506591796875, + "logps/rejected": -191.6248016357422, + "loss": 1.4005, + "nll_loss": 0.8182266354560852, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.467595100402832, + "rewards/margins": 3.2676169872283936, + "rewards/rejected": 1.1999778747558594, + "step": 3180 + }, + { + "epoch": 0.17697396707396568, + "grad_norm": 62.85856628417969, + "learning_rate": 9.246897486787172e-08, + "logits/chosen": -0.15434524416923523, + "logits/rejected": -0.3066304326057434, + "logps/chosen": -148.60952758789062, + "logps/rejected": -212.33450317382812, + "loss": 1.3648, + "nll_loss": 0.9511687159538269, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.542972087860107, + "rewards/margins": 3.7544562816619873, + "rewards/rejected": 0.7885159254074097, + "step": 3190 + }, + { + "epoch": 0.17752874440021638, + "grad_norm": 69.8587417602539, + "learning_rate": 9.242291654662058e-08, + "logits/chosen": -0.28641340136528015, + "logits/rejected": -0.39799395203590393, + "logps/chosen": -153.35104370117188, + "logps/rejected": -213.3860321044922, + "loss": 1.361, + "nll_loss": 0.9252685308456421, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.663729667663574, + "rewards/margins": 3.158186435699463, + "rewards/rejected": 1.5055434703826904, + "step": 3200 + }, + { + "epoch": 0.17808352172646705, + "grad_norm": 65.60191345214844, + "learning_rate": 9.237672935602734e-08, + "logits/chosen": -0.3066278100013733, + "logits/rejected": -0.3982074558734894, + "logps/chosen": -216.09536743164062, + "logps/rejected": -270.0486145019531, + "loss": 1.3723, + "nll_loss": 1.1032354831695557, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.352534294128418, + "rewards/margins": 3.3085875511169434, + "rewards/rejected": 2.0439465045928955, + "step": 3210 + }, + { + "epoch": 0.17863829905271772, + "grad_norm": 111.08596801757812, + "learning_rate": 9.233041343639621e-08, + "logits/chosen": -0.43451136350631714, + "logits/rejected": -0.5363648533821106, + "logps/chosen": -206.8416290283203, + "logps/rejected": -263.03863525390625, + "loss": 1.2986, + "nll_loss": 1.0895191431045532, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.949709892272949, + "rewards/margins": 3.783278226852417, + "rewards/rejected": 2.166430950164795, + "step": 3220 + }, + { + "epoch": 0.1791930763789684, + "grad_norm": 85.79315948486328, + "learning_rate": 9.228396892842243e-08, + "logits/chosen": -0.3698079288005829, + "logits/rejected": -0.5223512649536133, + "logps/chosen": -154.75775146484375, + "logps/rejected": -210.3184051513672, + "loss": 1.4173, + "nll_loss": 0.984754741191864, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.734168529510498, + "rewards/margins": 2.8217694759368896, + "rewards/rejected": 1.9123990535736084, + "step": 3230 + }, + { + "epoch": 0.17974785370521906, + "grad_norm": 55.84793472290039, + "learning_rate": 9.223739597319182e-08, + "logits/chosen": -0.3412432074546814, + "logits/rejected": -0.42043352127075195, + "logps/chosen": -166.50973510742188, + "logps/rejected": -232.7230224609375, + "loss": 1.3652, + "nll_loss": 1.0250871181488037, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.225703716278076, + "rewards/margins": 3.070754051208496, + "rewards/rejected": 2.1549501419067383, + "step": 3240 + }, + { + "epoch": 0.18030263103146973, + "grad_norm": 53.27821731567383, + "learning_rate": 9.219069471218044e-08, + "logits/chosen": -0.3862474858760834, + "logits/rejected": -0.47613492608070374, + "logps/chosen": -147.55599975585938, + "logps/rejected": -225.82376098632812, + "loss": 1.2634, + "nll_loss": 1.085354208946228, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.058647632598877, + "rewards/margins": 2.990551471710205, + "rewards/rejected": 2.0680959224700928, + "step": 3250 + }, + { + "epoch": 0.1808574083577204, + "grad_norm": 74.8572998046875, + "learning_rate": 9.214386528725407e-08, + "logits/chosen": -0.43352779746055603, + "logits/rejected": -0.5267191529273987, + "logps/chosen": -222.47268676757812, + "logps/rejected": -283.19281005859375, + "loss": 1.4565, + "nll_loss": 1.1316627264022827, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.734960079193115, + "rewards/margins": 3.585192918777466, + "rewards/rejected": 2.1497673988342285, + "step": 3260 + }, + { + "epoch": 0.1814121856839711, + "grad_norm": 56.274078369140625, + "learning_rate": 9.209690784066784e-08, + "logits/chosen": -0.13440726697444916, + "logits/rejected": -0.24160249531269073, + "logps/chosen": -160.8746795654297, + "logps/rejected": -216.3030242919922, + "loss": 1.3005, + "nll_loss": 0.9158796072006226, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.995485305786133, + "rewards/margins": 3.5055174827575684, + "rewards/rejected": 1.489967942237854, + "step": 3270 + }, + { + "epoch": 0.18196696301022178, + "grad_norm": 40.22864532470703, + "learning_rate": 9.204982251506576e-08, + "logits/chosen": -0.1497855931520462, + "logits/rejected": -0.2931815981864929, + "logps/chosen": -152.04678344726562, + "logps/rejected": -208.2039337158203, + "loss": 1.4019, + "nll_loss": 0.9629203081130981, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.667003154754639, + "rewards/margins": 3.2013118267059326, + "rewards/rejected": 1.465691328048706, + "step": 3280 + }, + { + "epoch": 0.18252174033647245, + "grad_norm": 70.88353729248047, + "learning_rate": 9.200260945348033e-08, + "logits/chosen": -0.14935025572776794, + "logits/rejected": -0.2693983018398285, + "logps/chosen": -145.81430053710938, + "logps/rejected": -183.57470703125, + "loss": 1.3995, + "nll_loss": 0.9631720781326294, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.340367317199707, + "rewards/margins": 1.9793999195098877, + "rewards/rejected": 2.3609676361083984, + "step": 3290 + }, + { + "epoch": 0.18307651766272312, + "grad_norm": 52.66188430786133, + "learning_rate": 9.195526879933205e-08, + "logits/chosen": -0.2618695795536041, + "logits/rejected": -0.33439141511917114, + "logps/chosen": -146.67202758789062, + "logps/rejected": -185.0167236328125, + "loss": 1.4054, + "nll_loss": 1.035718560218811, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.421456336975098, + "rewards/margins": 1.8323252201080322, + "rewards/rejected": 2.5891308784484863, + "step": 3300 + }, + { + "epoch": 0.1836312949889738, + "grad_norm": 62.6783561706543, + "learning_rate": 9.190780069642899e-08, + "logits/chosen": -0.4468691349029541, + "logits/rejected": -0.48922890424728394, + "logps/chosen": -185.74270629882812, + "logps/rejected": -238.2687530517578, + "loss": 1.3825, + "nll_loss": 1.0476751327514648, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.556589126586914, + "rewards/margins": 3.2610023021698, + "rewards/rejected": 2.295586585998535, + "step": 3310 + }, + { + "epoch": 0.18418607231522446, + "grad_norm": 119.20405578613281, + "learning_rate": 9.186020528896643e-08, + "logits/chosen": -0.27735182642936707, + "logits/rejected": -0.3652539551258087, + "logps/chosen": -179.13577270507812, + "logps/rejected": -219.6503143310547, + "loss": 1.351, + "nll_loss": 1.0589101314544678, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.890892505645752, + "rewards/margins": 2.9163360595703125, + "rewards/rejected": 1.9745559692382812, + "step": 3320 + }, + { + "epoch": 0.18474084964147516, + "grad_norm": 69.34968566894531, + "learning_rate": 9.181248272152633e-08, + "logits/chosen": -0.3122369647026062, + "logits/rejected": -0.3962582051753998, + "logps/chosen": -191.41275024414062, + "logps/rejected": -223.7455291748047, + "loss": 1.3463, + "nll_loss": 1.0294417142868042, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.048547744750977, + "rewards/margins": 2.3484997749328613, + "rewards/rejected": 2.7000479698181152, + "step": 3330 + }, + { + "epoch": 0.18529562696772583, + "grad_norm": 51.15849685668945, + "learning_rate": 9.176463313907694e-08, + "logits/chosen": -0.27559852600097656, + "logits/rejected": -0.44314026832580566, + "logps/chosen": -193.53561401367188, + "logps/rejected": -257.9790344238281, + "loss": 1.4233, + "nll_loss": 0.9996274709701538, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.9851603507995605, + "rewards/margins": 3.9858479499816895, + "rewards/rejected": 0.9993122220039368, + "step": 3340 + }, + { + "epoch": 0.1858504042939765, + "grad_norm": 65.05023193359375, + "learning_rate": 9.171665668697234e-08, + "logits/chosen": -0.14803513884544373, + "logits/rejected": -0.20512041449546814, + "logps/chosen": -157.99771118164062, + "logps/rejected": -202.19908142089844, + "loss": 1.3478, + "nll_loss": 0.8801227807998657, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.644710063934326, + "rewards/margins": 3.1068365573883057, + "rewards/rejected": 1.5378737449645996, + "step": 3350 + }, + { + "epoch": 0.18640518162022718, + "grad_norm": 72.04387664794922, + "learning_rate": 9.166855351095204e-08, + "logits/chosen": -0.28203243017196655, + "logits/rejected": -0.4075242578983307, + "logps/chosen": -157.84893798828125, + "logps/rejected": -190.1215057373047, + "loss": 1.3512, + "nll_loss": 0.9680745005607605, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.681795597076416, + "rewards/margins": 2.9883322715759277, + "rewards/rejected": 1.6934630870819092, + "step": 3360 + }, + { + "epoch": 0.18695995894647785, + "grad_norm": 135.1023406982422, + "learning_rate": 9.162032375714044e-08, + "logits/chosen": -0.27743062376976013, + "logits/rejected": -0.4327467978000641, + "logps/chosen": -176.78150939941406, + "logps/rejected": -229.76913452148438, + "loss": 1.4111, + "nll_loss": 0.9727862477302551, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.038991928100586, + "rewards/margins": 4.594063758850098, + "rewards/rejected": 0.4449283480644226, + "step": 3370 + }, + { + "epoch": 0.18751473627272852, + "grad_norm": 94.94296264648438, + "learning_rate": 9.157196757204649e-08, + "logits/chosen": -0.2864229679107666, + "logits/rejected": -0.4441998600959778, + "logps/chosen": -181.42529296875, + "logps/rejected": -223.7584228515625, + "loss": 1.3273, + "nll_loss": 1.0347633361816406, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.656604766845703, + "rewards/margins": 3.1743931770324707, + "rewards/rejected": 1.4822113513946533, + "step": 3380 + }, + { + "epoch": 0.18806951359897922, + "grad_norm": 63.17582702636719, + "learning_rate": 9.152348510256319e-08, + "logits/chosen": -0.25547483563423157, + "logits/rejected": -0.4117124080657959, + "logps/chosen": -160.04031372070312, + "logps/rejected": -231.7164764404297, + "loss": 1.3238, + "nll_loss": 0.9673099517822266, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.919541835784912, + "rewards/margins": 4.370184898376465, + "rewards/rejected": 0.5493569374084473, + "step": 3390 + }, + { + "epoch": 0.1886242909252299, + "grad_norm": 54.76870346069336, + "learning_rate": 9.147487649596719e-08, + "logits/chosen": -0.4662798047065735, + "logits/rejected": -0.5804362297058105, + "logps/chosen": -205.5302276611328, + "logps/rejected": -257.89093017578125, + "loss": 1.5078, + "nll_loss": 1.1512181758880615, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.4895339012146, + "rewards/margins": 3.2194790840148926, + "rewards/rejected": 2.270055055618286, + "step": 3400 + }, + { + "epoch": 0.18917906825148056, + "grad_norm": 52.00660705566406, + "learning_rate": 9.142614189991827e-08, + "logits/chosen": -0.3087933361530304, + "logits/rejected": -0.44009774923324585, + "logps/chosen": -206.9576416015625, + "logps/rejected": -280.1715087890625, + "loss": 1.3352, + "nll_loss": 1.108897089958191, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.316677570343018, + "rewards/margins": 3.676166534423828, + "rewards/rejected": 1.6405115127563477, + "step": 3410 + }, + { + "epoch": 0.18973384557773124, + "grad_norm": 62.3161735534668, + "learning_rate": 9.13772814624589e-08, + "logits/chosen": -0.30704936385154724, + "logits/rejected": -0.38141196966171265, + "logps/chosen": -157.9688262939453, + "logps/rejected": -221.52737426757812, + "loss": 1.4271, + "nll_loss": 0.9901927709579468, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.816000938415527, + "rewards/margins": 3.078101634979248, + "rewards/rejected": 1.7378990650177002, + "step": 3420 + }, + { + "epoch": 0.1902886229039819, + "grad_norm": 83.45936584472656, + "learning_rate": 9.132829533201396e-08, + "logits/chosen": -0.44340506196022034, + "logits/rejected": -0.5546245574951172, + "logps/chosen": -176.84097290039062, + "logps/rejected": -237.9415283203125, + "loss": 1.4669, + "nll_loss": 1.084262728691101, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.157680511474609, + "rewards/margins": 2.991424083709717, + "rewards/rejected": 2.1662566661834717, + "step": 3430 + }, + { + "epoch": 0.19084340023023258, + "grad_norm": 88.83804321289062, + "learning_rate": 9.127918365739001e-08, + "logits/chosen": -0.3826572299003601, + "logits/rejected": -0.505754828453064, + "logps/chosen": -201.6626434326172, + "logps/rejected": -257.3487243652344, + "loss": 1.3895, + "nll_loss": 1.059676170349121, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.6069536209106445, + "rewards/margins": 2.890880584716797, + "rewards/rejected": 2.716073751449585, + "step": 3440 + }, + { + "epoch": 0.19139817755648328, + "grad_norm": 56.37425994873047, + "learning_rate": 9.122994658777503e-08, + "logits/chosen": -0.2930208742618561, + "logits/rejected": -0.45751482248306274, + "logps/chosen": -163.3193817138672, + "logps/rejected": -212.1604766845703, + "loss": 1.3403, + "nll_loss": 0.9310756921768188, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.8973612785339355, + "rewards/margins": 3.659257173538208, + "rewards/rejected": 1.2381041049957275, + "step": 3450 + }, + { + "epoch": 0.19195295488273395, + "grad_norm": 41.779510498046875, + "learning_rate": 9.118058427273791e-08, + "logits/chosen": -0.34141066670417786, + "logits/rejected": -0.4689091742038727, + "logps/chosen": -173.6635284423828, + "logps/rejected": -234.62466430664062, + "loss": 1.2819, + "nll_loss": 0.981812596321106, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.877215385437012, + "rewards/margins": 3.6084609031677246, + "rewards/rejected": 1.2687546014785767, + "step": 3460 + }, + { + "epoch": 0.19250773220898462, + "grad_norm": 30.11418914794922, + "learning_rate": 9.113109686222802e-08, + "logits/chosen": -0.4162047803401947, + "logits/rejected": -0.5399635434150696, + "logps/chosen": -196.7699737548828, + "logps/rejected": -259.7813720703125, + "loss": 1.2991, + "nll_loss": 1.053601861000061, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.753113746643066, + "rewards/margins": 4.110931396484375, + "rewards/rejected": 1.6421819925308228, + "step": 3470 + }, + { + "epoch": 0.1930625095352353, + "grad_norm": 68.62322998046875, + "learning_rate": 9.108148450657471e-08, + "logits/chosen": -0.24979765713214874, + "logits/rejected": -0.37531179189682007, + "logps/chosen": -177.31985473632812, + "logps/rejected": -224.9247283935547, + "loss": 1.3996, + "nll_loss": 1.179163932800293, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.5680108070373535, + "rewards/margins": 2.4868268966674805, + "rewards/rejected": 2.081183910369873, + "step": 3480 + }, + { + "epoch": 0.19361728686148597, + "grad_norm": 65.12769317626953, + "learning_rate": 9.103174735648692e-08, + "logits/chosen": -0.34614425897598267, + "logits/rejected": -0.49961429834365845, + "logps/chosen": -150.73887634277344, + "logps/rejected": -206.9713897705078, + "loss": 1.341, + "nll_loss": 0.9850081205368042, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.518560886383057, + "rewards/margins": 2.364466905593872, + "rewards/rejected": 2.1540937423706055, + "step": 3490 + }, + { + "epoch": 0.19417206418773664, + "grad_norm": 59.67082214355469, + "learning_rate": 9.098188556305261e-08, + "logits/chosen": -0.2790587544441223, + "logits/rejected": -0.46283188462257385, + "logps/chosen": -146.38931274414062, + "logps/rejected": -194.45590209960938, + "loss": 1.2778, + "nll_loss": 0.9563590884208679, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.381845951080322, + "rewards/margins": 2.9258816242218018, + "rewards/rejected": 1.4559640884399414, + "step": 3500 + }, + { + "epoch": 0.19417206418773664, + "eval_logits/chosen": -0.4424753189086914, + "eval_logits/rejected": -0.5204493403434753, + "eval_logps/chosen": -201.9140167236328, + "eval_logps/rejected": -256.9493103027344, + "eval_loss": 1.3134431838989258, + "eval_nll_loss": 1.0490245819091797, + "eval_rewards/accuracies": 0.84375, + "eval_rewards/chosen": 5.6047210693359375, + "eval_rewards/margins": 3.391801118850708, + "eval_rewards/rejected": 2.2129199504852295, + "eval_runtime": 17.2468, + "eval_samples_per_second": 14.843, + "eval_steps_per_second": 1.855, + "step": 3500 + }, + { + "epoch": 0.19472684151398734, + "grad_norm": 74.80335998535156, + "learning_rate": 9.093189927773848e-08, + "logits/chosen": -0.3249863088130951, + "logits/rejected": -0.47237634658813477, + "logps/chosen": -183.12828063964844, + "logps/rejected": -219.3392333984375, + "loss": 1.3372, + "nll_loss": 0.9999750852584839, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.970829486846924, + "rewards/margins": 2.9720358848571777, + "rewards/rejected": 1.998793601989746, + "step": 3510 + }, + { + "epoch": 0.195281618840238, + "grad_norm": 54.24885177612305, + "learning_rate": 9.088178865238928e-08, + "logits/chosen": -0.23620446026325226, + "logits/rejected": -0.33106979727745056, + "logps/chosen": -144.73150634765625, + "logps/rejected": -200.6682586669922, + "loss": 1.2637, + "nll_loss": 0.8744996786117554, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.980032444000244, + "rewards/margins": 2.8522746562957764, + "rewards/rejected": 2.127758264541626, + "step": 3520 + }, + { + "epoch": 0.19583639616648868, + "grad_norm": 88.81790924072266, + "learning_rate": 9.083155383922756e-08, + "logits/chosen": -0.24368822574615479, + "logits/rejected": -0.37356775999069214, + "logps/chosen": -169.39254760742188, + "logps/rejected": -211.821044921875, + "loss": 1.4507, + "nll_loss": 0.9627988934516907, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.646332740783691, + "rewards/margins": 2.7353675365448, + "rewards/rejected": 1.9109646081924438, + "step": 3530 + }, + { + "epoch": 0.19639117349273935, + "grad_norm": 71.2747573852539, + "learning_rate": 9.078119499085308e-08, + "logits/chosen": -0.418663889169693, + "logits/rejected": -0.5115020275115967, + "logps/chosen": -181.2946319580078, + "logps/rejected": -225.5801544189453, + "loss": 1.3974, + "nll_loss": 1.0265872478485107, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.286177635192871, + "rewards/margins": 2.8992366790771484, + "rewards/rejected": 2.3869409561157227, + "step": 3540 + }, + { + "epoch": 0.19694595081899002, + "grad_norm": 39.39947509765625, + "learning_rate": 9.07307122602424e-08, + "logits/chosen": -0.3985862135887146, + "logits/rejected": -0.46568727493286133, + "logps/chosen": -187.3225860595703, + "logps/rejected": -242.97030639648438, + "loss": 1.3301, + "nll_loss": 1.0382740497589111, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.31097936630249, + "rewards/margins": 3.2747280597686768, + "rewards/rejected": 2.0362515449523926, + "step": 3550 + }, + { + "epoch": 0.1975007281452407, + "grad_norm": 29.353214263916016, + "learning_rate": 9.06801058007484e-08, + "logits/chosen": -0.16209930181503296, + "logits/rejected": -0.29742684960365295, + "logps/chosen": -153.34727478027344, + "logps/rejected": -165.13827514648438, + "loss": 1.3441, + "nll_loss": 0.8343960642814636, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.275020599365234, + "rewards/margins": 2.413543462753296, + "rewards/rejected": 1.8614771366119385, + "step": 3560 + }, + { + "epoch": 0.19805550547149137, + "grad_norm": 69.83041381835938, + "learning_rate": 9.062937576609982e-08, + "logits/chosen": -0.4704197347164154, + "logits/rejected": -0.5258275270462036, + "logps/chosen": -200.0384063720703, + "logps/rejected": -245.9759063720703, + "loss": 1.5084, + "nll_loss": 1.0869133472442627, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 5.382255554199219, + "rewards/margins": 2.419602632522583, + "rewards/rejected": 2.9626529216766357, + "step": 3570 + }, + { + "epoch": 0.19861028279774207, + "grad_norm": 56.682098388671875, + "learning_rate": 9.057852231040075e-08, + "logits/chosen": -0.2369583398103714, + "logits/rejected": -0.3535544276237488, + "logps/chosen": -155.8750457763672, + "logps/rejected": -205.74368286132812, + "loss": 1.3495, + "nll_loss": 0.940041184425354, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.6919403076171875, + "rewards/margins": 2.4483542442321777, + "rewards/rejected": 2.2435860633850098, + "step": 3580 + }, + { + "epoch": 0.19916506012399274, + "grad_norm": 79.86531829833984, + "learning_rate": 9.052754558813028e-08, + "logits/chosen": -0.38561543822288513, + "logits/rejected": -0.5354014039039612, + "logps/chosen": -171.94937133789062, + "logps/rejected": -215.95858764648438, + "loss": 1.3566, + "nll_loss": 1.0209826231002808, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.939810276031494, + "rewards/margins": 2.798379898071289, + "rewards/rejected": 2.141430616378784, + "step": 3590 + }, + { + "epoch": 0.1997198374502434, + "grad_norm": 65.32847595214844, + "learning_rate": 9.047644575414183e-08, + "logits/chosen": -0.2097257375717163, + "logits/rejected": -0.3981267809867859, + "logps/chosen": -149.56321716308594, + "logps/rejected": -175.35711669921875, + "loss": 1.3656, + "nll_loss": 0.8642207980155945, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.44227409362793, + "rewards/margins": 2.8110909461975098, + "rewards/rejected": 1.6311830282211304, + "step": 3600 + }, + { + "epoch": 0.20027461477649408, + "grad_norm": 49.38480758666992, + "learning_rate": 9.042522296366291e-08, + "logits/chosen": -0.3963824212551117, + "logits/rejected": -0.49302539229393005, + "logps/chosen": -176.93246459960938, + "logps/rejected": -230.24087524414062, + "loss": 1.3291, + "nll_loss": 1.04789137840271, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.288444519042969, + "rewards/margins": 3.1669602394104004, + "rewards/rejected": 2.1214840412139893, + "step": 3610 + }, + { + "epoch": 0.20082939210274475, + "grad_norm": 61.247657775878906, + "learning_rate": 9.037387737229451e-08, + "logits/chosen": -0.2520487308502197, + "logits/rejected": -0.4036117494106293, + "logps/chosen": -172.23992919921875, + "logps/rejected": -202.6747283935547, + "loss": 1.3291, + "nll_loss": 0.9279230833053589, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.3425374031066895, + "rewards/margins": 2.726191759109497, + "rewards/rejected": 1.616346001625061, + "step": 3620 + }, + { + "epoch": 0.20138416942899542, + "grad_norm": 61.63615798950195, + "learning_rate": 9.032240913601062e-08, + "logits/chosen": -0.27439185976982117, + "logits/rejected": -0.3957839012145996, + "logps/chosen": -159.75448608398438, + "logps/rejected": -222.67611694335938, + "loss": 1.4667, + "nll_loss": 0.9911657571792603, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.459640979766846, + "rewards/margins": 2.8293843269348145, + "rewards/rejected": 1.6302568912506104, + "step": 3630 + }, + { + "epoch": 0.20193894675524612, + "grad_norm": 76.18391418457031, + "learning_rate": 9.027081841115783e-08, + "logits/chosen": -0.25864553451538086, + "logits/rejected": -0.45542287826538086, + "logps/chosen": -158.814453125, + "logps/rejected": -234.5258331298828, + "loss": 1.3452, + "nll_loss": 0.8534186482429504, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.526062488555908, + "rewards/margins": 3.695394992828369, + "rewards/rejected": 0.8306673169136047, + "step": 3640 + }, + { + "epoch": 0.2024937240814968, + "grad_norm": 69.41482543945312, + "learning_rate": 9.021910535445479e-08, + "logits/chosen": -0.3268418312072754, + "logits/rejected": -0.42958277463912964, + "logps/chosen": -173.4744110107422, + "logps/rejected": -223.49221801757812, + "loss": 1.2794, + "nll_loss": 0.9873861074447632, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.12941837310791, + "rewards/margins": 3.557917356491089, + "rewards/rejected": 1.57150137424469, + "step": 3650 + }, + { + "epoch": 0.20304850140774747, + "grad_norm": 75.36470794677734, + "learning_rate": 9.01672701229918e-08, + "logits/chosen": -0.23507532477378845, + "logits/rejected": -0.40880221128463745, + "logps/chosen": -144.60598754882812, + "logps/rejected": -190.6197967529297, + "loss": 1.3332, + "nll_loss": 0.8716999292373657, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.289609432220459, + "rewards/margins": 3.0599446296691895, + "rewards/rejected": 1.229664921760559, + "step": 3660 + }, + { + "epoch": 0.20360327873399814, + "grad_norm": 58.159019470214844, + "learning_rate": 9.011531287423023e-08, + "logits/chosen": -0.057216621935367584, + "logits/rejected": -0.18780682981014252, + "logps/chosen": -105.15031433105469, + "logps/rejected": -145.9329833984375, + "loss": 1.3099, + "nll_loss": 0.7050063610076904, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.8075637817382812, + "rewards/margins": 2.8922011852264404, + "rewards/rejected": 0.9153624773025513, + "step": 3670 + }, + { + "epoch": 0.2041580560602488, + "grad_norm": 36.62731170654297, + "learning_rate": 9.006323376600215e-08, + "logits/chosen": -0.4482879042625427, + "logits/rejected": -0.5565906763076782, + "logps/chosen": -187.14883422851562, + "logps/rejected": -242.4229278564453, + "loss": 1.3601, + "nll_loss": 1.094943881034851, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.3773579597473145, + "rewards/margins": 3.112643003463745, + "rewards/rejected": 2.2647151947021484, + "step": 3680 + }, + { + "epoch": 0.20471283338649948, + "grad_norm": 91.01419067382812, + "learning_rate": 9.001103295650985e-08, + "logits/chosen": -0.3312477171421051, + "logits/rejected": -0.4301799237728119, + "logps/chosen": -177.8890380859375, + "logps/rejected": -210.2823944091797, + "loss": 1.2762, + "nll_loss": 1.0129368305206299, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.19062614440918, + "rewards/margins": 2.9621591567993164, + "rewards/rejected": 2.228466510772705, + "step": 3690 + }, + { + "epoch": 0.20526761071275018, + "grad_norm": 66.5330810546875, + "learning_rate": 8.99587106043252e-08, + "logits/chosen": -0.2117491215467453, + "logits/rejected": -0.3421010971069336, + "logps/chosen": -177.11669921875, + "logps/rejected": -205.1708221435547, + "loss": 1.3927, + "nll_loss": 0.9429581761360168, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.606531143188477, + "rewards/margins": 2.5099503993988037, + "rewards/rejected": 2.0965805053710938, + "step": 3700 + }, + { + "epoch": 0.20582238803900085, + "grad_norm": 35.68384552001953, + "learning_rate": 8.990626686838938e-08, + "logits/chosen": -0.3598509430885315, + "logits/rejected": -0.45173701643943787, + "logps/chosen": -162.70602416992188, + "logps/rejected": -220.9865264892578, + "loss": 1.2909, + "nll_loss": 0.9705358743667603, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.9746623039245605, + "rewards/margins": 3.077357053756714, + "rewards/rejected": 1.8973052501678467, + "step": 3710 + }, + { + "epoch": 0.20637716536525152, + "grad_norm": 72.9140396118164, + "learning_rate": 8.985370190801227e-08, + "logits/chosen": -0.24999204277992249, + "logits/rejected": -0.35079583525657654, + "logps/chosen": -172.55189514160156, + "logps/rejected": -207.9408416748047, + "loss": 1.3975, + "nll_loss": 0.9223276972770691, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.864678382873535, + "rewards/margins": 3.5197761058807373, + "rewards/rejected": 1.3449018001556396, + "step": 3720 + }, + { + "epoch": 0.2069319426915022, + "grad_norm": 58.62187576293945, + "learning_rate": 8.980101588287201e-08, + "logits/chosen": -0.39123472571372986, + "logits/rejected": -0.48360252380371094, + "logps/chosen": -181.18020629882812, + "logps/rejected": -233.19320678710938, + "loss": 1.375, + "nll_loss": 1.0049530267715454, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.3949456214904785, + "rewards/margins": 3.387439012527466, + "rewards/rejected": 2.0075063705444336, + "step": 3730 + }, + { + "epoch": 0.20748672001775287, + "grad_norm": 104.83113861083984, + "learning_rate": 8.974820895301444e-08, + "logits/chosen": -0.40971869230270386, + "logits/rejected": -0.5111854076385498, + "logps/chosen": -171.76803588867188, + "logps/rejected": -222.0377197265625, + "loss": 1.3896, + "nll_loss": 1.0767524242401123, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.4347333908081055, + "rewards/margins": 3.326315402984619, + "rewards/rejected": 2.1084187030792236, + "step": 3740 + }, + { + "epoch": 0.20804149734400354, + "grad_norm": 131.49130249023438, + "learning_rate": 8.96952812788528e-08, + "logits/chosen": -0.4055160582065582, + "logits/rejected": -0.5088625550270081, + "logps/chosen": -180.02684020996094, + "logps/rejected": -240.64077758789062, + "loss": 1.3895, + "nll_loss": 1.0220630168914795, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.497774124145508, + "rewards/margins": 2.485865831375122, + "rewards/rejected": 3.0119082927703857, + "step": 3750 + }, + { + "epoch": 0.20859627467025424, + "grad_norm": 75.92974853515625, + "learning_rate": 8.964223302116698e-08, + "logits/chosen": -0.3788098692893982, + "logits/rejected": -0.4966840147972107, + "logps/chosen": -190.69357299804688, + "logps/rejected": -241.46841430664062, + "loss": 1.3045, + "nll_loss": 1.0417449474334717, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.564336776733398, + "rewards/margins": 3.94038462638855, + "rewards/rejected": 1.6239522695541382, + "step": 3760 + }, + { + "epoch": 0.2091510519965049, + "grad_norm": 112.6208267211914, + "learning_rate": 8.958906434110325e-08, + "logits/chosen": -0.27260252833366394, + "logits/rejected": -0.4252198338508606, + "logps/chosen": -157.2131805419922, + "logps/rejected": -219.95974731445312, + "loss": 1.3116, + "nll_loss": 0.8680321574211121, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.7769927978515625, + "rewards/margins": 3.1072120666503906, + "rewards/rejected": 1.6697801351547241, + "step": 3770 + }, + { + "epoch": 0.20970582932275558, + "grad_norm": 94.54066467285156, + "learning_rate": 8.95357754001737e-08, + "logits/chosen": -0.26051098108291626, + "logits/rejected": -0.41779977083206177, + "logps/chosen": -149.70997619628906, + "logps/rejected": -197.0052032470703, + "loss": 1.284, + "nll_loss": 0.8986402750015259, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.675868034362793, + "rewards/margins": 3.015355348587036, + "rewards/rejected": 1.660513162612915, + "step": 3780 + }, + { + "epoch": 0.21026060664900625, + "grad_norm": 100.28746032714844, + "learning_rate": 8.948236636025568e-08, + "logits/chosen": -0.36036959290504456, + "logits/rejected": -0.47816309332847595, + "logps/chosen": -153.33084106445312, + "logps/rejected": -204.0556640625, + "loss": 1.454, + "nll_loss": 0.9947422742843628, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.761590957641602, + "rewards/margins": 2.528191089630127, + "rewards/rejected": 2.2333998680114746, + "step": 3790 + }, + { + "epoch": 0.21081538397525693, + "grad_norm": 67.10070037841797, + "learning_rate": 8.942883738359142e-08, + "logits/chosen": -0.3197064697742462, + "logits/rejected": -0.43918901681900024, + "logps/chosen": -173.57681274414062, + "logps/rejected": -223.16213989257812, + "loss": 1.2664, + "nll_loss": 0.9543741941452026, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.900728702545166, + "rewards/margins": 3.069725513458252, + "rewards/rejected": 1.831003189086914, + "step": 3800 + }, + { + "epoch": 0.2113701613015076, + "grad_norm": 49.583927154541016, + "learning_rate": 8.937518863278746e-08, + "logits/chosen": -0.43831509351730347, + "logits/rejected": -0.5418807864189148, + "logps/chosen": -170.5087432861328, + "logps/rejected": -228.23348999023438, + "loss": 1.3477, + "nll_loss": 1.0029704570770264, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.487241744995117, + "rewards/margins": 2.9966530799865723, + "rewards/rejected": 2.490588665008545, + "step": 3810 + }, + { + "epoch": 0.2119249386277583, + "grad_norm": 105.17793273925781, + "learning_rate": 8.932142027081419e-08, + "logits/chosen": -0.3503844141960144, + "logits/rejected": -0.5124867558479309, + "logps/chosen": -184.93325805664062, + "logps/rejected": -245.14987182617188, + "loss": 1.3364, + "nll_loss": 0.9998448491096497, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.2992753982543945, + "rewards/margins": 3.449542284011841, + "rewards/rejected": 1.8497333526611328, + "step": 3820 + }, + { + "epoch": 0.21247971595400897, + "grad_norm": 69.32069396972656, + "learning_rate": 8.926753246100536e-08, + "logits/chosen": -0.30155476927757263, + "logits/rejected": -0.35403114557266235, + "logps/chosen": -171.98782348632812, + "logps/rejected": -201.35647583007812, + "loss": 1.3318, + "nll_loss": 1.062892198562622, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 4.722691059112549, + "rewards/margins": 2.3339271545410156, + "rewards/rejected": 2.388763666152954, + "step": 3830 + }, + { + "epoch": 0.21303449328025964, + "grad_norm": 33.47819900512695, + "learning_rate": 8.921352536705752e-08, + "logits/chosen": -0.3153464198112488, + "logits/rejected": -0.4027198851108551, + "logps/chosen": -159.6675567626953, + "logps/rejected": -198.66439819335938, + "loss": 1.2908, + "nll_loss": 0.9326319694519043, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.927337646484375, + "rewards/margins": 2.171002149581909, + "rewards/rejected": 2.7563347816467285, + "step": 3840 + }, + { + "epoch": 0.2135892706065103, + "grad_norm": 73.93592834472656, + "learning_rate": 8.915939915302967e-08, + "logits/chosen": -0.20850825309753418, + "logits/rejected": -0.29826563596725464, + "logps/chosen": -170.40194702148438, + "logps/rejected": -181.62571716308594, + "loss": 1.3736, + "nll_loss": 0.9485648274421692, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.793632507324219, + "rewards/margins": 3.0252745151519775, + "rewards/rejected": 1.768358588218689, + "step": 3850 + }, + { + "epoch": 0.21414404793276098, + "grad_norm": 73.56593322753906, + "learning_rate": 8.910515398334255e-08, + "logits/chosen": -0.4447970390319824, + "logits/rejected": -0.5151744484901428, + "logps/chosen": -174.51461791992188, + "logps/rejected": -230.3745574951172, + "loss": 1.3978, + "nll_loss": 1.098456621170044, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.449948310852051, + "rewards/margins": 3.0486416816711426, + "rewards/rejected": 2.4013073444366455, + "step": 3860 + }, + { + "epoch": 0.21469882525901166, + "grad_norm": 48.50322341918945, + "learning_rate": 8.905079002277832e-08, + "logits/chosen": -0.22557875514030457, + "logits/rejected": -0.3795395493507385, + "logps/chosen": -138.54998779296875, + "logps/rejected": -195.5319366455078, + "loss": 1.2399, + "nll_loss": 0.8102123141288757, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.490899085998535, + "rewards/margins": 3.248749256134033, + "rewards/rejected": 1.2421494722366333, + "step": 3870 + }, + { + "epoch": 0.21525360258526233, + "grad_norm": 55.75164794921875, + "learning_rate": 8.899630743648e-08, + "logits/chosen": -0.31376057863235474, + "logits/rejected": -0.43367958068847656, + "logps/chosen": -150.63986206054688, + "logps/rejected": -201.40696716308594, + "loss": 1.2756, + "nll_loss": 0.9280077815055847, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.763642311096191, + "rewards/margins": 3.3326945304870605, + "rewards/rejected": 1.4309481382369995, + "step": 3880 + }, + { + "epoch": 0.21580837991151303, + "grad_norm": 82.2542495727539, + "learning_rate": 8.894170638995092e-08, + "logits/chosen": -0.2537023425102234, + "logits/rejected": -0.35121041536331177, + "logps/chosen": -159.0094757080078, + "logps/rejected": -208.6914825439453, + "loss": 1.4474, + "nll_loss": 0.9805082082748413, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.797659873962402, + "rewards/margins": 2.736910581588745, + "rewards/rejected": 2.0607495307922363, + "step": 3890 + }, + { + "epoch": 0.2163631572377637, + "grad_norm": 43.822174072265625, + "learning_rate": 8.888698704905431e-08, + "logits/chosen": -0.21386781334877014, + "logits/rejected": -0.3521370589733124, + "logps/chosen": -144.4310302734375, + "logps/rejected": -178.11569213867188, + "loss": 1.2435, + "nll_loss": 0.872231662273407, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.516562461853027, + "rewards/margins": 2.044229030609131, + "rewards/rejected": 2.4723331928253174, + "step": 3900 + }, + { + "epoch": 0.21691793456401437, + "grad_norm": 64.06157684326172, + "learning_rate": 8.88321495800127e-08, + "logits/chosen": -0.38649290800094604, + "logits/rejected": -0.4504520893096924, + "logps/chosen": -201.0390167236328, + "logps/rejected": -220.2386932373047, + "loss": 1.5477, + "nll_loss": 1.1234185695648193, + "rewards/accuracies": 0.625, + "rewards/chosen": 5.286801815032959, + "rewards/margins": 2.4049549102783203, + "rewards/rejected": 2.8818471431732178, + "step": 3910 + }, + { + "epoch": 0.21747271189026504, + "grad_norm": 59.72712707519531, + "learning_rate": 8.87771941494075e-08, + "logits/chosen": -0.24501697719097137, + "logits/rejected": -0.3806004822254181, + "logps/chosen": -153.45704650878906, + "logps/rejected": -185.648681640625, + "loss": 1.3693, + "nll_loss": 0.9162791967391968, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.498444557189941, + "rewards/margins": 2.5761585235595703, + "rewards/rejected": 1.9222854375839233, + "step": 3920 + }, + { + "epoch": 0.2180274892165157, + "grad_norm": 74.50149536132812, + "learning_rate": 8.872212092417844e-08, + "logits/chosen": -0.2650856375694275, + "logits/rejected": -0.34951329231262207, + "logps/chosen": -147.10775756835938, + "logps/rejected": -179.2303466796875, + "loss": 1.3909, + "nll_loss": 0.9994792938232422, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.621931076049805, + "rewards/margins": 2.5670480728149414, + "rewards/rejected": 2.0548832416534424, + "step": 3930 + }, + { + "epoch": 0.21858226654276638, + "grad_norm": 34.133785247802734, + "learning_rate": 8.866693007162307e-08, + "logits/chosen": -0.29337453842163086, + "logits/rejected": -0.40382012724876404, + "logps/chosen": -188.85833740234375, + "logps/rejected": -240.5803680419922, + "loss": 1.3901, + "nll_loss": 1.0012753009796143, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.997965335845947, + "rewards/margins": 2.5151822566986084, + "rewards/rejected": 2.4827828407287598, + "step": 3940 + }, + { + "epoch": 0.21913704386901708, + "grad_norm": 33.943721771240234, + "learning_rate": 8.861162175939625e-08, + "logits/chosen": -0.2020225077867508, + "logits/rejected": -0.330327570438385, + "logps/chosen": -167.04249572753906, + "logps/rejected": -215.63134765625, + "loss": 1.3655, + "nll_loss": 1.047996997833252, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.789084434509277, + "rewards/margins": 2.9170351028442383, + "rewards/rejected": 1.87204909324646, + "step": 3950 + }, + { + "epoch": 0.21969182119526776, + "grad_norm": 72.16616821289062, + "learning_rate": 8.855619615550972e-08, + "logits/chosen": -0.30495089292526245, + "logits/rejected": -0.4407684803009033, + "logps/chosen": -179.2265167236328, + "logps/rejected": -260.6602478027344, + "loss": 1.2582, + "nll_loss": 0.9948042035102844, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.850760459899902, + "rewards/margins": 4.7629876136779785, + "rewards/rejected": 1.087773084640503, + "step": 3960 + }, + { + "epoch": 0.22024659852151843, + "grad_norm": 62.64510726928711, + "learning_rate": 8.850065342833141e-08, + "logits/chosen": -0.1904325932264328, + "logits/rejected": -0.4101056158542633, + "logps/chosen": -133.64218139648438, + "logps/rejected": -197.58729553222656, + "loss": 1.3394, + "nll_loss": 0.8108884692192078, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.4841461181640625, + "rewards/margins": 3.7502377033233643, + "rewards/rejected": 0.7339082956314087, + "step": 3970 + }, + { + "epoch": 0.2208013758477691, + "grad_norm": 55.91902160644531, + "learning_rate": 8.844499374658512e-08, + "logits/chosen": -0.4355524182319641, + "logits/rejected": -0.5665684938430786, + "logps/chosen": -178.23260498046875, + "logps/rejected": -245.59194946289062, + "loss": 1.413, + "nll_loss": 1.092327356338501, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.46936559677124, + "rewards/margins": 3.4504482746124268, + "rewards/rejected": 2.018918037414551, + "step": 3980 + }, + { + "epoch": 0.22135615317401977, + "grad_norm": 46.83743667602539, + "learning_rate": 8.838921727934991e-08, + "logits/chosen": -0.3046635389328003, + "logits/rejected": -0.3972231447696686, + "logps/chosen": -161.92007446289062, + "logps/rejected": -226.1973876953125, + "loss": 1.3331, + "nll_loss": 1.0980288982391357, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.024444580078125, + "rewards/margins": 3.8393218517303467, + "rewards/rejected": 1.1851226091384888, + "step": 3990 + }, + { + "epoch": 0.22191093050027044, + "grad_norm": 68.99779510498047, + "learning_rate": 8.833332419605959e-08, + "logits/chosen": -0.33155789971351624, + "logits/rejected": -0.38299810886383057, + "logps/chosen": -166.82423400878906, + "logps/rejected": -200.9204864501953, + "loss": 1.3213, + "nll_loss": 1.010746955871582, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.797917366027832, + "rewards/margins": 2.3995375633239746, + "rewards/rejected": 2.3983798027038574, + "step": 4000 + }, + { + "epoch": 0.22191093050027044, + "eval_logits/chosen": -0.3887759745121002, + "eval_logits/rejected": -0.4639728367328644, + "eval_logps/chosen": -200.67454528808594, + "eval_logps/rejected": -261.816162109375, + "eval_loss": 1.2828993797302246, + "eval_nll_loss": 1.0431652069091797, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 5.728669166564941, + "eval_rewards/margins": 4.002435684204102, + "eval_rewards/rejected": 1.726233720779419, + "eval_runtime": 17.2188, + "eval_samples_per_second": 14.867, + "eval_steps_per_second": 1.858, + "step": 4000 + }, + { + "epoch": 0.22246570782652114, + "grad_norm": 140.62405395507812, + "learning_rate": 8.827731466650223e-08, + "logits/chosen": -0.20445005595684052, + "logits/rejected": -0.3240208625793457, + "logps/chosen": -153.79769897460938, + "logps/rejected": -195.5605926513672, + "loss": 1.35, + "nll_loss": 1.039541482925415, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.536130905151367, + "rewards/margins": 2.774941921234131, + "rewards/rejected": 1.7611888647079468, + "step": 4010 + }, + { + "epoch": 0.2230204851527718, + "grad_norm": 75.44985961914062, + "learning_rate": 8.822118886081961e-08, + "logits/chosen": -0.16724136471748352, + "logits/rejected": -0.3808498978614807, + "logps/chosen": -160.11105346679688, + "logps/rejected": -210.70327758789062, + "loss": 1.3192, + "nll_loss": 0.8742419481277466, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.984594345092773, + "rewards/margins": 3.297489643096924, + "rewards/rejected": 1.6871049404144287, + "step": 4020 + }, + { + "epoch": 0.22357526247902249, + "grad_norm": 60.77824401855469, + "learning_rate": 8.816494694950675e-08, + "logits/chosen": -0.24743108451366425, + "logits/rejected": -0.40931805968284607, + "logps/chosen": -162.1366729736328, + "logps/rejected": -207.17422485351562, + "loss": 1.3964, + "nll_loss": 0.8974650502204895, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.02827262878418, + "rewards/margins": 3.0538411140441895, + "rewards/rejected": 1.9744312763214111, + "step": 4030 + }, + { + "epoch": 0.22413003980527316, + "grad_norm": 85.68519592285156, + "learning_rate": 8.810858910341137e-08, + "logits/chosen": -0.4329708218574524, + "logits/rejected": -0.5223310589790344, + "logps/chosen": -189.80859375, + "logps/rejected": -210.829833984375, + "loss": 1.3957, + "nll_loss": 1.077449917793274, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.409401893615723, + "rewards/margins": 2.2836270332336426, + "rewards/rejected": 3.1257758140563965, + "step": 4040 + }, + { + "epoch": 0.22468481713152383, + "grad_norm": 55.49538040161133, + "learning_rate": 8.805211549373334e-08, + "logits/chosen": -0.25056955218315125, + "logits/rejected": -0.3673659861087799, + "logps/chosen": -178.02268981933594, + "logps/rejected": -230.04541015625, + "loss": 1.3563, + "nll_loss": 0.9556955099105835, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.048086166381836, + "rewards/margins": 2.6971967220306396, + "rewards/rejected": 2.350889205932617, + "step": 4050 + }, + { + "epoch": 0.2252395944577745, + "grad_norm": 68.45481872558594, + "learning_rate": 8.799552629202423e-08, + "logits/chosen": -0.33979806303977966, + "logits/rejected": -0.4661482274532318, + "logps/chosen": -175.78135681152344, + "logps/rejected": -208.96316528320312, + "loss": 1.3225, + "nll_loss": 0.9899007678031921, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.1168084144592285, + "rewards/margins": 3.1300911903381348, + "rewards/rejected": 1.9867169857025146, + "step": 4060 + }, + { + "epoch": 0.2257943717840252, + "grad_norm": 36.38805389404297, + "learning_rate": 8.793882167018671e-08, + "logits/chosen": -0.33051663637161255, + "logits/rejected": -0.37100648880004883, + "logps/chosen": -189.0030517578125, + "logps/rejected": -244.38217163085938, + "loss": 1.2831, + "nll_loss": 1.0483386516571045, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.193713188171387, + "rewards/margins": 2.911698818206787, + "rewards/rejected": 2.282015085220337, + "step": 4070 + }, + { + "epoch": 0.22634914911027587, + "grad_norm": 108.84422302246094, + "learning_rate": 8.788200180047407e-08, + "logits/chosen": -0.20089511573314667, + "logits/rejected": -0.32828986644744873, + "logps/chosen": -148.81640625, + "logps/rejected": -205.16873168945312, + "loss": 1.3197, + "nll_loss": 0.8446512222290039, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.87085485458374, + "rewards/margins": 3.210014820098877, + "rewards/rejected": 1.660840630531311, + "step": 4080 + }, + { + "epoch": 0.22690392643652654, + "grad_norm": 129.64999389648438, + "learning_rate": 8.78250668554897e-08, + "logits/chosen": -0.2841266989707947, + "logits/rejected": -0.37601885199546814, + "logps/chosen": -163.07171630859375, + "logps/rejected": -204.03134155273438, + "loss": 1.296, + "nll_loss": 0.9791350364685059, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.949339866638184, + "rewards/margins": 2.7364394664764404, + "rewards/rejected": 2.2129006385803223, + "step": 4090 + }, + { + "epoch": 0.22745870376277721, + "grad_norm": 68.07540130615234, + "learning_rate": 8.776801700818656e-08, + "logits/chosen": -0.3015419542789459, + "logits/rejected": -0.38536086678504944, + "logps/chosen": -148.27731323242188, + "logps/rejected": -204.63931274414062, + "loss": 1.3958, + "nll_loss": 1.005378007888794, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.849169731140137, + "rewards/margins": 3.0641539096832275, + "rewards/rejected": 1.7850162982940674, + "step": 4100 + }, + { + "epoch": 0.2280134810890279, + "grad_norm": 73.26811981201172, + "learning_rate": 8.771085243186669e-08, + "logits/chosen": -0.12376414239406586, + "logits/rejected": -0.23722615838050842, + "logps/chosen": -138.64968872070312, + "logps/rejected": -163.23028564453125, + "loss": 1.3529, + "nll_loss": 0.7532340288162231, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.229341506958008, + "rewards/margins": 2.931063175201416, + "rewards/rejected": 1.2982782125473022, + "step": 4110 + }, + { + "epoch": 0.22856825841527856, + "grad_norm": 97.30023956298828, + "learning_rate": 8.765357330018055e-08, + "logits/chosen": 0.011545022949576378, + "logits/rejected": -0.09808467328548431, + "logps/chosen": -129.2597198486328, + "logps/rejected": -153.56124877929688, + "loss": 1.3334, + "nll_loss": 0.8299420475959778, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.067888259887695, + "rewards/margins": 2.3037543296813965, + "rewards/rejected": 1.7641338109970093, + "step": 4120 + }, + { + "epoch": 0.22912303574152926, + "grad_norm": 89.39717102050781, + "learning_rate": 8.759617978712666e-08, + "logits/chosen": -0.40456628799438477, + "logits/rejected": -0.4786996841430664, + "logps/chosen": -192.72161865234375, + "logps/rejected": -263.7474670410156, + "loss": 1.375, + "nll_loss": 1.0599608421325684, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.777060031890869, + "rewards/margins": 4.0243730545043945, + "rewards/rejected": 1.7526872158050537, + "step": 4130 + }, + { + "epoch": 0.22967781306777993, + "grad_norm": 55.39466094970703, + "learning_rate": 8.753867206705098e-08, + "logits/chosen": -0.3558950424194336, + "logits/rejected": -0.42491593956947327, + "logps/chosen": -179.45462036132812, + "logps/rejected": -226.12252807617188, + "loss": 1.3064, + "nll_loss": 1.0498034954071045, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.1217498779296875, + "rewards/margins": 2.216548204421997, + "rewards/rejected": 2.9052016735076904, + "step": 4140 + }, + { + "epoch": 0.2302325903940306, + "grad_norm": 108.61188507080078, + "learning_rate": 8.748105031464643e-08, + "logits/chosen": -0.4373705983161926, + "logits/rejected": -0.5036669969558716, + "logps/chosen": -169.29861450195312, + "logps/rejected": -204.67160034179688, + "loss": 1.3576, + "nll_loss": 1.0841724872589111, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.15322208404541, + "rewards/margins": 2.883450984954834, + "rewards/rejected": 2.269770622253418, + "step": 4150 + }, + { + "epoch": 0.23078736772028127, + "grad_norm": 63.595794677734375, + "learning_rate": 8.74233147049523e-08, + "logits/chosen": -0.15530423820018768, + "logits/rejected": -0.24546948075294495, + "logps/chosen": -165.0889892578125, + "logps/rejected": -188.34654235839844, + "loss": 1.3574, + "nll_loss": 0.9425103068351746, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.631152153015137, + "rewards/margins": 2.0830862522125244, + "rewards/rejected": 2.5480661392211914, + "step": 4160 + }, + { + "epoch": 0.23134214504653194, + "grad_norm": 119.82685852050781, + "learning_rate": 8.736546541335371e-08, + "logits/chosen": -0.40301522612571716, + "logits/rejected": -0.4933186173439026, + "logps/chosen": -208.2997589111328, + "logps/rejected": -272.7257385253906, + "loss": 1.371, + "nll_loss": 1.113821268081665, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.932578086853027, + "rewards/margins": 3.926304578781128, + "rewards/rejected": 2.006272554397583, + "step": 4170 + }, + { + "epoch": 0.23189692237278262, + "grad_norm": 63.66457748413086, + "learning_rate": 8.730750261558119e-08, + "logits/chosen": -0.30639415979385376, + "logits/rejected": -0.3842052221298218, + "logps/chosen": -189.58531188964844, + "logps/rejected": -267.81903076171875, + "loss": 1.4157, + "nll_loss": 1.135546326637268, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 5.254769802093506, + "rewards/margins": 2.687643527984619, + "rewards/rejected": 2.5671257972717285, + "step": 4180 + }, + { + "epoch": 0.2324516996990333, + "grad_norm": 67.39790344238281, + "learning_rate": 8.724942648771003e-08, + "logits/chosen": -0.3820488154888153, + "logits/rejected": -0.5085964202880859, + "logps/chosen": -182.63333129882812, + "logps/rejected": -258.7193908691406, + "loss": 1.3241, + "nll_loss": 1.1056026220321655, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.469270706176758, + "rewards/margins": 3.692648410797119, + "rewards/rejected": 1.7766224145889282, + "step": 4190 + }, + { + "epoch": 0.233006477025284, + "grad_norm": 31.938941955566406, + "learning_rate": 8.71912372061598e-08, + "logits/chosen": -0.3462616801261902, + "logits/rejected": -0.4664136469364166, + "logps/chosen": -193.03187561035156, + "logps/rejected": -243.20504760742188, + "loss": 1.3471, + "nll_loss": 1.0250604152679443, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.235898017883301, + "rewards/margins": 3.276707410812378, + "rewards/rejected": 1.9591907262802124, + "step": 4200 + }, + { + "epoch": 0.23356125435153466, + "grad_norm": 43.32523727416992, + "learning_rate": 8.713293494769378e-08, + "logits/chosen": -0.2849578261375427, + "logits/rejected": -0.42158952355384827, + "logps/chosen": -177.0216522216797, + "logps/rejected": -234.6879425048828, + "loss": 1.3577, + "nll_loss": 1.0065664052963257, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.119265079498291, + "rewards/margins": 2.2945590019226074, + "rewards/rejected": 2.8247063159942627, + "step": 4210 + }, + { + "epoch": 0.23411603167778533, + "grad_norm": 81.4455337524414, + "learning_rate": 8.707451988941846e-08, + "logits/chosen": -0.23719966411590576, + "logits/rejected": -0.3504584729671478, + "logps/chosen": -195.0302276611328, + "logps/rejected": -249.18551635742188, + "loss": 1.3832, + "nll_loss": 1.0661619901657104, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.4248480796813965, + "rewards/margins": 3.603041172027588, + "rewards/rejected": 1.8218071460723877, + "step": 4220 + }, + { + "epoch": 0.234670809004036, + "grad_norm": 60.50730895996094, + "learning_rate": 8.701599220878297e-08, + "logits/chosen": -0.27704206109046936, + "logits/rejected": -0.41974037885665894, + "logps/chosen": -190.57310485839844, + "logps/rejected": -243.5283966064453, + "loss": 1.4058, + "nll_loss": 1.0585263967514038, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 5.695257663726807, + "rewards/margins": 2.6654000282287598, + "rewards/rejected": 3.0298571586608887, + "step": 4230 + }, + { + "epoch": 0.23522558633028667, + "grad_norm": 129.30686950683594, + "learning_rate": 8.695735208357859e-08, + "logits/chosen": -0.37667936086654663, + "logits/rejected": -0.4965333938598633, + "logps/chosen": -202.59652709960938, + "logps/rejected": -249.3206787109375, + "loss": 1.3356, + "nll_loss": 1.1458075046539307, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.792067527770996, + "rewards/margins": 2.907867431640625, + "rewards/rejected": 2.8842005729675293, + "step": 4240 + }, + { + "epoch": 0.23578036365653735, + "grad_norm": 80.4828109741211, + "learning_rate": 8.689859969193816e-08, + "logits/chosen": -0.2171669751405716, + "logits/rejected": -0.33526545763015747, + "logps/chosen": -186.0350341796875, + "logps/rejected": -253.03451538085938, + "loss": 1.3282, + "nll_loss": 0.947732150554657, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.438222885131836, + "rewards/margins": 3.2459194660186768, + "rewards/rejected": 2.192303419113159, + "step": 4250 + }, + { + "epoch": 0.23633514098278804, + "grad_norm": 47.65826416015625, + "learning_rate": 8.683973521233552e-08, + "logits/chosen": -0.22899992763996124, + "logits/rejected": -0.3872426152229309, + "logps/chosen": -183.62936401367188, + "logps/rejected": -225.7091827392578, + "loss": 1.2983, + "nll_loss": 0.975047767162323, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.47422456741333, + "rewards/margins": 3.2760257720947266, + "rewards/rejected": 2.1981987953186035, + "step": 4260 + }, + { + "epoch": 0.23688991830903872, + "grad_norm": 166.94833374023438, + "learning_rate": 8.678075882358505e-08, + "logits/chosen": -0.3284236788749695, + "logits/rejected": -0.40388602018356323, + "logps/chosen": -186.22012329101562, + "logps/rejected": -234.9075927734375, + "loss": 1.4147, + "nll_loss": 1.1497867107391357, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.253143310546875, + "rewards/margins": 2.696260452270508, + "rewards/rejected": 2.5568830966949463, + "step": 4270 + }, + { + "epoch": 0.2374446956352894, + "grad_norm": 53.439842224121094, + "learning_rate": 8.672167070484104e-08, + "logits/chosen": -0.0332237184047699, + "logits/rejected": -0.19702832400798798, + "logps/chosen": -143.75985717773438, + "logps/rejected": -175.3542938232422, + "loss": 1.2939, + "nll_loss": 0.7661317586898804, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.622331619262695, + "rewards/margins": 2.994150161743164, + "rewards/rejected": 1.6281816959381104, + "step": 4280 + }, + { + "epoch": 0.23799947296154006, + "grad_norm": 62.4766731262207, + "learning_rate": 8.666247103559725e-08, + "logits/chosen": -0.17678096890449524, + "logits/rejected": -0.2632465958595276, + "logps/chosen": -160.861083984375, + "logps/rejected": -194.93165588378906, + "loss": 1.3603, + "nll_loss": 0.9933874011039734, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.4631524085998535, + "rewards/margins": 1.8023935556411743, + "rewards/rejected": 2.6607584953308105, + "step": 4290 + }, + { + "epoch": 0.23855425028779073, + "grad_norm": 146.61288452148438, + "learning_rate": 8.660315999568622e-08, + "logits/chosen": 0.062420736998319626, + "logits/rejected": -0.1173282116651535, + "logps/chosen": -104.2728500366211, + "logps/rejected": -138.85101318359375, + "loss": 1.2621, + "nll_loss": 0.6970239877700806, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.7959773540496826, + "rewards/margins": 2.769336223602295, + "rewards/rejected": 1.0266412496566772, + "step": 4300 + }, + { + "epoch": 0.2391090276140414, + "grad_norm": 86.5093994140625, + "learning_rate": 8.654373776527886e-08, + "logits/chosen": -0.4773966372013092, + "logits/rejected": -0.5677574276924133, + "logps/chosen": -207.92684936523438, + "logps/rejected": -268.27423095703125, + "loss": 1.3202, + "nll_loss": 1.1020228862762451, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.070623397827148, + "rewards/margins": 3.890537977218628, + "rewards/rejected": 2.1800854206085205, + "step": 4310 + }, + { + "epoch": 0.2396638049402921, + "grad_norm": 75.80883026123047, + "learning_rate": 8.648420452488381e-08, + "logits/chosen": -0.03923141211271286, + "logits/rejected": -0.20453393459320068, + "logps/chosen": -129.60968017578125, + "logps/rejected": -183.7969207763672, + "loss": 1.3065, + "nll_loss": 0.7038072943687439, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.458006381988525, + "rewards/margins": 2.861825466156006, + "rewards/rejected": 1.59618079662323, + "step": 4320 + }, + { + "epoch": 0.24021858226654277, + "grad_norm": 61.33818054199219, + "learning_rate": 8.642456045534697e-08, + "logits/chosen": -0.09462814033031464, + "logits/rejected": -0.273030549287796, + "logps/chosen": -146.52047729492188, + "logps/rejected": -194.67807006835938, + "loss": 1.3122, + "nll_loss": 0.88775235414505, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.361196994781494, + "rewards/margins": 2.2622735500335693, + "rewards/rejected": 2.0989232063293457, + "step": 4330 + }, + { + "epoch": 0.24077335959279345, + "grad_norm": 95.40966033935547, + "learning_rate": 8.636480573785088e-08, + "logits/chosen": -0.06831637769937515, + "logits/rejected": -0.19084826111793518, + "logps/chosen": -132.2844696044922, + "logps/rejected": -149.97573852539062, + "loss": 1.2389, + "nll_loss": 0.7682362794876099, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.953458070755005, + "rewards/margins": 2.6358141899108887, + "rewards/rejected": 1.3176437616348267, + "step": 4340 + }, + { + "epoch": 0.24132813691904412, + "grad_norm": 66.08728790283203, + "learning_rate": 8.630494055391418e-08, + "logits/chosen": -0.22335462272167206, + "logits/rejected": -0.3700319826602936, + "logps/chosen": -151.97804260253906, + "logps/rejected": -237.0194549560547, + "loss": 1.3026, + "nll_loss": 0.9709617495536804, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.424733638763428, + "rewards/margins": 3.8076565265655518, + "rewards/rejected": 1.6170778274536133, + "step": 4350 + }, + { + "epoch": 0.2418829142452948, + "grad_norm": 84.6449966430664, + "learning_rate": 8.624496508539112e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -157.78152465820312, + "logps/rejected": -205.28628540039062, + "loss": 1.4204, + "nll_loss": NaN, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.4751176834106445, + "rewards/margins": 4.087596416473389, + "rewards/rejected": 1.387520670890808, + "step": 4360 + }, + { + "epoch": 0.24243769157154546, + "grad_norm": 94.63330078125, + "learning_rate": 8.618487951447095e-08, + "logits/chosen": -0.4150986075401306, + "logits/rejected": -0.5466437339782715, + "logps/chosen": -187.41586303710938, + "logps/rejected": -238.2434539794922, + "loss": 1.2492, + "nll_loss": 1.0808436870574951, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.215986251831055, + "rewards/margins": 3.6832032203674316, + "rewards/rejected": 1.5327831506729126, + "step": 4370 + }, + { + "epoch": 0.24299246889779616, + "grad_norm": 55.334938049316406, + "learning_rate": 8.612468402367738e-08, + "logits/chosen": -0.20146456360816956, + "logits/rejected": -0.3176548182964325, + "logps/chosen": -161.24691772460938, + "logps/rejected": -197.61886596679688, + "loss": 1.356, + "nll_loss": 0.9338601231575012, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.561044216156006, + "rewards/margins": 3.121908664703369, + "rewards/rejected": 1.4391355514526367, + "step": 4380 + }, + { + "epoch": 0.24354724622404683, + "grad_norm": 69.12451171875, + "learning_rate": 8.606437879586799e-08, + "logits/chosen": -0.22908329963684082, + "logits/rejected": -0.352277934551239, + "logps/chosen": -165.62339782714844, + "logps/rejected": -209.05032348632812, + "loss": 1.3846, + "nll_loss": 0.9394363164901733, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.0261735916137695, + "rewards/margins": 3.0195279121398926, + "rewards/rejected": 2.006645679473877, + "step": 4390 + }, + { + "epoch": 0.2441020235502975, + "grad_norm": 102.31427764892578, + "learning_rate": 8.60039640142338e-08, + "logits/chosen": -0.2176387757062912, + "logits/rejected": -0.31441378593444824, + "logps/chosen": -168.29946899414062, + "logps/rejected": -223.59280395507812, + "loss": 1.4105, + "nll_loss": 0.9233170747756958, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.863080024719238, + "rewards/margins": 2.7318460941314697, + "rewards/rejected": 2.1312336921691895, + "step": 4400 + }, + { + "epoch": 0.24465680087654817, + "grad_norm": 68.1030502319336, + "learning_rate": 8.594343986229853e-08, + "logits/chosen": -0.14959707856178284, + "logits/rejected": -0.2593737244606018, + "logps/chosen": -169.5933074951172, + "logps/rejected": -197.62954711914062, + "loss": 1.2849, + "nll_loss": 0.8743448257446289, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.89533805847168, + "rewards/margins": 2.8288931846618652, + "rewards/rejected": 2.0664451122283936, + "step": 4410 + }, + { + "epoch": 0.24521157820279885, + "grad_norm": 99.41896057128906, + "learning_rate": 8.588280652391819e-08, + "logits/chosen": -0.2931186556816101, + "logits/rejected": -0.4342547357082367, + "logps/chosen": -183.0137939453125, + "logps/rejected": -246.53598022460938, + "loss": 1.3999, + "nll_loss": 0.9624223709106445, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.3488383293151855, + "rewards/margins": 4.081454277038574, + "rewards/rejected": 1.2673838138580322, + "step": 4420 + }, + { + "epoch": 0.24576635552904952, + "grad_norm": 67.750732421875, + "learning_rate": 8.582206418328044e-08, + "logits/chosen": -0.3752570152282715, + "logits/rejected": -0.4712037146091461, + "logps/chosen": -182.9212646484375, + "logps/rejected": -234.5699005126953, + "loss": 1.3834, + "nll_loss": 1.0797786712646484, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.707915782928467, + "rewards/margins": 3.22369122505188, + "rewards/rejected": 2.484224796295166, + "step": 4430 + }, + { + "epoch": 0.24632113285530022, + "grad_norm": 47.93496322631836, + "learning_rate": 8.57612130249041e-08, + "logits/chosen": -0.3381286859512329, + "logits/rejected": -0.43061742186546326, + "logps/chosen": -175.91787719726562, + "logps/rejected": -221.03335571289062, + "loss": 1.2475, + "nll_loss": 0.9814583659172058, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.677148342132568, + "rewards/margins": 3.701653242111206, + "rewards/rejected": 1.9754953384399414, + "step": 4440 + }, + { + "epoch": 0.2468759101815509, + "grad_norm": 56.9894905090332, + "learning_rate": 8.570025323363852e-08, + "logits/chosen": -0.03819179907441139, + "logits/rejected": -0.20645050704479218, + "logps/chosen": -150.8126983642578, + "logps/rejected": -158.15496826171875, + "loss": 1.3705, + "nll_loss": 0.8135994076728821, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.05548620223999, + "rewards/margins": 1.940616250038147, + "rewards/rejected": 2.1148698329925537, + "step": 4450 + }, + { + "epoch": 0.24743068750780156, + "grad_norm": 65.15169525146484, + "learning_rate": 8.563918499466304e-08, + "logits/chosen": -0.3688794672489166, + "logits/rejected": -0.5100642442703247, + "logps/chosen": -182.76837158203125, + "logps/rejected": -246.40133666992188, + "loss": 1.3296, + "nll_loss": 1.0879732370376587, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.046154022216797, + "rewards/margins": 3.9471893310546875, + "rewards/rejected": 2.0989649295806885, + "step": 4460 + }, + { + "epoch": 0.24798546483405223, + "grad_norm": 58.4198112487793, + "learning_rate": 8.557800849348647e-08, + "logits/chosen": -0.42738962173461914, + "logits/rejected": -0.549644410610199, + "logps/chosen": -210.28811645507812, + "logps/rejected": -254.00711059570312, + "loss": 1.3312, + "nll_loss": 1.1513640880584717, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.145230293273926, + "rewards/margins": 3.2168631553649902, + "rewards/rejected": 2.9283671379089355, + "step": 4470 + }, + { + "epoch": 0.2485402421603029, + "grad_norm": 103.25559997558594, + "learning_rate": 8.551672391594645e-08, + "logits/chosen": -0.10428180545568466, + "logits/rejected": -0.195042222738266, + "logps/chosen": -157.71859741210938, + "logps/rejected": -180.86099243164062, + "loss": 1.3473, + "nll_loss": 0.8617512583732605, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.883713722229004, + "rewards/margins": 2.410423994064331, + "rewards/rejected": 2.473289966583252, + "step": 4480 + }, + { + "epoch": 0.24909501948655358, + "grad_norm": 52.8361701965332, + "learning_rate": 8.545533144820892e-08, + "logits/chosen": -0.20854294300079346, + "logits/rejected": -0.30969762802124023, + "logps/chosen": -180.6295166015625, + "logps/rejected": -237.1515350341797, + "loss": 1.471, + "nll_loss": 0.9408046007156372, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.4925456047058105, + "rewards/margins": 2.8565022945404053, + "rewards/rejected": 2.6360433101654053, + "step": 4490 + }, + { + "epoch": 0.24964979681280425, + "grad_norm": 72.89659881591797, + "learning_rate": 8.539383127676763e-08, + "logits/chosen": -0.23576506972312927, + "logits/rejected": -0.3891776502132416, + "logps/chosen": -212.4176483154297, + "logps/rejected": -269.67706298828125, + "loss": 1.3893, + "nll_loss": 1.0825352668762207, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.811644077301025, + "rewards/margins": 3.8783583641052246, + "rewards/rejected": 1.9332859516143799, + "step": 4500 + }, + { + "epoch": 0.24964979681280425, + "eval_logits/chosen": -0.3306578993797302, + "eval_logits/rejected": -0.40762603282928467, + "eval_logps/chosen": -199.13613891601562, + "eval_logps/rejected": -259.206298828125, + "eval_loss": 1.2888281345367432, + "eval_nll_loss": 1.0351033210754395, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 5.882508277893066, + "eval_rewards/margins": 3.895287036895752, + "eval_rewards/rejected": 1.9872204065322876, + "eval_runtime": 16.8043, + "eval_samples_per_second": 15.234, + "eval_steps_per_second": 1.904, + "step": 4500 + }, + { + "epoch": 0.25020457413905495, + "grad_norm": 72.03968811035156, + "learning_rate": 8.533222358844345e-08, + "logits/chosen": -0.38020601868629456, + "logits/rejected": -0.4878564774990082, + "logps/chosen": -176.05238342285156, + "logps/rejected": -223.0900115966797, + "loss": 1.3742, + "nll_loss": 1.063004970550537, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.97015905380249, + "rewards/margins": 3.1073145866394043, + "rewards/rejected": 1.8628448247909546, + "step": 4510 + }, + { + "epoch": 0.2507593514653056, + "grad_norm": 25.4820613861084, + "learning_rate": 8.527050857038385e-08, + "logits/chosen": -0.037890512496232986, + "logits/rejected": -0.22132185101509094, + "logps/chosen": -138.64907836914062, + "logps/rejected": -177.23446655273438, + "loss": 1.2826, + "nll_loss": 0.7740511298179626, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.439934253692627, + "rewards/margins": 3.037219285964966, + "rewards/rejected": 1.4027149677276611, + "step": 4520 + }, + { + "epoch": 0.2513141287915563, + "grad_norm": 50.93412399291992, + "learning_rate": 8.520868641006238e-08, + "logits/chosen": -0.1365940272808075, + "logits/rejected": -0.30282798409461975, + "logps/chosen": -132.58665466308594, + "logps/rejected": -169.67105102539062, + "loss": 1.3555, + "nll_loss": 0.7856622338294983, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.3856024742126465, + "rewards/margins": 2.379370927810669, + "rewards/rejected": 2.0062317848205566, + "step": 4530 + }, + { + "epoch": 0.251868906117807, + "grad_norm": 46.51472091674805, + "learning_rate": 8.514675729527801e-08, + "logits/chosen": -0.34577757120132446, + "logits/rejected": -0.4794695973396301, + "logps/chosen": -205.43167114257812, + "logps/rejected": -267.23724365234375, + "loss": 1.2885, + "nll_loss": 1.064664602279663, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.145432472229004, + "rewards/margins": 3.6067302227020264, + "rewards/rejected": 2.5387020111083984, + "step": 4540 + }, + { + "epoch": 0.25242368344405763, + "grad_norm": 62.636844635009766, + "learning_rate": 8.508472141415466e-08, + "logits/chosen": -0.18032360076904297, + "logits/rejected": -0.2991257905960083, + "logps/chosen": -165.84439086914062, + "logps/rejected": -233.81494140625, + "loss": 1.3154, + "nll_loss": 0.9324311017990112, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.935610771179199, + "rewards/margins": 3.13999080657959, + "rewards/rejected": 1.7956199645996094, + "step": 4550 + }, + { + "epoch": 0.25297846077030833, + "grad_norm": 73.33432006835938, + "learning_rate": 8.502257895514053e-08, + "logits/chosen": -0.30248206853866577, + "logits/rejected": -0.442868709564209, + "logps/chosen": -171.95774841308594, + "logps/rejected": -244.6792449951172, + "loss": 1.2763, + "nll_loss": 0.9759060144424438, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.5328264236450195, + "rewards/margins": 4.3905439376831055, + "rewards/rejected": 1.1422834396362305, + "step": 4560 + }, + { + "epoch": 0.253533238096559, + "grad_norm": 36.83837127685547, + "learning_rate": 8.496033010700761e-08, + "logits/chosen": -0.2894170880317688, + "logits/rejected": -0.44485601782798767, + "logps/chosen": -197.3072509765625, + "logps/rejected": -252.8368377685547, + "loss": 1.379, + "nll_loss": 1.0642839670181274, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.660626411437988, + "rewards/margins": 4.005428314208984, + "rewards/rejected": 1.6551977396011353, + "step": 4570 + }, + { + "epoch": 0.2540880154228097, + "grad_norm": 65.84944915771484, + "learning_rate": 8.489797505885105e-08, + "logits/chosen": -0.18601150810718536, + "logits/rejected": -0.3102690577507019, + "logps/chosen": -180.2449493408203, + "logps/rejected": -254.8947296142578, + "loss": 1.2849, + "nll_loss": 0.957818329334259, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.565386772155762, + "rewards/margins": 3.5227177143096924, + "rewards/rejected": 2.0426688194274902, + "step": 4580 + }, + { + "epoch": 0.2546427927490603, + "grad_norm": 127.11788177490234, + "learning_rate": 8.483551400008864e-08, + "logits/chosen": -0.0906374454498291, + "logits/rejected": -0.26230689883232117, + "logps/chosen": -161.72662353515625, + "logps/rejected": -198.145263671875, + "loss": 1.3407, + "nll_loss": 0.9006088972091675, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.747382164001465, + "rewards/margins": 2.776177167892456, + "rewards/rejected": 1.9712049961090088, + "step": 4590 + }, + { + "epoch": 0.255197570075311, + "grad_norm": 45.21589279174805, + "learning_rate": 8.477294712046014e-08, + "logits/chosen": -0.10336129367351532, + "logits/rejected": -0.31331485509872437, + "logps/chosen": -147.0296630859375, + "logps/rejected": -198.47779846191406, + "loss": 1.293, + "nll_loss": 0.8451549410820007, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.42185640335083, + "rewards/margins": 2.895052433013916, + "rewards/rejected": 1.5268040895462036, + "step": 4600 + }, + { + "epoch": 0.2557523474015617, + "grad_norm": 189.2617950439453, + "learning_rate": 8.471027461002683e-08, + "logits/chosen": -0.19419385492801666, + "logits/rejected": -0.3464725613594055, + "logps/chosen": -148.30079650878906, + "logps/rejected": -208.98556518554688, + "loss": 1.313, + "nll_loss": 0.8720955848693848, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.144837379455566, + "rewards/margins": 3.403660297393799, + "rewards/rejected": 1.7411772012710571, + "step": 4610 + }, + { + "epoch": 0.25630712472781236, + "grad_norm": 66.42888641357422, + "learning_rate": 8.46474966591708e-08, + "logits/chosen": -0.25781363248825073, + "logits/rejected": -0.35114097595214844, + "logps/chosen": -172.5027618408203, + "logps/rejected": -205.27633666992188, + "loss": 1.3957, + "nll_loss": 0.9941679835319519, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 4.990196704864502, + "rewards/margins": 2.257720470428467, + "rewards/rejected": 2.732475757598877, + "step": 4620 + }, + { + "epoch": 0.25686190205406306, + "grad_norm": 56.031105041503906, + "learning_rate": 8.458461345859453e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -166.62411499023438, + "logps/rejected": -231.1585693359375, + "loss": 1.2733, + "nll_loss": NaN, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.820073127746582, + "rewards/margins": 3.484143018722534, + "rewards/rejected": 1.3359302282333374, + "step": 4630 + }, + { + "epoch": 0.2574166793803137, + "grad_norm": 89.39034271240234, + "learning_rate": 8.452162519932012e-08, + "logits/chosen": -0.3336629867553711, + "logits/rejected": -0.41933974623680115, + "logps/chosen": -194.59652709960938, + "logps/rejected": -244.4859161376953, + "loss": 1.4353, + "nll_loss": 1.1331466436386108, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.238390922546387, + "rewards/margins": 1.9931983947753906, + "rewards/rejected": 3.245192766189575, + "step": 4640 + }, + { + "epoch": 0.2579714567065644, + "grad_norm": 59.741416931152344, + "learning_rate": 8.44585320726889e-08, + "logits/chosen": -0.29769474267959595, + "logits/rejected": -0.38778576254844666, + "logps/chosen": -181.58316040039062, + "logps/rejected": -249.23178100585938, + "loss": 1.317, + "nll_loss": 1.0540294647216797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.46547794342041, + "rewards/margins": 3.398808717727661, + "rewards/rejected": 2.06666898727417, + "step": 4650 + }, + { + "epoch": 0.2585262340328151, + "grad_norm": 90.04450225830078, + "learning_rate": 8.43953342703607e-08, + "logits/chosen": -0.04572884738445282, + "logits/rejected": -0.17261159420013428, + "logps/chosen": -130.46591186523438, + "logps/rejected": -192.3133087158203, + "loss": 1.2683, + "nll_loss": 0.7979989647865295, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.13479471206665, + "rewards/margins": 3.162766456604004, + "rewards/rejected": 0.9720277786254883, + "step": 4660 + }, + { + "epoch": 0.25908101135906575, + "grad_norm": 51.916786193847656, + "learning_rate": 8.433203198431336e-08, + "logits/chosen": -0.10603030771017075, + "logits/rejected": -0.25791341066360474, + "logps/chosen": -147.1509246826172, + "logps/rejected": -206.18197631835938, + "loss": 1.4653, + "nll_loss": 0.8920075297355652, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.549837589263916, + "rewards/margins": 2.8990442752838135, + "rewards/rejected": 1.6507936716079712, + "step": 4670 + }, + { + "epoch": 0.25963578868531645, + "grad_norm": 55.086280822753906, + "learning_rate": 8.426862540684206e-08, + "logits/chosen": -0.2217932492494583, + "logits/rejected": -0.3490327298641205, + "logps/chosen": -168.48159790039062, + "logps/rejected": -217.4625244140625, + "loss": 1.29, + "nll_loss": 1.0062181949615479, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.952488422393799, + "rewards/margins": 2.8383209705352783, + "rewards/rejected": 2.1141676902770996, + "step": 4680 + }, + { + "epoch": 0.2601905660115671, + "grad_norm": 64.567626953125, + "learning_rate": 8.420511473055886e-08, + "logits/chosen": -0.15327800810337067, + "logits/rejected": -0.2819003164768219, + "logps/chosen": -166.82540893554688, + "logps/rejected": -217.75234985351562, + "loss": 1.2892, + "nll_loss": 0.9549553990364075, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.273451328277588, + "rewards/margins": 3.76080322265625, + "rewards/rejected": 1.5126473903656006, + "step": 4690 + }, + { + "epoch": 0.2607453433378178, + "grad_norm": 49.292476654052734, + "learning_rate": 8.414150014839199e-08, + "logits/chosen": -0.2889734208583832, + "logits/rejected": -0.4110495448112488, + "logps/chosen": -206.3968505859375, + "logps/rejected": -283.59552001953125, + "loss": 1.3333, + "nll_loss": 1.0700557231903076, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.977264404296875, + "rewards/margins": 3.681428909301758, + "rewards/rejected": 2.2958357334136963, + "step": 4700 + }, + { + "epoch": 0.26130012066406844, + "grad_norm": 83.20051574707031, + "learning_rate": 8.407778185358536e-08, + "logits/chosen": -0.498958021402359, + "logits/rejected": -0.5524585247039795, + "logps/chosen": -214.4412078857422, + "logps/rejected": -242.309814453125, + "loss": 1.3835, + "nll_loss": 1.2496415376663208, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.918186664581299, + "rewards/margins": 2.482825517654419, + "rewards/rejected": 3.4353606700897217, + "step": 4710 + }, + { + "epoch": 0.26185489799031914, + "grad_norm": 60.543052673339844, + "learning_rate": 8.40139600396979e-08, + "logits/chosen": -0.2590022683143616, + "logits/rejected": -0.3653254508972168, + "logps/chosen": -154.97506713867188, + "logps/rejected": -201.74185180664062, + "loss": 1.3559, + "nll_loss": 0.9882938265800476, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.30222749710083, + "rewards/margins": 3.420788288116455, + "rewards/rejected": 1.8814388513565063, + "step": 4720 + }, + { + "epoch": 0.26240967531656983, + "grad_norm": 37.433589935302734, + "learning_rate": 8.3950034900603e-08, + "logits/chosen": -0.23119473457336426, + "logits/rejected": -0.32892414927482605, + "logps/chosen": -158.8278350830078, + "logps/rejected": -200.85879516601562, + "loss": 1.3303, + "nll_loss": 0.9745529890060425, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.2692413330078125, + "rewards/margins": 3.4871573448181152, + "rewards/rejected": 1.7820838689804077, + "step": 4730 + }, + { + "epoch": 0.2629644526428205, + "grad_norm": 150.04627990722656, + "learning_rate": 8.388600663048794e-08, + "logits/chosen": -0.1670699119567871, + "logits/rejected": -0.3385860025882721, + "logps/chosen": -179.1607208251953, + "logps/rejected": -281.8492126464844, + "loss": 1.4205, + "nll_loss": 0.9550544619560242, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.709284782409668, + "rewards/margins": 4.198940277099609, + "rewards/rejected": 1.510345220565796, + "step": 4740 + }, + { + "epoch": 0.2635192299690712, + "grad_norm": 52.15652847290039, + "learning_rate": 8.382187542385328e-08, + "logits/chosen": -0.2738000452518463, + "logits/rejected": -0.349651962518692, + "logps/chosen": -177.93954467773438, + "logps/rejected": -226.75778198242188, + "loss": 1.2965, + "nll_loss": 1.01934814453125, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.248774528503418, + "rewards/margins": 3.257829189300537, + "rewards/rejected": 1.9909454584121704, + "step": 4750 + }, + { + "epoch": 0.2640740072953218, + "grad_norm": 49.90021896362305, + "learning_rate": 8.37576414755123e-08, + "logits/chosen": -0.35484129190444946, + "logits/rejected": -0.41778916120529175, + "logps/chosen": -189.77159118652344, + "logps/rejected": -231.64547729492188, + "loss": 1.341, + "nll_loss": 1.0807373523712158, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.559969425201416, + "rewards/margins": 2.28197979927063, + "rewards/rejected": 3.277989625930786, + "step": 4760 + }, + { + "epoch": 0.2646287846215725, + "grad_norm": 48.15873336791992, + "learning_rate": 8.369330498059033e-08, + "logits/chosen": -0.16155406832695007, + "logits/rejected": -0.2664300501346588, + "logps/chosen": -164.81192016601562, + "logps/rejected": -231.3193817138672, + "loss": 1.3654, + "nll_loss": 1.0048094987869263, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.006320953369141, + "rewards/margins": 3.9034602642059326, + "rewards/rejected": 1.1028602123260498, + "step": 4770 + }, + { + "epoch": 0.26518356194782317, + "grad_norm": 229.7599639892578, + "learning_rate": 8.362886613452423e-08, + "logits/chosen": -0.15005961060523987, + "logits/rejected": -0.23518244922161102, + "logps/chosen": -163.5731658935547, + "logps/rejected": -202.78944396972656, + "loss": 1.3263, + "nll_loss": 0.8745111227035522, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.994362831115723, + "rewards/margins": 2.4524505138397217, + "rewards/rejected": 2.541912317276001, + "step": 4780 + }, + { + "epoch": 0.26573833927407386, + "grad_norm": 70.23784637451172, + "learning_rate": 8.35643251330618e-08, + "logits/chosen": -0.2799859046936035, + "logits/rejected": -0.37818005681037903, + "logps/chosen": -196.68984985351562, + "logps/rejected": -237.1707763671875, + "loss": 1.2948, + "nll_loss": 1.0815504789352417, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.128037929534912, + "rewards/margins": 3.218574047088623, + "rewards/rejected": 1.909463882446289, + "step": 4790 + }, + { + "epoch": 0.26629311660032456, + "grad_norm": 57.96487808227539, + "learning_rate": 8.349968217226113e-08, + "logits/chosen": -0.31223830580711365, + "logits/rejected": -0.4266796112060547, + "logps/chosen": -175.681640625, + "logps/rejected": -212.2870635986328, + "loss": 1.2977, + "nll_loss": 1.0572354793548584, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.238940238952637, + "rewards/margins": 2.6529510021209717, + "rewards/rejected": 2.585989475250244, + "step": 4800 + }, + { + "epoch": 0.2668478939265752, + "grad_norm": 43.18544387817383, + "learning_rate": 8.343493744849001e-08, + "logits/chosen": -0.2462203949689865, + "logits/rejected": -0.3878275752067566, + "logps/chosen": -180.4509735107422, + "logps/rejected": -250.7042236328125, + "loss": 1.3514, + "nll_loss": 1.009977102279663, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.467653751373291, + "rewards/margins": 3.171955108642578, + "rewards/rejected": 2.295698642730713, + "step": 4810 + }, + { + "epoch": 0.2674026712528259, + "grad_norm": 58.05904006958008, + "learning_rate": 8.337009115842545e-08, + "logits/chosen": -0.1778537929058075, + "logits/rejected": -0.3089439272880554, + "logps/chosen": -165.0500946044922, + "logps/rejected": -208.13623046875, + "loss": 1.3021, + "nll_loss": 0.9191769361495972, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.851661682128906, + "rewards/margins": 2.92620587348938, + "rewards/rejected": 1.9254562854766846, + "step": 4820 + }, + { + "epoch": 0.26795744857907655, + "grad_norm": 40.6214714050293, + "learning_rate": 8.330514349905293e-08, + "logits/chosen": -0.29483598470687866, + "logits/rejected": -0.43914732336997986, + "logps/chosen": -169.65567016601562, + "logps/rejected": -205.42495727539062, + "loss": 1.3685, + "nll_loss": 0.9740726351737976, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.574697971343994, + "rewards/margins": 3.5862374305725098, + "rewards/rejected": 1.988459587097168, + "step": 4830 + }, + { + "epoch": 0.26851222590532725, + "grad_norm": 68.7054443359375, + "learning_rate": 8.324009466766581e-08, + "logits/chosen": -0.30889657139778137, + "logits/rejected": -0.3688036799430847, + "logps/chosen": -150.3580780029297, + "logps/rejected": -210.03738403320312, + "loss": 1.3273, + "nll_loss": 0.9760260581970215, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.090402603149414, + "rewards/margins": 2.458418846130371, + "rewards/rejected": 2.631983757019043, + "step": 4840 + }, + { + "epoch": 0.26906700323157795, + "grad_norm": 60.75359344482422, + "learning_rate": 8.317494486186489e-08, + "logits/chosen": -0.32037153840065, + "logits/rejected": -0.43114009499549866, + "logps/chosen": -177.12033081054688, + "logps/rejected": -215.9650421142578, + "loss": 1.4469, + "nll_loss": 1.0228039026260376, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.64739990234375, + "rewards/margins": 2.7197928428649902, + "rewards/rejected": 2.9276070594787598, + "step": 4850 + }, + { + "epoch": 0.2696217805578286, + "grad_norm": 88.06070709228516, + "learning_rate": 8.310969427955765e-08, + "logits/chosen": -0.2338133305311203, + "logits/rejected": -0.3076700270175934, + "logps/chosen": -172.38645935058594, + "logps/rejected": -207.7739715576172, + "loss": 1.4337, + "nll_loss": 0.9386089444160461, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.505553722381592, + "rewards/margins": 2.553183078765869, + "rewards/rejected": 2.9523708820343018, + "step": 4860 + }, + { + "epoch": 0.2701765578840793, + "grad_norm": 58.89626693725586, + "learning_rate": 8.304434311895768e-08, + "logits/chosen": -0.05045692250132561, + "logits/rejected": -0.20222020149230957, + "logps/chosen": -127.2392578125, + "logps/rejected": -165.931640625, + "loss": 1.3058, + "nll_loss": 0.7586954832077026, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.104668617248535, + "rewards/margins": 2.188541889190674, + "rewards/rejected": 1.9161268472671509, + "step": 4870 + }, + { + "epoch": 0.27073133521032994, + "grad_norm": 72.38153076171875, + "learning_rate": 8.297889157858413e-08, + "logits/chosen": -0.3325496315956116, + "logits/rejected": -0.424597829580307, + "logps/chosen": -190.23590087890625, + "logps/rejected": -260.87847900390625, + "loss": 1.3395, + "nll_loss": 1.0346920490264893, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.8594069480896, + "rewards/margins": 4.145157814025879, + "rewards/rejected": 1.7142490148544312, + "step": 4880 + }, + { + "epoch": 0.27128611253658064, + "grad_norm": 128.99923706054688, + "learning_rate": 8.291333985726106e-08, + "logits/chosen": -0.03347639739513397, + "logits/rejected": -0.145706906914711, + "logps/chosen": -136.19009399414062, + "logps/rejected": -180.11630249023438, + "loss": 1.301, + "nll_loss": 0.8283224105834961, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.277609825134277, + "rewards/margins": 2.1975202560424805, + "rewards/rejected": 2.080089569091797, + "step": 4890 + }, + { + "epoch": 0.2718408898628313, + "grad_norm": 69.27930450439453, + "learning_rate": 8.284768815411691e-08, + "logits/chosen": -0.16743507981300354, + "logits/rejected": -0.26962658762931824, + "logps/chosen": -178.30587768554688, + "logps/rejected": -240.0220947265625, + "loss": 1.3253, + "nll_loss": 0.9475281834602356, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.275069236755371, + "rewards/margins": 2.969179391860962, + "rewards/rejected": 2.30588960647583, + "step": 4900 + }, + { + "epoch": 0.272395667189082, + "grad_norm": 62.62528610229492, + "learning_rate": 8.278193666858374e-08, + "logits/chosen": -0.20674102008342743, + "logits/rejected": -0.3136216998100281, + "logps/chosen": -142.73001098632812, + "logps/rejected": -173.5380096435547, + "loss": 1.3661, + "nll_loss": 0.9335249066352844, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.942017555236816, + "rewards/margins": 2.9952781200408936, + "rewards/rejected": 1.9467391967773438, + "step": 4910 + }, + { + "epoch": 0.2729504445153327, + "grad_norm": 47.427555084228516, + "learning_rate": 8.271608560039681e-08, + "logits/chosen": -0.28811416029930115, + "logits/rejected": -0.3628733158111572, + "logps/chosen": -187.3347930908203, + "logps/rejected": -237.3675994873047, + "loss": 1.3381, + "nll_loss": 1.1174544095993042, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.298020362854004, + "rewards/margins": 2.991478443145752, + "rewards/rejected": 2.3065414428710938, + "step": 4920 + }, + { + "epoch": 0.2735052218415833, + "grad_norm": 172.0816192626953, + "learning_rate": 8.26501351495938e-08, + "logits/chosen": -0.23228800296783447, + "logits/rejected": -0.30110445618629456, + "logps/chosen": -188.532470703125, + "logps/rejected": -249.3604736328125, + "loss": 1.3669, + "nll_loss": 1.0762465000152588, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.646094799041748, + "rewards/margins": 3.560511350631714, + "rewards/rejected": 2.085583448410034, + "step": 4930 + }, + { + "epoch": 0.274059999167834, + "grad_norm": 54.714622497558594, + "learning_rate": 8.258408551651438e-08, + "logits/chosen": -0.3558768928050995, + "logits/rejected": -0.42641109228134155, + "logps/chosen": -194.4613494873047, + "logps/rejected": -249.61874389648438, + "loss": 1.3133, + "nll_loss": 1.0652390718460083, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.462862491607666, + "rewards/margins": 3.3691318035125732, + "rewards/rejected": 2.093731164932251, + "step": 4940 + }, + { + "epoch": 0.27461477649408467, + "grad_norm": 70.50579833984375, + "learning_rate": 8.251793690179945e-08, + "logits/chosen": -0.171391561627388, + "logits/rejected": -0.29056456685066223, + "logps/chosen": -167.0868682861328, + "logps/rejected": -223.43124389648438, + "loss": 1.3015, + "nll_loss": 0.9458906054496765, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.318848133087158, + "rewards/margins": 3.345581531524658, + "rewards/rejected": 1.973266363143921, + "step": 4950 + }, + { + "epoch": 0.27516955382033537, + "grad_norm": 57.20966339111328, + "learning_rate": 8.245168950639061e-08, + "logits/chosen": 0.025897592306137085, + "logits/rejected": -0.12947197258472443, + "logps/chosen": -122.27632904052734, + "logps/rejected": -155.8192596435547, + "loss": 1.3076, + "nll_loss": 0.7416020035743713, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.2231645584106445, + "rewards/margins": 2.9529929161071777, + "rewards/rejected": 1.270171880722046, + "step": 4960 + }, + { + "epoch": 0.27572433114658607, + "grad_norm": 38.52761459350586, + "learning_rate": 8.23853435315295e-08, + "logits/chosen": -0.2517802119255066, + "logits/rejected": -0.3721666634082794, + "logps/chosen": -145.8294219970703, + "logps/rejected": -208.90017700195312, + "loss": 1.1446, + "nll_loss": 0.9143912196159363, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.211873531341553, + "rewards/margins": 3.9393725395202637, + "rewards/rejected": 1.2725012302398682, + "step": 4970 + }, + { + "epoch": 0.2762791084728367, + "grad_norm": 43.90656661987305, + "learning_rate": 8.231889917875728e-08, + "logits/chosen": -0.36203673481941223, + "logits/rejected": -0.500227153301239, + "logps/chosen": -173.27207946777344, + "logps/rejected": -255.75564575195312, + "loss": 1.2673, + "nll_loss": 1.028189778327942, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.356875419616699, + "rewards/margins": 4.0634074211120605, + "rewards/rejected": 1.2934677600860596, + "step": 4980 + }, + { + "epoch": 0.2768338857990874, + "grad_norm": 71.87471008300781, + "learning_rate": 8.225235664991386e-08, + "logits/chosen": -0.2676452100276947, + "logits/rejected": -0.3847208321094513, + "logps/chosen": -193.01084899902344, + "logps/rejected": -229.0294189453125, + "loss": 1.301, + "nll_loss": 1.0524907112121582, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.651148796081543, + "rewards/margins": 3.842705488204956, + "rewards/rejected": 1.808443307876587, + "step": 4990 + }, + { + "epoch": 0.27738866312533805, + "grad_norm": 37.92792892456055, + "learning_rate": 8.218571614713749e-08, + "logits/chosen": -0.24387606978416443, + "logits/rejected": -0.41826462745666504, + "logps/chosen": -145.3743133544922, + "logps/rejected": -180.5663604736328, + "loss": 1.2525, + "nll_loss": 0.9066478610038757, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.747946262359619, + "rewards/margins": 3.3187623023986816, + "rewards/rejected": 1.4291837215423584, + "step": 5000 + }, + { + "epoch": 0.27738866312533805, + "eval_logits/chosen": -0.3629521131515503, + "eval_logits/rejected": -0.4396223723888397, + "eval_logps/chosen": -198.44891357421875, + "eval_logps/rejected": -261.6014404296875, + "eval_loss": 1.2831330299377441, + "eval_nll_loss": 1.0325713157653809, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 5.951231479644775, + "eval_rewards/margins": 4.203526020050049, + "eval_rewards/rejected": 1.7477052211761475, + "eval_runtime": 17.0807, + "eval_samples_per_second": 14.988, + "eval_steps_per_second": 1.873, + "step": 5000 + }, + { + "epoch": 0.27794344045158875, + "grad_norm": 56.25941467285156, + "learning_rate": 8.211897787286396e-08, + "logits/chosen": -0.1583927869796753, + "logits/rejected": -0.3076084554195404, + "logps/chosen": -151.69210815429688, + "logps/rejected": -194.51449584960938, + "loss": 1.2783, + "nll_loss": 0.8849924206733704, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.681919574737549, + "rewards/margins": 2.732123851776123, + "rewards/rejected": 1.9497959613800049, + "step": 5010 + }, + { + "epoch": 0.2784982177778394, + "grad_norm": 55.836021423339844, + "learning_rate": 8.205214202982609e-08, + "logits/chosen": -0.25684595108032227, + "logits/rejected": -0.35995739698410034, + "logps/chosen": -166.514404296875, + "logps/rejected": -221.72021484375, + "loss": 1.3165, + "nll_loss": 0.9287391901016235, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.375771522521973, + "rewards/margins": 3.230828046798706, + "rewards/rejected": 2.1449429988861084, + "step": 5020 + }, + { + "epoch": 0.2790529951040901, + "grad_norm": 79.99015045166016, + "learning_rate": 8.198520882105311e-08, + "logits/chosen": -0.38538604974746704, + "logits/rejected": -0.4918065071105957, + "logps/chosen": -172.71304321289062, + "logps/rejected": -217.1675567626953, + "loss": 1.2925, + "nll_loss": 0.9629594087600708, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.571718692779541, + "rewards/margins": 3.5471160411834717, + "rewards/rejected": 2.024602174758911, + "step": 5030 + }, + { + "epoch": 0.2796077724303408, + "grad_norm": 62.02948760986328, + "learning_rate": 8.191817844986996e-08, + "logits/chosen": -0.3658435046672821, + "logits/rejected": -0.5107392072677612, + "logps/chosen": -177.09124755859375, + "logps/rejected": -239.96658325195312, + "loss": 1.2561, + "nll_loss": 0.9874979257583618, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.4753217697143555, + "rewards/margins": 3.384706974029541, + "rewards/rejected": 2.0906152725219727, + "step": 5040 + }, + { + "epoch": 0.28016254975659144, + "grad_norm": 58.71518325805664, + "learning_rate": 8.185105111989682e-08, + "logits/chosen": -0.20356634259223938, + "logits/rejected": -0.2974574863910675, + "logps/chosen": -140.64859008789062, + "logps/rejected": -194.08328247070312, + "loss": 1.3252, + "nll_loss": 0.8530987501144409, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.67659854888916, + "rewards/margins": 3.028804302215576, + "rewards/rejected": 1.6477943658828735, + "step": 5050 + }, + { + "epoch": 0.28071732708284214, + "grad_norm": 84.55443572998047, + "learning_rate": 8.178382703504831e-08, + "logits/chosen": -0.5204389095306396, + "logits/rejected": -0.5782557725906372, + "logps/chosen": -219.82516479492188, + "logps/rejected": -287.87445068359375, + "loss": 1.4168, + "nll_loss": 1.1804500818252563, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.438179016113281, + "rewards/margins": 3.6439692974090576, + "rewards/rejected": 2.794210195541382, + "step": 5060 + }, + { + "epoch": 0.2812721044090928, + "grad_norm": 55.49232864379883, + "learning_rate": 8.171650639953305e-08, + "logits/chosen": -0.3548034131526947, + "logits/rejected": -0.49223846197128296, + "logps/chosen": -180.63319396972656, + "logps/rejected": -264.86114501953125, + "loss": 1.3345, + "nll_loss": 0.9848377108573914, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.830276966094971, + "rewards/margins": 4.735908508300781, + "rewards/rejected": 1.0943679809570312, + "step": 5070 + }, + { + "epoch": 0.2818268817353435, + "grad_norm": 59.67625427246094, + "learning_rate": 8.164908941785286e-08, + "logits/chosen": -0.19392921030521393, + "logits/rejected": -0.337412029504776, + "logps/chosen": -152.64126586914062, + "logps/rejected": -227.7115936279297, + "loss": 1.2606, + "nll_loss": 0.9056864976882935, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.199661731719971, + "rewards/margins": 3.7226932048797607, + "rewards/rejected": 1.476968765258789, + "step": 5080 + }, + { + "epoch": 0.2823816590615941, + "grad_norm": 48.38815689086914, + "learning_rate": 8.158157629480236e-08, + "logits/chosen": -0.2246595323085785, + "logits/rejected": -0.43237733840942383, + "logps/chosen": -147.0095672607422, + "logps/rejected": -226.11062622070312, + "loss": 1.2936, + "nll_loss": 0.8035075068473816, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 4.727481842041016, + "rewards/margins": 3.7552146911621094, + "rewards/rejected": 0.9722667932510376, + "step": 5090 + }, + { + "epoch": 0.2829364363878448, + "grad_norm": 79.66407012939453, + "learning_rate": 8.151396723546809e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -133.1691131591797, + "logps/rejected": -176.14329528808594, + "loss": 1.247, + "nll_loss": NaN, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.611157417297363, + "rewards/margins": 2.2311840057373047, + "rewards/rejected": 2.3799734115600586, + "step": 5100 + }, + { + "epoch": 0.2834912137140955, + "grad_norm": 38.1684455871582, + "learning_rate": 8.144626244522812e-08, + "logits/chosen": -0.2781577706336975, + "logits/rejected": -0.35782188177108765, + "logps/chosen": -173.4530792236328, + "logps/rejected": -204.86978149414062, + "loss": 1.4075, + "nll_loss": 0.9582212567329407, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.3481879234313965, + "rewards/margins": 2.9867639541625977, + "rewards/rejected": 2.361424446105957, + "step": 5110 + }, + { + "epoch": 0.28404599104034617, + "grad_norm": 38.051395416259766, + "learning_rate": 8.137846212975126e-08, + "logits/chosen": -0.39350074529647827, + "logits/rejected": -0.45697134733200073, + "logps/chosen": -186.7138214111328, + "logps/rejected": -244.53775024414062, + "loss": 1.2772, + "nll_loss": 1.0653064250946045, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.167884349822998, + "rewards/margins": 3.2606379985809326, + "rewards/rejected": 2.9072463512420654, + "step": 5120 + }, + { + "epoch": 0.28460076836659687, + "grad_norm": 73.94496154785156, + "learning_rate": 8.131056649499653e-08, + "logits/chosen": -0.25749891996383667, + "logits/rejected": -0.38721853494644165, + "logps/chosen": -185.4833221435547, + "logps/rejected": -205.6341094970703, + "loss": 1.2753, + "nll_loss": 0.8648307919502258, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.020583152770996, + "rewards/margins": 2.860358238220215, + "rewards/rejected": 2.1602249145507812, + "step": 5130 + }, + { + "epoch": 0.2851555456928475, + "grad_norm": 103.15850830078125, + "learning_rate": 8.12425757472125e-08, + "logits/chosen": -0.34344482421875, + "logits/rejected": -0.43374189734458923, + "logps/chosen": -163.52737426757812, + "logps/rejected": -212.4480743408203, + "loss": 1.2976, + "nll_loss": 1.0665273666381836, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.501097679138184, + "rewards/margins": 3.606482744216919, + "rewards/rejected": 1.8946149349212646, + "step": 5140 + }, + { + "epoch": 0.2857103230190982, + "grad_norm": 64.51885223388672, + "learning_rate": 8.117449009293668e-08, + "logits/chosen": -0.31838101148605347, + "logits/rejected": -0.4258570671081543, + "logps/chosen": -150.1742706298828, + "logps/rejected": -219.8562469482422, + "loss": 1.3037, + "nll_loss": 0.9224601984024048, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.4263763427734375, + "rewards/margins": 2.586714267730713, + "rewards/rejected": 2.8396620750427246, + "step": 5150 + }, + { + "epoch": 0.2862651003453489, + "grad_norm": 85.3049545288086, + "learning_rate": 8.110630973899484e-08, + "logits/chosen": -0.39840513467788696, + "logits/rejected": -0.5297509431838989, + "logps/chosen": -180.4063720703125, + "logps/rejected": -234.35665893554688, + "loss": 1.2411, + "nll_loss": 1.0500776767730713, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.590461254119873, + "rewards/margins": 3.995450258255005, + "rewards/rejected": 1.5950109958648682, + "step": 5160 + }, + { + "epoch": 0.28681987767159955, + "grad_norm": 65.11550903320312, + "learning_rate": 8.103803489250045e-08, + "logits/chosen": -0.23196351528167725, + "logits/rejected": -0.39111918210983276, + "logps/chosen": -154.30833435058594, + "logps/rejected": -203.83786010742188, + "loss": 1.3235, + "nll_loss": 0.9584856033325195, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.514666557312012, + "rewards/margins": 3.025212287902832, + "rewards/rejected": 1.489454746246338, + "step": 5170 + }, + { + "epoch": 0.28737465499785025, + "grad_norm": 67.98075866699219, + "learning_rate": 8.096966576085405e-08, + "logits/chosen": -0.47045016288757324, + "logits/rejected": -0.5754284262657166, + "logps/chosen": -190.63046264648438, + "logps/rejected": -263.2176513671875, + "loss": 1.3354, + "nll_loss": 1.1000330448150635, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.919167518615723, + "rewards/margins": 3.543025493621826, + "rewards/rejected": 2.3761425018310547, + "step": 5180 + }, + { + "epoch": 0.2879294323241009, + "grad_norm": 83.0150375366211, + "learning_rate": 8.090120255174253e-08, + "logits/chosen": -0.24219787120819092, + "logits/rejected": -0.3379160463809967, + "logps/chosen": -164.17941284179688, + "logps/rejected": -207.69076538085938, + "loss": 1.3324, + "nll_loss": 0.9007360339164734, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.087338447570801, + "rewards/margins": 3.1830947399139404, + "rewards/rejected": 1.9042432308197021, + "step": 5190 + }, + { + "epoch": 0.2884842096503516, + "grad_norm": 51.171478271484375, + "learning_rate": 8.083264547313862e-08, + "logits/chosen": -0.21256570518016815, + "logits/rejected": -0.3457311689853668, + "logps/chosen": -143.1696319580078, + "logps/rejected": -185.2307586669922, + "loss": 1.2872, + "nll_loss": 0.8276432156562805, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.949501991271973, + "rewards/margins": 3.206437587738037, + "rewards/rejected": 1.7430641651153564, + "step": 5200 + }, + { + "epoch": 0.28903898697660224, + "grad_norm": 55.23284912109375, + "learning_rate": 8.076399473330014e-08, + "logits/chosen": -0.4154040813446045, + "logits/rejected": -0.5505325198173523, + "logps/chosen": -184.9228057861328, + "logps/rejected": -258.3560485839844, + "loss": 1.431, + "nll_loss": 1.021315097808838, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.933206558227539, + "rewards/margins": 4.160671234130859, + "rewards/rejected": 1.772534966468811, + "step": 5210 + }, + { + "epoch": 0.28959376430285294, + "grad_norm": 81.54315185546875, + "learning_rate": 8.06952505407695e-08, + "logits/chosen": -0.31606870889663696, + "logits/rejected": -0.43339866399765015, + "logps/chosen": -158.47915649414062, + "logps/rejected": -229.88601684570312, + "loss": 1.3092, + "nll_loss": 0.9658956527709961, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.462421417236328, + "rewards/margins": 3.690382480621338, + "rewards/rejected": 1.7720390558242798, + "step": 5220 + }, + { + "epoch": 0.29014854162910364, + "grad_norm": 62.62627029418945, + "learning_rate": 8.062641310437293e-08, + "logits/chosen": -0.2253562957048416, + "logits/rejected": -0.42890849709510803, + "logps/chosen": -169.52810668945312, + "logps/rejected": -232.82861328125, + "loss": 1.3288, + "nll_loss": 1.0389689207077026, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.488898754119873, + "rewards/margins": 3.7329201698303223, + "rewards/rejected": 1.7559791803359985, + "step": 5230 + }, + { + "epoch": 0.2907033189553543, + "grad_norm": 77.60379791259766, + "learning_rate": 8.055748263321998e-08, + "logits/chosen": -0.3747365474700928, + "logits/rejected": -0.4889054298400879, + "logps/chosen": -154.60496520996094, + "logps/rejected": -223.3097686767578, + "loss": 1.3717, + "nll_loss": 0.9673991203308105, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.296849250793457, + "rewards/margins": 2.987121105194092, + "rewards/rejected": 2.309727668762207, + "step": 5240 + }, + { + "epoch": 0.291258096281605, + "grad_norm": 45.01763916015625, + "learning_rate": 8.048845933670271e-08, + "logits/chosen": -0.4019540250301361, + "logits/rejected": -0.5339347720146179, + "logps/chosen": -198.4036102294922, + "logps/rejected": -255.08529663085938, + "loss": 1.3284, + "nll_loss": 1.1002283096313477, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.304580211639404, + "rewards/margins": 2.89150071144104, + "rewards/rejected": 2.4130795001983643, + "step": 5250 + }, + { + "epoch": 0.2918128736078556, + "grad_norm": 58.75382995605469, + "learning_rate": 8.041934342449526e-08, + "logits/chosen": -0.3500133156776428, + "logits/rejected": -0.5009504556655884, + "logps/chosen": -188.2596893310547, + "logps/rejected": -240.044921875, + "loss": 1.3187, + "nll_loss": 1.071189284324646, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.830065727233887, + "rewards/margins": 2.8829729557037354, + "rewards/rejected": 2.9470925331115723, + "step": 5260 + }, + { + "epoch": 0.2923676509341063, + "grad_norm": 40.570159912109375, + "learning_rate": 8.035013510655307e-08, + "logits/chosen": -0.33516281843185425, + "logits/rejected": -0.47703152894973755, + "logps/chosen": -156.31471252441406, + "logps/rejected": -210.299072265625, + "loss": 1.263, + "nll_loss": 0.8956218957901001, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.275986671447754, + "rewards/margins": 2.6534581184387207, + "rewards/rejected": 2.622528553009033, + "step": 5270 + }, + { + "epoch": 0.292922428260357, + "grad_norm": 72.06739044189453, + "learning_rate": 8.028083459311225e-08, + "logits/chosen": -0.431640088558197, + "logits/rejected": -0.5278843641281128, + "logps/chosen": -206.6097412109375, + "logps/rejected": -259.64697265625, + "loss": 1.3747, + "nll_loss": 1.1126128435134888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.9721574783325195, + "rewards/margins": 2.6364171504974365, + "rewards/rejected": 3.335740566253662, + "step": 5280 + }, + { + "epoch": 0.29347720558660767, + "grad_norm": 43.916831970214844, + "learning_rate": 8.021144209468904e-08, + "logits/chosen": -0.37015438079833984, + "logits/rejected": -0.44294947385787964, + "logps/chosen": -180.5640106201172, + "logps/rejected": -225.8361053466797, + "loss": 1.3638, + "nll_loss": 1.0486891269683838, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.0682053565979, + "rewards/margins": 2.2644426822662354, + "rewards/rejected": 2.803762912750244, + "step": 5290 + }, + { + "epoch": 0.29403198291285837, + "grad_norm": 64.19252014160156, + "learning_rate": 8.014195782207909e-08, + "logits/chosen": -0.20861633121967316, + "logits/rejected": -0.38095623254776, + "logps/chosen": -168.60348510742188, + "logps/rejected": -208.478271484375, + "loss": 1.3344, + "nll_loss": 0.8773584365844727, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.338396072387695, + "rewards/margins": 3.4838390350341797, + "rewards/rejected": 1.8545570373535156, + "step": 5300 + }, + { + "epoch": 0.294586760239109, + "grad_norm": 47.61259841918945, + "learning_rate": 8.007238198635677e-08, + "logits/chosen": -0.39370396733283997, + "logits/rejected": -0.4605169892311096, + "logps/chosen": -189.17117309570312, + "logps/rejected": -248.403564453125, + "loss": 1.3185, + "nll_loss": 1.0556788444519043, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.049463748931885, + "rewards/margins": 3.6159896850585938, + "rewards/rejected": 2.43347430229187, + "step": 5310 + }, + { + "epoch": 0.2951415375653597, + "grad_norm": 47.16741943359375, + "learning_rate": 8.000271479887468e-08, + "logits/chosen": -0.25164592266082764, + "logits/rejected": -0.37962883710861206, + "logps/chosen": -146.64503479003906, + "logps/rejected": -178.7125701904297, + "loss": 1.3137, + "nll_loss": 0.9622234106063843, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.87002420425415, + "rewards/margins": 2.9429609775543213, + "rewards/rejected": 1.927063226699829, + "step": 5320 + }, + { + "epoch": 0.29569631489161036, + "grad_norm": 50.34662628173828, + "learning_rate": 7.993295647126288e-08, + "logits/chosen": -0.12188470363616943, + "logits/rejected": -0.28352001309394836, + "logps/chosen": -144.2179412841797, + "logps/rejected": -193.8548583984375, + "loss": 1.3383, + "nll_loss": 0.823199450969696, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.984102249145508, + "rewards/margins": 3.198787212371826, + "rewards/rejected": 1.7853147983551025, + "step": 5330 + }, + { + "epoch": 0.29625109221786106, + "grad_norm": 46.27009582519531, + "learning_rate": 7.986310721542828e-08, + "logits/chosen": -0.23408794403076172, + "logits/rejected": -0.35068511962890625, + "logps/chosen": -190.06494140625, + "logps/rejected": -255.37680053710938, + "loss": 1.3908, + "nll_loss": 0.9979566335678101, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.786375999450684, + "rewards/margins": 3.3670754432678223, + "rewards/rejected": 2.4193005561828613, + "step": 5340 + }, + { + "epoch": 0.29680586954411176, + "grad_norm": 56.96269607543945, + "learning_rate": 7.979316724355406e-08, + "logits/chosen": -0.292167603969574, + "logits/rejected": -0.4041009843349457, + "logps/chosen": -128.87632751464844, + "logps/rejected": -187.7605438232422, + "loss": 1.3735, + "nll_loss": 0.8670805096626282, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.034547328948975, + "rewards/margins": 3.0269715785980225, + "rewards/rejected": 2.0075759887695312, + "step": 5350 + }, + { + "epoch": 0.2973606468703624, + "grad_norm": 54.19409942626953, + "learning_rate": 7.972313676809887e-08, + "logits/chosen": -0.31982582807540894, + "logits/rejected": -0.40687140822410583, + "logps/chosen": -194.898193359375, + "logps/rejected": -229.9585723876953, + "loss": 1.4132, + "nll_loss": 1.0126712322235107, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.524321556091309, + "rewards/margins": 3.129094123840332, + "rewards/rejected": 2.3952271938323975, + "step": 5360 + }, + { + "epoch": 0.2979154241966131, + "grad_norm": 41.81538391113281, + "learning_rate": 7.96530160017964e-08, + "logits/chosen": -0.23952436447143555, + "logits/rejected": -0.395558625459671, + "logps/chosen": -172.92860412597656, + "logps/rejected": -210.91049194335938, + "loss": 1.2618, + "nll_loss": 0.9645511507987976, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.2552056312561035, + "rewards/margins": 2.6192994117736816, + "rewards/rejected": 2.635906457901001, + "step": 5370 + }, + { + "epoch": 0.29847020152286374, + "grad_norm": 43.95526885986328, + "learning_rate": 7.958280515765454e-08, + "logits/chosen": -0.34653595089912415, + "logits/rejected": -0.4484923481941223, + "logps/chosen": -188.808349609375, + "logps/rejected": -257.86285400390625, + "loss": 1.3475, + "nll_loss": 1.0703051090240479, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.61892032623291, + "rewards/margins": 3.484872817993164, + "rewards/rejected": 2.1340479850769043, + "step": 5380 + }, + { + "epoch": 0.29902497884911444, + "grad_norm": 64.28650665283203, + "learning_rate": 7.951250444895484e-08, + "logits/chosen": -0.14369474351406097, + "logits/rejected": -0.2900177538394928, + "logps/chosen": -147.00808715820312, + "logps/rejected": -188.2275390625, + "loss": 1.341, + "nll_loss": 0.9086960554122925, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.827338218688965, + "rewards/margins": 2.7463221549987793, + "rewards/rejected": 2.0810163021087646, + "step": 5390 + }, + { + "epoch": 0.2995797561753651, + "grad_norm": 49.227413177490234, + "learning_rate": 7.944211408925183e-08, + "logits/chosen": -0.43552136421203613, + "logits/rejected": -0.5203756093978882, + "logps/chosen": -205.6902618408203, + "logps/rejected": -263.37103271484375, + "loss": 1.2393, + "nll_loss": 1.137157678604126, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.868722438812256, + "rewards/margins": 3.6083991527557373, + "rewards/rejected": 2.2603237628936768, + "step": 5400 + }, + { + "epoch": 0.3001345335016158, + "grad_norm": 39.7113151550293, + "learning_rate": 7.937163429237237e-08, + "logits/chosen": -0.2219894379377365, + "logits/rejected": -0.3694431781768799, + "logps/chosen": -159.9281768798828, + "logps/rejected": -204.74143981933594, + "loss": 1.3106, + "nll_loss": 0.9053483009338379, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.833624839782715, + "rewards/margins": 3.136951208114624, + "rewards/rejected": 1.6966737508773804, + "step": 5410 + }, + { + "epoch": 0.3006893108278665, + "grad_norm": 117.45632934570312, + "learning_rate": 7.930106527241505e-08, + "logits/chosen": -0.23928344249725342, + "logits/rejected": -0.3349587023258209, + "logps/chosen": -139.458984375, + "logps/rejected": -189.7228546142578, + "loss": 1.316, + "nll_loss": 0.8406341671943665, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.879055500030518, + "rewards/margins": 2.8671770095825195, + "rewards/rejected": 2.011878728866577, + "step": 5420 + }, + { + "epoch": 0.30124408815411713, + "grad_norm": 29.80186653137207, + "learning_rate": 7.923040724374941e-08, + "logits/chosen": -0.30750900506973267, + "logits/rejected": -0.4656451344490051, + "logps/chosen": -169.89950561523438, + "logps/rejected": -207.82431030273438, + "loss": 1.2527, + "nll_loss": 0.9901590347290039, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.296086311340332, + "rewards/margins": 3.4520249366760254, + "rewards/rejected": 1.8440611362457275, + "step": 5430 + }, + { + "epoch": 0.30179886548036783, + "grad_norm": 61.57847213745117, + "learning_rate": 7.915966042101546e-08, + "logits/chosen": -0.43216371536254883, + "logits/rejected": -0.5349610447883606, + "logps/chosen": -205.59500122070312, + "logps/rejected": -233.59249877929688, + "loss": 1.3843, + "nll_loss": 1.1686880588531494, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.404385566711426, + "rewards/margins": 2.747410297393799, + "rewards/rejected": 2.656975507736206, + "step": 5440 + }, + { + "epoch": 0.3023536428066185, + "grad_norm": 55.9067268371582, + "learning_rate": 7.908882501912288e-08, + "logits/chosen": -0.35131892561912537, + "logits/rejected": -0.5348880290985107, + "logps/chosen": -167.07186889648438, + "logps/rejected": -208.3030242919922, + "loss": 1.3274, + "nll_loss": 1.009548306465149, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.066190719604492, + "rewards/margins": 2.7632575035095215, + "rewards/rejected": 2.3029332160949707, + "step": 5450 + }, + { + "epoch": 0.30290842013286917, + "grad_norm": 48.4643669128418, + "learning_rate": 7.901790125325047e-08, + "logits/chosen": -0.2430580109357834, + "logits/rejected": -0.38270917534828186, + "logps/chosen": -196.25991821289062, + "logps/rejected": -240.02505493164062, + "loss": 1.341, + "nll_loss": 1.0512897968292236, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.403705596923828, + "rewards/margins": 3.0696892738342285, + "rewards/rejected": 2.3340163230895996, + "step": 5460 + }, + { + "epoch": 0.30346319745911987, + "grad_norm": 26.129262924194336, + "learning_rate": 7.894688933884545e-08, + "logits/chosen": -0.43403467535972595, + "logits/rejected": -0.5458344221115112, + "logps/chosen": -206.26931762695312, + "logps/rejected": -286.28814697265625, + "loss": 1.2033, + "nll_loss": 1.093693494796753, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.592001438140869, + "rewards/margins": 3.990468978881836, + "rewards/rejected": 2.601532459259033, + "step": 5470 + }, + { + "epoch": 0.3040179747853705, + "grad_norm": 55.46004104614258, + "learning_rate": 7.887578949162278e-08, + "logits/chosen": -0.3617546856403351, + "logits/rejected": -0.43136462569236755, + "logps/chosen": -183.30076599121094, + "logps/rejected": -205.0895233154297, + "loss": 1.4718, + "nll_loss": 1.0770082473754883, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.315445899963379, + "rewards/margins": 2.013780117034912, + "rewards/rejected": 3.301665782928467, + "step": 5480 + }, + { + "epoch": 0.3045727521116212, + "grad_norm": 63.63032913208008, + "learning_rate": 7.880460192756457e-08, + "logits/chosen": -0.45876961946487427, + "logits/rejected": -0.5710569620132446, + "logps/chosen": -186.36093139648438, + "logps/rejected": -239.4054718017578, + "loss": 1.3289, + "nll_loss": 1.0660300254821777, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.134969234466553, + "rewards/margins": 3.396899461746216, + "rewards/rejected": 2.738069772720337, + "step": 5490 + }, + { + "epoch": 0.30512752943787186, + "grad_norm": 83.45735931396484, + "learning_rate": 7.873332686291938e-08, + "logits/chosen": -0.1018824353814125, + "logits/rejected": -0.3132437765598297, + "logps/chosen": -124.9186019897461, + "logps/rejected": -162.782470703125, + "loss": 1.3713, + "nll_loss": 0.772121787071228, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.188645839691162, + "rewards/margins": 2.7185497283935547, + "rewards/rejected": 1.4700956344604492, + "step": 5500 + }, + { + "epoch": 0.30512752943787186, + "eval_logits/chosen": -0.40301600098609924, + "eval_logits/rejected": -0.4954206645488739, + "eval_logps/chosen": -197.5357208251953, + "eval_logps/rejected": -259.53399658203125, + "eval_loss": 1.2622016668319702, + "eval_nll_loss": 1.0264045000076294, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 6.042550086975098, + "eval_rewards/margins": 4.08809757232666, + "eval_rewards/rejected": 1.9544516801834106, + "eval_runtime": 16.9134, + "eval_samples_per_second": 15.136, + "eval_steps_per_second": 1.892, + "step": 5500 + }, + { + "epoch": 0.30568230676412256, + "grad_norm": 72.64631652832031, + "learning_rate": 7.866196451420155e-08, + "logits/chosen": -0.1749780923128128, + "logits/rejected": -0.3413892686367035, + "logps/chosen": -123.80096435546875, + "logps/rejected": -170.3175048828125, + "loss": 1.2754, + "nll_loss": 0.8201160430908203, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.313254356384277, + "rewards/margins": 2.8019752502441406, + "rewards/rejected": 1.511278510093689, + "step": 5510 + }, + { + "epoch": 0.3062370840903732, + "grad_norm": 69.69657897949219, + "learning_rate": 7.859051509819062e-08, + "logits/chosen": -0.3276621103286743, + "logits/rejected": -0.44854670763015747, + "logps/chosen": -178.0767822265625, + "logps/rejected": -224.83853149414062, + "loss": 1.2687, + "nll_loss": 0.9359772801399231, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.529772758483887, + "rewards/margins": 3.243734359741211, + "rewards/rejected": 2.2860379219055176, + "step": 5520 + }, + { + "epoch": 0.3067918614166239, + "grad_norm": 35.46376419067383, + "learning_rate": 7.851897883193056e-08, + "logits/chosen": -0.20039483904838562, + "logits/rejected": -0.34632977843284607, + "logps/chosen": -147.12570190429688, + "logps/rejected": -192.63580322265625, + "loss": 1.1467, + "nll_loss": 0.8703436851501465, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.039349555969238, + "rewards/margins": 3.207817792892456, + "rewards/rejected": 1.831531286239624, + "step": 5530 + }, + { + "epoch": 0.3073466387428746, + "grad_norm": 105.12731170654297, + "learning_rate": 7.84473559327292e-08, + "logits/chosen": -0.22078721225261688, + "logits/rejected": -0.41360822319984436, + "logps/chosen": -154.78302001953125, + "logps/rejected": -225.735107421875, + "loss": 1.3309, + "nll_loss": 0.8465398550033569, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.447340488433838, + "rewards/margins": 3.7463760375976562, + "rewards/rejected": 1.7009642124176025, + "step": 5540 + }, + { + "epoch": 0.30790141606912524, + "grad_norm": 83.25653076171875, + "learning_rate": 7.837564661815754e-08, + "logits/chosen": -0.36427661776542664, + "logits/rejected": -0.4735857844352722, + "logps/chosen": -184.8861083984375, + "logps/rejected": -257.52734375, + "loss": 1.4227, + "nll_loss": 1.0542347431182861, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.679947376251221, + "rewards/margins": 3.7396247386932373, + "rewards/rejected": 1.9403222799301147, + "step": 5550 + }, + { + "epoch": 0.30845619339537594, + "grad_norm": 97.9723892211914, + "learning_rate": 7.830385110604904e-08, + "logits/chosen": -0.3181317150592804, + "logits/rejected": -0.4537307322025299, + "logps/chosen": -123.86064147949219, + "logps/rejected": -191.56643676757812, + "loss": 1.4407, + "nll_loss": 0.8734070658683777, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.979821681976318, + "rewards/margins": 2.7552473545074463, + "rewards/rejected": 2.224574565887451, + "step": 5560 + }, + { + "epoch": 0.3090109707216266, + "grad_norm": 95.3475570678711, + "learning_rate": 7.82319696144991e-08, + "logits/chosen": -0.24439339339733124, + "logits/rejected": -0.37864407896995544, + "logps/chosen": -133.63058471679688, + "logps/rejected": -181.42242431640625, + "loss": 1.3581, + "nll_loss": 0.8765896558761597, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.396186828613281, + "rewards/margins": 2.396735429763794, + "rewards/rejected": 1.9994512796401978, + "step": 5570 + }, + { + "epoch": 0.3095657480478773, + "grad_norm": 103.33505249023438, + "learning_rate": 7.816000236186418e-08, + "logits/chosen": -0.28526854515075684, + "logits/rejected": -0.44429856538772583, + "logps/chosen": -170.72952270507812, + "logps/rejected": -233.9705047607422, + "loss": 1.2686, + "nll_loss": 0.9443384408950806, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.931466102600098, + "rewards/margins": 3.274135112762451, + "rewards/rejected": 1.6573302745819092, + "step": 5580 + }, + { + "epoch": 0.310120525374128, + "grad_norm": 51.87260437011719, + "learning_rate": 7.808794956676134e-08, + "logits/chosen": -0.307265043258667, + "logits/rejected": -0.49012789130210876, + "logps/chosen": -169.12460327148438, + "logps/rejected": -198.01025390625, + "loss": 1.2568, + "nll_loss": 0.9683774709701538, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.167562961578369, + "rewards/margins": 3.205660343170166, + "rewards/rejected": 1.9619033336639404, + "step": 5590 + }, + { + "epoch": 0.31067530270037863, + "grad_norm": 50.249149322509766, + "learning_rate": 7.80158114480675e-08, + "logits/chosen": -0.2516246438026428, + "logits/rejected": -0.3341858685016632, + "logps/chosen": -165.31979370117188, + "logps/rejected": -207.95602416992188, + "loss": 1.2628, + "nll_loss": 1.0670020580291748, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.323300838470459, + "rewards/margins": 2.661064386367798, + "rewards/rejected": 2.6622369289398193, + "step": 5600 + }, + { + "epoch": 0.31123008002662933, + "grad_norm": 54.5699348449707, + "learning_rate": 7.794358822491871e-08, + "logits/chosen": -0.11392636597156525, + "logits/rejected": -0.2459922581911087, + "logps/chosen": -128.5890350341797, + "logps/rejected": -170.20652770996094, + "loss": 1.213, + "nll_loss": 0.8366823196411133, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.278365135192871, + "rewards/margins": 3.0675265789031982, + "rewards/rejected": 1.2108380794525146, + "step": 5610 + }, + { + "epoch": 0.31178485735288, + "grad_norm": 45.721458435058594, + "learning_rate": 7.787128011670963e-08, + "logits/chosen": -0.19624245166778564, + "logits/rejected": -0.34312087297439575, + "logps/chosen": -169.8636932373047, + "logps/rejected": -233.64169311523438, + "loss": 1.1976, + "nll_loss": 0.9160765409469604, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.577919006347656, + "rewards/margins": 4.408540725708008, + "rewards/rejected": 1.1693775653839111, + "step": 5620 + }, + { + "epoch": 0.3123396346791307, + "grad_norm": 46.51015853881836, + "learning_rate": 7.779888734309266e-08, + "logits/chosen": -0.35512202978134155, + "logits/rejected": -0.48626452684402466, + "logps/chosen": -187.6857147216797, + "logps/rejected": -256.5793151855469, + "loss": 1.2573, + "nll_loss": 1.1082860231399536, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.167753219604492, + "rewards/margins": 4.530683517456055, + "rewards/rejected": 1.6370693445205688, + "step": 5630 + }, + { + "epoch": 0.3128944120053813, + "grad_norm": 50.111907958984375, + "learning_rate": 7.772641012397753e-08, + "logits/chosen": -0.3077355921268463, + "logits/rejected": -0.44563180208206177, + "logps/chosen": -190.90426635742188, + "logps/rejected": -258.58197021484375, + "loss": 1.294, + "nll_loss": 1.0387169122695923, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.6006927490234375, + "rewards/margins": 3.1984353065490723, + "rewards/rejected": 2.402257204055786, + "step": 5640 + }, + { + "epoch": 0.313449189331632, + "grad_norm": 60.936222076416016, + "learning_rate": 7.765384867953037e-08, + "logits/chosen": -0.30347099900245667, + "logits/rejected": -0.45295968651771545, + "logps/chosen": -183.19973754882812, + "logps/rejected": -263.80865478515625, + "loss": 1.2964, + "nll_loss": 1.023938775062561, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.2091240882873535, + "rewards/margins": 3.616779327392578, + "rewards/rejected": 2.5923449993133545, + "step": 5650 + }, + { + "epoch": 0.3140039666578827, + "grad_norm": 57.22261047363281, + "learning_rate": 7.758120323017326e-08, + "logits/chosen": -0.12532678246498108, + "logits/rejected": -0.24024459719657898, + "logps/chosen": -146.21481323242188, + "logps/rejected": -204.45602416992188, + "loss": 1.3377, + "nll_loss": 0.853805661201477, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.998294830322266, + "rewards/margins": 2.6482481956481934, + "rewards/rejected": 2.3500466346740723, + "step": 5660 + }, + { + "epoch": 0.31455874398413336, + "grad_norm": 42.02796936035156, + "learning_rate": 7.750847399658335e-08, + "logits/chosen": -0.05001025274395943, + "logits/rejected": -0.1884036660194397, + "logps/chosen": -116.2176742553711, + "logps/rejected": -169.38729858398438, + "loss": 1.2722, + "nll_loss": 0.7996511459350586, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.614314079284668, + "rewards/margins": 3.169663906097412, + "rewards/rejected": 1.4446502923965454, + "step": 5670 + }, + { + "epoch": 0.31511352131038406, + "grad_norm": 65.07894897460938, + "learning_rate": 7.743566119969244e-08, + "logits/chosen": -0.21025721728801727, + "logits/rejected": -0.35305628180503845, + "logps/chosen": -165.0023193359375, + "logps/rejected": -235.9938507080078, + "loss": 1.2099, + "nll_loss": 0.9207907915115356, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.444872856140137, + "rewards/margins": 3.8752048015594482, + "rewards/rejected": 1.5696674585342407, + "step": 5680 + }, + { + "epoch": 0.3156682986366347, + "grad_norm": 53.95488739013672, + "learning_rate": 7.73627650606861e-08, + "logits/chosen": -0.3568010628223419, + "logits/rejected": -0.44880276918411255, + "logps/chosen": -210.515625, + "logps/rejected": -255.9861297607422, + "loss": 1.3298, + "nll_loss": 1.187220811843872, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.920581340789795, + "rewards/margins": 2.686098098754883, + "rewards/rejected": 3.234483003616333, + "step": 5690 + }, + { + "epoch": 0.3162230759628854, + "grad_norm": 45.283164978027344, + "learning_rate": 7.728978580100303e-08, + "logits/chosen": -0.29234832525253296, + "logits/rejected": -0.41959208250045776, + "logps/chosen": -172.02313232421875, + "logps/rejected": -223.96923828125, + "loss": 1.2833, + "nll_loss": 1.0394980907440186, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.348699569702148, + "rewards/margins": 3.0042288303375244, + "rewards/rejected": 2.344470500946045, + "step": 5700 + }, + { + "epoch": 0.31677785328913605, + "grad_norm": 77.68114471435547, + "learning_rate": 7.721672364233453e-08, + "logits/chosen": -0.31327614188194275, + "logits/rejected": -0.4360221028327942, + "logps/chosen": -181.8870391845703, + "logps/rejected": -235.1174774169922, + "loss": 1.2906, + "nll_loss": 0.9546974301338196, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.674635887145996, + "rewards/margins": 4.261141300201416, + "rewards/rejected": 1.4134950637817383, + "step": 5710 + }, + { + "epoch": 0.31733263061538675, + "grad_norm": 29.960615158081055, + "learning_rate": 7.714357880662364e-08, + "logits/chosen": 0.09260416030883789, + "logits/rejected": -0.08293385803699493, + "logps/chosen": -101.0294418334961, + "logps/rejected": -150.16123962402344, + "loss": 1.2613, + "nll_loss": 0.6419572234153748, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 3.6931400299072266, + "rewards/margins": 2.5476291179656982, + "rewards/rejected": 1.1455105543136597, + "step": 5720 + }, + { + "epoch": 0.31788740794163745, + "grad_norm": 55.809993743896484, + "learning_rate": 7.707035151606455e-08, + "logits/chosen": -0.2473038136959076, + "logits/rejected": -0.39639773964881897, + "logps/chosen": -135.81277465820312, + "logps/rejected": -187.45169067382812, + "loss": 1.2082, + "nll_loss": 0.8615878820419312, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.824166774749756, + "rewards/margins": 3.423949718475342, + "rewards/rejected": 1.4002166986465454, + "step": 5730 + }, + { + "epoch": 0.3184421852678881, + "grad_norm": 70.8182373046875, + "learning_rate": 7.699704199310203e-08, + "logits/chosen": -0.3030581772327423, + "logits/rejected": -0.42742496728897095, + "logps/chosen": -160.92726135253906, + "logps/rejected": -204.8309326171875, + "loss": 1.3201, + "nll_loss": 1.1673412322998047, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.198462009429932, + "rewards/margins": 3.667888641357422, + "rewards/rejected": 1.5305726528167725, + "step": 5740 + }, + { + "epoch": 0.3189969625941388, + "grad_norm": 75.41050720214844, + "learning_rate": 7.692365046043051e-08, + "logits/chosen": -0.3121715784072876, + "logits/rejected": -0.4054687023162842, + "logps/chosen": -174.28924560546875, + "logps/rejected": -240.83523559570312, + "loss": 1.2978, + "nll_loss": 1.0281198024749756, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.9810380935668945, + "rewards/margins": 4.004823207855225, + "rewards/rejected": 1.9762141704559326, + "step": 5750 + }, + { + "epoch": 0.31955173992038943, + "grad_norm": 56.148040771484375, + "learning_rate": 7.685017714099365e-08, + "logits/chosen": -0.1841479241847992, + "logits/rejected": -0.38559359312057495, + "logps/chosen": -153.3293914794922, + "logps/rejected": -228.2798309326172, + "loss": 1.2664, + "nll_loss": 0.9011721611022949, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.384884834289551, + "rewards/margins": 5.131956100463867, + "rewards/rejected": 0.2529294788837433, + "step": 5760 + }, + { + "epoch": 0.32010651724664013, + "grad_norm": 46.79436492919922, + "learning_rate": 7.677662225798349e-08, + "logits/chosen": -0.2041047066450119, + "logits/rejected": -0.3556447923183441, + "logps/chosen": -161.56558227539062, + "logps/rejected": -232.87765502929688, + "loss": 1.3588, + "nll_loss": 0.9337288737297058, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 5.395378589630127, + "rewards/margins": 2.9714081287384033, + "rewards/rejected": 2.4239706993103027, + "step": 5770 + }, + { + "epoch": 0.32066129457289083, + "grad_norm": 63.75236892700195, + "learning_rate": 7.670298603483987e-08, + "logits/chosen": -0.26754432916641235, + "logits/rejected": -0.31302887201309204, + "logps/chosen": -171.03587341308594, + "logps/rejected": -225.9440460205078, + "loss": 1.2878, + "nll_loss": 1.025702953338623, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.298377990722656, + "rewards/margins": 3.062826633453369, + "rewards/rejected": 2.235551595687866, + "step": 5780 + }, + { + "epoch": 0.3212160718991415, + "grad_norm": 49.006954193115234, + "learning_rate": 7.662926869524971e-08, + "logits/chosen": -0.3983635902404785, + "logits/rejected": -0.5497706532478333, + "logps/chosen": -210.3919219970703, + "logps/rejected": -255.5663604736328, + "loss": 1.2036, + "nll_loss": 1.1042499542236328, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.7940168380737305, + "rewards/margins": 3.4843997955322266, + "rewards/rejected": 2.3096179962158203, + "step": 5790 + }, + { + "epoch": 0.3217708492253922, + "grad_norm": 60.399696350097656, + "learning_rate": 7.655547046314634e-08, + "logits/chosen": -0.3618773818016052, + "logits/rejected": -0.4993468225002289, + "logps/chosen": -168.93533325195312, + "logps/rejected": -230.4624786376953, + "loss": 1.2193, + "nll_loss": 0.9717389345169067, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.369915008544922, + "rewards/margins": 4.094571590423584, + "rewards/rejected": 1.2753427028656006, + "step": 5800 + }, + { + "epoch": 0.3223256265516428, + "grad_norm": 42.58092498779297, + "learning_rate": 7.648159156270884e-08, + "logits/chosen": -0.23907014727592468, + "logits/rejected": -0.4223829209804535, + "logps/chosen": -161.8720703125, + "logps/rejected": -214.8459014892578, + "loss": 1.3719, + "nll_loss": 0.9058443903923035, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.540342807769775, + "rewards/margins": 3.800814390182495, + "rewards/rejected": 1.739527702331543, + "step": 5810 + }, + { + "epoch": 0.3228804038778935, + "grad_norm": 70.84566497802734, + "learning_rate": 7.64076322183613e-08, + "logits/chosen": -0.3701106607913971, + "logits/rejected": -0.4296882152557373, + "logps/chosen": -183.67086791992188, + "logps/rejected": -228.63644409179688, + "loss": 1.3083, + "nll_loss": 1.0598195791244507, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.584314823150635, + "rewards/margins": 3.2408530712127686, + "rewards/rejected": 2.3434619903564453, + "step": 5820 + }, + { + "epoch": 0.32343518120414416, + "grad_norm": 37.22153091430664, + "learning_rate": 7.633359265477222e-08, + "logits/chosen": -0.2679436206817627, + "logits/rejected": -0.4335872530937195, + "logps/chosen": -164.10592651367188, + "logps/rejected": -201.74392700195312, + "loss": 1.2779, + "nll_loss": 0.9599549174308777, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.297649383544922, + "rewards/margins": 3.396411180496216, + "rewards/rejected": 1.9012380838394165, + "step": 5830 + }, + { + "epoch": 0.32398995853039486, + "grad_norm": 97.0199203491211, + "learning_rate": 7.625947309685372e-08, + "logits/chosen": -0.2867860794067383, + "logits/rejected": -0.3901570439338684, + "logps/chosen": -176.51451110839844, + "logps/rejected": -227.54580688476562, + "loss": 1.1868, + "nll_loss": 0.9550184011459351, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.572012424468994, + "rewards/margins": 3.2614574432373047, + "rewards/rejected": 2.3105552196502686, + "step": 5840 + }, + { + "epoch": 0.32454473585664556, + "grad_norm": 129.2616424560547, + "learning_rate": 7.6185273769761e-08, + "logits/chosen": -0.3186994194984436, + "logits/rejected": -0.4096761643886566, + "logps/chosen": -181.9349822998047, + "logps/rejected": -229.8415985107422, + "loss": 1.2713, + "nll_loss": 1.0548815727233887, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.698060035705566, + "rewards/margins": 4.21523380279541, + "rewards/rejected": 1.4828267097473145, + "step": 5850 + }, + { + "epoch": 0.3250995131828962, + "grad_norm": 57.745906829833984, + "learning_rate": 7.611099489889152e-08, + "logits/chosen": -0.4499587416648865, + "logits/rejected": -0.4957137107849121, + "logps/chosen": -208.1059112548828, + "logps/rejected": -243.59033203125, + "loss": 1.4432, + "nll_loss": 1.3240139484405518, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.761335849761963, + "rewards/margins": 2.7761924266815186, + "rewards/rejected": 2.9851431846618652, + "step": 5860 + }, + { + "epoch": 0.3256542905091469, + "grad_norm": 51.654205322265625, + "learning_rate": 7.60366367098844e-08, + "logits/chosen": -0.1490587294101715, + "logits/rejected": -0.34141451120376587, + "logps/chosen": -151.16256713867188, + "logps/rejected": -187.86569213867188, + "loss": 1.2655, + "nll_loss": 0.8017619252204895, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.672058582305908, + "rewards/margins": 3.017301082611084, + "rewards/rejected": 1.6547574996948242, + "step": 5870 + }, + { + "epoch": 0.32620906783539755, + "grad_norm": 70.94071960449219, + "learning_rate": 7.59621994286197e-08, + "logits/chosen": -0.25258737802505493, + "logits/rejected": -0.3760277330875397, + "logps/chosen": -159.57235717773438, + "logps/rejected": -209.1165008544922, + "loss": 1.2939, + "nll_loss": 0.9444242715835571, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.442964553833008, + "rewards/margins": 2.996890068054199, + "rewards/rejected": 2.446074962615967, + "step": 5880 + }, + { + "epoch": 0.32676384516164825, + "grad_norm": 49.107723236083984, + "learning_rate": 7.588768328121776e-08, + "logits/chosen": -0.36431506276130676, + "logits/rejected": -0.44438228011131287, + "logps/chosen": -172.80581665039062, + "logps/rejected": -219.6732940673828, + "loss": 1.3315, + "nll_loss": 1.0387299060821533, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.738280296325684, + "rewards/margins": 3.2641844749450684, + "rewards/rejected": 2.474095106124878, + "step": 5890 + }, + { + "epoch": 0.32731862248789895, + "grad_norm": 64.77669525146484, + "learning_rate": 7.581308849403842e-08, + "logits/chosen": -0.3097997307777405, + "logits/rejected": -0.5356290340423584, + "logps/chosen": -154.3521270751953, + "logps/rejected": -218.3408660888672, + "loss": 1.2495, + "nll_loss": 0.8772909045219421, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.518646717071533, + "rewards/margins": 4.598464012145996, + "rewards/rejected": 0.9201822280883789, + "step": 5900 + }, + { + "epoch": 0.3278733998141496, + "grad_norm": 87.04254150390625, + "learning_rate": 7.573841529368051e-08, + "logits/chosen": -0.42871952056884766, + "logits/rejected": -0.5346238017082214, + "logps/chosen": -185.951171875, + "logps/rejected": -256.2978820800781, + "loss": 1.244, + "nll_loss": 1.0659410953521729, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.769937038421631, + "rewards/margins": 4.028483867645264, + "rewards/rejected": 1.7414535284042358, + "step": 5910 + }, + { + "epoch": 0.3284281771404003, + "grad_norm": 73.32978820800781, + "learning_rate": 7.566366390698098e-08, + "logits/chosen": -0.2544618248939514, + "logits/rejected": -0.4156056344509125, + "logps/chosen": -204.7109375, + "logps/rejected": -284.37750244140625, + "loss": 1.2187, + "nll_loss": 0.9991675615310669, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.162306785583496, + "rewards/margins": 4.747017860412598, + "rewards/rejected": 1.4152885675430298, + "step": 5920 + }, + { + "epoch": 0.32898295446665093, + "grad_norm": 59.712581634521484, + "learning_rate": 7.558883456101432e-08, + "logits/chosen": -0.2760107219219208, + "logits/rejected": -0.48132261633872986, + "logps/chosen": -177.5280303955078, + "logps/rejected": -234.1001739501953, + "loss": 1.2621, + "nll_loss": 0.9008985757827759, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.312003135681152, + "rewards/margins": 3.9453353881835938, + "rewards/rejected": 1.3666683435440063, + "step": 5930 + }, + { + "epoch": 0.32953773179290163, + "grad_norm": 59.13243103027344, + "learning_rate": 7.551392748309187e-08, + "logits/chosen": -0.16078224778175354, + "logits/rejected": -0.3124062418937683, + "logps/chosen": -140.71485900878906, + "logps/rejected": -176.59397888183594, + "loss": 1.3002, + "nll_loss": 0.9051309823989868, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.805662631988525, + "rewards/margins": 2.5614771842956543, + "rewards/rejected": 2.244184970855713, + "step": 5940 + }, + { + "epoch": 0.3300925091191523, + "grad_norm": 87.19740295410156, + "learning_rate": 7.543894290076102e-08, + "logits/chosen": -0.13854815065860748, + "logits/rejected": -0.27854079008102417, + "logps/chosen": -123.906005859375, + "logps/rejected": -191.5620880126953, + "loss": 1.3135, + "nll_loss": 0.7680860757827759, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.787467002868652, + "rewards/margins": 3.6352336406707764, + "rewards/rejected": 1.1522338390350342, + "step": 5950 + }, + { + "epoch": 0.330647286445403, + "grad_norm": 81.57664489746094, + "learning_rate": 7.536388104180467e-08, + "logits/chosen": -0.2890421450138092, + "logits/rejected": -0.39421191811561584, + "logps/chosen": -133.77871704101562, + "logps/rejected": -190.2563018798828, + "loss": 1.3539, + "nll_loss": 0.8960251808166504, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.03255033493042, + "rewards/margins": 2.8215224742889404, + "rewards/rejected": 2.2110276222229004, + "step": 5960 + }, + { + "epoch": 0.3312020637716537, + "grad_norm": 43.42544937133789, + "learning_rate": 7.528874213424044e-08, + "logits/chosen": -0.2997869551181793, + "logits/rejected": -0.46176987886428833, + "logps/chosen": -162.94911193847656, + "logps/rejected": -232.91073608398438, + "loss": 1.279, + "nll_loss": 0.9380319714546204, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.328009605407715, + "rewards/margins": 3.1782174110412598, + "rewards/rejected": 2.149792432785034, + "step": 5970 + }, + { + "epoch": 0.3317568410979043, + "grad_norm": 56.400909423828125, + "learning_rate": 7.521352640631997e-08, + "logits/chosen": -0.39626017212867737, + "logits/rejected": -0.47180286049842834, + "logps/chosen": -176.3207244873047, + "logps/rejected": -240.38919067382812, + "loss": 1.3374, + "nll_loss": 1.0063221454620361, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.927728652954102, + "rewards/margins": 2.973665237426758, + "rewards/rejected": 2.9540631771087646, + "step": 5980 + }, + { + "epoch": 0.332311618424155, + "grad_norm": 52.270530700683594, + "learning_rate": 7.513823408652833e-08, + "logits/chosen": -0.31507277488708496, + "logits/rejected": -0.46405959129333496, + "logps/chosen": -192.87774658203125, + "logps/rejected": -255.1183624267578, + "loss": 1.2586, + "nll_loss": 0.9924084544181824, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.717407703399658, + "rewards/margins": 3.2618069648742676, + "rewards/rejected": 2.4556007385253906, + "step": 5990 + }, + { + "epoch": 0.33286639575040566, + "grad_norm": 41.6818733215332, + "learning_rate": 7.506286540358317e-08, + "logits/chosen": -0.2737719416618347, + "logits/rejected": -0.37862199544906616, + "logps/chosen": -185.80844116210938, + "logps/rejected": -265.85015869140625, + "loss": 1.2765, + "nll_loss": 1.0262348651885986, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.482663154602051, + "rewards/margins": 3.4432480335235596, + "rewards/rejected": 2.039414882659912, + "step": 6000 + }, + { + "epoch": 0.33286639575040566, + "eval_logits/chosen": -0.3862064480781555, + "eval_logits/rejected": -0.4756089448928833, + "eval_logps/chosen": -196.4144287109375, + "eval_logps/rejected": -260.7208557128906, + "eval_loss": 1.2678471803665161, + "eval_nll_loss": 1.0233957767486572, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 6.154678821563721, + "eval_rewards/margins": 4.318915843963623, + "eval_rewards/rejected": 1.8357634544372559, + "eval_runtime": 16.8909, + "eval_samples_per_second": 15.156, + "eval_steps_per_second": 1.895, + "step": 6000 + }, + { + "epoch": 0.33342117307665636, + "grad_norm": 87.84968566894531, + "learning_rate": 7.49874205864342e-08, + "logits/chosen": -0.21110494434833527, + "logits/rejected": -0.38663357496261597, + "logps/chosen": -159.3927764892578, + "logps/rejected": -252.47006225585938, + "loss": 1.3007, + "nll_loss": 0.8897081613540649, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.111419677734375, + "rewards/margins": 4.202252388000488, + "rewards/rejected": 1.9091672897338867, + "step": 6010 + }, + { + "epoch": 0.333975950402907, + "grad_norm": 55.64853286743164, + "learning_rate": 7.491189986426235e-08, + "logits/chosen": -0.45345860719680786, + "logits/rejected": -0.519438624382019, + "logps/chosen": -216.1964874267578, + "logps/rejected": -305.46832275390625, + "loss": 1.4051, + "nll_loss": 1.1889097690582275, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.22586727142334, + "rewards/margins": 4.845824241638184, + "rewards/rejected": 1.3800431489944458, + "step": 6020 + }, + { + "epoch": 0.3345307277291577, + "grad_norm": 87.58065795898438, + "learning_rate": 7.48363034664791e-08, + "logits/chosen": -0.22561879456043243, + "logits/rejected": -0.41260385513305664, + "logps/chosen": -136.2498779296875, + "logps/rejected": -181.90325927734375, + "loss": 1.2827, + "nll_loss": 0.8572355508804321, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.760739803314209, + "rewards/margins": 2.4254050254821777, + "rewards/rejected": 2.3353352546691895, + "step": 6030 + }, + { + "epoch": 0.3350855050554084, + "grad_norm": 84.19505310058594, + "learning_rate": 7.476063162272593e-08, + "logits/chosen": -0.3665398061275482, + "logits/rejected": -0.5166418552398682, + "logps/chosen": -181.16824340820312, + "logps/rejected": -262.6795349121094, + "loss": 1.3055, + "nll_loss": 1.013683557510376, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.933252811431885, + "rewards/margins": 4.092905521392822, + "rewards/rejected": 1.8403470516204834, + "step": 6040 + }, + { + "epoch": 0.33564028238165905, + "grad_norm": 39.95737838745117, + "learning_rate": 7.468488456287336e-08, + "logits/chosen": -0.37900015711784363, + "logits/rejected": -0.4724903106689453, + "logps/chosen": -173.86795043945312, + "logps/rejected": -231.43423461914062, + "loss": 1.2916, + "nll_loss": 0.9488736987113953, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.916155815124512, + "rewards/margins": 4.1103668212890625, + "rewards/rejected": 1.8057887554168701, + "step": 6050 + }, + { + "epoch": 0.33619505970790975, + "grad_norm": 63.77442169189453, + "learning_rate": 7.460906251702051e-08, + "logits/chosen": -0.21798260509967804, + "logits/rejected": -0.40063363313674927, + "logps/chosen": -165.91812133789062, + "logps/rejected": -195.58705139160156, + "loss": 1.2618, + "nll_loss": 0.9206756353378296, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.39694881439209, + "rewards/margins": 2.942946195602417, + "rewards/rejected": 2.4540023803710938, + "step": 6060 + }, + { + "epoch": 0.3367498370341604, + "grad_norm": 50.75809860229492, + "learning_rate": 7.45331657154942e-08, + "logits/chosen": -0.32556071877479553, + "logits/rejected": -0.42382025718688965, + "logps/chosen": -152.8153839111328, + "logps/rejected": -189.4619140625, + "loss": 1.2941, + "nll_loss": 0.9182936549186707, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.209329605102539, + "rewards/margins": 2.8177552223205566, + "rewards/rejected": 2.3915748596191406, + "step": 6070 + }, + { + "epoch": 0.3373046143604111, + "grad_norm": 94.52537536621094, + "learning_rate": 7.445719438884839e-08, + "logits/chosen": -0.18494704365730286, + "logits/rejected": -0.2840834856033325, + "logps/chosen": -163.761962890625, + "logps/rejected": -203.1731414794922, + "loss": 1.175, + "nll_loss": 0.9018437266349792, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.398464679718018, + "rewards/margins": 3.006654739379883, + "rewards/rejected": 2.3918099403381348, + "step": 6080 + }, + { + "epoch": 0.3378593916866618, + "grad_norm": 53.11320877075195, + "learning_rate": 7.438114876786343e-08, + "logits/chosen": -0.18565864861011505, + "logits/rejected": -0.31152859330177307, + "logps/chosen": -140.01583862304688, + "logps/rejected": -183.9373321533203, + "loss": 1.2597, + "nll_loss": 0.8148931264877319, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.700402736663818, + "rewards/margins": 2.360290288925171, + "rewards/rejected": 2.3401126861572266, + "step": 6090 + }, + { + "epoch": 0.33841416901291244, + "grad_norm": 148.61221313476562, + "learning_rate": 7.430502908354531e-08, + "logits/chosen": -0.3025711178779602, + "logits/rejected": -0.4514090120792389, + "logps/chosen": -179.38238525390625, + "logps/rejected": -237.6366424560547, + "loss": 1.3181, + "nll_loss": 1.0456587076187134, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.581388473510742, + "rewards/margins": 4.103987216949463, + "rewards/rejected": 1.4774014949798584, + "step": 6100 + }, + { + "epoch": 0.33896894633916314, + "grad_norm": 74.82305145263672, + "learning_rate": 7.422883556712507e-08, + "logits/chosen": -0.30521565675735474, + "logits/rejected": -0.4374857544898987, + "logps/chosen": -171.0484619140625, + "logps/rejected": -224.58627319335938, + "loss": 1.2929, + "nll_loss": 0.9364229440689087, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.537566184997559, + "rewards/margins": 3.221212387084961, + "rewards/rejected": 2.3163530826568604, + "step": 6110 + }, + { + "epoch": 0.3395237236654138, + "grad_norm": 97.04814147949219, + "learning_rate": 7.415256845005797e-08, + "logits/chosen": -0.2522074282169342, + "logits/rejected": -0.42201119661331177, + "logps/chosen": -184.75643920898438, + "logps/rejected": -244.8933868408203, + "loss": 1.3599, + "nll_loss": 0.9097296595573425, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.552473068237305, + "rewards/margins": 3.796072006225586, + "rewards/rejected": 1.7564010620117188, + "step": 6120 + }, + { + "epoch": 0.3400785009916645, + "grad_norm": 60.06885528564453, + "learning_rate": 7.407622796402291e-08, + "logits/chosen": -0.3063388764858246, + "logits/rejected": -0.4394128918647766, + "logps/chosen": -221.8954620361328, + "logps/rejected": -280.248291015625, + "loss": 1.4306, + "nll_loss": 1.0352271795272827, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.173859596252441, + "rewards/margins": 4.336389541625977, + "rewards/rejected": 1.837469458580017, + "step": 6130 + }, + { + "epoch": 0.3406332783179151, + "grad_norm": 54.11576461791992, + "learning_rate": 7.399981434092159e-08, + "logits/chosen": -0.3295516073703766, + "logits/rejected": -0.48471516370773315, + "logps/chosen": -211.0443572998047, + "logps/rejected": -267.0463562011719, + "loss": 1.2962, + "nll_loss": 1.0900715589523315, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.180323600769043, + "rewards/margins": 4.42136812210083, + "rewards/rejected": 1.7589561939239502, + "step": 6140 + }, + { + "epoch": 0.3411880556441658, + "grad_norm": 73.50775909423828, + "learning_rate": 7.392332781287797e-08, + "logits/chosen": -0.18400311470031738, + "logits/rejected": -0.301456481218338, + "logps/chosen": -131.61074829101562, + "logps/rejected": -181.8965301513672, + "loss": 1.2867, + "nll_loss": 0.8391935229301453, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.861627101898193, + "rewards/margins": 2.8276915550231934, + "rewards/rejected": 2.033935546875, + "step": 6150 + }, + { + "epoch": 0.3417428329704165, + "grad_norm": 55.874847412109375, + "learning_rate": 7.384676861223738e-08, + "logits/chosen": -0.42141732573509216, + "logits/rejected": -0.4614016115665436, + "logps/chosen": -205.42623901367188, + "logps/rejected": -246.082275390625, + "loss": 1.288, + "nll_loss": 1.1639835834503174, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.899552822113037, + "rewards/margins": 2.474811315536499, + "rewards/rejected": 3.424741268157959, + "step": 6160 + }, + { + "epoch": 0.34229761029666717, + "grad_norm": 58.53584671020508, + "learning_rate": 7.377013697156595e-08, + "logits/chosen": -0.3395301401615143, + "logits/rejected": -0.43350130319595337, + "logps/chosen": -198.6548614501953, + "logps/rejected": -271.24078369140625, + "loss": 1.4002, + "nll_loss": 1.0972192287445068, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.351205348968506, + "rewards/margins": 3.5542285442352295, + "rewards/rejected": 2.7969765663146973, + "step": 6170 + }, + { + "epoch": 0.34285238762291786, + "grad_norm": 93.97845458984375, + "learning_rate": 7.369343312364993e-08, + "logits/chosen": -0.11968524754047394, + "logits/rejected": -0.27802419662475586, + "logps/chosen": -145.43263244628906, + "logps/rejected": -194.38095092773438, + "loss": 1.3325, + "nll_loss": 0.8231142163276672, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.9595794677734375, + "rewards/margins": 3.4239859580993652, + "rewards/rejected": 1.5355936288833618, + "step": 6180 + }, + { + "epoch": 0.3434071649491685, + "grad_norm": 41.60972595214844, + "learning_rate": 7.361665730149482e-08, + "logits/chosen": -0.27187636494636536, + "logits/rejected": -0.3486558794975281, + "logps/chosen": -160.63827514648438, + "logps/rejected": -214.30783081054688, + "loss": 1.2974, + "nll_loss": 0.9356172680854797, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.591213703155518, + "rewards/margins": 2.9364173412323, + "rewards/rejected": 2.6547961235046387, + "step": 6190 + }, + { + "epoch": 0.3439619422754192, + "grad_norm": 108.99187469482422, + "learning_rate": 7.353980973832478e-08, + "logits/chosen": -0.39349788427352905, + "logits/rejected": -0.4973227381706238, + "logps/chosen": -183.8045196533203, + "logps/rejected": -229.7374725341797, + "loss": 1.3744, + "nll_loss": 1.0677894353866577, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.8697943687438965, + "rewards/margins": 3.1557323932647705, + "rewards/rejected": 2.714062213897705, + "step": 6200 + }, + { + "epoch": 0.3445167196016699, + "grad_norm": 43.160152435302734, + "learning_rate": 7.346289066758194e-08, + "logits/chosen": -0.31956276297569275, + "logits/rejected": -0.4108821749687195, + "logps/chosen": -156.1692352294922, + "logps/rejected": -214.0746612548828, + "loss": 1.3614, + "nll_loss": 1.099229097366333, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.477211952209473, + "rewards/margins": 3.1682682037353516, + "rewards/rejected": 2.308943510055542, + "step": 6210 + }, + { + "epoch": 0.34507149692792055, + "grad_norm": 114.94517517089844, + "learning_rate": 7.338590032292561e-08, + "logits/chosen": -0.4515753388404846, + "logits/rejected": -0.5310046672821045, + "logps/chosen": -192.64334106445312, + "logps/rejected": -232.1820068359375, + "loss": 1.3761, + "nll_loss": 1.1607623100280762, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.136222839355469, + "rewards/margins": 3.0856223106384277, + "rewards/rejected": 3.05060076713562, + "step": 6220 + }, + { + "epoch": 0.34562627425417125, + "grad_norm": 55.0934944152832, + "learning_rate": 7.330883893823163e-08, + "logits/chosen": -0.14323115348815918, + "logits/rejected": -0.328891783952713, + "logps/chosen": -136.67495727539062, + "logps/rejected": -173.80918884277344, + "loss": 1.3251, + "nll_loss": 0.8461788892745972, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.520416736602783, + "rewards/margins": 2.688857316970825, + "rewards/rejected": 1.8315595388412476, + "step": 6230 + }, + { + "epoch": 0.3461810515804219, + "grad_norm": 93.47306823730469, + "learning_rate": 7.323170674759163e-08, + "logits/chosen": -0.304673433303833, + "logits/rejected": -0.43938857316970825, + "logps/chosen": -200.05377197265625, + "logps/rejected": -229.02261352539062, + "loss": 1.3026, + "nll_loss": 0.9279875755310059, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.0606160163879395, + "rewards/margins": 4.205328464508057, + "rewards/rejected": 1.8552885055541992, + "step": 6240 + }, + { + "epoch": 0.3467358289066726, + "grad_norm": 74.74398040771484, + "learning_rate": 7.315450398531235e-08, + "logits/chosen": -0.10720958560705185, + "logits/rejected": -0.25590941309928894, + "logps/chosen": -126.95005798339844, + "logps/rejected": -181.28817749023438, + "loss": 1.2535, + "nll_loss": 0.7301589846611023, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.625161170959473, + "rewards/margins": 3.0095062255859375, + "rewards/rejected": 1.615654706954956, + "step": 6250 + }, + { + "epoch": 0.34729060623292324, + "grad_norm": 63.662574768066406, + "learning_rate": 7.307723088591488e-08, + "logits/chosen": -0.17334458231925964, + "logits/rejected": -0.30409249663352966, + "logps/chosen": -142.11819458007812, + "logps/rejected": -182.3699493408203, + "loss": 1.2557, + "nll_loss": 0.8528593182563782, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.944436073303223, + "rewards/margins": 2.245910167694092, + "rewards/rejected": 2.69852614402771, + "step": 6260 + }, + { + "epoch": 0.34784538355917394, + "grad_norm": 82.51528930664062, + "learning_rate": 7.299988768413401e-08, + "logits/chosen": -0.3210276961326599, + "logits/rejected": -0.43619388341903687, + "logps/chosen": -189.78506469726562, + "logps/rejected": -248.33517456054688, + "loss": 1.2835, + "nll_loss": 1.0168853998184204, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.723790645599365, + "rewards/margins": 3.3456077575683594, + "rewards/rejected": 2.378182888031006, + "step": 6270 + }, + { + "epoch": 0.34840016088542464, + "grad_norm": 71.50834655761719, + "learning_rate": 7.292247461491743e-08, + "logits/chosen": -0.3468714952468872, + "logits/rejected": -0.46626749634742737, + "logps/chosen": -194.5014190673828, + "logps/rejected": -250.09317016601562, + "loss": 1.2907, + "nll_loss": 1.0468021631240845, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.295615196228027, + "rewards/margins": 4.136518955230713, + "rewards/rejected": 2.1590962409973145, + "step": 6280 + }, + { + "epoch": 0.3489549382116753, + "grad_norm": 54.759300231933594, + "learning_rate": 7.284499191342512e-08, + "logits/chosen": -0.03164532408118248, + "logits/rejected": -0.23100057244300842, + "logps/chosen": -116.4410400390625, + "logps/rejected": -183.38046264648438, + "loss": 1.239, + "nll_loss": 0.7256309390068054, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.489354610443115, + "rewards/margins": 3.0173919200897217, + "rewards/rejected": 1.471962571144104, + "step": 6290 + }, + { + "epoch": 0.349509715537926, + "grad_norm": 70.90135192871094, + "learning_rate": 7.276743981502856e-08, + "logits/chosen": -0.4476935863494873, + "logits/rejected": -0.49387326836586, + "logps/chosen": -207.6614990234375, + "logps/rejected": -271.92535400390625, + "loss": 1.346, + "nll_loss": 1.2119020223617554, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.015636444091797, + "rewards/margins": 2.942776918411255, + "rewards/rejected": 3.072859525680542, + "step": 6300 + }, + { + "epoch": 0.3500644928641766, + "grad_norm": 43.998878479003906, + "learning_rate": 7.268981855531002e-08, + "logits/chosen": -0.2172735184431076, + "logits/rejected": -0.3375437557697296, + "logps/chosen": -164.04248046875, + "logps/rejected": -231.4910430908203, + "loss": 1.2797, + "nll_loss": 0.9179700613021851, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.578200817108154, + "rewards/margins": 3.939047336578369, + "rewards/rejected": 1.6391534805297852, + "step": 6310 + }, + { + "epoch": 0.3506192701904273, + "grad_norm": 50.79728317260742, + "learning_rate": 7.261212837006191e-08, + "logits/chosen": -0.2464495599269867, + "logits/rejected": -0.3708285391330719, + "logps/chosen": -154.8589630126953, + "logps/rejected": -199.90542602539062, + "loss": 1.2937, + "nll_loss": 0.9330763816833496, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.324396133422852, + "rewards/margins": 3.398838758468628, + "rewards/rejected": 1.9255568981170654, + "step": 6320 + }, + { + "epoch": 0.35117404751667797, + "grad_norm": 47.69200897216797, + "learning_rate": 7.253436949528598e-08, + "logits/chosen": -0.2670142650604248, + "logits/rejected": -0.42380857467651367, + "logps/chosen": -136.66729736328125, + "logps/rejected": -179.9617156982422, + "loss": 1.3436, + "nll_loss": 0.8983263969421387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.134589195251465, + "rewards/margins": 2.9455533027648926, + "rewards/rejected": 2.1890358924865723, + "step": 6330 + }, + { + "epoch": 0.35172882484292867, + "grad_norm": 35.19856643676758, + "learning_rate": 7.245654216719267e-08, + "logits/chosen": -0.32339194416999817, + "logits/rejected": -0.4838895797729492, + "logps/chosen": -182.80630493164062, + "logps/rejected": -240.4121551513672, + "loss": 1.3196, + "nll_loss": 1.1221892833709717, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.559865951538086, + "rewards/margins": 3.9628207683563232, + "rewards/rejected": 1.5970450639724731, + "step": 6340 + }, + { + "epoch": 0.35228360216917937, + "grad_norm": 39.05183792114258, + "learning_rate": 7.237864662220031e-08, + "logits/chosen": -0.34584400057792664, + "logits/rejected": -0.4345241189002991, + "logps/chosen": -180.0929718017578, + "logps/rejected": -244.9677276611328, + "loss": 1.2868, + "nll_loss": 1.0596837997436523, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.154149055480957, + "rewards/margins": 4.189995288848877, + "rewards/rejected": 1.9641540050506592, + "step": 6350 + }, + { + "epoch": 0.35283837949543, + "grad_norm": 61.75810241699219, + "learning_rate": 7.230068309693454e-08, + "logits/chosen": -0.1510430872440338, + "logits/rejected": -0.26399290561676025, + "logps/chosen": -153.47390747070312, + "logps/rejected": -201.64395141601562, + "loss": 1.3012, + "nll_loss": 0.9015968441963196, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.250309944152832, + "rewards/margins": 2.9471797943115234, + "rewards/rejected": 2.303130626678467, + "step": 6360 + }, + { + "epoch": 0.3533931568216807, + "grad_norm": 62.2823600769043, + "learning_rate": 7.222265182822739e-08, + "logits/chosen": -0.292441189289093, + "logits/rejected": -0.4160211682319641, + "logps/chosen": -167.91571044921875, + "logps/rejected": -215.4639129638672, + "loss": 1.2773, + "nll_loss": 0.963361382484436, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.473585605621338, + "rewards/margins": 3.273808240890503, + "rewards/rejected": 2.199777603149414, + "step": 6370 + }, + { + "epoch": 0.35394793414793135, + "grad_norm": 67.76326751708984, + "learning_rate": 7.21445530531168e-08, + "logits/chosen": -0.22603091597557068, + "logits/rejected": -0.35425618290901184, + "logps/chosen": -149.12693786621094, + "logps/rejected": -204.117431640625, + "loss": 1.3084, + "nll_loss": 1.0864441394805908, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.302445888519287, + "rewards/margins": 3.3712353706359863, + "rewards/rejected": 1.9312105178833008, + "step": 6380 + }, + { + "epoch": 0.35450271147418205, + "grad_norm": 98.6044692993164, + "learning_rate": 7.206638700884569e-08, + "logits/chosen": -0.33501431345939636, + "logits/rejected": -0.49409008026123047, + "logps/chosen": -177.7139129638672, + "logps/rejected": -237.3137969970703, + "loss": 1.3065, + "nll_loss": 1.0060980319976807, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.7570481300354, + "rewards/margins": 4.340456485748291, + "rewards/rejected": 1.4165910482406616, + "step": 6390 + }, + { + "epoch": 0.35505748880043275, + "grad_norm": 44.0657958984375, + "learning_rate": 7.198815393286135e-08, + "logits/chosen": -0.2693483233451843, + "logits/rejected": -0.40953415632247925, + "logps/chosen": -174.3963623046875, + "logps/rejected": -241.756591796875, + "loss": 1.249, + "nll_loss": 0.9351913332939148, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.488059043884277, + "rewards/margins": 3.997734785079956, + "rewards/rejected": 1.4903242588043213, + "step": 6400 + }, + { + "epoch": 0.3556122661266834, + "grad_norm": 45.25605392456055, + "learning_rate": 7.190985406281472e-08, + "logits/chosen": -0.08713646233081818, + "logits/rejected": -0.25797972083091736, + "logps/chosen": -117.9123764038086, + "logps/rejected": -182.8262481689453, + "loss": 1.3499, + "nll_loss": 0.7855955958366394, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.2343363761901855, + "rewards/margins": 3.5206990242004395, + "rewards/rejected": 0.7136377096176147, + "step": 6410 + }, + { + "epoch": 0.3561670434529341, + "grad_norm": 49.59719467163086, + "learning_rate": 7.183148763655959e-08, + "logits/chosen": -0.27555233240127563, + "logits/rejected": -0.36578166484832764, + "logps/chosen": -191.7743682861328, + "logps/rejected": -243.2725372314453, + "loss": 1.3099, + "nll_loss": 0.9913687705993652, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.697419166564941, + "rewards/margins": 3.9420788288116455, + "rewards/rejected": 1.7553411722183228, + "step": 6420 + }, + { + "epoch": 0.35672182077918474, + "grad_norm": 57.11975860595703, + "learning_rate": 7.175305489215199e-08, + "logits/chosen": -0.08695421367883682, + "logits/rejected": -0.2602617144584656, + "logps/chosen": -121.39566802978516, + "logps/rejected": -164.87734985351562, + "loss": 1.2562, + "nll_loss": 0.7267943620681763, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.647629261016846, + "rewards/margins": 3.4465274810791016, + "rewards/rejected": 1.2011014223098755, + "step": 6430 + }, + { + "epoch": 0.35727659810543544, + "grad_norm": 72.39891052246094, + "learning_rate": 7.167455606784934e-08, + "logits/chosen": -0.32719069719314575, + "logits/rejected": -0.43272677063941956, + "logps/chosen": -189.12789916992188, + "logps/rejected": -252.99569702148438, + "loss": 1.4202, + "nll_loss": 1.029658555984497, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.1457319259643555, + "rewards/margins": 3.5280590057373047, + "rewards/rejected": 2.6176724433898926, + "step": 6440 + }, + { + "epoch": 0.3578313754316861, + "grad_norm": 29.843097686767578, + "learning_rate": 7.159599140210986e-08, + "logits/chosen": -0.2822156548500061, + "logits/rejected": -0.42548808455467224, + "logps/chosen": -179.24459838867188, + "logps/rejected": -261.34515380859375, + "loss": 1.2763, + "nll_loss": 1.0095112323760986, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.766944885253906, + "rewards/margins": 3.254338502883911, + "rewards/rejected": 2.512606143951416, + "step": 6450 + }, + { + "epoch": 0.3583861527579368, + "grad_norm": 137.72471618652344, + "learning_rate": 7.151736113359174e-08, + "logits/chosen": -0.2707064151763916, + "logits/rejected": -0.36198943853378296, + "logps/chosen": -164.41000366210938, + "logps/rejected": -227.1502227783203, + "loss": 1.3286, + "nll_loss": 1.1287130117416382, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.302213191986084, + "rewards/margins": 2.804896593093872, + "rewards/rejected": 2.497316360473633, + "step": 6460 + }, + { + "epoch": 0.3589409300841875, + "grad_norm": 31.62238311767578, + "learning_rate": 7.143866550115245e-08, + "logits/chosen": 0.03674770146608353, + "logits/rejected": -0.16360250115394592, + "logps/chosen": -127.9668960571289, + "logps/rejected": -163.06024169921875, + "loss": 1.2334, + "nll_loss": 0.7001178860664368, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.386387825012207, + "rewards/margins": 2.4408748149871826, + "rewards/rejected": 1.945513367652893, + "step": 6470 + }, + { + "epoch": 0.3594957074104381, + "grad_norm": 56.53056335449219, + "learning_rate": 7.135990474384804e-08, + "logits/chosen": -0.3590020537376404, + "logits/rejected": -0.4130149781703949, + "logps/chosen": -167.0102996826172, + "logps/rejected": -226.590087890625, + "loss": 1.3655, + "nll_loss": 1.0488640069961548, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.666329860687256, + "rewards/margins": 3.3042640686035156, + "rewards/rejected": 2.3620657920837402, + "step": 6480 + }, + { + "epoch": 0.3600504847366888, + "grad_norm": 56.09820556640625, + "learning_rate": 7.128107910093238e-08, + "logits/chosen": -0.21703293919563293, + "logits/rejected": -0.3460865616798401, + "logps/chosen": -160.18495178222656, + "logps/rejected": -196.6850128173828, + "loss": 1.3055, + "nll_loss": 0.9067890048027039, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.332886695861816, + "rewards/margins": 2.839141607284546, + "rewards/rejected": 2.493744373321533, + "step": 6490 + }, + { + "epoch": 0.36060526206293947, + "grad_norm": 77.31085968017578, + "learning_rate": 7.120218881185642e-08, + "logits/chosen": -0.28630420565605164, + "logits/rejected": -0.40152207016944885, + "logps/chosen": -167.91921997070312, + "logps/rejected": -241.7128448486328, + "loss": 1.2308, + "nll_loss": 0.9291079640388489, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.025139331817627, + "rewards/margins": 2.8772501945495605, + "rewards/rejected": 3.147888660430908, + "step": 6500 + }, + { + "epoch": 0.36060526206293947, + "eval_logits/chosen": -0.35594862699508667, + "eval_logits/rejected": -0.4358241856098175, + "eval_logps/chosen": -194.94239807128906, + "eval_logps/rejected": -258.62860107421875, + "eval_loss": 1.255896806716919, + "eval_nll_loss": 1.0143333673477173, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 6.301882743835449, + "eval_rewards/margins": 4.256891250610352, + "eval_rewards/rejected": 2.0449914932250977, + "eval_runtime": 16.8633, + "eval_samples_per_second": 15.181, + "eval_steps_per_second": 1.898, + "step": 6500 + }, + { + "epoch": 0.36116003938919017, + "grad_norm": 55.182987213134766, + "learning_rate": 7.112323411626755e-08, + "logits/chosen": -0.3085078299045563, + "logits/rejected": -0.4444068372249603, + "logps/chosen": -163.02085876464844, + "logps/rejected": -214.41885375976562, + "loss": 1.326, + "nll_loss": 1.0153495073318481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.5913190841674805, + "rewards/margins": 2.8857014179229736, + "rewards/rejected": 2.705617904663086, + "step": 6510 + }, + { + "epoch": 0.3617148167154408, + "grad_norm": 91.24496459960938, + "learning_rate": 7.104421525400874e-08, + "logits/chosen": -0.23191985487937927, + "logits/rejected": -0.371439129114151, + "logps/chosen": -157.10472106933594, + "logps/rejected": -209.3927764892578, + "loss": 1.3215, + "nll_loss": 1.0198981761932373, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.36658239364624, + "rewards/margins": 3.5386276245117188, + "rewards/rejected": 1.827954649925232, + "step": 6520 + }, + { + "epoch": 0.3622695940416915, + "grad_norm": 49.67591094970703, + "learning_rate": 7.096513246511794e-08, + "logits/chosen": -0.20982857048511505, + "logits/rejected": -0.3763899505138397, + "logps/chosen": -142.84642028808594, + "logps/rejected": -192.2998504638672, + "loss": 1.4071, + "nll_loss": 0.9238711595535278, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.907094478607178, + "rewards/margins": 3.333144426345825, + "rewards/rejected": 1.573950171470642, + "step": 6530 + }, + { + "epoch": 0.3628243713679422, + "grad_norm": 62.50767135620117, + "learning_rate": 7.088598598982727e-08, + "logits/chosen": -0.22332048416137695, + "logits/rejected": -0.349341481924057, + "logps/chosen": -163.889404296875, + "logps/rejected": -205.19857788085938, + "loss": 1.2796, + "nll_loss": 0.9458600878715515, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.2342610359191895, + "rewards/margins": 3.4151597023010254, + "rewards/rejected": 1.819101333618164, + "step": 6540 + }, + { + "epoch": 0.36337914869419286, + "grad_norm": 88.51134490966797, + "learning_rate": 7.080677606856229e-08, + "logits/chosen": -0.1471829116344452, + "logits/rejected": -0.2903767228126526, + "logps/chosen": -142.53680419921875, + "logps/rejected": -210.1653594970703, + "loss": 1.2684, + "nll_loss": 0.8725835084915161, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.088635444641113, + "rewards/margins": 3.848362445831299, + "rewards/rejected": 1.2402734756469727, + "step": 6550 + }, + { + "epoch": 0.36393392602044355, + "grad_norm": 63.4984245300293, + "learning_rate": 7.07275029419413e-08, + "logits/chosen": -0.2597203254699707, + "logits/rejected": -0.34430503845214844, + "logps/chosen": -186.06716918945312, + "logps/rejected": -232.9912109375, + "loss": 1.3937, + "nll_loss": 1.1478632688522339, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.421383857727051, + "rewards/margins": 3.0764412879943848, + "rewards/rejected": 2.344942331314087, + "step": 6560 + }, + { + "epoch": 0.3644887033466942, + "grad_norm": 62.26931381225586, + "learning_rate": 7.064816685077461e-08, + "logits/chosen": -0.2954166829586029, + "logits/rejected": -0.39529484510421753, + "logps/chosen": -208.4893035888672, + "logps/rejected": -244.24404907226562, + "loss": 1.4052, + "nll_loss": 1.03569495677948, + "rewards/accuracies": 0.75, + "rewards/chosen": 6.040772438049316, + "rewards/margins": 3.601053237915039, + "rewards/rejected": 2.4397192001342773, + "step": 6570 + }, + { + "epoch": 0.3650434806729449, + "grad_norm": 34.1624641418457, + "learning_rate": 7.056876803606382e-08, + "logits/chosen": -0.2604549825191498, + "logits/rejected": -0.3620299696922302, + "logps/chosen": -169.99151611328125, + "logps/rejected": -234.2142791748047, + "loss": 1.2377, + "nll_loss": 0.9837854504585266, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.080147743225098, + "rewards/margins": 3.937419891357422, + "rewards/rejected": 2.142728090286255, + "step": 6580 + }, + { + "epoch": 0.3655982579991956, + "grad_norm": 57.836952209472656, + "learning_rate": 7.048930673900104e-08, + "logits/chosen": -0.09565924108028412, + "logits/rejected": -0.18147985637187958, + "logps/chosen": -140.58352661132812, + "logps/rejected": -192.8702850341797, + "loss": 1.2783, + "nll_loss": 0.8632059097290039, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.876048564910889, + "rewards/margins": 2.7102205753326416, + "rewards/rejected": 2.165827989578247, + "step": 6590 + }, + { + "epoch": 0.36615303532544624, + "grad_norm": 118.21241760253906, + "learning_rate": 7.040978320096819e-08, + "logits/chosen": -0.2354263812303543, + "logits/rejected": -0.3318116068840027, + "logps/chosen": -145.45526123046875, + "logps/rejected": -204.20809936523438, + "loss": 1.3199, + "nll_loss": 0.897225558757782, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.396674633026123, + "rewards/margins": 3.439645290374756, + "rewards/rejected": 1.9570293426513672, + "step": 6600 + }, + { + "epoch": 0.36670781265169694, + "grad_norm": 68.2356185913086, + "learning_rate": 7.033019766353625e-08, + "logits/chosen": -0.17760439217090607, + "logits/rejected": -0.2736101448535919, + "logps/chosen": -198.8605499267578, + "logps/rejected": -246.8716583251953, + "loss": 1.3336, + "nll_loss": 1.0561082363128662, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.714756965637207, + "rewards/margins": 2.732408046722412, + "rewards/rejected": 2.982348918914795, + "step": 6610 + }, + { + "epoch": 0.3672625899779476, + "grad_norm": 87.32295989990234, + "learning_rate": 7.025055036846454e-08, + "logits/chosen": -0.21067604422569275, + "logits/rejected": -0.32407501339912415, + "logps/chosen": -202.57394409179688, + "logps/rejected": -250.7236785888672, + "loss": 1.3081, + "nll_loss": 1.0166271924972534, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.010329246520996, + "rewards/margins": 3.8012759685516357, + "rewards/rejected": 2.2090530395507812, + "step": 6620 + }, + { + "epoch": 0.3678173673041983, + "grad_norm": 56.69834899902344, + "learning_rate": 7.017084155770005e-08, + "logits/chosen": -0.2860775887966156, + "logits/rejected": -0.3612442910671234, + "logps/chosen": -203.62258911132812, + "logps/rejected": -248.18661499023438, + "loss": 1.2513, + "nll_loss": 1.1245375871658325, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.074135780334473, + "rewards/margins": 3.8243346214294434, + "rewards/rejected": 2.249800682067871, + "step": 6630 + }, + { + "epoch": 0.36837214463044893, + "grad_norm": 85.24093627929688, + "learning_rate": 7.009107147337652e-08, + "logits/chosen": -0.03159003332257271, + "logits/rejected": -0.18280380964279175, + "logps/chosen": -153.42019653320312, + "logps/rejected": -214.30972290039062, + "loss": 1.27, + "nll_loss": 0.8551816940307617, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.650689125061035, + "rewards/margins": 3.2693932056427, + "rewards/rejected": 1.3812963962554932, + "step": 6640 + }, + { + "epoch": 0.3689269219566996, + "grad_norm": 57.682281494140625, + "learning_rate": 7.001124035781389e-08, + "logits/chosen": 0.09463175386190414, + "logits/rejected": -0.0054311128333210945, + "logps/chosen": -131.00662231445312, + "logps/rejected": -162.55885314941406, + "loss": 1.3126, + "nll_loss": 0.7956680059432983, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.460573196411133, + "rewards/margins": 2.7722792625427246, + "rewards/rejected": 1.6882938146591187, + "step": 6650 + }, + { + "epoch": 0.3694816992829503, + "grad_norm": 45.95630645751953, + "learning_rate": 6.993134845351752e-08, + "logits/chosen": -0.2924136221408844, + "logits/rejected": -0.3941074013710022, + "logps/chosen": -157.63040161132812, + "logps/rejected": -208.11837768554688, + "loss": 1.2819, + "nll_loss": 1.0189913511276245, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.672625541687012, + "rewards/margins": 2.997842788696289, + "rewards/rejected": 2.6747829914093018, + "step": 6660 + }, + { + "epoch": 0.37003647660920097, + "grad_norm": 115.9908218383789, + "learning_rate": 6.985139600317737e-08, + "logits/chosen": 0.06917702406644821, + "logits/rejected": -0.09741564840078354, + "logps/chosen": -116.82330322265625, + "logps/rejected": -147.33279418945312, + "loss": 1.3045, + "nll_loss": 0.704028308391571, + "rewards/accuracies": 0.75, + "rewards/chosen": 3.9377639293670654, + "rewards/margins": 2.025374412536621, + "rewards/rejected": 1.9123893976211548, + "step": 6670 + }, + { + "epoch": 0.37059125393545167, + "grad_norm": 60.326393127441406, + "learning_rate": 6.977138324966736e-08, + "logits/chosen": -0.12928955256938934, + "logits/rejected": -0.2916646897792816, + "logps/chosen": -146.9344482421875, + "logps/rejected": -200.44168090820312, + "loss": 1.2645, + "nll_loss": 0.8929560780525208, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.845291614532471, + "rewards/margins": 3.032985210418701, + "rewards/rejected": 1.8123064041137695, + "step": 6680 + }, + { + "epoch": 0.3711460312617023, + "grad_norm": 48.884490966796875, + "learning_rate": 6.969131043604459e-08, + "logits/chosen": -0.17368502914905548, + "logits/rejected": -0.3455658555030823, + "logps/chosen": -155.38258361816406, + "logps/rejected": -208.5419158935547, + "loss": 1.2361, + "nll_loss": 0.8771988153457642, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.127696514129639, + "rewards/margins": 3.4352517127990723, + "rewards/rejected": 1.692445158958435, + "step": 6690 + }, + { + "epoch": 0.371700808587953, + "grad_norm": 57.625953674316406, + "learning_rate": 6.961117780554862e-08, + "logits/chosen": -0.11713214218616486, + "logits/rejected": -0.2630918622016907, + "logps/chosen": -159.48318481445312, + "logps/rejected": -208.9144287109375, + "loss": 1.3387, + "nll_loss": 0.9308179616928101, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.009263515472412, + "rewards/margins": 3.5683231353759766, + "rewards/rejected": 1.4409408569335938, + "step": 6700 + }, + { + "epoch": 0.3722555859142037, + "grad_norm": 43.676998138427734, + "learning_rate": 6.953098560160065e-08, + "logits/chosen": -0.18010783195495605, + "logits/rejected": -0.27125436067581177, + "logps/chosen": -168.66122436523438, + "logps/rejected": -218.2757568359375, + "loss": 1.2475, + "nll_loss": 0.9375994801521301, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.377259254455566, + "rewards/margins": 2.9341747760772705, + "rewards/rejected": 2.443084478378296, + "step": 6710 + }, + { + "epoch": 0.37281036324045436, + "grad_norm": 113.7532958984375, + "learning_rate": 6.945073406780295e-08, + "logits/chosen": -0.1638331115245819, + "logits/rejected": -0.30707019567489624, + "logps/chosen": -158.09225463867188, + "logps/rejected": -197.9178009033203, + "loss": 1.3798, + "nll_loss": 0.962394118309021, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 4.8423566818237305, + "rewards/margins": 2.6290435791015625, + "rewards/rejected": 2.213313341140747, + "step": 6720 + }, + { + "epoch": 0.37336514056670506, + "grad_norm": 86.59712219238281, + "learning_rate": 6.937042344793795e-08, + "logits/chosen": -0.22376978397369385, + "logits/rejected": -0.3277062177658081, + "logps/chosen": -177.826416015625, + "logps/rejected": -240.59805297851562, + "loss": 1.3694, + "nll_loss": 1.1009342670440674, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.5532402992248535, + "rewards/margins": 3.9395835399627686, + "rewards/rejected": 1.6136566400527954, + "step": 6730 + }, + { + "epoch": 0.3739199178929557, + "grad_norm": 84.39332580566406, + "learning_rate": 6.929005398596754e-08, + "logits/chosen": -0.11341975629329681, + "logits/rejected": -0.29075413942337036, + "logps/chosen": -174.98912048339844, + "logps/rejected": -228.1398468017578, + "loss": 1.2888, + "nll_loss": 0.8445985913276672, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.468979835510254, + "rewards/margins": 3.9075286388397217, + "rewards/rejected": 1.5614511966705322, + "step": 6740 + }, + { + "epoch": 0.3744746952192064, + "grad_norm": 46.01534652709961, + "learning_rate": 6.920962592603248e-08, + "logits/chosen": -0.37522125244140625, + "logits/rejected": -0.4878465533256531, + "logps/chosen": -181.1302490234375, + "logps/rejected": -237.25146484375, + "loss": 1.3075, + "nll_loss": 1.0549627542495728, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.972900867462158, + "rewards/margins": 3.768439769744873, + "rewards/rejected": 2.204460859298706, + "step": 6750 + }, + { + "epoch": 0.37502947254545704, + "grad_norm": 68.10105895996094, + "learning_rate": 6.91291395124514e-08, + "logits/chosen": -0.23928236961364746, + "logits/rejected": -0.36333370208740234, + "logps/chosen": -166.55661010742188, + "logps/rejected": -216.7520751953125, + "loss": 1.283, + "nll_loss": 1.0072357654571533, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.385659694671631, + "rewards/margins": 3.2875118255615234, + "rewards/rejected": 2.0981478691101074, + "step": 6760 + }, + { + "epoch": 0.37558424987170774, + "grad_norm": 126.58575439453125, + "learning_rate": 6.904859498972025e-08, + "logits/chosen": -0.1630457043647766, + "logits/rejected": -0.3187350332736969, + "logps/chosen": -127.83060455322266, + "logps/rejected": -168.25228881835938, + "loss": 1.2153, + "nll_loss": 0.8524566888809204, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.729722499847412, + "rewards/margins": 2.9829154014587402, + "rewards/rejected": 1.7468068599700928, + "step": 6770 + }, + { + "epoch": 0.37613902719795844, + "grad_norm": 45.973533630371094, + "learning_rate": 6.89679926025115e-08, + "logits/chosen": -0.2023843228816986, + "logits/rejected": -0.3685084283351898, + "logps/chosen": -160.76187133789062, + "logps/rejected": -228.87161254882812, + "loss": 1.2253, + "nll_loss": 0.9022408723831177, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.037689685821533, + "rewards/margins": 4.123410224914551, + "rewards/rejected": 0.9142792820930481, + "step": 6780 + }, + { + "epoch": 0.3766938045242091, + "grad_norm": 44.571598052978516, + "learning_rate": 6.888733259567342e-08, + "logits/chosen": -0.3374364674091339, + "logits/rejected": -0.5107888579368591, + "logps/chosen": -184.53736877441406, + "logps/rejected": -266.7218322753906, + "loss": 1.2618, + "nll_loss": 0.9846833944320679, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.135041236877441, + "rewards/margins": 4.377993106842041, + "rewards/rejected": 1.7570486068725586, + "step": 6790 + }, + { + "epoch": 0.3772485818504598, + "grad_norm": 103.22528839111328, + "learning_rate": 6.880661521422927e-08, + "logits/chosen": -0.21125967800617218, + "logits/rejected": -0.28824880719184875, + "logps/chosen": -154.03274536132812, + "logps/rejected": -200.83773803710938, + "loss": 1.3361, + "nll_loss": 0.9575176239013672, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.292486667633057, + "rewards/margins": 2.889380931854248, + "rewards/rejected": 2.4031059741973877, + "step": 6800 + }, + { + "epoch": 0.37780335917671043, + "grad_norm": 61.16334915161133, + "learning_rate": 6.87258407033766e-08, + "logits/chosen": -0.16521167755126953, + "logits/rejected": -0.2563553750514984, + "logps/chosen": -161.29776000976562, + "logps/rejected": -237.66824340820312, + "loss": 1.2426, + "nll_loss": 0.944124698638916, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.560735702514648, + "rewards/margins": 3.922863721847534, + "rewards/rejected": 1.6378719806671143, + "step": 6810 + }, + { + "epoch": 0.37835813650296113, + "grad_norm": 140.47105407714844, + "learning_rate": 6.864500930848652e-08, + "logits/chosen": -0.31639528274536133, + "logits/rejected": -0.4148218035697937, + "logps/chosen": -182.83103942871094, + "logps/rejected": -220.06185913085938, + "loss": 1.2797, + "nll_loss": 0.9983618855476379, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.573078632354736, + "rewards/margins": 3.072183132171631, + "rewards/rejected": 2.5008950233459473, + "step": 6820 + }, + { + "epoch": 0.3789129138292118, + "grad_norm": 55.61164093017578, + "learning_rate": 6.856412127510297e-08, + "logits/chosen": -0.20365670323371887, + "logits/rejected": -0.36336570978164673, + "logps/chosen": -181.97103881835938, + "logps/rejected": -239.91830444335938, + "loss": 1.3521, + "nll_loss": 0.9995267987251282, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.553870677947998, + "rewards/margins": 4.438251972198486, + "rewards/rejected": 1.1156189441680908, + "step": 6830 + }, + { + "epoch": 0.37946769115546247, + "grad_norm": 30.3891544342041, + "learning_rate": 6.848317684894188e-08, + "logits/chosen": -0.2444746196269989, + "logits/rejected": -0.33190420269966125, + "logps/chosen": -174.93524169921875, + "logps/rejected": -221.8345947265625, + "loss": 1.2883, + "nll_loss": 1.066310167312622, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.140133857727051, + "rewards/margins": 3.0378527641296387, + "rewards/rejected": 2.102281093597412, + "step": 6840 + }, + { + "epoch": 0.38002246848171317, + "grad_norm": 106.51737213134766, + "learning_rate": 6.840217627589051e-08, + "logits/chosen": -0.26744550466537476, + "logits/rejected": -0.354861319065094, + "logps/chosen": -171.31802368164062, + "logps/rejected": -202.1862030029297, + "loss": 1.2587, + "nll_loss": 1.028227686882019, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.459754943847656, + "rewards/margins": 2.871218204498291, + "rewards/rejected": 2.5885369777679443, + "step": 6850 + }, + { + "epoch": 0.3805772458079638, + "grad_norm": 50.665340423583984, + "learning_rate": 6.832111980200672e-08, + "logits/chosen": -0.18003907799720764, + "logits/rejected": -0.296830415725708, + "logps/chosen": -170.67274475097656, + "logps/rejected": -216.8302764892578, + "loss": 1.2318, + "nll_loss": 0.9463116526603699, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.955447196960449, + "rewards/margins": 2.8384132385253906, + "rewards/rejected": 2.1170341968536377, + "step": 6860 + }, + { + "epoch": 0.3811320231342145, + "grad_norm": 83.80812072753906, + "learning_rate": 6.82400076735181e-08, + "logits/chosen": -0.33803707361221313, + "logits/rejected": -0.462869793176651, + "logps/chosen": -175.27798461914062, + "logps/rejected": -253.77969360351562, + "loss": 1.275, + "nll_loss": 1.0101854801177979, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.634285926818848, + "rewards/margins": 3.6914801597595215, + "rewards/rejected": 1.9428050518035889, + "step": 6870 + }, + { + "epoch": 0.38168680046046516, + "grad_norm": 60.65707778930664, + "learning_rate": 6.815884013682139e-08, + "logits/chosen": -0.14707748591899872, + "logits/rejected": -0.31664079427719116, + "logps/chosen": -151.28451538085938, + "logps/rejected": -203.17376708984375, + "loss": 1.3218, + "nll_loss": 0.8372349739074707, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.27748441696167, + "rewards/margins": 4.052838325500488, + "rewards/rejected": 1.2246456146240234, + "step": 6880 + }, + { + "epoch": 0.38224157778671586, + "grad_norm": 78.37731170654297, + "learning_rate": 6.807761743848158e-08, + "logits/chosen": -0.1943022906780243, + "logits/rejected": -0.36641693115234375, + "logps/chosen": -173.49557495117188, + "logps/rejected": -239.4379425048828, + "loss": 1.2835, + "nll_loss": 0.8731621503829956, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.856075763702393, + "rewards/margins": 3.8867263793945312, + "rewards/rejected": 1.9693495035171509, + "step": 6890 + }, + { + "epoch": 0.38279635511296656, + "grad_norm": 48.03612518310547, + "learning_rate": 6.799633982523128e-08, + "logits/chosen": -0.19575706124305725, + "logits/rejected": -0.35647979378700256, + "logps/chosen": -148.83901977539062, + "logps/rejected": -195.1193084716797, + "loss": 1.2447, + "nll_loss": 0.8911144137382507, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.207388877868652, + "rewards/margins": 3.3151965141296387, + "rewards/rejected": 1.8921921253204346, + "step": 6900 + }, + { + "epoch": 0.3833511324392172, + "grad_norm": 57.33967971801758, + "learning_rate": 6.791500754396985e-08, + "logits/chosen": -0.3484516739845276, + "logits/rejected": -0.49889832735061646, + "logps/chosen": -184.61288452148438, + "logps/rejected": -240.37765502929688, + "loss": 1.36, + "nll_loss": 1.0312130451202393, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.726377010345459, + "rewards/margins": 3.6866767406463623, + "rewards/rejected": 2.039700746536255, + "step": 6910 + }, + { + "epoch": 0.3839059097654679, + "grad_norm": 52.44933319091797, + "learning_rate": 6.783362084176276e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -145.9274444580078, + "logps/rejected": -202.3636932373047, + "loss": 1.2302, + "nll_loss": NaN, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.59585428237915, + "rewards/margins": 3.771801710128784, + "rewards/rejected": 1.824052095413208, + "step": 6920 + }, + { + "epoch": 0.38446068709171854, + "grad_norm": 75.28496551513672, + "learning_rate": 6.775217996584082e-08, + "logits/chosen": -0.2931756377220154, + "logits/rejected": -0.3632332682609558, + "logps/chosen": -188.1034698486328, + "logps/rejected": -240.4395294189453, + "loss": 1.3405, + "nll_loss": 1.1550260782241821, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.495295524597168, + "rewards/margins": 2.5328545570373535, + "rewards/rejected": 2.9624409675598145, + "step": 6930 + }, + { + "epoch": 0.38501546441796924, + "grad_norm": 82.79104614257812, + "learning_rate": 6.767068516359935e-08, + "logits/chosen": -0.3296845555305481, + "logits/rejected": -0.39916354417800903, + "logps/chosen": -171.7633819580078, + "logps/rejected": -192.93478393554688, + "loss": 1.3266, + "nll_loss": 1.0685354471206665, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.36434268951416, + "rewards/margins": 2.498317003250122, + "rewards/rejected": 2.866025447845459, + "step": 6940 + }, + { + "epoch": 0.3855702417442199, + "grad_norm": 53.79294204711914, + "learning_rate": 6.758913668259752e-08, + "logits/chosen": -0.28452345728874207, + "logits/rejected": -0.4068564474582672, + "logps/chosen": -166.2971954345703, + "logps/rejected": -252.1527862548828, + "loss": 1.239, + "nll_loss": 0.9916399717330933, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.018822193145752, + "rewards/margins": 4.4249725341796875, + "rewards/rejected": 1.5938496589660645, + "step": 6950 + }, + { + "epoch": 0.3861250190704706, + "grad_norm": 66.47594451904297, + "learning_rate": 6.750753477055755e-08, + "logits/chosen": -0.31195324659347534, + "logits/rejected": -0.3991023898124695, + "logps/chosen": -172.429931640625, + "logps/rejected": -227.45504760742188, + "loss": 1.3355, + "nll_loss": 1.016535758972168, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.862557411193848, + "rewards/margins": 2.859070301055908, + "rewards/rejected": 3.0034873485565186, + "step": 6960 + }, + { + "epoch": 0.3866797963967213, + "grad_norm": 118.2043685913086, + "learning_rate": 6.742587967536397e-08, + "logits/chosen": -0.25057241320610046, + "logits/rejected": -0.4117020070552826, + "logps/chosen": -170.09341430664062, + "logps/rejected": -239.7622528076172, + "loss": 1.2453, + "nll_loss": 0.9312776327133179, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.926008701324463, + "rewards/margins": 4.62038516998291, + "rewards/rejected": 1.3056236505508423, + "step": 6970 + }, + { + "epoch": 0.38723457372297193, + "grad_norm": 85.15673828125, + "learning_rate": 6.734417164506285e-08, + "logits/chosen": -0.015497421845793724, + "logits/rejected": -0.21804824471473694, + "logps/chosen": -149.40304565429688, + "logps/rejected": -213.0756378173828, + "loss": 1.2931, + "nll_loss": 0.7935541272163391, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.081489562988281, + "rewards/margins": 3.855226516723633, + "rewards/rejected": 1.2262629270553589, + "step": 6980 + }, + { + "epoch": 0.38778935104922263, + "grad_norm": 41.28948211669922, + "learning_rate": 6.726241092786111e-08, + "logits/chosen": -0.2652779221534729, + "logits/rejected": -0.3552890121936798, + "logps/chosen": -182.173095703125, + "logps/rejected": -232.24490356445312, + "loss": 1.2805, + "nll_loss": 1.117540955543518, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.992371082305908, + "rewards/margins": 4.001796722412109, + "rewards/rejected": 1.9905742406845093, + "step": 6990 + }, + { + "epoch": 0.3883441283754733, + "grad_norm": 61.72700119018555, + "learning_rate": 6.718059777212566e-08, + "logits/chosen": -0.27194350957870483, + "logits/rejected": -0.35817548632621765, + "logps/chosen": -174.56044006347656, + "logps/rejected": -219.2909698486328, + "loss": 1.1839, + "nll_loss": 0.9531155824661255, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.9334845542907715, + "rewards/margins": 3.5387001037597656, + "rewards/rejected": 2.394784927368164, + "step": 7000 + }, + { + "epoch": 0.3883441283754733, + "eval_logits/chosen": -0.353268563747406, + "eval_logits/rejected": -0.4408227503299713, + "eval_logps/chosen": -194.59059143066406, + "eval_logps/rejected": -265.7848815917969, + "eval_loss": 1.2542152404785156, + "eval_nll_loss": 1.0102587938308716, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.337061882019043, + "eval_rewards/margins": 5.007699966430664, + "eval_rewards/rejected": 1.3293613195419312, + "eval_runtime": 16.7119, + "eval_samples_per_second": 15.318, + "eval_steps_per_second": 1.915, + "step": 7000 + }, + { + "epoch": 0.388898905701724, + "grad_norm": 96.24934387207031, + "learning_rate": 6.709873242638272e-08, + "logits/chosen": -0.34589165449142456, + "logits/rejected": -0.4874038100242615, + "logps/chosen": -172.8664093017578, + "logps/rejected": -225.47140502929688, + "loss": 1.2438, + "nll_loss": 1.0253851413726807, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.837738037109375, + "rewards/margins": 3.9034225940704346, + "rewards/rejected": 1.9343160390853882, + "step": 7010 + }, + { + "epoch": 0.3894536830279747, + "grad_norm": 55.82901382446289, + "learning_rate": 6.70168151393171e-08, + "logits/chosen": -0.21283188462257385, + "logits/rejected": -0.3394726514816284, + "logps/chosen": -164.8054962158203, + "logps/rejected": -225.3345947265625, + "loss": 1.4238, + "nll_loss": 0.9743822813034058, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.448554039001465, + "rewards/margins": 3.3105034828186035, + "rewards/rejected": 2.1380505561828613, + "step": 7020 + }, + { + "epoch": 0.3900084603542253, + "grad_norm": 63.66157913208008, + "learning_rate": 6.693484615977133e-08, + "logits/chosen": -0.3501175045967102, + "logits/rejected": -0.4347058832645416, + "logps/chosen": -203.2848358154297, + "logps/rejected": -250.0037078857422, + "loss": 1.3144, + "nll_loss": 1.1313148736953735, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.2256083488464355, + "rewards/margins": 3.5999526977539062, + "rewards/rejected": 2.625655174255371, + "step": 7030 + }, + { + "epoch": 0.390563237680476, + "grad_norm": 45.28962326049805, + "learning_rate": 6.6852825736745e-08, + "logits/chosen": -0.21284322440624237, + "logits/rejected": -0.3513473868370056, + "logps/chosen": -151.68064880371094, + "logps/rejected": -213.38381958007812, + "loss": 1.2049, + "nll_loss": 0.8993641138076782, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.502038955688477, + "rewards/margins": 3.628171920776367, + "rewards/rejected": 1.8738670349121094, + "step": 7040 + }, + { + "epoch": 0.39111801500672666, + "grad_norm": 30.618072509765625, + "learning_rate": 6.677075411939394e-08, + "logits/chosen": -0.08350099623203278, + "logits/rejected": -0.31030920147895813, + "logps/chosen": -151.19979858398438, + "logps/rejected": -221.28964233398438, + "loss": 1.2577, + "nll_loss": 0.8976804614067078, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.1023359298706055, + "rewards/margins": 4.105113506317139, + "rewards/rejected": 0.9972225427627563, + "step": 7050 + }, + { + "epoch": 0.39167279233297736, + "grad_norm": 43.067962646484375, + "learning_rate": 6.668863155702955e-08, + "logits/chosen": -0.22113993763923645, + "logits/rejected": -0.336910605430603, + "logps/chosen": -177.03387451171875, + "logps/rejected": -243.845458984375, + "loss": 1.185, + "nll_loss": 0.9910527467727661, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.8803253173828125, + "rewards/margins": 4.664963722229004, + "rewards/rejected": 1.2153613567352295, + "step": 7060 + }, + { + "epoch": 0.392227569659228, + "grad_norm": 40.56425476074219, + "learning_rate": 6.660645829911793e-08, + "logits/chosen": -0.08752115815877914, + "logits/rejected": -0.25853779911994934, + "logps/chosen": -148.84317016601562, + "logps/rejected": -215.58499145507812, + "loss": 1.3292, + "nll_loss": 0.9114246368408203, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.128087997436523, + "rewards/margins": 3.934530258178711, + "rewards/rejected": 1.1935579776763916, + "step": 7070 + }, + { + "epoch": 0.3927823469854787, + "grad_norm": 66.71498107910156, + "learning_rate": 6.652423459527923e-08, + "logits/chosen": -0.21504049003124237, + "logits/rejected": -0.3345591723918915, + "logps/chosen": -161.94309997558594, + "logps/rejected": -222.70516967773438, + "loss": 1.2319, + "nll_loss": 0.9897100329399109, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.446173191070557, + "rewards/margins": 4.251335620880127, + "rewards/rejected": 1.1948375701904297, + "step": 7080 + }, + { + "epoch": 0.3933371243117294, + "grad_norm": 60.648468017578125, + "learning_rate": 6.644196069528676e-08, + "logits/chosen": -0.3080524802207947, + "logits/rejected": -0.46810024976730347, + "logps/chosen": -156.82632446289062, + "logps/rejected": -215.0540771484375, + "loss": 1.2722, + "nll_loss": 1.0197008848190308, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.787126541137695, + "rewards/margins": 3.726269483566284, + "rewards/rejected": 2.0608572959899902, + "step": 7090 + }, + { + "epoch": 0.39389190163798005, + "grad_norm": 29.4898624420166, + "learning_rate": 6.635963684906644e-08, + "logits/chosen": -0.2957158088684082, + "logits/rejected": -0.4307606816291809, + "logps/chosen": -181.65115356445312, + "logps/rejected": -263.91302490234375, + "loss": 1.2249, + "nll_loss": 1.0066194534301758, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.1734938621521, + "rewards/margins": 4.535821914672852, + "rewards/rejected": 1.6376720666885376, + "step": 7100 + }, + { + "epoch": 0.39444667896423075, + "grad_norm": 86.3984603881836, + "learning_rate": 6.62772633066958e-08, + "logits/chosen": -0.3266414403915405, + "logits/rejected": -0.47777050733566284, + "logps/chosen": -178.56004333496094, + "logps/rejected": -253.69216918945312, + "loss": 1.3158, + "nll_loss": 1.0822752714157104, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.967124938964844, + "rewards/margins": 4.293940544128418, + "rewards/rejected": 1.6731847524642944, + "step": 7110 + }, + { + "epoch": 0.3950014562904814, + "grad_norm": 45.96630096435547, + "learning_rate": 6.619484031840338e-08, + "logits/chosen": -0.31532272696495056, + "logits/rejected": -0.3624531924724579, + "logps/chosen": -205.3643035888672, + "logps/rejected": -238.4527130126953, + "loss": 1.2695, + "nll_loss": 1.106184720993042, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.021925926208496, + "rewards/margins": 3.5777347087860107, + "rewards/rejected": 2.4441912174224854, + "step": 7120 + }, + { + "epoch": 0.3955562336167321, + "grad_norm": 159.8441925048828, + "learning_rate": 6.611236813456791e-08, + "logits/chosen": -0.20600607991218567, + "logits/rejected": -0.3467629849910736, + "logps/chosen": -138.72528076171875, + "logps/rejected": -222.2270965576172, + "loss": 1.3248, + "nll_loss": 0.9142557382583618, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.90877628326416, + "rewards/margins": 2.3921499252319336, + "rewards/rejected": 2.5166258811950684, + "step": 7130 + }, + { + "epoch": 0.39611101094298273, + "grad_norm": 19.28346824645996, + "learning_rate": 6.602984700571758e-08, + "logits/chosen": -0.4229269027709961, + "logits/rejected": -0.5715084671974182, + "logps/chosen": -167.72642517089844, + "logps/rejected": -256.966796875, + "loss": 1.2952, + "nll_loss": 1.0315793752670288, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.049108505249023, + "rewards/margins": 3.8893866539001465, + "rewards/rejected": 2.1597213745117188, + "step": 7140 + }, + { + "epoch": 0.39666578826923343, + "grad_norm": 52.33828353881836, + "learning_rate": 6.594727718252925e-08, + "logits/chosen": -0.34071478247642517, + "logits/rejected": -0.4567103981971741, + "logps/chosen": -173.2262420654297, + "logps/rejected": -233.28775024414062, + "loss": 1.2967, + "nll_loss": 0.9634785652160645, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.447007179260254, + "rewards/margins": 3.2220001220703125, + "rewards/rejected": 2.2250072956085205, + "step": 7150 + }, + { + "epoch": 0.39722056559548413, + "grad_norm": 48.68062210083008, + "learning_rate": 6.586465891582768e-08, + "logits/chosen": -0.34071049094200134, + "logits/rejected": -0.45068711042404175, + "logps/chosen": -160.2462921142578, + "logps/rejected": -223.63253784179688, + "loss": 1.3201, + "nll_loss": 1.000896692276001, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.550699710845947, + "rewards/margins": 3.277545213699341, + "rewards/rejected": 2.2731540203094482, + "step": 7160 + }, + { + "epoch": 0.3977753429217348, + "grad_norm": 49.71293258666992, + "learning_rate": 6.578199245658486e-08, + "logits/chosen": -0.23354479670524597, + "logits/rejected": -0.3580577075481415, + "logps/chosen": -170.16897583007812, + "logps/rejected": -197.76730346679688, + "loss": 1.3182, + "nll_loss": 0.9331790804862976, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.230744361877441, + "rewards/margins": 2.730969190597534, + "rewards/rejected": 2.49977445602417, + "step": 7170 + }, + { + "epoch": 0.3983301202479855, + "grad_norm": 53.93007278442383, + "learning_rate": 6.569927805591908e-08, + "logits/chosen": -0.24669504165649414, + "logits/rejected": -0.37552201747894287, + "logps/chosen": -188.89300537109375, + "logps/rejected": -267.8440856933594, + "loss": 1.1627, + "nll_loss": 0.9904123544692993, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.9379072189331055, + "rewards/margins": 4.1582183837890625, + "rewards/rejected": 1.779689073562622, + "step": 7180 + }, + { + "epoch": 0.3988848975742361, + "grad_norm": 209.7054443359375, + "learning_rate": 6.561651596509432e-08, + "logits/chosen": -0.2623883783817291, + "logits/rejected": -0.4429514408111572, + "logps/chosen": -176.68624877929688, + "logps/rejected": -223.68154907226562, + "loss": 1.2484, + "nll_loss": 0.9244028925895691, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.663640022277832, + "rewards/margins": 3.5748519897460938, + "rewards/rejected": 2.08878755569458, + "step": 7190 + }, + { + "epoch": 0.3994396749004868, + "grad_norm": 38.98826217651367, + "learning_rate": 6.553370643551945e-08, + "logits/chosen": -0.2990649342536926, + "logits/rejected": -0.4146324694156647, + "logps/chosen": -183.01441955566406, + "logps/rejected": -265.43853759765625, + "loss": 1.3292, + "nll_loss": 0.9667603373527527, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.6139750480651855, + "rewards/margins": 3.454200029373169, + "rewards/rejected": 2.1597747802734375, + "step": 7200 + }, + { + "epoch": 0.3999944522267375, + "grad_norm": 54.74693298339844, + "learning_rate": 6.545084971874738e-08, + "logits/chosen": -0.25584933161735535, + "logits/rejected": -0.3960145115852356, + "logps/chosen": -148.88522338867188, + "logps/rejected": -198.43917846679688, + "loss": 1.2716, + "nll_loss": 0.9009321331977844, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.381265163421631, + "rewards/margins": 3.4116127490997314, + "rewards/rejected": 1.9696524143218994, + "step": 7210 + }, + { + "epoch": 0.40054922955298816, + "grad_norm": 58.379478454589844, + "learning_rate": 6.53679460664744e-08, + "logits/chosen": -0.34522515535354614, + "logits/rejected": -0.45888328552246094, + "logps/chosen": -163.30357360839844, + "logps/rejected": -226.4535369873047, + "loss": 1.2642, + "nll_loss": 1.0160595178604126, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.892518043518066, + "rewards/margins": 3.9560694694519043, + "rewards/rejected": 1.9364478588104248, + "step": 7220 + }, + { + "epoch": 0.40110400687923886, + "grad_norm": 89.80985260009766, + "learning_rate": 6.528499573053938e-08, + "logits/chosen": -0.30434325337409973, + "logits/rejected": -0.48230838775634766, + "logps/chosen": -167.24014282226562, + "logps/rejected": -245.2574005126953, + "loss": 1.308, + "nll_loss": 0.9712599515914917, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.582326412200928, + "rewards/margins": 4.144760608673096, + "rewards/rejected": 1.4375665187835693, + "step": 7230 + }, + { + "epoch": 0.4016587842054895, + "grad_norm": 55.74021911621094, + "learning_rate": 6.520199896292299e-08, + "logits/chosen": -0.32192081212997437, + "logits/rejected": -0.48280245065689087, + "logps/chosen": -189.1600341796875, + "logps/rejected": -241.54629516601562, + "loss": 1.3141, + "nll_loss": 0.9787147641181946, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.8414106369018555, + "rewards/margins": 4.506801128387451, + "rewards/rejected": 1.3346093893051147, + "step": 7240 + }, + { + "epoch": 0.4022135615317402, + "grad_norm": 57.21150207519531, + "learning_rate": 6.511895601574698e-08, + "logits/chosen": -0.3998798727989197, + "logits/rejected": -0.5131269693374634, + "logps/chosen": -195.90878295898438, + "logps/rejected": -243.5328369140625, + "loss": 1.2844, + "nll_loss": 1.0310008525848389, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.184939384460449, + "rewards/margins": 3.873645305633545, + "rewards/rejected": 2.3112943172454834, + "step": 7250 + }, + { + "epoch": 0.40276833885799085, + "grad_norm": 56.23503875732422, + "learning_rate": 6.503586714127331e-08, + "logits/chosen": -0.039901845157146454, + "logits/rejected": -0.25853031873703003, + "logps/chosen": -115.4613037109375, + "logps/rejected": -147.33761596679688, + "loss": 1.3694, + "nll_loss": 0.7300417423248291, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 3.834766387939453, + "rewards/margins": 2.127188205718994, + "rewards/rejected": 1.7075786590576172, + "step": 7260 + }, + { + "epoch": 0.40332311618424155, + "grad_norm": 109.07487487792969, + "learning_rate": 6.495273259190355e-08, + "logits/chosen": -0.3013862371444702, + "logits/rejected": -0.4309801161289215, + "logps/chosen": -156.0321502685547, + "logps/rejected": -198.90753173828125, + "loss": 1.4112, + "nll_loss": 1.0063666105270386, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.712536811828613, + "rewards/margins": 2.2817347049713135, + "rewards/rejected": 2.4308016300201416, + "step": 7270 + }, + { + "epoch": 0.40387789351049225, + "grad_norm": 54.24409103393555, + "learning_rate": 6.486955262017794e-08, + "logits/chosen": -0.346465528011322, + "logits/rejected": -0.39551910758018494, + "logps/chosen": -209.0965118408203, + "logps/rejected": -232.0498809814453, + "loss": 1.317, + "nll_loss": 1.1299588680267334, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.459007263183594, + "rewards/margins": 3.1379857063293457, + "rewards/rejected": 3.321021556854248, + "step": 7280 + }, + { + "epoch": 0.4044326708367429, + "grad_norm": 87.00990295410156, + "learning_rate": 6.478632747877472e-08, + "logits/chosen": -0.29052549600601196, + "logits/rejected": -0.43641576170921326, + "logps/chosen": -188.79385375976562, + "logps/rejected": -254.0786590576172, + "loss": 1.2827, + "nll_loss": 0.95270836353302, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.114146709442139, + "rewards/margins": 3.6468193531036377, + "rewards/rejected": 2.467327356338501, + "step": 7290 + }, + { + "epoch": 0.4049874481629936, + "grad_norm": 89.29615020751953, + "learning_rate": 6.470305742050936e-08, + "logits/chosen": -0.2050275355577469, + "logits/rejected": -0.3741667866706848, + "logps/chosen": -141.3899688720703, + "logps/rejected": -197.33541870117188, + "loss": 1.2951, + "nll_loss": 0.8298746943473816, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.978948593139648, + "rewards/margins": 2.2912750244140625, + "rewards/rejected": 2.687673568725586, + "step": 7300 + }, + { + "epoch": 0.40554222548924423, + "grad_norm": 63.30764389038086, + "learning_rate": 6.461974269833378e-08, + "logits/chosen": -0.22644257545471191, + "logits/rejected": -0.36334604024887085, + "logps/chosen": -170.58517456054688, + "logps/rejected": -225.7571258544922, + "loss": 1.3345, + "nll_loss": 0.973869800567627, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.511404991149902, + "rewards/margins": 3.0940215587615967, + "rewards/rejected": 2.417384386062622, + "step": 7310 + }, + { + "epoch": 0.40609700281549493, + "grad_norm": 74.49880981445312, + "learning_rate": 6.453638356533555e-08, + "logits/chosen": -0.32636964321136475, + "logits/rejected": -0.4954894185066223, + "logps/chosen": -161.60977172851562, + "logps/rejected": -225.15234375, + "loss": 1.295, + "nll_loss": 0.973361611366272, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.697045803070068, + "rewards/margins": 3.5218639373779297, + "rewards/rejected": 2.1751818656921387, + "step": 7320 + }, + { + "epoch": 0.40665178014174563, + "grad_norm": 62.1312370300293, + "learning_rate": 6.445298027473716e-08, + "logits/chosen": -0.3880705237388611, + "logits/rejected": -0.5019701719284058, + "logps/chosen": -182.28811645507812, + "logps/rejected": -240.3295440673828, + "loss": 1.249, + "nll_loss": 1.040808081626892, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.146364688873291, + "rewards/margins": 3.690261125564575, + "rewards/rejected": 2.456103563308716, + "step": 7330 + }, + { + "epoch": 0.4072065574679963, + "grad_norm": 42.314449310302734, + "learning_rate": 6.436953307989523e-08, + "logits/chosen": -0.28808385133743286, + "logits/rejected": -0.4283718466758728, + "logps/chosen": -176.02244567871094, + "logps/rejected": -221.84640502929688, + "loss": 1.266, + "nll_loss": 0.9531732797622681, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.6160783767700195, + "rewards/margins": 3.110626697540283, + "rewards/rejected": 2.5054516792297363, + "step": 7340 + }, + { + "epoch": 0.407761334794247, + "grad_norm": 77.77169036865234, + "learning_rate": 6.428604223429979e-08, + "logits/chosen": -0.3338751196861267, + "logits/rejected": -0.520000696182251, + "logps/chosen": -147.56625366210938, + "logps/rejected": -203.83847045898438, + "loss": 1.2764, + "nll_loss": 0.9144219160079956, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.272387981414795, + "rewards/margins": 3.1382250785827637, + "rewards/rejected": 2.134162664413452, + "step": 7350 + }, + { + "epoch": 0.4083161121204976, + "grad_norm": 51.88246536254883, + "learning_rate": 6.420250799157342e-08, + "logits/chosen": -0.3343006670475006, + "logits/rejected": -0.4886436462402344, + "logps/chosen": -129.22486877441406, + "logps/rejected": -158.54843139648438, + "loss": 1.3176, + "nll_loss": 0.842542290687561, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.768006801605225, + "rewards/margins": 2.8421809673309326, + "rewards/rejected": 1.9258254766464233, + "step": 7360 + }, + { + "epoch": 0.4088708894467483, + "grad_norm": 65.615966796875, + "learning_rate": 6.411893060547055e-08, + "logits/chosen": -0.10722409188747406, + "logits/rejected": -0.30478712916374207, + "logps/chosen": -141.62173461914062, + "logps/rejected": -174.9485321044922, + "loss": 1.2427, + "nll_loss": 0.8185256719589233, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.766512870788574, + "rewards/margins": 2.7977631092071533, + "rewards/rejected": 1.968750238418579, + "step": 7370 + }, + { + "epoch": 0.40942566677299896, + "grad_norm": 45.00594711303711, + "learning_rate": 6.403531032987667e-08, + "logits/chosen": -0.38103166222572327, + "logits/rejected": -0.5468229055404663, + "logps/chosen": -189.6179656982422, + "logps/rejected": -287.31390380859375, + "loss": 1.3187, + "nll_loss": 0.9494185447692871, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.6198248863220215, + "rewards/margins": 5.335195541381836, + "rewards/rejected": 1.2846286296844482, + "step": 7380 + }, + { + "epoch": 0.40998044409924966, + "grad_norm": 54.82295227050781, + "learning_rate": 6.395164741880753e-08, + "logits/chosen": -0.3098643720149994, + "logits/rejected": -0.4438135027885437, + "logps/chosen": -164.07351684570312, + "logps/rejected": -229.0188446044922, + "loss": 1.3101, + "nll_loss": 0.9140411615371704, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.3129096031188965, + "rewards/margins": 3.516838788986206, + "rewards/rejected": 1.7960714101791382, + "step": 7390 + }, + { + "epoch": 0.41053522142550036, + "grad_norm": 40.2969856262207, + "learning_rate": 6.386794212640845e-08, + "logits/chosen": -0.2711586356163025, + "logits/rejected": -0.4007849097251892, + "logps/chosen": -150.17539978027344, + "logps/rejected": -182.2269744873047, + "loss": 1.1839, + "nll_loss": 0.8594070672988892, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.0169172286987305, + "rewards/margins": 3.0875864028930664, + "rewards/rejected": 1.929330587387085, + "step": 7400 + }, + { + "epoch": 0.411089998751751, + "grad_norm": 54.541927337646484, + "learning_rate": 6.378419470695342e-08, + "logits/chosen": -0.43582743406295776, + "logits/rejected": -0.6034985184669495, + "logps/chosen": -183.70852661132812, + "logps/rejected": -258.99169921875, + "loss": 1.2531, + "nll_loss": 1.0488944053649902, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.260799407958984, + "rewards/margins": 4.284533500671387, + "rewards/rejected": 1.9762649536132812, + "step": 7410 + }, + { + "epoch": 0.4116447760780017, + "grad_norm": 51.705360412597656, + "learning_rate": 6.370040541484449e-08, + "logits/chosen": -0.24617047607898712, + "logits/rejected": -0.4318224787712097, + "logps/chosen": -178.9082489013672, + "logps/rejected": -240.6511993408203, + "loss": 1.2556, + "nll_loss": 0.9786630868911743, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.324519157409668, + "rewards/margins": 3.5243849754333496, + "rewards/rejected": 1.8001340627670288, + "step": 7420 + }, + { + "epoch": 0.41219955340425235, + "grad_norm": 40.38996505737305, + "learning_rate": 6.361657450461084e-08, + "logits/chosen": -0.2616347670555115, + "logits/rejected": -0.4238702654838562, + "logps/chosen": -124.56805419921875, + "logps/rejected": -181.3067169189453, + "loss": 1.2497, + "nll_loss": 0.8138860464096069, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.362429141998291, + "rewards/margins": 3.0529863834381104, + "rewards/rejected": 2.3094425201416016, + "step": 7430 + }, + { + "epoch": 0.41275433073050305, + "grad_norm": 44.85422134399414, + "learning_rate": 6.353270223090806e-08, + "logits/chosen": -0.32470908761024475, + "logits/rejected": -0.41462892293930054, + "logps/chosen": -198.8808135986328, + "logps/rejected": -252.1754608154297, + "loss": 1.294, + "nll_loss": 1.0325891971588135, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 6.107321262359619, + "rewards/margins": 3.250342845916748, + "rewards/rejected": 2.856978416442871, + "step": 7440 + }, + { + "epoch": 0.4133091080567537, + "grad_norm": 33.28203582763672, + "learning_rate": 6.344878884851746e-08, + "logits/chosen": -0.42162925004959106, + "logits/rejected": -0.5931268930435181, + "logps/chosen": -183.57373046875, + "logps/rejected": -248.9408416748047, + "loss": 1.3005, + "nll_loss": 1.037781000137329, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.289162635803223, + "rewards/margins": 5.122281074523926, + "rewards/rejected": 1.1668803691864014, + "step": 7450 + }, + { + "epoch": 0.4138638853830044, + "grad_norm": 75.61338806152344, + "learning_rate": 6.33648346123452e-08, + "logits/chosen": -0.3089607357978821, + "logits/rejected": -0.4007699489593506, + "logps/chosen": -183.18475341796875, + "logps/rejected": -222.86441040039062, + "loss": 1.2974, + "nll_loss": 1.0575367212295532, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.529407978057861, + "rewards/margins": 2.9048867225646973, + "rewards/rejected": 2.624520778656006, + "step": 7460 + }, + { + "epoch": 0.4144186627092551, + "grad_norm": 41.54254913330078, + "learning_rate": 6.32808397774215e-08, + "logits/chosen": -0.37035509943962097, + "logits/rejected": -0.54868483543396, + "logps/chosen": -169.4999542236328, + "logps/rejected": -246.3911590576172, + "loss": 1.2784, + "nll_loss": 1.0069705247879028, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.76138973236084, + "rewards/margins": 4.051873207092285, + "rewards/rejected": 1.7095155715942383, + "step": 7470 + }, + { + "epoch": 0.41497344003550574, + "grad_norm": 45.810035705566406, + "learning_rate": 6.319680459889995e-08, + "logits/chosen": -0.17138861119747162, + "logits/rejected": -0.3850787281990051, + "logps/chosen": -146.49534606933594, + "logps/rejected": -208.6310577392578, + "loss": 1.1755, + "nll_loss": 0.8154012560844421, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.7701897621154785, + "rewards/margins": 4.011888027191162, + "rewards/rejected": 0.7583020329475403, + "step": 7480 + }, + { + "epoch": 0.41552821736175644, + "grad_norm": 76.499267578125, + "learning_rate": 6.311272933205672e-08, + "logits/chosen": -0.31379497051239014, + "logits/rejected": -0.3845873475074768, + "logps/chosen": -177.3786163330078, + "logps/rejected": -214.22543334960938, + "loss": 1.3975, + "nll_loss": 1.2314379215240479, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.535956859588623, + "rewards/margins": 3.367109775543213, + "rewards/rejected": 2.1688480377197266, + "step": 7490 + }, + { + "epoch": 0.4160829946880071, + "grad_norm": 63.57807922363281, + "learning_rate": 6.302861423228967e-08, + "logits/chosen": -0.4402855932712555, + "logits/rejected": -0.5427097678184509, + "logps/chosen": -202.45889282226562, + "logps/rejected": -282.24188232421875, + "loss": 1.2701, + "nll_loss": 1.1113221645355225, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.235448360443115, + "rewards/margins": 3.985610246658325, + "rewards/rejected": 2.24983811378479, + "step": 7500 + }, + { + "epoch": 0.4160829946880071, + "eval_logits/chosen": -0.398401141166687, + "eval_logits/rejected": -0.49995607137680054, + "eval_logps/chosen": -193.20004272460938, + "eval_logps/rejected": -259.1756591796875, + "eval_loss": 1.2661241292953491, + "eval_nll_loss": 1.002261757850647, + "eval_rewards/accuracies": 0.9375, + "eval_rewards/chosen": 6.4761176109313965, + "eval_rewards/margins": 4.485833644866943, + "eval_rewards/rejected": 1.9902844429016113, + "eval_runtime": 17.0997, + "eval_samples_per_second": 14.971, + "eval_steps_per_second": 1.871, + "step": 7500 + }, + { + "epoch": 0.4166377720142578, + "grad_norm": 34.99973678588867, + "learning_rate": 6.294445955511774e-08, + "logits/chosen": -0.29553765058517456, + "logits/rejected": -0.4380635619163513, + "logps/chosen": -183.63485717773438, + "logps/rejected": -227.50021362304688, + "loss": 1.3205, + "nll_loss": 1.03634512424469, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.569860458374023, + "rewards/margins": 3.396176815032959, + "rewards/rejected": 2.173682689666748, + "step": 7510 + }, + { + "epoch": 0.4171925493405085, + "grad_norm": 48.45505905151367, + "learning_rate": 6.286026555618009e-08, + "logits/chosen": -0.3055883049964905, + "logits/rejected": -0.469032347202301, + "logps/chosen": -161.53659057617188, + "logps/rejected": -226.0988311767578, + "loss": 1.2877, + "nll_loss": 0.8650112152099609, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.755152225494385, + "rewards/margins": 4.042037010192871, + "rewards/rejected": 1.7131156921386719, + "step": 7520 + }, + { + "epoch": 0.4177473266667591, + "grad_norm": 55.78778076171875, + "learning_rate": 6.277603249123526e-08, + "logits/chosen": -0.36147961020469666, + "logits/rejected": -0.4718469977378845, + "logps/chosen": -143.3127899169922, + "logps/rejected": -181.94027709960938, + "loss": 1.2009, + "nll_loss": 0.9830204248428345, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.296572685241699, + "rewards/margins": 2.2858357429504395, + "rewards/rejected": 3.0107367038726807, + "step": 7530 + }, + { + "epoch": 0.4183021039930098, + "grad_norm": 80.88349151611328, + "learning_rate": 6.269176061616056e-08, + "logits/chosen": -0.14564813673496246, + "logits/rejected": -0.3274744749069214, + "logps/chosen": -134.18099975585938, + "logps/rejected": -197.35910034179688, + "loss": 1.3682, + "nll_loss": 0.7621486186981201, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.868192195892334, + "rewards/margins": 3.5857315063476562, + "rewards/rejected": 1.2824609279632568, + "step": 7540 + }, + { + "epoch": 0.41885688131926047, + "grad_norm": 51.41616439819336, + "learning_rate": 6.260745018695112e-08, + "logits/chosen": -0.36704492568969727, + "logits/rejected": -0.47699612379074097, + "logps/chosen": -164.365478515625, + "logps/rejected": -218.2649688720703, + "loss": 1.2076, + "nll_loss": 0.9271243810653687, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.707385540008545, + "rewards/margins": 3.966140031814575, + "rewards/rejected": 1.7412458658218384, + "step": 7550 + }, + { + "epoch": 0.41941165864551117, + "grad_norm": 49.04743576049805, + "learning_rate": 6.25231014597192e-08, + "logits/chosen": -0.3328538239002228, + "logits/rejected": -0.46134382486343384, + "logps/chosen": -156.789306640625, + "logps/rejected": -211.8652801513672, + "loss": 1.2794, + "nll_loss": 0.9207913279533386, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.7461256980896, + "rewards/margins": 3.3402061462402344, + "rewards/rejected": 2.4059205055236816, + "step": 7560 + }, + { + "epoch": 0.4199664359717618, + "grad_norm": 83.76142883300781, + "learning_rate": 6.243871469069344e-08, + "logits/chosen": -0.3034781813621521, + "logits/rejected": -0.49480685591697693, + "logps/chosen": -167.407470703125, + "logps/rejected": -225.42703247070312, + "loss": 1.2352, + "nll_loss": 0.9763292074203491, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.7099928855896, + "rewards/margins": 3.6460483074188232, + "rewards/rejected": 2.063944101333618, + "step": 7570 + }, + { + "epoch": 0.4205212132980125, + "grad_norm": 78.60682678222656, + "learning_rate": 6.235429013621798e-08, + "logits/chosen": -0.23759326338768005, + "logits/rejected": -0.4189354479312897, + "logps/chosen": -144.0743408203125, + "logps/rejected": -215.2904815673828, + "loss": 1.2802, + "nll_loss": 0.8422958254814148, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.4411234855651855, + "rewards/margins": 3.513167142868042, + "rewards/rejected": 1.927955985069275, + "step": 7580 + }, + { + "epoch": 0.4210759906242632, + "grad_norm": 56.77098083496094, + "learning_rate": 6.226982805275181e-08, + "logits/chosen": -0.32095006108283997, + "logits/rejected": -0.548249363899231, + "logps/chosen": -151.5083465576172, + "logps/rejected": -217.2533416748047, + "loss": 1.3749, + "nll_loss": 0.8653049468994141, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.12197732925415, + "rewards/margins": 3.2202446460723877, + "rewards/rejected": 1.9017328023910522, + "step": 7590 + }, + { + "epoch": 0.42163076795051385, + "grad_norm": 49.3482780456543, + "learning_rate": 6.218532869686786e-08, + "logits/chosen": -0.22720670700073242, + "logits/rejected": -0.4425369203090668, + "logps/chosen": -140.2606964111328, + "logps/rejected": -214.157470703125, + "loss": 1.2169, + "nll_loss": 0.7681783437728882, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.133579730987549, + "rewards/margins": 4.256910800933838, + "rewards/rejected": 0.8766688108444214, + "step": 7600 + }, + { + "epoch": 0.42218554527676455, + "grad_norm": 82.47681427001953, + "learning_rate": 6.210079232525232e-08, + "logits/chosen": -0.346763551235199, + "logits/rejected": -0.4864253103733063, + "logps/chosen": -176.0386962890625, + "logps/rejected": -258.1570129394531, + "loss": 1.2363, + "nll_loss": 0.9986612200737, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.702577590942383, + "rewards/margins": 3.368680477142334, + "rewards/rejected": 2.333897352218628, + "step": 7610 + }, + { + "epoch": 0.4227403226030152, + "grad_norm": 63.12968826293945, + "learning_rate": 6.201621919470382e-08, + "logits/chosen": -0.2454340159893036, + "logits/rejected": -0.38405701518058777, + "logps/chosen": -192.60916137695312, + "logps/rejected": -259.3959045410156, + "loss": 1.3147, + "nll_loss": 0.9730021357536316, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.158176422119141, + "rewards/margins": 4.365778923034668, + "rewards/rejected": 1.792396903038025, + "step": 7620 + }, + { + "epoch": 0.4232950999292659, + "grad_norm": 90.59529876708984, + "learning_rate": 6.193160956213261e-08, + "logits/chosen": -0.16950708627700806, + "logits/rejected": -0.4081265926361084, + "logps/chosen": -146.45358276367188, + "logps/rejected": -197.4040069580078, + "loss": 1.2613, + "nll_loss": 0.8664730787277222, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.412256240844727, + "rewards/margins": 3.448835849761963, + "rewards/rejected": 1.9634202718734741, + "step": 7630 + }, + { + "epoch": 0.4238498772555166, + "grad_norm": 41.387332916259766, + "learning_rate": 6.184696368455991e-08, + "logits/chosen": -0.3791458308696747, + "logits/rejected": -0.5487962961196899, + "logps/chosen": -178.0819549560547, + "logps/rejected": -246.73733520507812, + "loss": 1.2968, + "nll_loss": 0.9921107292175293, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.117079734802246, + "rewards/margins": 4.516340255737305, + "rewards/rejected": 1.6007391214370728, + "step": 7640 + }, + { + "epoch": 0.42440465458176724, + "grad_norm": 67.04618835449219, + "learning_rate": 6.176228181911699e-08, + "logits/chosen": -0.37947243452072144, + "logits/rejected": -0.49038830399513245, + "logps/chosen": -164.1470947265625, + "logps/rejected": -222.6766357421875, + "loss": 1.3031, + "nll_loss": 1.011852502822876, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.655206680297852, + "rewards/margins": 3.4010486602783203, + "rewards/rejected": 2.2541584968566895, + "step": 7650 + }, + { + "epoch": 0.42495943190801794, + "grad_norm": 80.08260345458984, + "learning_rate": 6.167756422304439e-08, + "logits/chosen": -0.21255847811698914, + "logits/rejected": -0.47834545373916626, + "logps/chosen": -147.35255432128906, + "logps/rejected": -204.34854125976562, + "loss": 1.2285, + "nll_loss": 0.8217668533325195, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.1367998123168945, + "rewards/margins": 3.8483917713165283, + "rewards/rejected": 1.288407564163208, + "step": 7660 + }, + { + "epoch": 0.4255142092342686, + "grad_norm": 72.43340301513672, + "learning_rate": 6.159281115369131e-08, + "logits/chosen": -0.34400323033332825, + "logits/rejected": -0.49123507738113403, + "logps/chosen": -186.2705078125, + "logps/rejected": -230.4435577392578, + "loss": 1.3026, + "nll_loss": 1.2007331848144531, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.783455848693848, + "rewards/margins": 3.2348504066467285, + "rewards/rejected": 2.5486056804656982, + "step": 7670 + }, + { + "epoch": 0.4260689865605193, + "grad_norm": 63.78506088256836, + "learning_rate": 6.150802286851461e-08, + "logits/chosen": -0.40512070059776306, + "logits/rejected": -0.5488404631614685, + "logps/chosen": -159.0139923095703, + "logps/rejected": -209.39480590820312, + "loss": 1.3335, + "nll_loss": 0.9720331430435181, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.443997383117676, + "rewards/margins": 3.3262696266174316, + "rewards/rejected": 2.117727279663086, + "step": 7680 + }, + { + "epoch": 0.4266237638867699, + "grad_norm": 102.8187484741211, + "learning_rate": 6.142319962507817e-08, + "logits/chosen": -0.3015509247779846, + "logits/rejected": -0.4275835156440735, + "logps/chosen": -171.422607421875, + "logps/rejected": -211.2342529296875, + "loss": 1.3355, + "nll_loss": 0.940872848033905, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.379124641418457, + "rewards/margins": 3.149885416030884, + "rewards/rejected": 2.2292397022247314, + "step": 7690 + }, + { + "epoch": 0.4271785412130206, + "grad_norm": 106.95166778564453, + "learning_rate": 6.133834168105205e-08, + "logits/chosen": -0.29108524322509766, + "logits/rejected": -0.5013249516487122, + "logps/chosen": -143.29544067382812, + "logps/rejected": -204.5666961669922, + "loss": 1.3016, + "nll_loss": 0.8779792785644531, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.290759086608887, + "rewards/margins": 3.5017638206481934, + "rewards/rejected": 1.7889950275421143, + "step": 7700 + }, + { + "epoch": 0.4277333185392713, + "grad_norm": 68.40714263916016, + "learning_rate": 6.125344929421172e-08, + "logits/chosen": -0.305257648229599, + "logits/rejected": -0.3654315769672394, + "logps/chosen": -197.51206970214844, + "logps/rejected": -251.21578979492188, + "loss": 1.2782, + "nll_loss": 1.050001859664917, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.111239910125732, + "rewards/margins": 3.144008159637451, + "rewards/rejected": 2.967231512069702, + "step": 7710 + }, + { + "epoch": 0.42828809586552197, + "grad_norm": 87.34870910644531, + "learning_rate": 6.116852272243728e-08, + "logits/chosen": -0.242166668176651, + "logits/rejected": -0.48238492012023926, + "logps/chosen": -162.7891082763672, + "logps/rejected": -229.2385711669922, + "loss": 1.3349, + "nll_loss": 0.8462405204772949, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.6684393882751465, + "rewards/margins": 3.6242847442626953, + "rewards/rejected": 2.044154405593872, + "step": 7720 + }, + { + "epoch": 0.42884287319177267, + "grad_norm": 76.2271499633789, + "learning_rate": 6.108356222371268e-08, + "logits/chosen": -0.12172901630401611, + "logits/rejected": -0.3345574736595154, + "logps/chosen": -122.95631408691406, + "logps/rejected": -185.2032470703125, + "loss": 1.3135, + "nll_loss": 0.8006790280342102, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.69210147857666, + "rewards/margins": 2.9726243019104004, + "rewards/rejected": 1.7194769382476807, + "step": 7730 + }, + { + "epoch": 0.4293976505180233, + "grad_norm": 69.55945587158203, + "learning_rate": 6.099856805612493e-08, + "logits/chosen": -0.25045618414878845, + "logits/rejected": -0.4043782651424408, + "logps/chosen": -142.79592895507812, + "logps/rejected": -229.7549285888672, + "loss": 1.2622, + "nll_loss": 0.8604512214660645, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.3889946937561035, + "rewards/margins": 4.070669651031494, + "rewards/rejected": 1.3183256387710571, + "step": 7740 + }, + { + "epoch": 0.429952427844274, + "grad_norm": 77.58515930175781, + "learning_rate": 6.091354047786332e-08, + "logits/chosen": -0.31509530544281006, + "logits/rejected": -0.42753204703330994, + "logps/chosen": -145.33651733398438, + "logps/rejected": -180.94540405273438, + "loss": 1.2589, + "nll_loss": 0.9200709462165833, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.249917030334473, + "rewards/margins": 3.6292223930358887, + "rewards/rejected": 1.6206945180892944, + "step": 7750 + }, + { + "epoch": 0.43050720517052465, + "grad_norm": 100.63086700439453, + "learning_rate": 6.082847974721861e-08, + "logits/chosen": -0.24173316359519958, + "logits/rejected": -0.40897685289382935, + "logps/chosen": -144.01553344726562, + "logps/rejected": -218.42141723632812, + "loss": 1.2609, + "nll_loss": 0.9144455790519714, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.449666500091553, + "rewards/margins": 3.5731265544891357, + "rewards/rejected": 1.876539945602417, + "step": 7760 + }, + { + "epoch": 0.43106198249677535, + "grad_norm": 48.98227310180664, + "learning_rate": 6.074338612258229e-08, + "logits/chosen": -0.4577174186706543, + "logits/rejected": -0.5763063430786133, + "logps/chosen": -181.20730590820312, + "logps/rejected": -251.55136108398438, + "loss": 1.3029, + "nll_loss": 1.092089056968689, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.994641304016113, + "rewards/margins": 3.4837639331817627, + "rewards/rejected": 2.5108776092529297, + "step": 7770 + }, + { + "epoch": 0.43161675982302605, + "grad_norm": 114.86874389648438, + "learning_rate": 6.065825986244578e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -161.11328125, + "logps/rejected": -193.9613037109375, + "loss": 1.272, + "nll_loss": NaN, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.616585731506348, + "rewards/margins": 3.6385693550109863, + "rewards/rejected": 1.9780166149139404, + "step": 7780 + }, + { + "epoch": 0.4321715371492767, + "grad_norm": 58.877315521240234, + "learning_rate": 6.057310122539963e-08, + "logits/chosen": -0.2762307822704315, + "logits/rejected": -0.420022577047348, + "logps/chosen": -127.3053207397461, + "logps/rejected": -196.6819305419922, + "loss": 1.3587, + "nll_loss": 0.8521549105644226, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.0783371925354, + "rewards/margins": 3.1709678173065186, + "rewards/rejected": 1.9073699712753296, + "step": 7790 + }, + { + "epoch": 0.4327263144755274, + "grad_norm": 61.59865188598633, + "learning_rate": 6.04879104701327e-08, + "logits/chosen": -0.3291458487510681, + "logits/rejected": -0.44707727432250977, + "logps/chosen": -156.1598358154297, + "logps/rejected": -205.094970703125, + "loss": 1.2417, + "nll_loss": 0.9146450161933899, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.510323524475098, + "rewards/margins": 3.3687164783477783, + "rewards/rejected": 2.1416075229644775, + "step": 7800 + }, + { + "epoch": 0.43328109180177804, + "grad_norm": 89.74889373779297, + "learning_rate": 6.04026878554315e-08, + "logits/chosen": -0.3558953106403351, + "logits/rejected": -0.4734135568141937, + "logps/chosen": -167.73020935058594, + "logps/rejected": -225.3911895751953, + "loss": 1.3202, + "nll_loss": 1.0521650314331055, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.655695915222168, + "rewards/margins": 3.121359348297119, + "rewards/rejected": 2.5343360900878906, + "step": 7810 + }, + { + "epoch": 0.43383586912802874, + "grad_norm": 48.8852653503418, + "learning_rate": 6.031743364017922e-08, + "logits/chosen": -0.31145888566970825, + "logits/rejected": -0.3819272518157959, + "logps/chosen": -169.56024169921875, + "logps/rejected": -249.2076416015625, + "loss": 1.2963, + "nll_loss": 0.9819513559341431, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.6514410972595215, + "rewards/margins": 3.171518087387085, + "rewards/rejected": 2.479923725128174, + "step": 7820 + }, + { + "epoch": 0.43439064645427944, + "grad_norm": 40.11418151855469, + "learning_rate": 6.023214808335516e-08, + "logits/chosen": -0.1905912607908249, + "logits/rejected": -0.36486178636550903, + "logps/chosen": -163.63925170898438, + "logps/rejected": -221.80239868164062, + "loss": 1.2658, + "nll_loss": 0.9231510162353516, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.2673845291137695, + "rewards/margins": 3.396528720855713, + "rewards/rejected": 1.8708559274673462, + "step": 7830 + }, + { + "epoch": 0.4349454237805301, + "grad_norm": 81.7651138305664, + "learning_rate": 6.014683144403374e-08, + "logits/chosen": -0.008128717541694641, + "logits/rejected": -0.19771425426006317, + "logps/chosen": -112.61724853515625, + "logps/rejected": -176.70272827148438, + "loss": 1.2552, + "nll_loss": 0.7131599187850952, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.644561767578125, + "rewards/margins": 3.75565767288208, + "rewards/rejected": 0.8889042735099792, + "step": 7840 + }, + { + "epoch": 0.4355002011067808, + "grad_norm": 65.13880920410156, + "learning_rate": 6.006148398138382e-08, + "logits/chosen": -0.3456365466117859, + "logits/rejected": -0.48094815015792847, + "logps/chosen": -161.97412109375, + "logps/rejected": -196.0426025390625, + "loss": 1.3188, + "nll_loss": 0.9527362585067749, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.310242652893066, + "rewards/margins": 3.3656864166259766, + "rewards/rejected": 1.944556474685669, + "step": 7850 + }, + { + "epoch": 0.4360549784330314, + "grad_norm": 49.75828170776367, + "learning_rate": 5.997610595466792e-08, + "logits/chosen": -0.2645300030708313, + "logits/rejected": -0.40959396958351135, + "logps/chosen": -147.76052856445312, + "logps/rejected": -211.4885711669922, + "loss": 1.308, + "nll_loss": 0.8695026636123657, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.175740718841553, + "rewards/margins": 4.212712287902832, + "rewards/rejected": 0.9630285501480103, + "step": 7860 + }, + { + "epoch": 0.4366097557592821, + "grad_norm": 45.04252243041992, + "learning_rate": 5.989069762324135e-08, + "logits/chosen": -0.1510230004787445, + "logits/rejected": -0.3848511874675751, + "logps/chosen": -152.15814208984375, + "logps/rejected": -230.59036254882812, + "loss": 1.3166, + "nll_loss": 0.8440617322921753, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.116968154907227, + "rewards/margins": 4.361637115478516, + "rewards/rejected": 0.7553306818008423, + "step": 7870 + }, + { + "epoch": 0.43716453308553277, + "grad_norm": 59.32204818725586, + "learning_rate": 5.980525924655152e-08, + "logits/chosen": -0.3439778685569763, + "logits/rejected": -0.4573608338832855, + "logps/chosen": -202.69509887695312, + "logps/rejected": -277.2159423828125, + "loss": 1.269, + "nll_loss": 1.026094675064087, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.560296058654785, + "rewards/margins": 4.462855339050293, + "rewards/rejected": 2.097440481185913, + "step": 7880 + }, + { + "epoch": 0.43771931041178347, + "grad_norm": 68.54647064208984, + "learning_rate": 5.971979108413714e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -156.3693389892578, + "logps/rejected": -215.7193603515625, + "loss": 1.3733, + "nll_loss": NaN, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.428963661193848, + "rewards/margins": 3.6827340126037598, + "rewards/rejected": 1.7462295293807983, + "step": 7890 + }, + { + "epoch": 0.43827408773803417, + "grad_norm": 77.02647399902344, + "learning_rate": 5.96342933956273e-08, + "logits/chosen": -0.24855390191078186, + "logits/rejected": -0.3666438162326813, + "logps/chosen": -145.66111755371094, + "logps/rejected": -181.38365173339844, + "loss": 1.2417, + "nll_loss": 1.033527135848999, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.633823871612549, + "rewards/margins": 2.5259013175964355, + "rewards/rejected": 2.1079225540161133, + "step": 7900 + }, + { + "epoch": 0.4388288650642848, + "grad_norm": 145.5952606201172, + "learning_rate": 5.9548766440740906e-08, + "logits/chosen": -0.31071895360946655, + "logits/rejected": -0.4172093868255615, + "logps/chosen": -172.99037170410156, + "logps/rejected": -224.00509643554688, + "loss": 1.3486, + "nll_loss": 0.9945386648178101, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.091534614562988, + "rewards/margins": 4.539047718048096, + "rewards/rejected": 1.5524866580963135, + "step": 7910 + }, + { + "epoch": 0.4393836423905355, + "grad_norm": 45.83077621459961, + "learning_rate": 5.9463210479285674e-08, + "logits/chosen": -0.20147080719470978, + "logits/rejected": -0.36271151900291443, + "logps/chosen": -136.16787719726562, + "logps/rejected": -197.44253540039062, + "loss": 1.2506, + "nll_loss": 0.8147318959236145, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.147400856018066, + "rewards/margins": 3.7860970497131348, + "rewards/rejected": 1.3613044023513794, + "step": 7920 + }, + { + "epoch": 0.43993841971678616, + "grad_norm": 51.59779739379883, + "learning_rate": 5.937762577115747e-08, + "logits/chosen": -0.37672844529151917, + "logits/rejected": -0.4909901022911072, + "logps/chosen": -157.14938354492188, + "logps/rejected": -196.3175048828125, + "loss": 1.2589, + "nll_loss": 0.977447509765625, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.745203971862793, + "rewards/margins": 2.8865718841552734, + "rewards/rejected": 2.8586316108703613, + "step": 7930 + }, + { + "epoch": 0.44049319704303685, + "grad_norm": 83.54605102539062, + "learning_rate": 5.929201257633948e-08, + "logits/chosen": -0.4630278944969177, + "logits/rejected": -0.5463398694992065, + "logps/chosen": -196.7548065185547, + "logps/rejected": -225.6660614013672, + "loss": 1.2924, + "nll_loss": 1.0636637210845947, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.932701587677002, + "rewards/margins": 2.5180087089538574, + "rewards/rejected": 3.4146926403045654, + "step": 7940 + }, + { + "epoch": 0.44104797436928755, + "grad_norm": 47.81173324584961, + "learning_rate": 5.920637115490141e-08, + "logits/chosen": -0.39089900255203247, + "logits/rejected": -0.4534800052642822, + "logps/chosen": -153.1407012939453, + "logps/rejected": -196.2056884765625, + "loss": 1.2349, + "nll_loss": 0.9944251775741577, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.820315361022949, + "rewards/margins": 3.712320327758789, + "rewards/rejected": 2.10799503326416, + "step": 7950 + }, + { + "epoch": 0.4416027516955382, + "grad_norm": 67.46906280517578, + "learning_rate": 5.9120701766998774e-08, + "logits/chosen": -0.21322064101696014, + "logits/rejected": -0.343554824590683, + "logps/chosen": -157.1032257080078, + "logps/rejected": -220.17630004882812, + "loss": 1.1961, + "nll_loss": 0.8462135195732117, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.530757904052734, + "rewards/margins": 3.692348003387451, + "rewards/rejected": 1.8384101390838623, + "step": 7960 + }, + { + "epoch": 0.4421575290217889, + "grad_norm": 33.015899658203125, + "learning_rate": 5.9035004672871936e-08, + "logits/chosen": -0.17605528235435486, + "logits/rejected": -0.31549564003944397, + "logps/chosen": -157.47113037109375, + "logps/rejected": -201.6073455810547, + "loss": 1.2521, + "nll_loss": 0.9050248861312866, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.284222602844238, + "rewards/margins": 2.9695611000061035, + "rewards/rejected": 2.3146615028381348, + "step": 7970 + }, + { + "epoch": 0.44271230634803954, + "grad_norm": 78.65603637695312, + "learning_rate": 5.89492801328455e-08, + "logits/chosen": -0.3612144887447357, + "logits/rejected": -0.4951377511024475, + "logps/chosen": -181.75048828125, + "logps/rejected": -274.47943115234375, + "loss": 1.2657, + "nll_loss": 0.9882364273071289, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.6054205894470215, + "rewards/margins": 4.520341873168945, + "rewards/rejected": 2.0850789546966553, + "step": 7980 + }, + { + "epoch": 0.44326708367429024, + "grad_norm": 58.317527770996094, + "learning_rate": 5.8863528407327456e-08, + "logits/chosen": -0.17070798575878143, + "logits/rejected": -0.3363649845123291, + "logps/chosen": -142.54373168945312, + "logps/rejected": -174.45375061035156, + "loss": 1.2523, + "nll_loss": 0.8363102078437805, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.006918907165527, + "rewards/margins": 3.044194221496582, + "rewards/rejected": 1.9627254009246826, + "step": 7990 + }, + { + "epoch": 0.4438218610005409, + "grad_norm": 45.0575065612793, + "learning_rate": 5.87777497568083e-08, + "logits/chosen": -0.21342894434928894, + "logits/rejected": -0.418354332447052, + "logps/chosen": -115.73429107666016, + "logps/rejected": -180.33444213867188, + "loss": 1.213, + "nll_loss": 0.7383561134338379, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.680453300476074, + "rewards/margins": 3.650794267654419, + "rewards/rejected": 1.0296586751937866, + "step": 8000 + }, + { + "epoch": 0.4438218610005409, + "eval_logits/chosen": -0.38671383261680603, + "eval_logits/rejected": -0.48004651069641113, + "eval_logps/chosen": -192.80557250976562, + "eval_logps/rejected": -259.6051940917969, + "eval_loss": 1.2556079626083374, + "eval_nll_loss": 1.0014536380767822, + "eval_rewards/accuracies": 0.875, + "eval_rewards/chosen": 6.515565395355225, + "eval_rewards/margins": 4.568235397338867, + "eval_rewards/rejected": 1.9473298788070679, + "eval_runtime": 16.7445, + "eval_samples_per_second": 15.289, + "eval_steps_per_second": 1.911, + "step": 8000 + }, + { + "epoch": 0.4443766383267916, + "grad_norm": 47.568199157714844, + "learning_rate": 5.86919444418604e-08, + "logits/chosen": -0.2681363523006439, + "logits/rejected": -0.4715178608894348, + "logps/chosen": -137.9403533935547, + "logps/rejected": -166.81297302246094, + "loss": 1.2904, + "nll_loss": 0.7847102284431458, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.970905303955078, + "rewards/margins": 2.872488498687744, + "rewards/rejected": 2.098417282104492, + "step": 8010 + }, + { + "epoch": 0.4449314156530423, + "grad_norm": 55.3165283203125, + "learning_rate": 5.860611272313706e-08, + "logits/chosen": -0.3643186390399933, + "logits/rejected": -0.48495370149612427, + "logps/chosen": -189.58767700195312, + "logps/rejected": -261.2303161621094, + "loss": 1.2308, + "nll_loss": 1.0931975841522217, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.005456447601318, + "rewards/margins": 4.076797962188721, + "rewards/rejected": 1.928658127784729, + "step": 8020 + }, + { + "epoch": 0.4454861929792929, + "grad_norm": 93.05540466308594, + "learning_rate": 5.852025486137182e-08, + "logits/chosen": -0.20182017982006073, + "logits/rejected": -0.3323633670806885, + "logps/chosen": -143.80166625976562, + "logps/rejected": -195.39883422851562, + "loss": 1.3046, + "nll_loss": 0.864376425743103, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.173520565032959, + "rewards/margins": 3.3077869415283203, + "rewards/rejected": 1.8657335042953491, + "step": 8030 + }, + { + "epoch": 0.4460409703055436, + "grad_norm": 80.69502258300781, + "learning_rate": 5.8434371117377645e-08, + "logits/chosen": -0.2694427967071533, + "logits/rejected": -0.40171557664871216, + "logps/chosen": -152.2576446533203, + "logps/rejected": -233.55606079101562, + "loss": 1.2215, + "nll_loss": 0.8913043141365051, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.948529243469238, + "rewards/margins": 4.456477165222168, + "rewards/rejected": 1.4920519590377808, + "step": 8040 + }, + { + "epoch": 0.44659574763179427, + "grad_norm": 38.91741943359375, + "learning_rate": 5.834846175204611e-08, + "logits/chosen": -0.1876542568206787, + "logits/rejected": -0.3668864965438843, + "logps/chosen": -171.0609588623047, + "logps/rejected": -219.9167022705078, + "loss": 1.2786, + "nll_loss": 0.9668686985969543, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.387633323669434, + "rewards/margins": 3.384929656982422, + "rewards/rejected": 2.00270414352417, + "step": 8050 + }, + { + "epoch": 0.44715052495804497, + "grad_norm": 54.68962860107422, + "learning_rate": 5.826252702634661e-08, + "logits/chosen": -0.30333903431892395, + "logits/rejected": -0.4032462239265442, + "logps/chosen": -168.95272827148438, + "logps/rejected": -223.0485076904297, + "loss": 1.2644, + "nll_loss": 0.9916449785232544, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.742356300354004, + "rewards/margins": 3.2595245838165283, + "rewards/rejected": 2.4828314781188965, + "step": 8060 + }, + { + "epoch": 0.4477053022842956, + "grad_norm": 72.57445526123047, + "learning_rate": 5.81765672013256e-08, + "logits/chosen": -0.25930994749069214, + "logits/rejected": -0.4766874313354492, + "logps/chosen": -138.98260498046875, + "logps/rejected": -199.86651611328125, + "loss": 1.2728, + "nll_loss": 0.9009540677070618, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.168518543243408, + "rewards/margins": 4.168160438537598, + "rewards/rejected": 1.0003578662872314, + "step": 8070 + }, + { + "epoch": 0.4482600796105463, + "grad_norm": 82.89904022216797, + "learning_rate": 5.809058253810577e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -138.91119384765625, + "logps/rejected": -177.7933349609375, + "loss": 1.2864, + "nll_loss": NaN, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.166953086853027, + "rewards/margins": 3.048910617828369, + "rewards/rejected": 2.118042230606079, + "step": 8080 + }, + { + "epoch": 0.448814856936797, + "grad_norm": 45.50212860107422, + "learning_rate": 5.8004573297885263e-08, + "logits/chosen": -0.1541905403137207, + "logits/rejected": -0.3506318926811218, + "logps/chosen": -135.6262969970703, + "logps/rejected": -191.90237426757812, + "loss": 1.2552, + "nll_loss": 0.8606699705123901, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.86007022857666, + "rewards/margins": 3.3907406330108643, + "rewards/rejected": 1.469329595565796, + "step": 8090 + }, + { + "epoch": 0.44936963426304766, + "grad_norm": 64.80812072753906, + "learning_rate": 5.791853974193688e-08, + "logits/chosen": -0.32614752650260925, + "logits/rejected": -0.46904468536376953, + "logps/chosen": -197.24119567871094, + "logps/rejected": -231.13327026367188, + "loss": 1.1762, + "nll_loss": 1.0258362293243408, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.1628522872924805, + "rewards/margins": 4.323519706726074, + "rewards/rejected": 1.8393325805664062, + "step": 8100 + }, + { + "epoch": 0.44992441158929836, + "grad_norm": 66.36274719238281, + "learning_rate": 5.783248213160729e-08, + "logits/chosen": -0.3098219633102417, + "logits/rejected": -0.4934779703617096, + "logps/chosen": -178.09774780273438, + "logps/rejected": -239.51025390625, + "loss": 1.2542, + "nll_loss": 0.9184685945510864, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.972233772277832, + "rewards/margins": 4.269294261932373, + "rewards/rejected": 1.7029390335083008, + "step": 8110 + }, + { + "epoch": 0.450479188915549, + "grad_norm": 68.26948547363281, + "learning_rate": 5.774640072831621e-08, + "logits/chosen": -0.3017955422401428, + "logits/rejected": -0.46287283301353455, + "logps/chosen": -183.70526123046875, + "logps/rejected": -258.19964599609375, + "loss": 1.2733, + "nll_loss": 0.9797613024711609, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.402983665466309, + "rewards/margins": 4.828103065490723, + "rewards/rejected": 1.5748809576034546, + "step": 8120 + }, + { + "epoch": 0.4510339662417997, + "grad_norm": 42.09645080566406, + "learning_rate": 5.766029579355567e-08, + "logits/chosen": -0.34000691771507263, + "logits/rejected": -0.5368366241455078, + "logps/chosen": -171.09927368164062, + "logps/rejected": -246.84521484375, + "loss": 1.2752, + "nll_loss": 0.9592534899711609, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.863207817077637, + "rewards/margins": 4.2700419425964355, + "rewards/rejected": 1.5931650400161743, + "step": 8130 + }, + { + "epoch": 0.4515887435680504, + "grad_norm": 61.56931686401367, + "learning_rate": 5.7574167588889155e-08, + "logits/chosen": -0.4223068654537201, + "logits/rejected": -0.5126476883888245, + "logps/chosen": -190.4967803955078, + "logps/rejected": -259.795654296875, + "loss": 1.2975, + "nll_loss": 1.0304621458053589, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.281733512878418, + "rewards/margins": 4.427463531494141, + "rewards/rejected": 1.8542697429656982, + "step": 8140 + }, + { + "epoch": 0.45214352089430104, + "grad_norm": 36.00699996948242, + "learning_rate": 5.7488016375950846e-08, + "logits/chosen": -0.2626270651817322, + "logits/rejected": -0.43936362862586975, + "logps/chosen": -160.6858673095703, + "logps/rejected": -210.3122100830078, + "loss": 1.2147, + "nll_loss": 0.9187489748001099, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.460170269012451, + "rewards/margins": 3.7872111797332764, + "rewards/rejected": 1.6729589700698853, + "step": 8150 + }, + { + "epoch": 0.45269829822055174, + "grad_norm": 56.432472229003906, + "learning_rate": 5.740184241644482e-08, + "logits/chosen": -0.3512900471687317, + "logits/rejected": -0.5015803575515747, + "logps/chosen": -176.67373657226562, + "logps/rejected": -235.1858673095703, + "loss": 1.2242, + "nll_loss": 0.967685341835022, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.801245212554932, + "rewards/margins": 4.152280330657959, + "rewards/rejected": 1.6489654779434204, + "step": 8160 + }, + { + "epoch": 0.4532530755468024, + "grad_norm": 42.60057830810547, + "learning_rate": 5.7315645972144264e-08, + "logits/chosen": -0.42258042097091675, + "logits/rejected": -0.561353862285614, + "logps/chosen": -172.57858276367188, + "logps/rejected": -234.5561065673828, + "loss": 1.3028, + "nll_loss": 0.972865104675293, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.304925441741943, + "rewards/margins": 4.14249324798584, + "rewards/rejected": 2.1624317169189453, + "step": 8170 + }, + { + "epoch": 0.4538078528730531, + "grad_norm": 75.68475341796875, + "learning_rate": 5.7229427304890644e-08, + "logits/chosen": -0.35305720567703247, + "logits/rejected": -0.5380151271820068, + "logps/chosen": -183.86520385742188, + "logps/rejected": -225.5127716064453, + "loss": 1.2598, + "nll_loss": 0.9746831059455872, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.931631088256836, + "rewards/margins": 4.358283042907715, + "rewards/rejected": 1.5733486413955688, + "step": 8180 + }, + { + "epoch": 0.45436263019930373, + "grad_norm": 70.02229309082031, + "learning_rate": 5.7143186676592935e-08, + "logits/chosen": -0.3164612650871277, + "logits/rejected": -0.41461247205734253, + "logps/chosen": -180.4053497314453, + "logps/rejected": -213.3650360107422, + "loss": 1.2599, + "nll_loss": 0.9668909907341003, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.490021705627441, + "rewards/margins": 2.3829715251922607, + "rewards/rejected": 3.1070501804351807, + "step": 8190 + }, + { + "epoch": 0.45491740752555443, + "grad_norm": 64.77501678466797, + "learning_rate": 5.705692434922683e-08, + "logits/chosen": -0.3636978268623352, + "logits/rejected": -0.5518943071365356, + "logps/chosen": -172.8309783935547, + "logps/rejected": -235.52029418945312, + "loss": 1.2163, + "nll_loss": 0.9208908081054688, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.8655290603637695, + "rewards/margins": 3.3627452850341797, + "rewards/rejected": 2.5027828216552734, + "step": 8200 + }, + { + "epoch": 0.45547218485180513, + "grad_norm": 63.131656646728516, + "learning_rate": 5.697064058483395e-08, + "logits/chosen": -0.4812677800655365, + "logits/rejected": -0.6031264066696167, + "logps/chosen": -204.84005737304688, + "logps/rejected": -278.93475341796875, + "loss": 1.3567, + "nll_loss": 1.2066683769226074, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.644658088684082, + "rewards/margins": 3.36299467086792, + "rewards/rejected": 3.281662702560425, + "step": 8210 + }, + { + "epoch": 0.4560269621780558, + "grad_norm": 80.57061004638672, + "learning_rate": 5.688433564552103e-08, + "logits/chosen": -0.4052867889404297, + "logits/rejected": -0.5403534770011902, + "logps/chosen": -149.51043701171875, + "logps/rejected": -223.8475799560547, + "loss": 1.3456, + "nll_loss": 0.9441035389900208, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.9508514404296875, + "rewards/margins": 4.125848293304443, + "rewards/rejected": 1.825002670288086, + "step": 8220 + }, + { + "epoch": 0.45658173950430647, + "grad_norm": 62.646263122558594, + "learning_rate": 5.67980097934591e-08, + "logits/chosen": -0.3800668716430664, + "logits/rejected": -0.4854021668434143, + "logps/chosen": -169.1044921875, + "logps/rejected": -245.3473663330078, + "loss": 1.2799, + "nll_loss": 1.067249059677124, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.665637016296387, + "rewards/margins": 3.1536102294921875, + "rewards/rejected": 2.512026309967041, + "step": 8230 + }, + { + "epoch": 0.4571365168305571, + "grad_norm": 59.984928131103516, + "learning_rate": 5.6711663290882774e-08, + "logits/chosen": -0.42158278822898865, + "logits/rejected": -0.5549692511558533, + "logps/chosen": -177.200927734375, + "logps/rejected": -229.2981414794922, + "loss": 1.3148, + "nll_loss": 1.011946439743042, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.219405651092529, + "rewards/margins": 3.8054656982421875, + "rewards/rejected": 2.413939952850342, + "step": 8240 + }, + { + "epoch": 0.4576912941568078, + "grad_norm": 125.51600646972656, + "learning_rate": 5.662529640008933e-08, + "logits/chosen": -0.3310701549053192, + "logits/rejected": -0.4897652566432953, + "logps/chosen": -168.09390258789062, + "logps/rejected": -217.44931030273438, + "loss": 1.3698, + "nll_loss": 0.9788816571235657, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.848081111907959, + "rewards/margins": 4.070723533630371, + "rewards/rejected": 1.777358055114746, + "step": 8250 + }, + { + "epoch": 0.4582460714830585, + "grad_norm": 57.40364456176758, + "learning_rate": 5.6538909383438046e-08, + "logits/chosen": -0.408261239528656, + "logits/rejected": -0.5265794396400452, + "logps/chosen": -196.43104553222656, + "logps/rejected": -253.6921844482422, + "loss": 1.3006, + "nll_loss": 1.097115159034729, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.171659469604492, + "rewards/margins": 3.760265827178955, + "rewards/rejected": 2.411393880844116, + "step": 8260 + }, + { + "epoch": 0.45880084880930916, + "grad_norm": 59.3066520690918, + "learning_rate": 5.645250250334931e-08, + "logits/chosen": -0.382033109664917, + "logits/rejected": -0.4673156142234802, + "logps/chosen": -200.22938537597656, + "logps/rejected": -272.4017333984375, + "loss": 1.2982, + "nll_loss": 1.0755808353424072, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.385659217834473, + "rewards/margins": 3.7684860229492188, + "rewards/rejected": 2.617173671722412, + "step": 8270 + }, + { + "epoch": 0.45935562613555986, + "grad_norm": 84.24140930175781, + "learning_rate": 5.636607602230379e-08, + "logits/chosen": -0.33731868863105774, + "logits/rejected": -0.5461875200271606, + "logps/chosen": -181.59646606445312, + "logps/rejected": -280.0567626953125, + "loss": 1.2524, + "nll_loss": 0.9656723737716675, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.201190948486328, + "rewards/margins": 5.510003089904785, + "rewards/rejected": 0.6911883354187012, + "step": 8280 + }, + { + "epoch": 0.4599104034618105, + "grad_norm": 57.184749603271484, + "learning_rate": 5.62796302028418e-08, + "logits/chosen": -0.26092246174812317, + "logits/rejected": -0.3896161615848541, + "logps/chosen": -164.76968383789062, + "logps/rejected": -215.32467651367188, + "loss": 1.2664, + "nll_loss": 0.9338585138320923, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.4399824142456055, + "rewards/margins": 4.290564060211182, + "rewards/rejected": 1.149418592453003, + "step": 8290 + }, + { + "epoch": 0.4604651807880612, + "grad_norm": 59.31813049316406, + "learning_rate": 5.619316530756233e-08, + "logits/chosen": -0.28790172934532166, + "logits/rejected": -0.45928388833999634, + "logps/chosen": -162.6776885986328, + "logps/rejected": -223.90475463867188, + "loss": 1.2686, + "nll_loss": 0.9045939445495605, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.451429843902588, + "rewards/margins": 3.5017216205596924, + "rewards/rejected": 1.9497079849243164, + "step": 8300 + }, + { + "epoch": 0.46101995811431185, + "grad_norm": 101.83157348632812, + "learning_rate": 5.610668159912235e-08, + "logits/chosen": -0.3260810375213623, + "logits/rejected": -0.4430045485496521, + "logps/chosen": -186.3931121826172, + "logps/rejected": -235.7809295654297, + "loss": 1.3681, + "nll_loss": 0.9977655410766602, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.799564361572266, + "rewards/margins": 3.3150525093078613, + "rewards/rejected": 2.484511613845825, + "step": 8310 + }, + { + "epoch": 0.46157473544056254, + "grad_norm": 101.32489776611328, + "learning_rate": 5.602017934023595e-08, + "logits/chosen": -0.3339731693267822, + "logits/rejected": -0.5301405191421509, + "logps/chosen": -139.90518188476562, + "logps/rejected": -179.57363891601562, + "loss": 1.3411, + "nll_loss": 0.942695140838623, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.0137505531311035, + "rewards/margins": 3.0543651580810547, + "rewards/rejected": 1.9593846797943115, + "step": 8320 + }, + { + "epoch": 0.46212951276681324, + "grad_norm": 84.96895599365234, + "learning_rate": 5.59336587936736e-08, + "logits/chosen": -0.46184906363487244, + "logits/rejected": -0.5722322463989258, + "logps/chosen": -195.6354217529297, + "logps/rejected": -262.1464538574219, + "loss": 1.2554, + "nll_loss": 1.0777390003204346, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.625528812408447, + "rewards/margins": 3.791055202484131, + "rewards/rejected": 2.834473133087158, + "step": 8330 + }, + { + "epoch": 0.4626842900930639, + "grad_norm": 91.9871826171875, + "learning_rate": 5.5847120222261315e-08, + "logits/chosen": -0.3586110770702362, + "logits/rejected": -0.5264121890068054, + "logps/chosen": -139.6001739501953, + "logps/rejected": -184.0530242919922, + "loss": 1.3581, + "nll_loss": 0.9735239148139954, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.088287353515625, + "rewards/margins": 3.1971933841705322, + "rewards/rejected": 1.8910939693450928, + "step": 8340 + }, + { + "epoch": 0.4632390674193146, + "grad_norm": 129.99005126953125, + "learning_rate": 5.5760563888879844e-08, + "logits/chosen": -0.3932887613773346, + "logits/rejected": -0.5743144750595093, + "logps/chosen": -142.7582550048828, + "logps/rejected": -206.0090789794922, + "loss": 1.2786, + "nll_loss": 0.8861738443374634, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.229488849639893, + "rewards/margins": 3.4306912422180176, + "rewards/rejected": 1.7987968921661377, + "step": 8350 + }, + { + "epoch": 0.46379384474556523, + "grad_norm": 50.705623626708984, + "learning_rate": 5.567399005646393e-08, + "logits/chosen": -0.372164249420166, + "logits/rejected": -0.518144428730011, + "logps/chosen": -165.54945373535156, + "logps/rejected": -218.5325469970703, + "loss": 1.2101, + "nll_loss": 0.8808409571647644, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.740856647491455, + "rewards/margins": 3.079467296600342, + "rewards/rejected": 2.661389112472534, + "step": 8360 + }, + { + "epoch": 0.46434862207181593, + "grad_norm": 68.00957489013672, + "learning_rate": 5.558739898800141e-08, + "logits/chosen": -0.4178202748298645, + "logits/rejected": -0.5774034857749939, + "logps/chosen": -168.80972290039062, + "logps/rejected": -226.04690551757812, + "loss": 1.2316, + "nll_loss": 1.0256147384643555, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.702794075012207, + "rewards/margins": 3.232090473175049, + "rewards/rejected": 2.470703601837158, + "step": 8370 + }, + { + "epoch": 0.4649033993980666, + "grad_norm": 56.85757827758789, + "learning_rate": 5.550079094653257e-08, + "logits/chosen": -0.28982049226760864, + "logits/rejected": -0.4554738998413086, + "logps/chosen": -168.7762451171875, + "logps/rejected": -216.87002563476562, + "loss": 1.364, + "nll_loss": 0.9375017285346985, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.515100479125977, + "rewards/margins": 2.744253635406494, + "rewards/rejected": 2.7708468437194824, + "step": 8380 + }, + { + "epoch": 0.4654581767243173, + "grad_norm": 62.08861541748047, + "learning_rate": 5.5414166195149194e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -122.28581237792969, + "logps/rejected": -182.76296997070312, + "loss": 1.2023, + "nll_loss": NaN, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.9859161376953125, + "rewards/margins": 3.501011371612549, + "rewards/rejected": 1.4849050045013428, + "step": 8390 + }, + { + "epoch": 0.466012954050568, + "grad_norm": 56.96398162841797, + "learning_rate": 5.53275249969938e-08, + "logits/chosen": -0.29233837127685547, + "logits/rejected": -0.42494410276412964, + "logps/chosen": -175.22488403320312, + "logps/rejected": -226.5401153564453, + "loss": 1.221, + "nll_loss": 0.9822790026664734, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.395953178405762, + "rewards/margins": 3.6122539043426514, + "rewards/rejected": 1.783699631690979, + "step": 8400 + }, + { + "epoch": 0.4665677313768186, + "grad_norm": 69.49365997314453, + "learning_rate": 5.524086761525896e-08, + "logits/chosen": -0.3085178732872009, + "logits/rejected": -0.5056566596031189, + "logps/chosen": -170.92762756347656, + "logps/rejected": -225.01986694335938, + "loss": 1.2412, + "nll_loss": 0.9332050085067749, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.219595909118652, + "rewards/margins": 3.47466778755188, + "rewards/rejected": 1.744927167892456, + "step": 8410 + }, + { + "epoch": 0.4671225087030693, + "grad_norm": 50.1507453918457, + "learning_rate": 5.515419431318632e-08, + "logits/chosen": -0.39892831444740295, + "logits/rejected": -0.49943628907203674, + "logps/chosen": -162.7307586669922, + "logps/rejected": -202.21437072753906, + "loss": 1.225, + "nll_loss": 1.015000343322754, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.446582317352295, + "rewards/margins": 3.4593796730041504, + "rewards/rejected": 1.9872024059295654, + "step": 8420 + }, + { + "epoch": 0.46767728602931996, + "grad_norm": 57.40410614013672, + "learning_rate": 5.506750535406594e-08, + "logits/chosen": -0.24424946308135986, + "logits/rejected": -0.47506189346313477, + "logps/chosen": -140.15203857421875, + "logps/rejected": -225.4398651123047, + "loss": 1.1725, + "nll_loss": 0.8635500073432922, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.439966678619385, + "rewards/margins": 4.405338287353516, + "rewards/rejected": 1.03462815284729, + "step": 8430 + }, + { + "epoch": 0.46823206335557066, + "grad_norm": 69.4198226928711, + "learning_rate": 5.49808010012354e-08, + "logits/chosen": -0.23678168654441833, + "logits/rejected": -0.3856371343135834, + "logps/chosen": -145.4683074951172, + "logps/rejected": -215.7239532470703, + "loss": 1.1816, + "nll_loss": 0.8480218052864075, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.327239036560059, + "rewards/margins": 3.5169034004211426, + "rewards/rejected": 1.8103358745574951, + "step": 8440 + }, + { + "epoch": 0.46878684068182136, + "grad_norm": 121.41065979003906, + "learning_rate": 5.489408151807907e-08, + "logits/chosen": -0.20533113181591034, + "logits/rejected": -0.386726438999176, + "logps/chosen": -164.6047821044922, + "logps/rejected": -240.8743133544922, + "loss": 1.2385, + "nll_loss": 0.946201503276825, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.50950288772583, + "rewards/margins": 3.9801108837127686, + "rewards/rejected": 1.5293917655944824, + "step": 8450 + }, + { + "epoch": 0.469341618008072, + "grad_norm": 67.52189636230469, + "learning_rate": 5.480734716802729e-08, + "logits/chosen": -0.047091174870729446, + "logits/rejected": -0.30713146924972534, + "logps/chosen": -107.66644287109375, + "logps/rejected": -164.18824768066406, + "loss": 1.219, + "nll_loss": 0.7501475214958191, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.337796688079834, + "rewards/margins": 4.029504299163818, + "rewards/rejected": 0.30829155445098877, + "step": 8460 + }, + { + "epoch": 0.4698963953343227, + "grad_norm": 45.683902740478516, + "learning_rate": 5.472059821455554e-08, + "logits/chosen": -0.3871431350708008, + "logits/rejected": -0.514873206615448, + "logps/chosen": -185.6728057861328, + "logps/rejected": -251.6055145263672, + "loss": 1.2143, + "nll_loss": 1.1259502172470093, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.96354341506958, + "rewards/margins": 4.187412261962891, + "rewards/rejected": 1.7761310338974, + "step": 8470 + }, + { + "epoch": 0.47045117266057335, + "grad_norm": 101.97664642333984, + "learning_rate": 5.4633834921183665e-08, + "logits/chosen": -0.2548361122608185, + "logits/rejected": -0.5008405447006226, + "logps/chosen": -151.93946838378906, + "logps/rejected": -212.5448760986328, + "loss": 1.314, + "nll_loss": 0.8620659112930298, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.392413139343262, + "rewards/margins": 3.4350333213806152, + "rewards/rejected": 1.9573793411254883, + "step": 8480 + }, + { + "epoch": 0.47100594998682405, + "grad_norm": 46.88422775268555, + "learning_rate": 5.454705755147508e-08, + "logits/chosen": -0.18084892630577087, + "logits/rejected": -0.3403630256652832, + "logps/chosen": -153.07003784179688, + "logps/rejected": -207.1527557373047, + "loss": 1.2825, + "nll_loss": 0.9178160429000854, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.4134650230407715, + "rewards/margins": 3.658824920654297, + "rewards/rejected": 1.7546402215957642, + "step": 8490 + }, + { + "epoch": 0.4715607273130747, + "grad_norm": 81.96524810791016, + "learning_rate": 5.4460266369035954e-08, + "logits/chosen": -0.154692143201828, + "logits/rejected": -0.39466938376426697, + "logps/chosen": -108.70941162109375, + "logps/rejected": -186.53506469726562, + "loss": 1.2631, + "nll_loss": 0.7526475787162781, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.2512054443359375, + "rewards/margins": 3.975069046020508, + "rewards/rejected": 0.27613669633865356, + "step": 8500 + }, + { + "epoch": 0.4715607273130747, + "eval_logits/chosen": -0.38347405195236206, + "eval_logits/rejected": -0.49657073616981506, + "eval_logps/chosen": -193.00897216796875, + "eval_logps/rejected": -264.6562194824219, + "eval_loss": 1.2516002655029297, + "eval_nll_loss": 1.0022295713424683, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.495223522186279, + "eval_rewards/margins": 5.052995681762695, + "eval_rewards/rejected": 1.442228078842163, + "eval_runtime": 17.0205, + "eval_samples_per_second": 15.041, + "eval_steps_per_second": 1.88, + "step": 8500 + }, + { + "epoch": 0.4721155046393254, + "grad_norm": 64.24961853027344, + "learning_rate": 5.4373461637514416e-08, + "logits/chosen": -0.15924356877803802, + "logits/rejected": -0.3522131145000458, + "logps/chosen": -130.67691040039062, + "logps/rejected": -195.55931091308594, + "loss": 1.1696, + "nll_loss": 0.8569602966308594, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.041111946105957, + "rewards/margins": 3.7395081520080566, + "rewards/rejected": 1.3016037940979004, + "step": 8510 + }, + { + "epoch": 0.4726702819655761, + "grad_norm": 83.25701904296875, + "learning_rate": 5.428664362059975e-08, + "logits/chosen": -0.3635895848274231, + "logits/rejected": -0.5300859212875366, + "logps/chosen": -167.30325317382812, + "logps/rejected": -218.53466796875, + "loss": 1.3063, + "nll_loss": 0.9899178743362427, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.74692440032959, + "rewards/margins": 3.647434711456299, + "rewards/rejected": 2.099489688873291, + "step": 8520 + }, + { + "epoch": 0.47322505929182673, + "grad_norm": 83.2901840209961, + "learning_rate": 5.41998125820216e-08, + "logits/chosen": -0.2491048276424408, + "logits/rejected": -0.39172905683517456, + "logps/chosen": -159.7559814453125, + "logps/rejected": -203.1042938232422, + "loss": 1.2351, + "nll_loss": 0.899651050567627, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.467600345611572, + "rewards/margins": 2.6668825149536133, + "rewards/rejected": 2.80071759223938, + "step": 8530 + }, + { + "epoch": 0.47377983661807743, + "grad_norm": 56.23780059814453, + "learning_rate": 5.4112968785549174e-08, + "logits/chosen": -0.15644797682762146, + "logits/rejected": -0.34491434693336487, + "logps/chosen": -113.5403823852539, + "logps/rejected": -181.77078247070312, + "loss": 1.2335, + "nll_loss": 0.8350857496261597, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.577191352844238, + "rewards/margins": 2.9863953590393066, + "rewards/rejected": 1.5907953977584839, + "step": 8540 + }, + { + "epoch": 0.4743346139443281, + "grad_norm": 90.52593994140625, + "learning_rate": 5.402611249499042e-08, + "logits/chosen": -0.2765730023384094, + "logits/rejected": -0.48341649770736694, + "logps/chosen": -156.0672607421875, + "logps/rejected": -248.24545288085938, + "loss": 1.2362, + "nll_loss": 0.9277788996696472, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.159286022186279, + "rewards/margins": 5.113110542297363, + "rewards/rejected": 1.0461763143539429, + "step": 8550 + }, + { + "epoch": 0.4748893912705788, + "grad_norm": 82.83740997314453, + "learning_rate": 5.393924397419126e-08, + "logits/chosen": -0.11408629268407822, + "logits/rejected": -0.34016355872154236, + "logps/chosen": -131.93338012695312, + "logps/rejected": -187.15115356445312, + "loss": 1.2929, + "nll_loss": 0.835770308971405, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.571127891540527, + "rewards/margins": 2.900984287261963, + "rewards/rejected": 1.6701438426971436, + "step": 8560 + }, + { + "epoch": 0.4754441685968295, + "grad_norm": 59.472686767578125, + "learning_rate": 5.385236348703474e-08, + "logits/chosen": -0.21524448692798615, + "logits/rejected": -0.46747082471847534, + "logps/chosen": -117.48472595214844, + "logps/rejected": -176.40567016601562, + "loss": 1.1203, + "nll_loss": 0.7838854789733887, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.015366554260254, + "rewards/margins": 4.129748344421387, + "rewards/rejected": 0.8856188058853149, + "step": 8570 + }, + { + "epoch": 0.4759989459230801, + "grad_norm": 52.82234573364258, + "learning_rate": 5.376547129744029e-08, + "logits/chosen": -0.3290513753890991, + "logits/rejected": -0.48952069878578186, + "logps/chosen": -157.22695922851562, + "logps/rejected": -216.16525268554688, + "loss": 1.2843, + "nll_loss": 0.9082091450691223, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.499343395233154, + "rewards/margins": 3.3456497192382812, + "rewards/rejected": 2.153693199157715, + "step": 8580 + }, + { + "epoch": 0.4765537232493308, + "grad_norm": 70.94815826416016, + "learning_rate": 5.367856766936286e-08, + "logits/chosen": -0.27662572264671326, + "logits/rejected": -0.4078160226345062, + "logps/chosen": -172.33746337890625, + "logps/rejected": -196.3927459716797, + "loss": 1.3422, + "nll_loss": 1.0085538625717163, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.572179317474365, + "rewards/margins": 2.264984607696533, + "rewards/rejected": 3.3071951866149902, + "step": 8590 + }, + { + "epoch": 0.47710850057558146, + "grad_norm": 103.72970581054688, + "learning_rate": 5.359165286679217e-08, + "logits/chosen": -0.36042019724845886, + "logits/rejected": -0.5228351354598999, + "logps/chosen": -179.4456024169922, + "logps/rejected": -268.6436462402344, + "loss": 1.2705, + "nll_loss": 1.020342469215393, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.373924732208252, + "rewards/margins": 4.730695724487305, + "rewards/rejected": 1.643228530883789, + "step": 8600 + }, + { + "epoch": 0.47766327790183216, + "grad_norm": 78.71900177001953, + "learning_rate": 5.350472715375186e-08, + "logits/chosen": -0.2985449433326721, + "logits/rejected": -0.4532528817653656, + "logps/chosen": -162.94070434570312, + "logps/rejected": -212.1439208984375, + "loss": 1.2438, + "nll_loss": 0.8956171274185181, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.750922203063965, + "rewards/margins": 3.4489173889160156, + "rewards/rejected": 2.30200457572937, + "step": 8610 + }, + { + "epoch": 0.4782180552280828, + "grad_norm": 58.050025939941406, + "learning_rate": 5.341779079429872e-08, + "logits/chosen": -0.3309435546398163, + "logits/rejected": -0.4891184866428375, + "logps/chosen": -140.02577209472656, + "logps/rejected": -206.16079711914062, + "loss": 1.3336, + "nll_loss": 0.8849604725837708, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.365493297576904, + "rewards/margins": 3.672743320465088, + "rewards/rejected": 1.6927497386932373, + "step": 8620 + }, + { + "epoch": 0.4787728325543335, + "grad_norm": 79.02501678466797, + "learning_rate": 5.333084405252192e-08, + "logits/chosen": -0.2821223735809326, + "logits/rejected": -0.45447272062301636, + "logps/chosen": -153.81051635742188, + "logps/rejected": -217.93423461914062, + "loss": 1.1836, + "nll_loss": 0.8838475346565247, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.901255130767822, + "rewards/margins": 4.203151226043701, + "rewards/rejected": 1.698103666305542, + "step": 8630 + }, + { + "epoch": 0.4793276098805842, + "grad_norm": 61.627197265625, + "learning_rate": 5.32438871925421e-08, + "logits/chosen": -0.40773114562034607, + "logits/rejected": -0.5189584493637085, + "logps/chosen": -181.827392578125, + "logps/rejected": -259.85040283203125, + "loss": 1.2252, + "nll_loss": 1.0588048696517944, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.054421901702881, + "rewards/margins": 3.4445533752441406, + "rewards/rejected": 2.609868288040161, + "step": 8640 + }, + { + "epoch": 0.47988238720683485, + "grad_norm": 69.41519165039062, + "learning_rate": 5.3156920478510695e-08, + "logits/chosen": -0.22698119282722473, + "logits/rejected": -0.4370526671409607, + "logps/chosen": -191.02120971679688, + "logps/rejected": -230.5593719482422, + "loss": 1.1659, + "nll_loss": 0.9413886070251465, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.745708465576172, + "rewards/margins": 4.41402006149292, + "rewards/rejected": 1.3316879272460938, + "step": 8650 + }, + { + "epoch": 0.48043716453308555, + "grad_norm": 77.55827331542969, + "learning_rate": 5.3069944174609046e-08, + "logits/chosen": -0.347109854221344, + "logits/rejected": -0.5010181665420532, + "logps/chosen": -180.91476440429688, + "logps/rejected": -262.96807861328125, + "loss": 1.3218, + "nll_loss": 0.9891365170478821, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.197776794433594, + "rewards/margins": 4.570927619934082, + "rewards/rejected": 1.6268491744995117, + "step": 8660 + }, + { + "epoch": 0.4809919418593362, + "grad_norm": 70.76117706298828, + "learning_rate": 5.298295854504764e-08, + "logits/chosen": -0.3689562678337097, + "logits/rejected": -0.4636387825012207, + "logps/chosen": -180.4010772705078, + "logps/rejected": -229.11538696289062, + "loss": 1.2623, + "nll_loss": 1.1043275594711304, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.257417678833008, + "rewards/margins": 3.5675837993621826, + "rewards/rejected": 2.6898341178894043, + "step": 8670 + }, + { + "epoch": 0.4815467191855869, + "grad_norm": 59.07448196411133, + "learning_rate": 5.2895963854065264e-08, + "logits/chosen": -0.42123499512672424, + "logits/rejected": -0.45919767022132874, + "logps/chosen": -197.53662109375, + "logps/rejected": -278.5355529785156, + "loss": 1.2533, + "nll_loss": 1.2173737287521362, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.918522834777832, + "rewards/margins": 4.231985569000244, + "rewards/rejected": 2.6865363121032715, + "step": 8680 + }, + { + "epoch": 0.48210149651183754, + "grad_norm": 122.18305969238281, + "learning_rate": 5.2808960365928266e-08, + "logits/chosen": -0.24855947494506836, + "logits/rejected": -0.35904207825660706, + "logps/chosen": -185.3103485107422, + "logps/rejected": -249.7644500732422, + "loss": 1.3725, + "nll_loss": 0.9928406476974487, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.964116096496582, + "rewards/margins": 3.4931252002716064, + "rewards/rejected": 2.4709906578063965, + "step": 8690 + }, + { + "epoch": 0.48265627383808823, + "grad_norm": 109.85166931152344, + "learning_rate": 5.272194834492969e-08, + "logits/chosen": -0.1494508683681488, + "logits/rejected": -0.33946704864501953, + "logps/chosen": -112.33955383300781, + "logps/rejected": -179.93511962890625, + "loss": 1.2902, + "nll_loss": 0.7621714472770691, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.732860088348389, + "rewards/margins": 3.625771999359131, + "rewards/rejected": 1.107088327407837, + "step": 8700 + }, + { + "epoch": 0.48321105116433893, + "grad_norm": 64.41929626464844, + "learning_rate": 5.263492805538853e-08, + "logits/chosen": -0.1143936887383461, + "logits/rejected": -0.3094675838947296, + "logps/chosen": -109.27293395996094, + "logps/rejected": -171.84115600585938, + "loss": 1.2469, + "nll_loss": 0.7721977233886719, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.127641677856445, + "rewards/margins": 3.933940887451172, + "rewards/rejected": 1.1937006711959839, + "step": 8710 + }, + { + "epoch": 0.4837658284905896, + "grad_norm": 58.07503128051758, + "learning_rate": 5.254789976164885e-08, + "logits/chosen": -0.17686712741851807, + "logits/rejected": -0.36666935682296753, + "logps/chosen": -161.06690979003906, + "logps/rejected": -220.7855987548828, + "loss": 1.2123, + "nll_loss": 1.0213537216186523, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.2469587326049805, + "rewards/margins": 3.10516357421875, + "rewards/rejected": 2.1417949199676514, + "step": 8720 + }, + { + "epoch": 0.4843206058168403, + "grad_norm": 45.921226501464844, + "learning_rate": 5.246086372807911e-08, + "logits/chosen": -0.1610218733549118, + "logits/rejected": -0.4254869520664215, + "logps/chosen": -122.7448959350586, + "logps/rejected": -185.78421020507812, + "loss": 1.3354, + "nll_loss": 0.7519733309745789, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.872366905212402, + "rewards/margins": 3.6194496154785156, + "rewards/rejected": 1.2529175281524658, + "step": 8730 + }, + { + "epoch": 0.4848753831430909, + "grad_norm": 51.10985565185547, + "learning_rate": 5.237382021907119e-08, + "logits/chosen": -0.3228822350502014, + "logits/rejected": -0.40563878417015076, + "logps/chosen": -178.90646362304688, + "logps/rejected": -218.18753051757812, + "loss": 1.3275, + "nll_loss": 1.1199922561645508, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.172387599945068, + "rewards/margins": 3.0818145275115967, + "rewards/rejected": 3.090573310852051, + "step": 8740 + }, + { + "epoch": 0.4854301604693416, + "grad_norm": 46.320003509521484, + "learning_rate": 5.228676949903973e-08, + "logits/chosen": -0.30700522661209106, + "logits/rejected": -0.4341781735420227, + "logps/chosen": -164.91897583007812, + "logps/rejected": -210.10507202148438, + "loss": 1.1947, + "nll_loss": 0.9750370979309082, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.663297653198242, + "rewards/margins": 3.347306728363037, + "rewards/rejected": 2.315990924835205, + "step": 8750 + }, + { + "epoch": 0.4859849377955923, + "grad_norm": 40.002342224121094, + "learning_rate": 5.219971183242125e-08, + "logits/chosen": -0.044834405183792114, + "logits/rejected": -0.23564691841602325, + "logps/chosen": -93.63855743408203, + "logps/rejected": -129.38055419921875, + "loss": 1.1847, + "nll_loss": 0.6099318265914917, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.2069268226623535, + "rewards/margins": 2.6457130908966064, + "rewards/rejected": 1.5612133741378784, + "step": 8760 + }, + { + "epoch": 0.48653971512184296, + "grad_norm": 79.6065902709961, + "learning_rate": 5.211264748367341e-08, + "logits/chosen": -0.3970792889595032, + "logits/rejected": -0.44048887491226196, + "logps/chosen": -162.2034149169922, + "logps/rejected": -220.4867401123047, + "loss": 1.2505, + "nll_loss": 0.9981945157051086, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.638679027557373, + "rewards/margins": 3.384411334991455, + "rewards/rejected": 2.2542672157287598, + "step": 8770 + }, + { + "epoch": 0.48709449244809366, + "grad_norm": 65.16060638427734, + "learning_rate": 5.2025576717274146e-08, + "logits/chosen": -0.1481800228357315, + "logits/rejected": -0.3821583390235901, + "logps/chosen": -129.69326782226562, + "logps/rejected": -197.94424438476562, + "loss": 1.3856, + "nll_loss": 0.7421929240226746, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.613026142120361, + "rewards/margins": 2.946195125579834, + "rewards/rejected": 1.6668307781219482, + "step": 8780 + }, + { + "epoch": 0.4876492697743443, + "grad_norm": 76.15536499023438, + "learning_rate": 5.193849979772086e-08, + "logits/chosen": -0.21147122979164124, + "logits/rejected": -0.413780152797699, + "logps/chosen": -92.40141296386719, + "logps/rejected": -118.81148529052734, + "loss": 1.363, + "nll_loss": 0.6374253630638123, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.012933731079102, + "rewards/margins": 2.4644181728363037, + "rewards/rejected": 1.548515796661377, + "step": 8790 + }, + { + "epoch": 0.488204047100595, + "grad_norm": 60.92570114135742, + "learning_rate": 5.1851416989529696e-08, + "logits/chosen": -0.3093903660774231, + "logits/rejected": -0.44280901551246643, + "logps/chosen": -137.2354736328125, + "logps/rejected": -195.80068969726562, + "loss": 1.2765, + "nll_loss": 0.866968035697937, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.153212070465088, + "rewards/margins": 2.812201976776123, + "rewards/rejected": 2.341010093688965, + "step": 8800 + }, + { + "epoch": 0.48875882442684565, + "grad_norm": 73.39197540283203, + "learning_rate": 5.176432855723466e-08, + "logits/chosen": -0.3896110951900482, + "logits/rejected": -0.510871946811676, + "logps/chosen": -172.58920288085938, + "logps/rejected": -250.2101593017578, + "loss": 1.4006, + "nll_loss": 0.951266884803772, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.155871391296387, + "rewards/margins": 3.3034446239471436, + "rewards/rejected": 2.8524270057678223, + "step": 8810 + }, + { + "epoch": 0.48931360175309635, + "grad_norm": 49.965972900390625, + "learning_rate": 5.167723476538682e-08, + "logits/chosen": -0.2232964038848877, + "logits/rejected": -0.39524856209754944, + "logps/chosen": -132.72500610351562, + "logps/rejected": -195.49729919433594, + "loss": 1.1924, + "nll_loss": 0.7789738774299622, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.062056064605713, + "rewards/margins": 3.040559768676758, + "rewards/rejected": 2.0214955806732178, + "step": 8820 + }, + { + "epoch": 0.48986837907934705, + "grad_norm": 73.50363159179688, + "learning_rate": 5.15901358785536e-08, + "logits/chosen": -0.3110652565956116, + "logits/rejected": -0.41810736060142517, + "logps/chosen": -155.49932861328125, + "logps/rejected": -210.6866455078125, + "loss": 1.2802, + "nll_loss": 0.9495540857315063, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.892726421356201, + "rewards/margins": 4.21453857421875, + "rewards/rejected": 1.6781879663467407, + "step": 8830 + }, + { + "epoch": 0.4904231564055977, + "grad_norm": 81.90071868896484, + "learning_rate": 5.1503032161317814e-08, + "logits/chosen": -0.3098284900188446, + "logits/rejected": -0.43176165223121643, + "logps/chosen": -173.8741455078125, + "logps/rejected": -229.93814086914062, + "loss": 1.3226, + "nll_loss": 0.9427685737609863, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.143521308898926, + "rewards/margins": 4.145721435546875, + "rewards/rejected": 1.9978001117706299, + "step": 8840 + }, + { + "epoch": 0.4909779337318484, + "grad_norm": 66.26280212402344, + "learning_rate": 5.1415923878277e-08, + "logits/chosen": -0.2731201648712158, + "logits/rejected": -0.4314287602901459, + "logps/chosen": -152.42535400390625, + "logps/rejected": -195.3851318359375, + "loss": 1.2677, + "nll_loss": 0.9041376113891602, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.4018168449401855, + "rewards/margins": 2.7450244426727295, + "rewards/rejected": 2.656792640686035, + "step": 8850 + }, + { + "epoch": 0.49153271105809904, + "grad_norm": 60.99270248413086, + "learning_rate": 5.132881129404256e-08, + "logits/chosen": -0.2745968997478485, + "logits/rejected": -0.4192644953727722, + "logps/chosen": -192.58935546875, + "logps/rejected": -237.8802490234375, + "loss": 1.2896, + "nll_loss": 1.0134674310684204, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.789472579956055, + "rewards/margins": 3.4546120166778564, + "rewards/rejected": 2.3348610401153564, + "step": 8860 + }, + { + "epoch": 0.49208748838434974, + "grad_norm": 58.93909454345703, + "learning_rate": 5.1241694673238924e-08, + "logits/chosen": -0.22993163764476776, + "logits/rejected": -0.3750077188014984, + "logps/chosen": -160.7397003173828, + "logps/rejected": -207.7425537109375, + "loss": 1.2323, + "nll_loss": 0.9182407259941101, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.209530830383301, + "rewards/margins": 3.3358845710754395, + "rewards/rejected": 1.8736464977264404, + "step": 8870 + }, + { + "epoch": 0.49264226571060044, + "grad_norm": 61.5848503112793, + "learning_rate": 5.115457428050285e-08, + "logits/chosen": -0.4301369786262512, + "logits/rejected": -0.5818208456039429, + "logps/chosen": -185.5221710205078, + "logps/rejected": -256.7213439941406, + "loss": 1.2948, + "nll_loss": 1.0040353536605835, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.889816761016846, + "rewards/margins": 3.5707008838653564, + "rewards/rejected": 2.31911563873291, + "step": 8880 + }, + { + "epoch": 0.4931970430368511, + "grad_norm": 52.07457733154297, + "learning_rate": 5.10674503804825e-08, + "logits/chosen": -0.16518327593803406, + "logits/rejected": -0.39885979890823364, + "logps/chosen": -133.54811096191406, + "logps/rejected": -192.93878173828125, + "loss": 1.1787, + "nll_loss": 0.7971738576889038, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.232979774475098, + "rewards/margins": 3.939906358718872, + "rewards/rejected": 1.2930728197097778, + "step": 8890 + }, + { + "epoch": 0.4937518203631018, + "grad_norm": 76.2165756225586, + "learning_rate": 5.098032323783672e-08, + "logits/chosen": -0.22320708632469177, + "logits/rejected": -0.36877983808517456, + "logps/chosen": -153.4722137451172, + "logps/rejected": -209.455078125, + "loss": 1.3544, + "nll_loss": 0.9227968454360962, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.538509845733643, + "rewards/margins": 3.450572967529297, + "rewards/rejected": 2.0879368782043457, + "step": 8900 + }, + { + "epoch": 0.4943065976893524, + "grad_norm": 54.87151336669922, + "learning_rate": 5.089319311723419e-08, + "logits/chosen": -0.3627270758152008, + "logits/rejected": -0.5207785964012146, + "logps/chosen": -149.60006713867188, + "logps/rejected": -223.32992553710938, + "loss": 1.2309, + "nll_loss": 0.9427006840705872, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.925708293914795, + "rewards/margins": 4.0009918212890625, + "rewards/rejected": 1.9247167110443115, + "step": 8910 + }, + { + "epoch": 0.4948613750156031, + "grad_norm": 47.99515151977539, + "learning_rate": 5.0806060283352636e-08, + "logits/chosen": -0.40103524923324585, + "logits/rejected": -0.508873462677002, + "logps/chosen": -192.48849487304688, + "logps/rejected": -243.2057342529297, + "loss": 1.2448, + "nll_loss": 1.0411288738250732, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.980090141296387, + "rewards/margins": 3.6297004222869873, + "rewards/rejected": 2.350389242172241, + "step": 8920 + }, + { + "epoch": 0.49541615234185377, + "grad_norm": 65.2103042602539, + "learning_rate": 5.0718925000878054e-08, + "logits/chosen": -0.4266236424446106, + "logits/rejected": -0.5535237193107605, + "logps/chosen": -175.2716064453125, + "logps/rejected": -242.2535400390625, + "loss": 1.1515, + "nll_loss": 0.9992551803588867, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.330218315124512, + "rewards/margins": 3.893296480178833, + "rewards/rejected": 2.436922550201416, + "step": 8930 + }, + { + "epoch": 0.49597092966810447, + "grad_norm": 101.24113464355469, + "learning_rate": 5.063178753450381e-08, + "logits/chosen": -0.13781292736530304, + "logits/rejected": -0.29342782497406006, + "logps/chosen": -114.6077880859375, + "logps/rejected": -174.44699096679688, + "loss": 1.2208, + "nll_loss": 0.7622730731964111, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.34787130355835, + "rewards/margins": 2.809565305709839, + "rewards/rejected": 1.5383061170578003, + "step": 8940 + }, + { + "epoch": 0.49652570699435516, + "grad_norm": 75.74856567382812, + "learning_rate": 5.0544648148930005e-08, + "logits/chosen": -0.2966635823249817, + "logits/rejected": -0.437977135181427, + "logps/chosen": -201.9712677001953, + "logps/rejected": -264.7134094238281, + "loss": 1.2483, + "nll_loss": 1.0638071298599243, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.1540117263793945, + "rewards/margins": 4.22914981842041, + "rewards/rejected": 1.9248619079589844, + "step": 8950 + }, + { + "epoch": 0.4970804843206058, + "grad_norm": 53.319393157958984, + "learning_rate": 5.0457507108862474e-08, + "logits/chosen": -0.20097801089286804, + "logits/rejected": -0.37530016899108887, + "logps/chosen": -157.8563690185547, + "logps/rejected": -205.6762237548828, + "loss": 1.3172, + "nll_loss": 0.9467114210128784, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.260502815246582, + "rewards/margins": 2.7744460105895996, + "rewards/rejected": 2.4860565662384033, + "step": 8960 + }, + { + "epoch": 0.4976352616468565, + "grad_norm": 103.03054809570312, + "learning_rate": 5.0370364679012134e-08, + "logits/chosen": -0.2561754286289215, + "logits/rejected": -0.4238462448120117, + "logps/chosen": -157.69876098632812, + "logps/rejected": -251.7804412841797, + "loss": 1.3476, + "nll_loss": 0.8771616220474243, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.624480247497559, + "rewards/margins": 4.1874284744262695, + "rewards/rejected": 1.4370521306991577, + "step": 8970 + }, + { + "epoch": 0.49819003897310715, + "grad_norm": 60.20049285888672, + "learning_rate": 5.028322112409412e-08, + "logits/chosen": -0.1408107429742813, + "logits/rejected": -0.3607892692089081, + "logps/chosen": -142.41387939453125, + "logps/rejected": -230.49667358398438, + "loss": 1.266, + "nll_loss": 0.7529163956642151, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.326937675476074, + "rewards/margins": 4.1454057693481445, + "rewards/rejected": 1.181531310081482, + "step": 8980 + }, + { + "epoch": 0.49874481629935785, + "grad_norm": 68.71036529541016, + "learning_rate": 5.019607670882696e-08, + "logits/chosen": -0.40607452392578125, + "logits/rejected": -0.4826090335845947, + "logps/chosen": -166.85350036621094, + "logps/rejected": -235.6752471923828, + "loss": 1.3243, + "nll_loss": 1.003832459449768, + "rewards/accuracies": 0.75, + "rewards/chosen": 6.251927852630615, + "rewards/margins": 3.4062037467956543, + "rewards/rejected": 2.845724105834961, + "step": 8990 + }, + { + "epoch": 0.4992995936256085, + "grad_norm": 53.574527740478516, + "learning_rate": 5.010893169793181e-08, + "logits/chosen": -0.19645099341869354, + "logits/rejected": -0.3450215458869934, + "logps/chosen": -160.8070068359375, + "logps/rejected": -215.6662139892578, + "loss": 1.2199, + "nll_loss": 0.9896215200424194, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.662204742431641, + "rewards/margins": 4.123475074768066, + "rewards/rejected": 1.5387299060821533, + "step": 9000 + }, + { + "epoch": 0.4992995936256085, + "eval_logits/chosen": -0.4024575352668762, + "eval_logits/rejected": -0.5044897198677063, + "eval_logps/chosen": -192.44297790527344, + "eval_logps/rejected": -259.1492919921875, + "eval_loss": 1.2387713193893433, + "eval_nll_loss": 0.9994342923164368, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.55182409286499, + "eval_rewards/margins": 4.558903217315674, + "eval_rewards/rejected": 1.9929208755493164, + "eval_runtime": 17.1585, + "eval_samples_per_second": 14.92, + "eval_steps_per_second": 1.865, + "step": 9000 + }, + { + "epoch": 0.4998543709518592, + "grad_norm": 54.899871826171875, + "learning_rate": 5.0021786356131635e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -167.99615478515625, + "logps/rejected": -205.74496459960938, + "loss": 1.3748, + "nll_loss": NaN, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.293550491333008, + "rewards/margins": 2.5636558532714844, + "rewards/rejected": 2.7298946380615234, + "step": 9010 + }, + { + "epoch": 0.5004091482781099, + "grad_norm": 58.67256546020508, + "learning_rate": 4.9934640948150405e-08, + "logits/chosen": -0.3223643898963928, + "logits/rejected": -0.47426262497901917, + "logps/chosen": -164.1899871826172, + "logps/rejected": -210.4023895263672, + "loss": 1.1899, + "nll_loss": 0.9342159032821655, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.415058612823486, + "rewards/margins": 3.5778567790985107, + "rewards/rejected": 1.837201476097107, + "step": 9020 + }, + { + "epoch": 0.5009639256043605, + "grad_norm": 49.9996223449707, + "learning_rate": 4.984749573871227e-08, + "logits/chosen": -0.3344659209251404, + "logits/rejected": -0.4435255527496338, + "logps/chosen": -145.72366333007812, + "logps/rejected": -205.8213348388672, + "loss": 1.2959, + "nll_loss": 0.9082239866256714, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.198209285736084, + "rewards/margins": 3.0010673999786377, + "rewards/rejected": 2.1971421241760254, + "step": 9030 + }, + { + "epoch": 0.5015187029306112, + "grad_norm": 47.84605026245117, + "learning_rate": 4.9760350992540836e-08, + "logits/chosen": -0.3379828631877899, + "logits/rejected": -0.48572176694869995, + "logps/chosen": -164.86590576171875, + "logps/rejected": -215.35763549804688, + "loss": 1.2641, + "nll_loss": 1.043215274810791, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.340142250061035, + "rewards/margins": 3.15108585357666, + "rewards/rejected": 2.189056396484375, + "step": 9040 + }, + { + "epoch": 0.5020734802568619, + "grad_norm": 69.17318725585938, + "learning_rate": 4.967320697435824e-08, + "logits/chosen": -0.43921709060668945, + "logits/rejected": -0.4727630615234375, + "logps/chosen": -218.1172637939453, + "logps/rejected": -279.47357177734375, + "loss": 1.3764, + "nll_loss": 1.1762627363204956, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.6129326820373535, + "rewards/margins": 3.7094714641571045, + "rewards/rejected": 2.90346097946167, + "step": 9050 + }, + { + "epoch": 0.5026282575831126, + "grad_norm": 45.45820999145508, + "learning_rate": 4.958606394888445e-08, + "logits/chosen": -0.23763099312782288, + "logits/rejected": -0.3468344211578369, + "logps/chosen": -150.2931671142578, + "logps/rejected": -200.9547576904297, + "loss": 1.2636, + "nll_loss": 0.8626793026924133, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.093936443328857, + "rewards/margins": 3.113210678100586, + "rewards/rejected": 1.9807260036468506, + "step": 9060 + }, + { + "epoch": 0.5031830349093632, + "grad_norm": 41.96353530883789, + "learning_rate": 4.949892218083638e-08, + "logits/chosen": -0.30053022503852844, + "logits/rejected": -0.4640938341617584, + "logps/chosen": -155.72662353515625, + "logps/rejected": -216.5644989013672, + "loss": 1.2078, + "nll_loss": 0.9101463556289673, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.764248847961426, + "rewards/margins": 3.896909236907959, + "rewards/rejected": 1.8673397302627563, + "step": 9070 + }, + { + "epoch": 0.503737812235614, + "grad_norm": 95.07010650634766, + "learning_rate": 4.941178193492713e-08, + "logits/chosen": -0.26831507682800293, + "logits/rejected": -0.3470011353492737, + "logps/chosen": -150.46742248535156, + "logps/rejected": -210.14584350585938, + "loss": 1.2531, + "nll_loss": 0.9086271524429321, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.583650588989258, + "rewards/margins": 2.7660763263702393, + "rewards/rejected": 2.8175745010375977, + "step": 9080 + }, + { + "epoch": 0.5042925895618646, + "grad_norm": 55.530311584472656, + "learning_rate": 4.932464347586522e-08, + "logits/chosen": -0.36913302540779114, + "logits/rejected": -0.43520718812942505, + "logps/chosen": -192.30624389648438, + "logps/rejected": -214.4044647216797, + "loss": 1.394, + "nll_loss": 1.1062183380126953, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.194088459014893, + "rewards/margins": 3.0849320888519287, + "rewards/rejected": 3.1091561317443848, + "step": 9090 + }, + { + "epoch": 0.5048473668881153, + "grad_norm": 45.58949661254883, + "learning_rate": 4.9237507068353705e-08, + "logits/chosen": -0.31153604388237, + "logits/rejected": -0.45507732033729553, + "logps/chosen": -167.9213104248047, + "logps/rejected": -238.15774536132812, + "loss": 1.2972, + "nll_loss": 0.9619966745376587, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.149403095245361, + "rewards/margins": 4.006768226623535, + "rewards/rejected": 2.142634630203247, + "step": 9100 + }, + { + "epoch": 0.5054021442143659, + "grad_norm": 43.78194808959961, + "learning_rate": 4.91503729770894e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -141.9030303955078, + "logps/rejected": -233.8895721435547, + "loss": 1.2054, + "nll_loss": NaN, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.323995113372803, + "rewards/margins": 3.1030120849609375, + "rewards/rejected": 2.2209832668304443, + "step": 9110 + }, + { + "epoch": 0.5059569215406167, + "grad_norm": 62.937625885009766, + "learning_rate": 4.906324146676212e-08, + "logits/chosen": -0.12656566500663757, + "logits/rejected": -0.3648197650909424, + "logps/chosen": -121.91495513916016, + "logps/rejected": -181.48928833007812, + "loss": 1.2619, + "nll_loss": 0.7313886880874634, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.360525608062744, + "rewards/margins": 3.2105624675750732, + "rewards/rejected": 1.1499627828598022, + "step": 9120 + }, + { + "epoch": 0.5065116988668673, + "grad_norm": 73.8137435913086, + "learning_rate": 4.897611280205377e-08, + "logits/chosen": -0.3255676031112671, + "logits/rejected": -0.4526425302028656, + "logps/chosen": -153.31971740722656, + "logps/rejected": -201.62258911132812, + "loss": 1.2252, + "nll_loss": 0.9077402353286743, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.789377212524414, + "rewards/margins": 3.29107928276062, + "rewards/rejected": 2.498297929763794, + "step": 9130 + }, + { + "epoch": 0.507066476193118, + "grad_norm": 136.17686462402344, + "learning_rate": 4.888898724763772e-08, + "logits/chosen": -0.37891626358032227, + "logits/rejected": -0.5509136319160461, + "logps/chosen": -166.04354858398438, + "logps/rejected": -227.5803680419922, + "loss": 1.334, + "nll_loss": 0.9960344433784485, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.237698078155518, + "rewards/margins": 3.9502997398376465, + "rewards/rejected": 2.2873973846435547, + "step": 9140 + }, + { + "epoch": 0.5076212535193687, + "grad_norm": 90.369384765625, + "learning_rate": 4.8801865068177804e-08, + "logits/chosen": -0.37518784403800964, + "logits/rejected": -0.48688000440597534, + "logps/chosen": -155.38690185546875, + "logps/rejected": -219.45492553710938, + "loss": 1.4013, + "nll_loss": 1.0259861946105957, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.525850296020508, + "rewards/margins": 3.486804962158203, + "rewards/rejected": 2.0390450954437256, + "step": 9150 + }, + { + "epoch": 0.5081760308456194, + "grad_norm": 62.08201599121094, + "learning_rate": 4.871474652832763e-08, + "logits/chosen": -0.5720881223678589, + "logits/rejected": -0.6331910490989685, + "logps/chosen": -214.214111328125, + "logps/rejected": -301.6823425292969, + "loss": 1.3024, + "nll_loss": 1.1901862621307373, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 7.2337236404418945, + "rewards/margins": 3.9172492027282715, + "rewards/rejected": 3.316474199295044, + "step": 9160 + }, + { + "epoch": 0.50873080817187, + "grad_norm": 104.32915496826172, + "learning_rate": 4.8627631892729755e-08, + "logits/chosen": -0.3542863726615906, + "logits/rejected": -0.47028714418411255, + "logps/chosen": -151.53012084960938, + "logps/rejected": -191.0808563232422, + "loss": 1.3353, + "nll_loss": 0.8963730931282043, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.543292999267578, + "rewards/margins": 3.2578887939453125, + "rewards/rejected": 2.2854042053222656, + "step": 9170 + }, + { + "epoch": 0.5092855854981206, + "grad_norm": 84.5309829711914, + "learning_rate": 4.854052142601485e-08, + "logits/chosen": -0.42346876859664917, + "logits/rejected": -0.45761674642562866, + "logps/chosen": -210.19479370117188, + "logps/rejected": -253.4579620361328, + "loss": 1.2789, + "nll_loss": 1.1564407348632812, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.436079978942871, + "rewards/margins": 3.428272247314453, + "rewards/rejected": 3.007807970046997, + "step": 9180 + }, + { + "epoch": 0.5098403628243714, + "grad_norm": 56.795047760009766, + "learning_rate": 4.8453415392800975e-08, + "logits/chosen": -0.34913143515586853, + "logits/rejected": -0.5700281262397766, + "logps/chosen": -198.15798950195312, + "logps/rejected": -287.1739196777344, + "loss": 1.3482, + "nll_loss": 0.9863710403442383, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.719827175140381, + "rewards/margins": 5.301693916320801, + "rewards/rejected": 1.4181333780288696, + "step": 9190 + }, + { + "epoch": 0.510395140150622, + "grad_norm": 66.92633056640625, + "learning_rate": 4.836631405769268e-08, + "logits/chosen": -0.33419111371040344, + "logits/rejected": -0.46793827414512634, + "logps/chosen": -167.11634826660156, + "logps/rejected": -215.9599151611328, + "loss": 1.2058, + "nll_loss": 0.9543848037719727, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.533473014831543, + "rewards/margins": 3.174680233001709, + "rewards/rejected": 2.358793020248413, + "step": 9200 + }, + { + "epoch": 0.5109499174768727, + "grad_norm": 61.43981170654297, + "learning_rate": 4.827921768528025e-08, + "logits/chosen": -0.25474053621292114, + "logits/rejected": -0.3410353660583496, + "logps/chosen": -163.89346313476562, + "logps/rejected": -216.3996124267578, + "loss": 1.3041, + "nll_loss": 0.9180054664611816, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.6906418800354, + "rewards/margins": 2.7940871715545654, + "rewards/rejected": 2.8965542316436768, + "step": 9210 + }, + { + "epoch": 0.5115046948031234, + "grad_norm": 94.89171600341797, + "learning_rate": 4.81921265401389e-08, + "logits/chosen": -0.30657464265823364, + "logits/rejected": -0.4406011998653412, + "logps/chosen": -154.62619018554688, + "logps/rejected": -199.08432006835938, + "loss": 1.2845, + "nll_loss": 0.9502051472663879, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.735767364501953, + "rewards/margins": 3.5599846839904785, + "rewards/rejected": 2.175783157348633, + "step": 9220 + }, + { + "epoch": 0.5120594721293741, + "grad_norm": 71.24378204345703, + "learning_rate": 4.810504088682795e-08, + "logits/chosen": -0.4792702794075012, + "logits/rejected": -0.5561822652816772, + "logps/chosen": -193.745849609375, + "logps/rejected": -241.57131958007812, + "loss": 1.2862, + "nll_loss": 1.1115574836730957, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 6.186019420623779, + "rewards/margins": 2.6864452362060547, + "rewards/rejected": 3.499574661254883, + "step": 9230 + }, + { + "epoch": 0.5126142494556247, + "grad_norm": 50.33415603637695, + "learning_rate": 4.8017960989890084e-08, + "logits/chosen": -0.275177538394928, + "logits/rejected": -0.39323943853378296, + "logps/chosen": -178.32994079589844, + "logps/rejected": -220.71353149414062, + "loss": 1.3117, + "nll_loss": 1.0821926593780518, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 4.963408946990967, + "rewards/margins": 2.5242600440979004, + "rewards/rejected": 2.4391491413116455, + "step": 9240 + }, + { + "epoch": 0.5131690267818754, + "grad_norm": 53.26866912841797, + "learning_rate": 4.793088711385044e-08, + "logits/chosen": -0.26483121514320374, + "logits/rejected": -0.4290854036808014, + "logps/chosen": -141.33038330078125, + "logps/rejected": -187.74212646484375, + "loss": 1.2095, + "nll_loss": 0.8839927911758423, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.510378360748291, + "rewards/margins": 3.708258867263794, + "rewards/rejected": 1.8021198511123657, + "step": 9250 + }, + { + "epoch": 0.5137238041081261, + "grad_norm": 58.61595153808594, + "learning_rate": 4.7843819523215904e-08, + "logits/chosen": -0.23977124691009521, + "logits/rejected": -0.3805224895477295, + "logps/chosen": -177.07260131835938, + "logps/rejected": -231.0491180419922, + "loss": 1.2659, + "nll_loss": 0.9728581309318542, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.701986789703369, + "rewards/margins": 3.4946236610412598, + "rewards/rejected": 2.2073636054992676, + "step": 9260 + }, + { + "epoch": 0.5142785814343768, + "grad_norm": 70.80389404296875, + "learning_rate": 4.7756758482474266e-08, + "logits/chosen": -0.33648785948753357, + "logits/rejected": -0.5013399720191956, + "logps/chosen": -190.47677612304688, + "logps/rejected": -251.017578125, + "loss": 1.2799, + "nll_loss": 1.0157908201217651, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.240078926086426, + "rewards/margins": 4.198624610900879, + "rewards/rejected": 2.0414538383483887, + "step": 9270 + }, + { + "epoch": 0.5148333587606274, + "grad_norm": 40.859596252441406, + "learning_rate": 4.766970425609338e-08, + "logits/chosen": -0.22810812294483185, + "logits/rejected": -0.43857163190841675, + "logps/chosen": -168.486083984375, + "logps/rejected": -222.28384399414062, + "loss": 1.1961, + "nll_loss": 0.9034191370010376, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.565457344055176, + "rewards/margins": 3.08699369430542, + "rewards/rejected": 2.478463649749756, + "step": 9280 + }, + { + "epoch": 0.5153881360868782, + "grad_norm": 60.25469970703125, + "learning_rate": 4.758265710852047e-08, + "logits/chosen": -0.3398720622062683, + "logits/rejected": -0.4708589017391205, + "logps/chosen": -140.80482482910156, + "logps/rejected": -200.8313751220703, + "loss": 1.2658, + "nll_loss": 1.0187203884124756, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.520662307739258, + "rewards/margins": 3.972774028778076, + "rewards/rejected": 1.5478891134262085, + "step": 9290 + }, + { + "epoch": 0.5159429134131288, + "grad_norm": 71.22685241699219, + "learning_rate": 4.749561730418121e-08, + "logits/chosen": -0.25368431210517883, + "logits/rejected": -0.3661794662475586, + "logps/chosen": -163.49826049804688, + "logps/rejected": -211.91513061523438, + "loss": 1.1583, + "nll_loss": 1.0610846281051636, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.6848297119140625, + "rewards/margins": 3.0135960578918457, + "rewards/rejected": 2.6712327003479004, + "step": 9300 + }, + { + "epoch": 0.5164976907393795, + "grad_norm": 40.56968688964844, + "learning_rate": 4.7408585107478966e-08, + "logits/chosen": -0.30613285303115845, + "logits/rejected": -0.463456928730011, + "logps/chosen": -157.46041870117188, + "logps/rejected": -230.9835968017578, + "loss": 1.1434, + "nll_loss": 0.8922632932662964, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.7883405685424805, + "rewards/margins": 4.518655776977539, + "rewards/rejected": 1.269684910774231, + "step": 9310 + }, + { + "epoch": 0.5170524680656302, + "grad_norm": 105.7554702758789, + "learning_rate": 4.7321560782794e-08, + "logits/chosen": -0.2926510274410248, + "logits/rejected": -0.4659983515739441, + "logps/chosen": -149.47518920898438, + "logps/rejected": -204.29615783691406, + "loss": 1.2726, + "nll_loss": 1.0316669940948486, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.572632789611816, + "rewards/margins": 3.1992945671081543, + "rewards/rejected": 2.373338460922241, + "step": 9320 + }, + { + "epoch": 0.5176072453918809, + "grad_norm": 45.35145950317383, + "learning_rate": 4.723454459448267e-08, + "logits/chosen": -0.39577716588974, + "logits/rejected": -0.5708610415458679, + "logps/chosen": -166.49581909179688, + "logps/rejected": -234.752197265625, + "loss": 1.2841, + "nll_loss": 1.0080711841583252, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.923181056976318, + "rewards/margins": 4.514263153076172, + "rewards/rejected": 1.408917784690857, + "step": 9330 + }, + { + "epoch": 0.5181620227181315, + "grad_norm": 49.20162582397461, + "learning_rate": 4.714753680687661e-08, + "logits/chosen": -0.29385411739349365, + "logits/rejected": -0.49700015783309937, + "logps/chosen": -186.43679809570312, + "logps/rejected": -231.0037841796875, + "loss": 1.2745, + "nll_loss": 0.9985346794128418, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.996167182922363, + "rewards/margins": 4.20475435256958, + "rewards/rejected": 1.7914127111434937, + "step": 9340 + }, + { + "epoch": 0.5187168000443821, + "grad_norm": 62.807979583740234, + "learning_rate": 4.706053768428194e-08, + "logits/chosen": -0.3283035159111023, + "logits/rejected": -0.45695775747299194, + "logps/chosen": -152.83058166503906, + "logps/rejected": -229.2628173828125, + "loss": 1.2467, + "nll_loss": 0.9692705869674683, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.828050136566162, + "rewards/margins": 3.535831928253174, + "rewards/rejected": 2.2922184467315674, + "step": 9350 + }, + { + "epoch": 0.5192715773706329, + "grad_norm": 50.36726379394531, + "learning_rate": 4.6973547490978464e-08, + "logits/chosen": -0.2728636860847473, + "logits/rejected": -0.3863973617553711, + "logps/chosen": -159.2672882080078, + "logps/rejected": -209.1708526611328, + "loss": 1.268, + "nll_loss": 0.9585272073745728, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.8927998542785645, + "rewards/margins": 4.422527313232422, + "rewards/rejected": 1.4702714681625366, + "step": 9360 + }, + { + "epoch": 0.5198263546968835, + "grad_norm": 40.91887283325195, + "learning_rate": 4.688656649121884e-08, + "logits/chosen": -0.2662855088710785, + "logits/rejected": -0.46767717599868774, + "logps/chosen": -149.1509246826172, + "logps/rejected": -214.30929565429688, + "loss": 1.1965, + "nll_loss": 0.9241575002670288, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.647675037384033, + "rewards/margins": 3.9500439167022705, + "rewards/rejected": 1.6976312398910522, + "step": 9370 + }, + { + "epoch": 0.5203811320231342, + "grad_norm": 42.508792877197266, + "learning_rate": 4.679959494922778e-08, + "logits/chosen": -0.3795866370201111, + "logits/rejected": -0.4913211464881897, + "logps/chosen": -180.6576385498047, + "logps/rejected": -218.34091186523438, + "loss": 1.3003, + "nll_loss": 1.0153512954711914, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.015936374664307, + "rewards/margins": 3.199903964996338, + "rewards/rejected": 2.8160321712493896, + "step": 9380 + }, + { + "epoch": 0.5209359093493849, + "grad_norm": 62.78444290161133, + "learning_rate": 4.6712633129201365e-08, + "logits/chosen": -0.34073182940483093, + "logits/rejected": -0.5074422955513, + "logps/chosen": -158.64015197753906, + "logps/rejected": -231.17691040039062, + "loss": 1.2725, + "nll_loss": 0.9371173977851868, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.835041046142578, + "rewards/margins": 4.626204490661621, + "rewards/rejected": 1.2088369131088257, + "step": 9390 + }, + { + "epoch": 0.5214906866756356, + "grad_norm": 58.16633987426758, + "learning_rate": 4.662568129530603e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -156.18252563476562, + "logps/rejected": -216.96456909179688, + "loss": 1.2165, + "nll_loss": NaN, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.5341057777404785, + "rewards/margins": 4.276349067687988, + "rewards/rejected": 1.2577569484710693, + "step": 9400 + }, + { + "epoch": 0.5220454640018862, + "grad_norm": 66.12977600097656, + "learning_rate": 4.6538739711677946e-08, + "logits/chosen": -0.2907131314277649, + "logits/rejected": -0.4284954071044922, + "logps/chosen": -148.6352996826172, + "logps/rejected": -196.28317260742188, + "loss": 1.2471, + "nll_loss": 0.9851943850517273, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.316567420959473, + "rewards/margins": 3.416973829269409, + "rewards/rejected": 1.899593710899353, + "step": 9410 + }, + { + "epoch": 0.5226002413281369, + "grad_norm": 104.19569396972656, + "learning_rate": 4.645180864242208e-08, + "logits/chosen": -0.2709147334098816, + "logits/rejected": -0.348580002784729, + "logps/chosen": -168.90243530273438, + "logps/rejected": -231.1278533935547, + "loss": 1.2662, + "nll_loss": 0.9691041111946106, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.149167060852051, + "rewards/margins": 3.7617859840393066, + "rewards/rejected": 2.3873813152313232, + "step": 9420 + }, + { + "epoch": 0.5231550186543876, + "grad_norm": 25.868131637573242, + "learning_rate": 4.636488835161151e-08, + "logits/chosen": -0.35118424892425537, + "logits/rejected": -0.5087558031082153, + "logps/chosen": -182.45474243164062, + "logps/rejected": -249.97134399414062, + "loss": 1.2443, + "nll_loss": 1.010013222694397, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.450409889221191, + "rewards/margins": 4.733773231506348, + "rewards/rejected": 1.7166366577148438, + "step": 9430 + }, + { + "epoch": 0.5237097959806383, + "grad_norm": 50.46853256225586, + "learning_rate": 4.6277979103286604e-08, + "logits/chosen": -0.2707952857017517, + "logits/rejected": -0.4313136637210846, + "logps/chosen": -158.99472045898438, + "logps/rejected": -214.5619354248047, + "loss": 1.3258, + "nll_loss": 0.9249902963638306, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.994052410125732, + "rewards/margins": 3.4994099140167236, + "rewards/rejected": 2.494642734527588, + "step": 9440 + }, + { + "epoch": 0.5242645733068889, + "grad_norm": 123.99307250976562, + "learning_rate": 4.6191081161454104e-08, + "logits/chosen": -0.3308565020561218, + "logits/rejected": -0.4992128312587738, + "logps/chosen": -195.032470703125, + "logps/rejected": -230.4774932861328, + "loss": 1.2792, + "nll_loss": 0.9932400584220886, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.721206188201904, + "rewards/margins": 3.06563138961792, + "rewards/rejected": 2.6555750370025635, + "step": 9450 + }, + { + "epoch": 0.5248193506331397, + "grad_norm": 40.34004592895508, + "learning_rate": 4.610419479008646e-08, + "logits/chosen": -0.2730061411857605, + "logits/rejected": -0.4122668206691742, + "logps/chosen": -177.38949584960938, + "logps/rejected": -219.531005859375, + "loss": 1.2804, + "nll_loss": 1.013285517692566, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.72647762298584, + "rewards/margins": 3.58758544921875, + "rewards/rejected": 2.1388919353485107, + "step": 9460 + }, + { + "epoch": 0.5253741279593903, + "grad_norm": 49.95905685424805, + "learning_rate": 4.601732025312094e-08, + "logits/chosen": -0.07873831689357758, + "logits/rejected": -0.35095566511154175, + "logps/chosen": -119.7325439453125, + "logps/rejected": -169.24378967285156, + "loss": 1.2533, + "nll_loss": 0.7773770093917847, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.6324920654296875, + "rewards/margins": 3.6657466888427734, + "rewards/rejected": 0.9667451977729797, + "step": 9470 + }, + { + "epoch": 0.525928905285641, + "grad_norm": 76.98511505126953, + "learning_rate": 4.5930457814458904e-08, + "logits/chosen": -0.2371709793806076, + "logits/rejected": -0.3776131272315979, + "logps/chosen": -136.942626953125, + "logps/rejected": -186.45135498046875, + "loss": 1.3325, + "nll_loss": 0.9275113940238953, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.964024543762207, + "rewards/margins": 4.096395015716553, + "rewards/rejected": 0.8676289319992065, + "step": 9480 + }, + { + "epoch": 0.5264836826118916, + "grad_norm": 31.29893684387207, + "learning_rate": 4.5843607737964936e-08, + "logits/chosen": -0.26233673095703125, + "logits/rejected": -0.3851124048233032, + "logps/chosen": -166.62661743164062, + "logps/rejected": -233.799072265625, + "loss": 1.2413, + "nll_loss": 1.006217122077942, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.642024040222168, + "rewards/margins": 4.011407852172852, + "rewards/rejected": 1.6306159496307373, + "step": 9490 + }, + { + "epoch": 0.5270384599381424, + "grad_norm": 68.17864227294922, + "learning_rate": 4.575677028746606e-08, + "logits/chosen": -0.31898969411849976, + "logits/rejected": -0.44344624876976013, + "logps/chosen": -159.30613708496094, + "logps/rejected": -202.1865234375, + "loss": 1.2221, + "nll_loss": 0.952996551990509, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.8765974044799805, + "rewards/margins": 4.293417930603027, + "rewards/rejected": 1.5831793546676636, + "step": 9500 + }, + { + "epoch": 0.5270384599381424, + "eval_logits/chosen": -0.3847997784614563, + "eval_logits/rejected": -0.48578494787216187, + "eval_logps/chosen": -191.77676391601562, + "eval_logps/rejected": -261.9651184082031, + "eval_loss": 1.2324037551879883, + "eval_nll_loss": 0.9943583011627197, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.618445873260498, + "eval_rewards/margins": 4.907104969024658, + "eval_rewards/rejected": 1.7113406658172607, + "eval_runtime": 16.8711, + "eval_samples_per_second": 15.174, + "eval_steps_per_second": 1.897, + "step": 9500 + }, + { + "epoch": 0.527593237264393, + "grad_norm": 63.11122131347656, + "learning_rate": 4.566994572675096e-08, + "logits/chosen": -0.33774352073669434, + "logits/rejected": -0.47510844469070435, + "logps/chosen": -159.69754028320312, + "logps/rejected": -214.65634155273438, + "loss": 1.3338, + "nll_loss": 0.9783695340156555, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.225392818450928, + "rewards/margins": 3.6365272998809814, + "rewards/rejected": 2.5888662338256836, + "step": 9510 + }, + { + "epoch": 0.5281480145906436, + "grad_norm": 27.57427215576172, + "learning_rate": 4.5583134319569135e-08, + "logits/chosen": -0.209748774766922, + "logits/rejected": -0.33275189995765686, + "logps/chosen": -148.1979217529297, + "logps/rejected": -178.30247497558594, + "loss": 1.2496, + "nll_loss": 0.9017229080200195, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.141815185546875, + "rewards/margins": 2.856659412384033, + "rewards/rejected": 2.285155773162842, + "step": 9520 + }, + { + "epoch": 0.5287027919168944, + "grad_norm": 61.95778274536133, + "learning_rate": 4.549633632963019e-08, + "logits/chosen": -0.2876083254814148, + "logits/rejected": -0.42835497856140137, + "logps/chosen": -196.5648956298828, + "logps/rejected": -240.12460327148438, + "loss": 1.2341, + "nll_loss": 0.9770992398262024, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.147914886474609, + "rewards/margins": 4.078275203704834, + "rewards/rejected": 2.069639205932617, + "step": 9530 + }, + { + "epoch": 0.529257569243145, + "grad_norm": 61.39414596557617, + "learning_rate": 4.540955202060293e-08, + "logits/chosen": -0.3222961723804474, + "logits/rejected": -0.5214440226554871, + "logps/chosen": -192.00732421875, + "logps/rejected": -298.51129150390625, + "loss": 1.2755, + "nll_loss": 0.9837193489074707, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.313649654388428, + "rewards/margins": 5.230317115783691, + "rewards/rejected": 1.0833323001861572, + "step": 9540 + }, + { + "epoch": 0.5298123465693957, + "grad_norm": 78.86589813232422, + "learning_rate": 4.532278165611458e-08, + "logits/chosen": -0.23326829075813293, + "logits/rejected": -0.39289242029190063, + "logps/chosen": -139.88034057617188, + "logps/rejected": -177.99623107910156, + "loss": 1.2034, + "nll_loss": 0.8656681180000305, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.314993858337402, + "rewards/margins": 3.008488178253174, + "rewards/rejected": 2.3065056800842285, + "step": 9550 + }, + { + "epoch": 0.5303671238956463, + "grad_norm": 163.3700714111328, + "learning_rate": 4.5236025499750055e-08, + "logits/chosen": -0.42433637380599976, + "logits/rejected": -0.5292702913284302, + "logps/chosen": -195.80093383789062, + "logps/rejected": -266.8990173339844, + "loss": 1.3414, + "nll_loss": 1.0810550451278687, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.120522499084473, + "rewards/margins": 3.1135170459747314, + "rewards/rejected": 3.007004976272583, + "step": 9560 + }, + { + "epoch": 0.5309219012218971, + "grad_norm": 39.24164962768555, + "learning_rate": 4.5149283815051045e-08, + "logits/chosen": -0.16320346295833588, + "logits/rejected": -0.2912190854549408, + "logps/chosen": -142.26515197753906, + "logps/rejected": -218.2663116455078, + "loss": 1.2095, + "nll_loss": 0.839637279510498, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.451222896575928, + "rewards/margins": 4.63043737411499, + "rewards/rejected": 0.8207852244377136, + "step": 9570 + }, + { + "epoch": 0.5314766785481477, + "grad_norm": 57.75300598144531, + "learning_rate": 4.506255686551537e-08, + "logits/chosen": -0.37713342905044556, + "logits/rejected": -0.5360641479492188, + "logps/chosen": -187.2853546142578, + "logps/rejected": -267.45574951171875, + "loss": 1.3412, + "nll_loss": 1.0303289890289307, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.236812114715576, + "rewards/margins": 4.56418514251709, + "rewards/rejected": 1.672626256942749, + "step": 9580 + }, + { + "epoch": 0.5320314558743984, + "grad_norm": 67.17675018310547, + "learning_rate": 4.497584491459601e-08, + "logits/chosen": -0.23767873644828796, + "logits/rejected": -0.42438793182373047, + "logps/chosen": -161.55792236328125, + "logps/rejected": -221.64810180664062, + "loss": 1.2575, + "nll_loss": 0.8866890072822571, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.824664115905762, + "rewards/margins": 4.561118125915527, + "rewards/rejected": 1.2635459899902344, + "step": 9590 + }, + { + "epoch": 0.5325862332006491, + "grad_norm": 58.9091796875, + "learning_rate": 4.48891482257004e-08, + "logits/chosen": -0.26320698857307434, + "logits/rejected": -0.3689182698726654, + "logps/chosen": -163.16287231445312, + "logps/rejected": -196.4301300048828, + "loss": 1.3376, + "nll_loss": 1.03379225730896, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.770537853240967, + "rewards/margins": 3.680947780609131, + "rewards/rejected": 2.089590072631836, + "step": 9600 + }, + { + "epoch": 0.5331410105268998, + "grad_norm": 61.47688293457031, + "learning_rate": 4.480246706218964e-08, + "logits/chosen": -0.2204124480485916, + "logits/rejected": -0.31043797731399536, + "logps/chosen": -161.21546936035156, + "logps/rejected": -218.0150909423828, + "loss": 1.306, + "nll_loss": 0.8733453750610352, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.2873029708862305, + "rewards/margins": 3.6474196910858154, + "rewards/rejected": 1.6398833990097046, + "step": 9610 + }, + { + "epoch": 0.5336957878531504, + "grad_norm": 48.654083251953125, + "learning_rate": 4.471580168737763e-08, + "logits/chosen": -0.20148694515228271, + "logits/rejected": -0.3757801055908203, + "logps/chosen": -156.82908630371094, + "logps/rejected": -218.42886352539062, + "loss": 1.3186, + "nll_loss": 0.9077130556106567, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.419076919555664, + "rewards/margins": 3.657808780670166, + "rewards/rejected": 1.7612682580947876, + "step": 9620 + }, + { + "epoch": 0.5342505651794012, + "grad_norm": 93.73970031738281, + "learning_rate": 4.462915236453037e-08, + "logits/chosen": -0.137271448969841, + "logits/rejected": -0.24905912578105927, + "logps/chosen": -151.14271545410156, + "logps/rejected": -197.56529235839844, + "loss": 1.2949, + "nll_loss": 0.8466536402702332, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.110020637512207, + "rewards/margins": 3.130384922027588, + "rewards/rejected": 1.9796355962753296, + "step": 9630 + }, + { + "epoch": 0.5348053425056518, + "grad_norm": 65.98847198486328, + "learning_rate": 4.4542519356865025e-08, + "logits/chosen": -0.17641180753707886, + "logits/rejected": -0.40553078055381775, + "logps/chosen": -115.76924133300781, + "logps/rejected": -193.11819458007812, + "loss": 1.176, + "nll_loss": 0.7737227082252502, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.877331733703613, + "rewards/margins": 3.8386740684509277, + "rewards/rejected": 1.0386579036712646, + "step": 9640 + }, + { + "epoch": 0.5353601198319025, + "grad_norm": 54.22218704223633, + "learning_rate": 4.445590292754926e-08, + "logits/chosen": -0.27017146348953247, + "logits/rejected": -0.44109922647476196, + "logps/chosen": -162.62646484375, + "logps/rejected": -268.144287109375, + "loss": 1.2267, + "nll_loss": 0.8913278579711914, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.323023319244385, + "rewards/margins": 4.736567497253418, + "rewards/rejected": 1.5864553451538086, + "step": 9650 + }, + { + "epoch": 0.5359148971581531, + "grad_norm": 54.476959228515625, + "learning_rate": 4.436930333970032e-08, + "logits/chosen": -0.38154515624046326, + "logits/rejected": -0.5023744702339172, + "logps/chosen": -179.91366577148438, + "logps/rejected": -209.097412109375, + "loss": 1.2911, + "nll_loss": 1.0336401462554932, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.029297828674316, + "rewards/margins": 3.258763551712036, + "rewards/rejected": 2.7705347537994385, + "step": 9660 + }, + { + "epoch": 0.5364696744844039, + "grad_norm": 41.79189682006836, + "learning_rate": 4.428272085638431e-08, + "logits/chosen": -0.23520343005657196, + "logits/rejected": -0.42612147331237793, + "logps/chosen": -166.47738647460938, + "logps/rejected": -210.00961303710938, + "loss": 1.181, + "nll_loss": 0.9167992472648621, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.926709175109863, + "rewards/margins": 4.105425834655762, + "rewards/rejected": 1.821282982826233, + "step": 9670 + }, + { + "epoch": 0.5370244518106545, + "grad_norm": 58.55911636352539, + "learning_rate": 4.4196155740615434e-08, + "logits/chosen": -0.3951946496963501, + "logits/rejected": -0.46657371520996094, + "logps/chosen": -187.66921997070312, + "logps/rejected": -247.8221893310547, + "loss": 1.2993, + "nll_loss": 1.1953412294387817, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.88215970993042, + "rewards/margins": 3.2391600608825684, + "rewards/rejected": 2.6429996490478516, + "step": 9680 + }, + { + "epoch": 0.5375792291369051, + "grad_norm": 67.51808166503906, + "learning_rate": 4.4109608255355066e-08, + "logits/chosen": -0.4071227014064789, + "logits/rejected": -0.4993719160556793, + "logps/chosen": -195.88389587402344, + "logps/rejected": -245.6428680419922, + "loss": 1.3397, + "nll_loss": 1.1394661664962769, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.183575630187988, + "rewards/margins": 3.207012891769409, + "rewards/rejected": 2.976562976837158, + "step": 9690 + }, + { + "epoch": 0.5381340064631559, + "grad_norm": 51.367610931396484, + "learning_rate": 4.4023078663511065e-08, + "logits/chosen": -0.32586947083473206, + "logits/rejected": -0.48920202255249023, + "logps/chosen": -170.09494018554688, + "logps/rejected": -206.68032836914062, + "loss": 1.2709, + "nll_loss": 0.9710670709609985, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.917175769805908, + "rewards/margins": 3.1624655723571777, + "rewards/rejected": 2.754709482192993, + "step": 9700 + }, + { + "epoch": 0.5386887837894065, + "grad_norm": 52.32279586791992, + "learning_rate": 4.393656722793689e-08, + "logits/chosen": -0.25959348678588867, + "logits/rejected": -0.41398563981056213, + "logps/chosen": -149.76123046875, + "logps/rejected": -193.00668334960938, + "loss": 1.3175, + "nll_loss": 0.936292827129364, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.5646162033081055, + "rewards/margins": 3.4865188598632812, + "rewards/rejected": 2.078097105026245, + "step": 9710 + }, + { + "epoch": 0.5392435611156572, + "grad_norm": 86.81396484375, + "learning_rate": 4.38500742114309e-08, + "logits/chosen": -0.2789912819862366, + "logits/rejected": -0.3998999297618866, + "logps/chosen": -200.81280517578125, + "logps/rejected": -261.4305114746094, + "loss": 1.2604, + "nll_loss": 1.061387300491333, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.401424407958984, + "rewards/margins": 3.613424777984619, + "rewards/rejected": 2.7879996299743652, + "step": 9720 + }, + { + "epoch": 0.5397983384419078, + "grad_norm": 75.32423400878906, + "learning_rate": 4.376359987673546e-08, + "logits/chosen": -0.26495617628097534, + "logits/rejected": -0.3843373954296112, + "logps/chosen": -143.46360778808594, + "logps/rejected": -198.763916015625, + "loss": 1.3028, + "nll_loss": 0.8967748880386353, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.674996852874756, + "rewards/margins": 3.3870913982391357, + "rewards/rejected": 2.287905216217041, + "step": 9730 + }, + { + "epoch": 0.5403531157681586, + "grad_norm": 37.746395111083984, + "learning_rate": 4.367714448653622e-08, + "logits/chosen": -0.24729077517986298, + "logits/rejected": -0.42625313997268677, + "logps/chosen": -166.9033966064453, + "logps/rejected": -199.24484252929688, + "loss": 1.3007, + "nll_loss": 1.0432840585708618, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.175162315368652, + "rewards/margins": 3.7920310497283936, + "rewards/rejected": 1.383131742477417, + "step": 9740 + }, + { + "epoch": 0.5409078930944092, + "grad_norm": 39.954654693603516, + "learning_rate": 4.3590708303461256e-08, + "logits/chosen": -0.24685220420360565, + "logits/rejected": -0.37900620698928833, + "logps/chosen": -172.75445556640625, + "logps/rejected": -252.6306610107422, + "loss": 1.2514, + "nll_loss": 0.9287413358688354, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.060909271240234, + "rewards/margins": 3.7459397315979004, + "rewards/rejected": 2.314969778060913, + "step": 9750 + }, + { + "epoch": 0.5414626704206599, + "grad_norm": 104.29252624511719, + "learning_rate": 4.350429159008029e-08, + "logits/chosen": 0.005691577680408955, + "logits/rejected": -0.12545771896839142, + "logps/chosen": -160.78424072265625, + "logps/rejected": -223.47946166992188, + "loss": 1.3204, + "nll_loss": 0.8255676031112671, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.460650444030762, + "rewards/margins": 3.252558469772339, + "rewards/rejected": 2.208092212677002, + "step": 9760 + }, + { + "epoch": 0.5420174477469106, + "grad_norm": 46.125762939453125, + "learning_rate": 4.341789460890391e-08, + "logits/chosen": -0.1777302771806717, + "logits/rejected": -0.33578577637672424, + "logps/chosen": -142.5837860107422, + "logps/rejected": -191.37918090820312, + "loss": 1.2636, + "nll_loss": 0.8883357048034668, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.09456729888916, + "rewards/margins": 2.4341490268707275, + "rewards/rejected": 2.6604175567626953, + "step": 9770 + }, + { + "epoch": 0.5425722250731613, + "grad_norm": 39.68583297729492, + "learning_rate": 4.3331517622382805e-08, + "logits/chosen": -0.22501273453235626, + "logits/rejected": -0.4059979319572449, + "logps/chosen": -159.4439239501953, + "logps/rejected": -229.55899047851562, + "loss": 1.2376, + "nll_loss": 0.900013267993927, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.4433722496032715, + "rewards/margins": 4.18411111831665, + "rewards/rejected": 1.2592613697052002, + "step": 9780 + }, + { + "epoch": 0.5431270023994119, + "grad_norm": 80.15180206298828, + "learning_rate": 4.324516089290688e-08, + "logits/chosen": -0.30077359080314636, + "logits/rejected": -0.41646233201026917, + "logps/chosen": -170.2849578857422, + "logps/rejected": -233.38253784179688, + "loss": 1.392, + "nll_loss": 0.9865023493766785, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.135927677154541, + "rewards/margins": 3.195493459701538, + "rewards/rejected": 2.940434217453003, + "step": 9790 + }, + { + "epoch": 0.5436817797256626, + "grad_norm": 62.00218200683594, + "learning_rate": 4.3158824682804495e-08, + "logits/chosen": -0.14447996020317078, + "logits/rejected": -0.2670975923538208, + "logps/chosen": -144.17227172851562, + "logps/rejected": -204.5887908935547, + "loss": 1.2697, + "nll_loss": 1.0111229419708252, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.344749450683594, + "rewards/margins": 3.5098907947540283, + "rewards/rejected": 1.8348582983016968, + "step": 9800 + }, + { + "epoch": 0.5442365570519133, + "grad_norm": 78.89068603515625, + "learning_rate": 4.3072509254341703e-08, + "logits/chosen": -0.29741746187210083, + "logits/rejected": -0.44821491837501526, + "logps/chosen": -179.11917114257812, + "logps/rejected": -236.12158203125, + "loss": 1.3057, + "nll_loss": 1.0103237628936768, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.0418572425842285, + "rewards/margins": 3.351210832595825, + "rewards/rejected": 2.6906466484069824, + "step": 9810 + }, + { + "epoch": 0.544791334378164, + "grad_norm": 53.10793685913086, + "learning_rate": 4.2986214869721414e-08, + "logits/chosen": -0.3061564564704895, + "logits/rejected": -0.4098960757255554, + "logps/chosen": -186.2776336669922, + "logps/rejected": -243.23342895507812, + "loss": 1.2418, + "nll_loss": 0.9879401922225952, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.188605308532715, + "rewards/margins": 4.205409049987793, + "rewards/rejected": 1.9831968545913696, + "step": 9820 + }, + { + "epoch": 0.5453461117044146, + "grad_norm": 57.80470275878906, + "learning_rate": 4.289994179108264e-08, + "logits/chosen": -0.16093483567237854, + "logits/rejected": -0.3503780961036682, + "logps/chosen": -138.66676330566406, + "logps/rejected": -183.24270629882812, + "loss": 1.2144, + "nll_loss": 0.8346014022827148, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.369721412658691, + "rewards/margins": 4.062706470489502, + "rewards/rejected": 1.307015299797058, + "step": 9830 + }, + { + "epoch": 0.5459008890306654, + "grad_norm": 95.7310562133789, + "learning_rate": 4.2813690280499635e-08, + "logits/chosen": -0.29143795371055603, + "logits/rejected": -0.3641031086444855, + "logps/chosen": -150.7353515625, + "logps/rejected": -200.9915008544922, + "loss": 1.3845, + "nll_loss": 1.0360045433044434, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.378697872161865, + "rewards/margins": 3.316908359527588, + "rewards/rejected": 2.0617895126342773, + "step": 9840 + }, + { + "epoch": 0.546455666356916, + "grad_norm": 49.37306213378906, + "learning_rate": 4.272746059998116e-08, + "logits/chosen": -0.37530088424682617, + "logits/rejected": -0.4528474807739258, + "logps/chosen": -190.97067260742188, + "logps/rejected": -257.160400390625, + "loss": 1.2963, + "nll_loss": 1.115810751914978, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.650545597076416, + "rewards/margins": 3.7628426551818848, + "rewards/rejected": 2.8877029418945312, + "step": 9850 + }, + { + "epoch": 0.5470104436831666, + "grad_norm": 60.86616134643555, + "learning_rate": 4.264125301146965e-08, + "logits/chosen": -0.3090120255947113, + "logits/rejected": -0.4056679308414459, + "logps/chosen": -162.8961944580078, + "logps/rejected": -217.0439453125, + "loss": 1.1644, + "nll_loss": 1.0370877981185913, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.953455924987793, + "rewards/margins": 3.510234832763672, + "rewards/rejected": 2.443220853805542, + "step": 9860 + }, + { + "epoch": 0.5475652210094173, + "grad_norm": 69.87907409667969, + "learning_rate": 4.2555067776840403e-08, + "logits/chosen": -0.1707712858915329, + "logits/rejected": -0.3207014799118042, + "logps/chosen": -162.30130004882812, + "logps/rejected": -231.8777313232422, + "loss": 1.2908, + "nll_loss": 0.9085710644721985, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.92129373550415, + "rewards/margins": 4.297608852386475, + "rewards/rejected": 1.6236846446990967, + "step": 9870 + }, + { + "epoch": 0.548119998335668, + "grad_norm": 76.71466064453125, + "learning_rate": 4.24689051579009e-08, + "logits/chosen": -0.21910777688026428, + "logits/rejected": -0.34946125745773315, + "logps/chosen": -162.58026123046875, + "logps/rejected": -215.5464324951172, + "loss": 1.2457, + "nll_loss": 0.9269440770149231, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.775429725646973, + "rewards/margins": 4.113760471343994, + "rewards/rejected": 1.6616685390472412, + "step": 9880 + }, + { + "epoch": 0.5486747756619187, + "grad_norm": 48.44591522216797, + "learning_rate": 4.238276541638984e-08, + "logits/chosen": -0.20550286769866943, + "logits/rejected": -0.38304099440574646, + "logps/chosen": -153.71751403808594, + "logps/rejected": -231.9537353515625, + "loss": 1.2689, + "nll_loss": 0.8824397325515747, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.776326656341553, + "rewards/margins": 4.239264011383057, + "rewards/rejected": 1.5370631217956543, + "step": 9890 + }, + { + "epoch": 0.5492295529881693, + "grad_norm": 54.35772705078125, + "learning_rate": 4.2296648813976446e-08, + "logits/chosen": -0.24491152167320251, + "logits/rejected": -0.4041506350040436, + "logps/chosen": -169.57485961914062, + "logps/rejected": -233.47744750976562, + "loss": 1.2242, + "nll_loss": 1.0028597116470337, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.852323055267334, + "rewards/margins": 3.8250155448913574, + "rewards/rejected": 2.0273072719573975, + "step": 9900 + }, + { + "epoch": 0.5497843303144201, + "grad_norm": 54.7266731262207, + "learning_rate": 4.221055561225965e-08, + "logits/chosen": -0.2844238579273224, + "logits/rejected": -0.43756571412086487, + "logps/chosen": -162.778076171875, + "logps/rejected": -210.55270385742188, + "loss": 1.2546, + "nll_loss": 0.8987213373184204, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.959184646606445, + "rewards/margins": 4.301226615905762, + "rewards/rejected": 1.6579582691192627, + "step": 9910 + }, + { + "epoch": 0.5503391076406707, + "grad_norm": 67.63874816894531, + "learning_rate": 4.212448607276729e-08, + "logits/chosen": -0.24086585640907288, + "logits/rejected": -0.4369584918022156, + "logps/chosen": -155.12686157226562, + "logps/rejected": -214.6593017578125, + "loss": 1.3173, + "nll_loss": 0.8765009045600891, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.212924003601074, + "rewards/margins": 2.9610352516174316, + "rewards/rejected": 2.2518887519836426, + "step": 9920 + }, + { + "epoch": 0.5508938849669214, + "grad_norm": 58.325653076171875, + "learning_rate": 4.203844045695538e-08, + "logits/chosen": -0.19549937546253204, + "logits/rejected": -0.32256126403808594, + "logps/chosen": -148.02536010742188, + "logps/rejected": -201.8097381591797, + "loss": 1.2249, + "nll_loss": 0.8750473260879517, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.65772008895874, + "rewards/margins": 3.355874538421631, + "rewards/rejected": 2.301845073699951, + "step": 9930 + }, + { + "epoch": 0.5514486622931721, + "grad_norm": 61.73947525024414, + "learning_rate": 4.19524190262072e-08, + "logits/chosen": -0.3503522574901581, + "logits/rejected": -0.4940328598022461, + "logps/chosen": -201.44822692871094, + "logps/rejected": -299.6861877441406, + "loss": 1.2018, + "nll_loss": 1.0702818632125854, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.758185386657715, + "rewards/margins": 5.5747480392456055, + "rewards/rejected": 1.1834368705749512, + "step": 9940 + }, + { + "epoch": 0.5520034396194228, + "grad_norm": 86.86346435546875, + "learning_rate": 4.186642204183258e-08, + "logits/chosen": -0.20920626819133759, + "logits/rejected": -0.3422732651233673, + "logps/chosen": -149.57943725585938, + "logps/rejected": -201.55697631835938, + "loss": 1.35, + "nll_loss": 0.9252266883850098, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.3443379402160645, + "rewards/margins": 3.2025299072265625, + "rewards/rejected": 2.141807794570923, + "step": 9950 + }, + { + "epoch": 0.5525582169456734, + "grad_norm": 41.97013854980469, + "learning_rate": 4.17804497650671e-08, + "logits/chosen": -0.40197426080703735, + "logits/rejected": -0.4947798252105713, + "logps/chosen": -180.4291534423828, + "logps/rejected": -249.99404907226562, + "loss": 1.2468, + "nll_loss": 1.0687518119812012, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.4140801429748535, + "rewards/margins": 4.156526565551758, + "rewards/rejected": 2.2575535774230957, + "step": 9960 + }, + { + "epoch": 0.5531129942719241, + "grad_norm": 40.65499496459961, + "learning_rate": 4.169450245707125e-08, + "logits/chosen": -0.25668230652809143, + "logits/rejected": -0.43165111541748047, + "logps/chosen": -163.89199829101562, + "logps/rejected": -226.5345001220703, + "loss": 1.2065, + "nll_loss": 0.9410833120346069, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.106402397155762, + "rewards/margins": 3.919654130935669, + "rewards/rejected": 2.1867482662200928, + "step": 9970 + }, + { + "epoch": 0.5536677715981748, + "grad_norm": 38.79108810424805, + "learning_rate": 4.160858037892973e-08, + "logits/chosen": -0.176131471991539, + "logits/rejected": -0.38565102219581604, + "logps/chosen": -157.2784881591797, + "logps/rejected": -213.25076293945312, + "loss": 1.1965, + "nll_loss": 0.8632275462150574, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.779348373413086, + "rewards/margins": 4.213487148284912, + "rewards/rejected": 1.5658613443374634, + "step": 9980 + }, + { + "epoch": 0.5542225489244255, + "grad_norm": 92.07147979736328, + "learning_rate": 4.152268379165054e-08, + "logits/chosen": -0.2727094292640686, + "logits/rejected": -0.42925962805747986, + "logps/chosen": -166.07444763183594, + "logps/rejected": -226.11392211914062, + "loss": 1.3282, + "nll_loss": 0.9418858289718628, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.982013702392578, + "rewards/margins": 3.9414381980895996, + "rewards/rejected": 2.0405755043029785, + "step": 9990 + }, + { + "epoch": 0.5547773262506761, + "grad_norm": 66.82201385498047, + "learning_rate": 4.143681295616429e-08, + "logits/chosen": -0.30561619997024536, + "logits/rejected": -0.4442395269870758, + "logps/chosen": -172.85299682617188, + "logps/rejected": -208.4752655029297, + "loss": 1.2903, + "nll_loss": 0.9878283739089966, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.75870418548584, + "rewards/margins": 3.4746947288513184, + "rewards/rejected": 2.2840092182159424, + "step": 10000 + }, + { + "epoch": 0.5547773262506761, + "eval_logits/chosen": -0.37051522731781006, + "eval_logits/rejected": -0.4826039671897888, + "eval_logps/chosen": -191.36302185058594, + "eval_logps/rejected": -262.9516296386719, + "eval_loss": 1.2380090951919556, + "eval_nll_loss": 0.9910838603973389, + "eval_rewards/accuracies": 0.9375, + "eval_rewards/chosen": 6.659819602966309, + "eval_rewards/margins": 5.047131538391113, + "eval_rewards/rejected": 1.6126880645751953, + "eval_runtime": 17.0446, + "eval_samples_per_second": 15.019, + "eval_steps_per_second": 1.877, + "step": 10000 + }, + { + "epoch": 0.5553321035769269, + "grad_norm": 58.526893615722656, + "learning_rate": 4.135096813332333e-08, + "logits/chosen": -0.30008548498153687, + "logits/rejected": -0.3977207839488983, + "logps/chosen": -166.9575958251953, + "logps/rejected": -219.8026885986328, + "loss": 1.2512, + "nll_loss": 1.047019600868225, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.596224784851074, + "rewards/margins": 3.707197904586792, + "rewards/rejected": 1.8890268802642822, + "step": 10010 + }, + { + "epoch": 0.5558868809031775, + "grad_norm": 66.96456909179688, + "learning_rate": 4.126514958390099e-08, + "logits/chosen": -0.2781417965888977, + "logits/rejected": -0.4583090841770172, + "logps/chosen": -158.1600799560547, + "logps/rejected": -217.20059204101562, + "loss": 1.2551, + "nll_loss": 0.959384560585022, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.230644702911377, + "rewards/margins": 3.299389362335205, + "rewards/rejected": 1.9312553405761719, + "step": 10020 + }, + { + "epoch": 0.5564416582294281, + "grad_norm": 41.96564865112305, + "learning_rate": 4.1179357568590836e-08, + "logits/chosen": -0.16881588101387024, + "logits/rejected": -0.3409457802772522, + "logps/chosen": -135.94915771484375, + "logps/rejected": -201.0962677001953, + "loss": 1.202, + "nll_loss": 0.8159046173095703, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.185823917388916, + "rewards/margins": 4.460925579071045, + "rewards/rejected": 0.7248983383178711, + "step": 10030 + }, + { + "epoch": 0.5569964355556788, + "grad_norm": 120.59881591796875, + "learning_rate": 4.109359234800579e-08, + "logits/chosen": -0.1846725046634674, + "logits/rejected": -0.36268824338912964, + "logps/chosen": -168.23143005371094, + "logps/rejected": -227.4480743408203, + "loss": 1.2565, + "nll_loss": 0.8777807354927063, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.901741027832031, + "rewards/margins": 3.807669162750244, + "rewards/rejected": 2.09407114982605, + "step": 10040 + }, + { + "epoch": 0.5575512128819295, + "grad_norm": 51.784149169921875, + "learning_rate": 4.1007854182677384e-08, + "logits/chosen": -0.08302908390760422, + "logits/rejected": -0.302766889333725, + "logps/chosen": -132.2078857421875, + "logps/rejected": -199.78074645996094, + "loss": 1.2573, + "nll_loss": 0.7507218718528748, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.031310081481934, + "rewards/margins": 3.765636920928955, + "rewards/rejected": 1.2656733989715576, + "step": 10050 + }, + { + "epoch": 0.5581059902081802, + "grad_norm": 44.574737548828125, + "learning_rate": 4.092214333305496e-08, + "logits/chosen": -0.17102904617786407, + "logits/rejected": -0.32034677267074585, + "logps/chosen": -147.87527465820312, + "logps/rejected": -222.16213989257812, + "loss": 1.3022, + "nll_loss": 0.8825603723526001, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.455257892608643, + "rewards/margins": 3.493680953979492, + "rewards/rejected": 1.9615771770477295, + "step": 10060 + }, + { + "epoch": 0.5586607675344308, + "grad_norm": 93.73436737060547, + "learning_rate": 4.0836460059504875e-08, + "logits/chosen": -0.16540075838565826, + "logits/rejected": -0.34344834089279175, + "logps/chosen": -149.68399047851562, + "logps/rejected": -181.32241821289062, + "loss": 1.2071, + "nll_loss": 0.8493715524673462, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.781062126159668, + "rewards/margins": 3.1879143714904785, + "rewards/rejected": 1.5931479930877686, + "step": 10070 + }, + { + "epoch": 0.5592155448606816, + "grad_norm": 73.01387786865234, + "learning_rate": 4.075080462230976e-08, + "logits/chosen": -0.2254190742969513, + "logits/rejected": -0.3859403431415558, + "logps/chosen": -163.48228454589844, + "logps/rejected": -199.42469787597656, + "loss": 1.2813, + "nll_loss": 0.9542709589004517, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.255620002746582, + "rewards/margins": 3.1761951446533203, + "rewards/rejected": 2.0794243812561035, + "step": 10080 + }, + { + "epoch": 0.5597703221869322, + "grad_norm": 60.718994140625, + "learning_rate": 4.066517728166765e-08, + "logits/chosen": -0.3345080316066742, + "logits/rejected": -0.43725553154945374, + "logps/chosen": -169.29473876953125, + "logps/rejected": -217.14334106445312, + "loss": 1.2574, + "nll_loss": 1.0213162899017334, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.477988243103027, + "rewards/margins": 3.135385751724243, + "rewards/rejected": 2.342602252960205, + "step": 10090 + }, + { + "epoch": 0.5603250995131829, + "grad_norm": 154.75498962402344, + "learning_rate": 4.0579578297691226e-08, + "logits/chosen": -0.20659950375556946, + "logits/rejected": -0.3297533392906189, + "logps/chosen": -164.67564392089844, + "logps/rejected": -219.97640991210938, + "loss": 1.3394, + "nll_loss": 0.9876292943954468, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.401577949523926, + "rewards/margins": 3.6367709636688232, + "rewards/rejected": 1.7648069858551025, + "step": 10100 + }, + { + "epoch": 0.5608798768394335, + "grad_norm": 52.578983306884766, + "learning_rate": 4.0494007930407046e-08, + "logits/chosen": -0.31942233443260193, + "logits/rejected": -0.4834202826023102, + "logps/chosen": -178.60403442382812, + "logps/rejected": -239.25717163085938, + "loss": 1.2078, + "nll_loss": 0.9782170057296753, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.414088249206543, + "rewards/margins": 4.0510382652282715, + "rewards/rejected": 2.3630499839782715, + "step": 10110 + }, + { + "epoch": 0.5614346541656843, + "grad_norm": 45.17326736450195, + "learning_rate": 4.040846643975473e-08, + "logits/chosen": -0.48407474160194397, + "logits/rejected": -0.5528632402420044, + "logps/chosen": -185.46778869628906, + "logps/rejected": -264.75201416015625, + "loss": 1.1985, + "nll_loss": 1.0744699239730835, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.133168697357178, + "rewards/margins": 3.387519121170044, + "rewards/rejected": 2.745649576187134, + "step": 10120 + }, + { + "epoch": 0.5619894314919349, + "grad_norm": 77.66555786132812, + "learning_rate": 4.032295408558619e-08, + "logits/chosen": -0.341526597738266, + "logits/rejected": -0.5378842949867249, + "logps/chosen": -184.19808959960938, + "logps/rejected": -249.416015625, + "loss": 1.1846, + "nll_loss": 0.9458611607551575, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.309676170349121, + "rewards/margins": 4.639254570007324, + "rewards/rejected": 1.6704213619232178, + "step": 10130 + }, + { + "epoch": 0.5625442088181856, + "grad_norm": 51.0809326171875, + "learning_rate": 4.023747112766482e-08, + "logits/chosen": -0.24474194645881653, + "logits/rejected": -0.4141760468482971, + "logps/chosen": -166.1620635986328, + "logps/rejected": -223.5570831298828, + "loss": 1.2322, + "nll_loss": 0.9518367648124695, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.8418779373168945, + "rewards/margins": 3.5767345428466797, + "rewards/rejected": 2.2651429176330566, + "step": 10140 + }, + { + "epoch": 0.5630989861444363, + "grad_norm": 52.535606384277344, + "learning_rate": 4.0152017825664705e-08, + "logits/chosen": -0.15986858308315277, + "logits/rejected": -0.4056181013584137, + "logps/chosen": -183.78421020507812, + "logps/rejected": -270.3041076660156, + "loss": 1.1969, + "nll_loss": 0.8582059144973755, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.990294933319092, + "rewards/margins": 5.1435980796813965, + "rewards/rejected": 0.8466971516609192, + "step": 10150 + }, + { + "epoch": 0.563653763470687, + "grad_norm": 32.545166015625, + "learning_rate": 4.006659443916987e-08, + "logits/chosen": -0.23594574630260468, + "logits/rejected": -0.3522702157497406, + "logps/chosen": -153.6573944091797, + "logps/rejected": -226.1883544921875, + "loss": 1.1999, + "nll_loss": 0.935680091381073, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.601001262664795, + "rewards/margins": 3.8492636680603027, + "rewards/rejected": 1.7517372369766235, + "step": 10160 + }, + { + "epoch": 0.5642085407969376, + "grad_norm": 122.06332397460938, + "learning_rate": 3.9981201227673424e-08, + "logits/chosen": -0.2976406514644623, + "logits/rejected": -0.43046554923057556, + "logps/chosen": -148.02928161621094, + "logps/rejected": -189.18588256835938, + "loss": 1.3542, + "nll_loss": 0.9401167631149292, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.531824111938477, + "rewards/margins": 3.1392407417297363, + "rewards/rejected": 2.3925833702087402, + "step": 10170 + }, + { + "epoch": 0.5647633181231883, + "grad_norm": 74.4186782836914, + "learning_rate": 3.989583845057688e-08, + "logits/chosen": -0.293215811252594, + "logits/rejected": -0.4330647587776184, + "logps/chosen": -166.47183227539062, + "logps/rejected": -227.81460571289062, + "loss": 1.2225, + "nll_loss": 0.9029404520988464, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.619418144226074, + "rewards/margins": 3.53371000289917, + "rewards/rejected": 2.085707902908325, + "step": 10180 + }, + { + "epoch": 0.565318095449439, + "grad_norm": 50.121543884277344, + "learning_rate": 3.9810506367189226e-08, + "logits/chosen": -0.23663277924060822, + "logits/rejected": -0.4094238877296448, + "logps/chosen": -141.23477172851562, + "logps/rejected": -214.24105834960938, + "loss": 1.217, + "nll_loss": 0.841219425201416, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.489045143127441, + "rewards/margins": 3.193114995956421, + "rewards/rejected": 2.295930862426758, + "step": 10190 + }, + { + "epoch": 0.5658728727756897, + "grad_norm": 29.837879180908203, + "learning_rate": 3.972520523672626e-08, + "logits/chosen": -0.21812088787555695, + "logits/rejected": -0.41568484902381897, + "logps/chosen": -146.0269317626953, + "logps/rejected": -192.945068359375, + "loss": 1.2653, + "nll_loss": 0.8905706405639648, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.3971052169799805, + "rewards/margins": 3.420079469680786, + "rewards/rejected": 1.9770259857177734, + "step": 10200 + }, + { + "epoch": 0.5664276501019403, + "grad_norm": 34.43855667114258, + "learning_rate": 3.963993531830973e-08, + "logits/chosen": -0.27292150259017944, + "logits/rejected": -0.45809444785118103, + "logps/chosen": -137.93099975585938, + "logps/rejected": -193.16009521484375, + "loss": 1.1547, + "nll_loss": 0.792144775390625, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.233412265777588, + "rewards/margins": 3.822312593460083, + "rewards/rejected": 1.4110995531082153, + "step": 10210 + }, + { + "epoch": 0.566982427428191, + "grad_norm": 38.51008224487305, + "learning_rate": 3.9554696870966566e-08, + "logits/chosen": -0.19013547897338867, + "logits/rejected": -0.38051319122314453, + "logps/chosen": -131.56764221191406, + "logps/rejected": -188.43069458007812, + "loss": 1.3362, + "nll_loss": 0.8542930483818054, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.759130954742432, + "rewards/margins": 3.1565465927124023, + "rewards/rejected": 1.6025844812393188, + "step": 10220 + }, + { + "epoch": 0.5675372047544417, + "grad_norm": 118.84576416015625, + "learning_rate": 3.9469490153628124e-08, + "logits/chosen": -0.21577802300453186, + "logits/rejected": -0.44243812561035156, + "logps/chosen": -150.70347595214844, + "logps/rejected": -203.06527709960938, + "loss": 1.23, + "nll_loss": 0.8202966451644897, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.528652191162109, + "rewards/margins": 4.659850120544434, + "rewards/rejected": 0.8688012957572937, + "step": 10230 + }, + { + "epoch": 0.5680919820806923, + "grad_norm": 41.868560791015625, + "learning_rate": 3.938431542512936e-08, + "logits/chosen": -0.24971739947795868, + "logits/rejected": -0.44349947571754456, + "logps/chosen": -148.19473266601562, + "logps/rejected": -202.19549560546875, + "loss": 1.2403, + "nll_loss": 0.8570820689201355, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.79079532623291, + "rewards/margins": 3.260967969894409, + "rewards/rejected": 2.5298266410827637, + "step": 10240 + }, + { + "epoch": 0.5686467594069431, + "grad_norm": 69.5272445678711, + "learning_rate": 3.9299172944208036e-08, + "logits/chosen": -0.47588104009628296, + "logits/rejected": -0.6104357838630676, + "logps/chosen": -205.761962890625, + "logps/rejected": -272.7579650878906, + "loss": 1.3107, + "nll_loss": 1.0669199228286743, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.797511100769043, + "rewards/margins": 3.980442762374878, + "rewards/rejected": 2.8170692920684814, + "step": 10250 + }, + { + "epoch": 0.5692015367331937, + "grad_norm": 47.80853271484375, + "learning_rate": 3.9214062969503995e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -149.69068908691406, + "logps/rejected": -196.16390991210938, + "loss": 1.1962, + "nll_loss": NaN, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.366769313812256, + "rewards/margins": 3.5515894889831543, + "rewards/rejected": 1.815179467201233, + "step": 10260 + }, + { + "epoch": 0.5697563140594444, + "grad_norm": 59.806941986083984, + "learning_rate": 3.912898575955826e-08, + "logits/chosen": -0.24707865715026855, + "logits/rejected": -0.4839915633201599, + "logps/chosen": -131.5078582763672, + "logps/rejected": -189.6587677001953, + "loss": 1.2316, + "nll_loss": 0.7790622711181641, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 4.850626468658447, + "rewards/margins": 3.8297977447509766, + "rewards/rejected": 1.0208286046981812, + "step": 10270 + }, + { + "epoch": 0.570311091385695, + "grad_norm": 55.08845520019531, + "learning_rate": 3.9043941572812436e-08, + "logits/chosen": -0.40836301445961, + "logits/rejected": -0.5811692476272583, + "logps/chosen": -166.8919219970703, + "logps/rejected": -252.9898681640625, + "loss": 1.2585, + "nll_loss": 0.9906598329544067, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.0761308670043945, + "rewards/margins": 4.707409858703613, + "rewards/rejected": 1.36872136592865, + "step": 10280 + }, + { + "epoch": 0.5708658687119458, + "grad_norm": 46.97995376586914, + "learning_rate": 3.8958930667607734e-08, + "logits/chosen": -0.21153624355793, + "logits/rejected": -0.43253570795059204, + "logps/chosen": -117.1732177734375, + "logps/rejected": -182.49441528320312, + "loss": 1.1881, + "nll_loss": 0.7652009725570679, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.728806018829346, + "rewards/margins": 3.9170024394989014, + "rewards/rejected": 0.8118033409118652, + "step": 10290 + }, + { + "epoch": 0.5714206460381964, + "grad_norm": 57.82447052001953, + "learning_rate": 3.887395330218428e-08, + "logits/chosen": -0.2495851218700409, + "logits/rejected": -0.39650827646255493, + "logps/chosen": -152.25137329101562, + "logps/rejected": -205.52682495117188, + "loss": 1.2071, + "nll_loss": 0.8481658101081848, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.563853740692139, + "rewards/margins": 4.029789924621582, + "rewards/rejected": 1.534063458442688, + "step": 10300 + }, + { + "epoch": 0.5719754233644471, + "grad_norm": 56.055057525634766, + "learning_rate": 3.878900973468031e-08, + "logits/chosen": -0.4390603005886078, + "logits/rejected": -0.5407704710960388, + "logps/chosen": -184.212890625, + "logps/rejected": -237.96286010742188, + "loss": 1.3313, + "nll_loss": 1.051011323928833, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.2777605056762695, + "rewards/margins": 4.191825866699219, + "rewards/rejected": 2.08593487739563, + "step": 10310 + }, + { + "epoch": 0.5725302006906978, + "grad_norm": 42.29498291015625, + "learning_rate": 3.87041002231314e-08, + "logits/chosen": -0.34510624408721924, + "logits/rejected": -0.5676770210266113, + "logps/chosen": -180.16378784179688, + "logps/rejected": -238.06961059570312, + "loss": 1.2451, + "nll_loss": 0.9839975237846375, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.807919979095459, + "rewards/margins": 4.66348934173584, + "rewards/rejected": 1.1444305181503296, + "step": 10320 + }, + { + "epoch": 0.5730849780169485, + "grad_norm": 43.10969543457031, + "learning_rate": 3.8619225025469684e-08, + "logits/chosen": -0.3275555670261383, + "logits/rejected": -0.47744303941726685, + "logps/chosen": -163.6125030517578, + "logps/rejected": -227.3212890625, + "loss": 1.2735, + "nll_loss": 0.9478472471237183, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.324474334716797, + "rewards/margins": 4.421574592590332, + "rewards/rejected": 1.902899980545044, + "step": 10330 + }, + { + "epoch": 0.5736397553431991, + "grad_norm": 47.634117126464844, + "learning_rate": 3.853438439952304e-08, + "logits/chosen": -0.3265872001647949, + "logits/rejected": -0.4961087703704834, + "logps/chosen": -192.4685821533203, + "logps/rejected": -227.6434783935547, + "loss": 1.2575, + "nll_loss": 0.9580524563789368, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.888747215270996, + "rewards/margins": 3.3545432090759277, + "rewards/rejected": 2.53420352935791, + "step": 10340 + }, + { + "epoch": 0.5741945326694498, + "grad_norm": 84.28260040283203, + "learning_rate": 3.844957860301433e-08, + "logits/chosen": -0.15577220916748047, + "logits/rejected": -0.37774962186813354, + "logps/chosen": -134.08277893066406, + "logps/rejected": -179.55654907226562, + "loss": 1.2275, + "nll_loss": 0.8151863813400269, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.769875526428223, + "rewards/margins": 3.951662540435791, + "rewards/rejected": 0.8182132840156555, + "step": 10350 + }, + { + "epoch": 0.5747493099957005, + "grad_norm": 65.98307037353516, + "learning_rate": 3.836480789356063e-08, + "logits/chosen": -0.3224945366382599, + "logits/rejected": -0.5012255311012268, + "logps/chosen": -182.64144897460938, + "logps/rejected": -218.14675903320312, + "loss": 1.2064, + "nll_loss": 1.0050209760665894, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.930918216705322, + "rewards/margins": 3.0051004886627197, + "rewards/rejected": 2.9258179664611816, + "step": 10360 + }, + { + "epoch": 0.5753040873219512, + "grad_norm": 68.94466400146484, + "learning_rate": 3.828007252867239e-08, + "logits/chosen": -0.1657598316669464, + "logits/rejected": -0.330579936504364, + "logps/chosen": -140.05340576171875, + "logps/rejected": -205.56869506835938, + "loss": 1.2834, + "nll_loss": 0.8427098393440247, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.017988681793213, + "rewards/margins": 3.076979398727417, + "rewards/rejected": 1.9410085678100586, + "step": 10370 + }, + { + "epoch": 0.5758588646482018, + "grad_norm": 83.23578643798828, + "learning_rate": 3.819537276575276e-08, + "logits/chosen": -0.38658449053764343, + "logits/rejected": -0.5110979080200195, + "logps/chosen": -163.3243865966797, + "logps/rejected": -236.9144744873047, + "loss": 1.2682, + "nll_loss": 1.0007117986679077, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.975901126861572, + "rewards/margins": 3.9243903160095215, + "rewards/rejected": 2.0515103340148926, + "step": 10380 + }, + { + "epoch": 0.5764136419744526, + "grad_norm": 69.59004211425781, + "learning_rate": 3.811070886209668e-08, + "logits/chosen": -0.14034242928028107, + "logits/rejected": -0.3311167359352112, + "logps/chosen": -171.51422119140625, + "logps/rejected": -240.3944854736328, + "loss": 1.253, + "nll_loss": 0.900901198387146, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.613317012786865, + "rewards/margins": 4.6014533042907715, + "rewards/rejected": 1.011863350868225, + "step": 10390 + }, + { + "epoch": 0.5769684193007032, + "grad_norm": 81.56526184082031, + "learning_rate": 3.80260810748902e-08, + "logits/chosen": -0.3824862837791443, + "logits/rejected": -0.4957052767276764, + "logps/chosen": -162.27566528320312, + "logps/rejected": -243.3124542236328, + "loss": 1.2452, + "nll_loss": 0.9620378613471985, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.729463577270508, + "rewards/margins": 3.5673599243164062, + "rewards/rejected": 2.1621041297912598, + "step": 10400 + }, + { + "epoch": 0.5775231966269538, + "grad_norm": 46.983177185058594, + "learning_rate": 3.79414896612096e-08, + "logits/chosen": -0.4101601541042328, + "logits/rejected": -0.4935119152069092, + "logps/chosen": -196.01905822753906, + "logps/rejected": -271.23883056640625, + "loss": 1.2677, + "nll_loss": 1.0919231176376343, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.388064384460449, + "rewards/margins": 3.650418519973755, + "rewards/rejected": 2.7376456260681152, + "step": 10410 + }, + { + "epoch": 0.5780779739532045, + "grad_norm": 51.8054313659668, + "learning_rate": 3.7856934878020746e-08, + "logits/chosen": -0.3905481696128845, + "logits/rejected": -0.5235660672187805, + "logps/chosen": -174.46282958984375, + "logps/rejected": -224.73825073242188, + "loss": 1.2783, + "nll_loss": 1.0395283699035645, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.0996575355529785, + "rewards/margins": 3.955806255340576, + "rewards/rejected": 2.143850803375244, + "step": 10420 + }, + { + "epoch": 0.5786327512794552, + "grad_norm": 61.640480041503906, + "learning_rate": 3.777241698217818e-08, + "logits/chosen": -0.23529402911663055, + "logits/rejected": -0.41466569900512695, + "logps/chosen": -154.8517608642578, + "logps/rejected": -242.61160278320312, + "loss": 1.2596, + "nll_loss": 0.8378097414970398, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.4309611320495605, + "rewards/margins": 3.666550397872925, + "rewards/rejected": 1.764411211013794, + "step": 10430 + }, + { + "epoch": 0.5791875286057059, + "grad_norm": 62.208614349365234, + "learning_rate": 3.7687936230424414e-08, + "logits/chosen": -0.26653987169265747, + "logits/rejected": -0.41189995408058167, + "logps/chosen": -182.07679748535156, + "logps/rejected": -250.5347137451172, + "loss": 1.2692, + "nll_loss": 0.9324018359184265, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.15363073348999, + "rewards/margins": 3.7218990325927734, + "rewards/rejected": 2.431732177734375, + "step": 10440 + }, + { + "epoch": 0.5797423059319565, + "grad_norm": 95.203125, + "learning_rate": 3.760349287938909e-08, + "logits/chosen": -0.20371286571025848, + "logits/rejected": -0.402310848236084, + "logps/chosen": -162.33380126953125, + "logps/rejected": -244.218505859375, + "loss": 1.2327, + "nll_loss": 0.8449538350105286, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.365697860717773, + "rewards/margins": 4.171332359313965, + "rewards/rejected": 1.1943647861480713, + "step": 10450 + }, + { + "epoch": 0.5802970832582073, + "grad_norm": 41.2097282409668, + "learning_rate": 3.751908718558826e-08, + "logits/chosen": -0.36653703451156616, + "logits/rejected": -0.5174434185028076, + "logps/chosen": -170.66348266601562, + "logps/rejected": -238.06167602539062, + "loss": 1.2665, + "nll_loss": 0.9725948572158813, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.139052391052246, + "rewards/margins": 4.696810722351074, + "rewards/rejected": 1.4422420263290405, + "step": 10460 + }, + { + "epoch": 0.5808518605844579, + "grad_norm": 130.22348022460938, + "learning_rate": 3.743471940542361e-08, + "logits/chosen": -0.3808217942714691, + "logits/rejected": -0.5171536803245544, + "logps/chosen": -178.21490478515625, + "logps/rejected": -234.63394165039062, + "loss": 1.2486, + "nll_loss": 1.1083260774612427, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.08544397354126, + "rewards/margins": 3.4438986778259277, + "rewards/rejected": 2.641545534133911, + "step": 10470 + }, + { + "epoch": 0.5814066379107086, + "grad_norm": 113.35897064208984, + "learning_rate": 3.735038979518161e-08, + "logits/chosen": -0.355629563331604, + "logits/rejected": -0.4770967364311218, + "logps/chosen": -167.3038787841797, + "logps/rejected": -203.6851348876953, + "loss": 1.1651, + "nll_loss": 0.9599370956420898, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.775060653686523, + "rewards/margins": 3.4364638328552246, + "rewards/rejected": 2.338596820831299, + "step": 10480 + }, + { + "epoch": 0.5819614152369592, + "grad_norm": 51.10672378540039, + "learning_rate": 3.72660986110328e-08, + "logits/chosen": -0.2594406008720398, + "logits/rejected": -0.40329861640930176, + "logps/chosen": -165.8750457763672, + "logps/rejected": -217.21444702148438, + "loss": 1.2455, + "nll_loss": 0.9032737612724304, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.570055961608887, + "rewards/margins": 3.448636293411255, + "rewards/rejected": 2.121419668197632, + "step": 10490 + }, + { + "epoch": 0.58251619256321, + "grad_norm": 61.78841781616211, + "learning_rate": 3.7181846109031e-08, + "logits/chosen": -0.255531907081604, + "logits/rejected": -0.4038110673427582, + "logps/chosen": -175.2942352294922, + "logps/rejected": -242.3545684814453, + "loss": 1.2611, + "nll_loss": 0.9725499153137207, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.6681036949157715, + "rewards/margins": 3.321598529815674, + "rewards/rejected": 2.3465051651000977, + "step": 10500 + }, + { + "epoch": 0.58251619256321, + "eval_logits/chosen": -0.4062573313713074, + "eval_logits/rejected": -0.5181547403335571, + "eval_logps/chosen": -191.35580444335938, + "eval_logps/rejected": -264.09014892578125, + "eval_loss": 1.240870475769043, + "eval_nll_loss": 0.9917998313903809, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.660539627075195, + "eval_rewards/margins": 5.161705017089844, + "eval_rewards/rejected": 1.4988348484039307, + "eval_runtime": 17.2273, + "eval_samples_per_second": 14.86, + "eval_steps_per_second": 1.858, + "step": 10500 + }, + { + "epoch": 0.5830709698894606, + "grad_norm": 94.24137878417969, + "learning_rate": 3.709763254511248e-08, + "logits/chosen": -0.2867467403411865, + "logits/rejected": -0.4277495741844177, + "logps/chosen": -157.75086975097656, + "logps/rejected": -211.0211944580078, + "loss": 1.2738, + "nll_loss": 0.9054722785949707, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.910742282867432, + "rewards/margins": 4.365904331207275, + "rewards/rejected": 1.544838309288025, + "step": 10510 + }, + { + "epoch": 0.5836257472157113, + "grad_norm": 52.160987854003906, + "learning_rate": 3.701345817509531e-08, + "logits/chosen": -0.3296714425086975, + "logits/rejected": -0.4535750448703766, + "logps/chosen": -185.8523406982422, + "logps/rejected": -232.6554718017578, + "loss": 1.2354, + "nll_loss": 1.038352370262146, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.936407566070557, + "rewards/margins": 3.9164319038391113, + "rewards/rejected": 2.0199761390686035, + "step": 10520 + }, + { + "epoch": 0.584180524541962, + "grad_norm": 123.69461059570312, + "learning_rate": 3.6929323254678435e-08, + "logits/chosen": -0.31326359510421753, + "logits/rejected": -0.46600404381752014, + "logps/chosen": -174.47178649902344, + "logps/rejected": -232.8724822998047, + "loss": 1.3301, + "nll_loss": 1.0197381973266602, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.738070487976074, + "rewards/margins": 3.5029759407043457, + "rewards/rejected": 2.235095500946045, + "step": 10530 + }, + { + "epoch": 0.5847353018682127, + "grad_norm": 66.86030578613281, + "learning_rate": 3.684522803944098e-08, + "logits/chosen": -0.4315427243709564, + "logits/rejected": -0.5430246591567993, + "logps/chosen": -192.14537048339844, + "logps/rejected": -238.3332977294922, + "loss": 1.2138, + "nll_loss": 1.0603669881820679, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.344313144683838, + "rewards/margins": 3.1130709648132324, + "rewards/rejected": 3.2312424182891846, + "step": 10540 + }, + { + "epoch": 0.5852900791944633, + "grad_norm": 45.89120101928711, + "learning_rate": 3.676117278484144e-08, + "logits/chosen": -0.2674533724784851, + "logits/rejected": -0.4509350657463074, + "logps/chosen": -159.71278381347656, + "logps/rejected": -221.8773956298828, + "loss": 1.2318, + "nll_loss": 0.8398923873901367, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.681628704071045, + "rewards/margins": 4.198636054992676, + "rewards/rejected": 1.4829930067062378, + "step": 10550 + }, + { + "epoch": 0.585844856520714, + "grad_norm": 67.97987365722656, + "learning_rate": 3.6677157746216934e-08, + "logits/chosen": -0.21639053523540497, + "logits/rejected": -0.4206268787384033, + "logps/chosen": -151.79837036132812, + "logps/rejected": -231.48812866210938, + "loss": 1.2648, + "nll_loss": 0.8437652587890625, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.830565929412842, + "rewards/margins": 4.830432891845703, + "rewards/rejected": 1.0001335144042969, + "step": 10560 + }, + { + "epoch": 0.5863996338469647, + "grad_norm": 24.88826560974121, + "learning_rate": 3.659318317878245e-08, + "logits/chosen": -0.3304033577442169, + "logits/rejected": -0.4739084839820862, + "logps/chosen": -188.98007202148438, + "logps/rejected": -256.1590881347656, + "loss": 1.2539, + "nll_loss": 1.040067195892334, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.439640998840332, + "rewards/margins": 4.042901992797852, + "rewards/rejected": 2.3967385292053223, + "step": 10570 + }, + { + "epoch": 0.5869544111732153, + "grad_norm": 76.99559783935547, + "learning_rate": 3.650924933762997e-08, + "logits/chosen": -0.32587510347366333, + "logits/rejected": -0.5330041646957397, + "logps/chosen": -151.07997131347656, + "logps/rejected": -200.09375, + "loss": 1.2803, + "nll_loss": 0.8896474838256836, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.428950786590576, + "rewards/margins": 3.8680737018585205, + "rewards/rejected": 1.5608775615692139, + "step": 10580 + }, + { + "epoch": 0.587509188499466, + "grad_norm": 46.51936340332031, + "learning_rate": 3.642535647772781e-08, + "logits/chosen": -0.5787987112998962, + "logits/rejected": -0.620763897895813, + "logps/chosen": -220.16983032226562, + "logps/rejected": -271.59197998046875, + "loss": 1.2583, + "nll_loss": 1.241948127746582, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 7.415195465087891, + "rewards/margins": 4.35185432434082, + "rewards/rejected": 3.0633416175842285, + "step": 10590 + }, + { + "epoch": 0.5880639658257167, + "grad_norm": 79.09966278076172, + "learning_rate": 3.634150485391977e-08, + "logits/chosen": -0.4392542839050293, + "logits/rejected": -0.5849173665046692, + "logps/chosen": -204.16653442382812, + "logps/rejected": -281.7826232910156, + "loss": 1.2942, + "nll_loss": 1.087228536605835, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.519995212554932, + "rewards/margins": 4.00660514831543, + "rewards/rejected": 2.5133910179138184, + "step": 10600 + }, + { + "epoch": 0.5886187431519674, + "grad_norm": 130.8496551513672, + "learning_rate": 3.62576947209244e-08, + "logits/chosen": -0.25583991408348083, + "logits/rejected": -0.40053972601890564, + "logps/chosen": -142.29135131835938, + "logps/rejected": -186.323486328125, + "loss": 1.3215, + "nll_loss": 0.8704953193664551, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 5.435754776000977, + "rewards/margins": 3.020576000213623, + "rewards/rejected": 2.4151787757873535, + "step": 10610 + }, + { + "epoch": 0.589173520478218, + "grad_norm": 43.7702751159668, + "learning_rate": 3.617392633333421e-08, + "logits/chosen": -0.2983597218990326, + "logits/rejected": -0.3602357804775238, + "logps/chosen": -185.18138122558594, + "logps/rejected": -221.4364471435547, + "loss": 1.2755, + "nll_loss": 1.1458022594451904, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.94400691986084, + "rewards/margins": 2.6994998455047607, + "rewards/rejected": 3.2445075511932373, + "step": 10620 + }, + { + "epoch": 0.5897282978044688, + "grad_norm": 63.04467010498047, + "learning_rate": 3.60901999456149e-08, + "logits/chosen": -0.22892241179943085, + "logits/rejected": -0.4272095561027527, + "logps/chosen": -138.57318115234375, + "logps/rejected": -188.59591674804688, + "loss": 1.2542, + "nll_loss": 0.8126301765441895, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.200963497161865, + "rewards/margins": 3.0934946537017822, + "rewards/rejected": 2.107468605041504, + "step": 10630 + }, + { + "epoch": 0.5902830751307194, + "grad_norm": 105.11898040771484, + "learning_rate": 3.6006515812104565e-08, + "logits/chosen": -0.39995405077934265, + "logits/rejected": -0.5333541035652161, + "logps/chosen": -170.89114379882812, + "logps/rejected": -247.3040313720703, + "loss": 1.1993, + "nll_loss": 0.9506009221076965, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.481733798980713, + "rewards/margins": 5.089728832244873, + "rewards/rejected": 1.392005443572998, + "step": 10640 + }, + { + "epoch": 0.5908378524569701, + "grad_norm": 59.90850067138672, + "learning_rate": 3.592287418701297e-08, + "logits/chosen": -0.32686835527420044, + "logits/rejected": -0.4767589569091797, + "logps/chosen": -154.9896240234375, + "logps/rejected": -234.1117706298828, + "loss": 1.1636, + "nll_loss": 0.8605798482894897, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.890946388244629, + "rewards/margins": 4.759671688079834, + "rewards/rejected": 1.1312743425369263, + "step": 10650 + }, + { + "epoch": 0.5913926297832207, + "grad_norm": 112.5676498413086, + "learning_rate": 3.5839275324420725e-08, + "logits/chosen": -0.31205517053604126, + "logits/rejected": -0.5330938100814819, + "logps/chosen": -156.6017303466797, + "logps/rejected": -212.99722290039062, + "loss": 1.264, + "nll_loss": 0.8573010563850403, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.145112037658691, + "rewards/margins": 3.8558971881866455, + "rewards/rejected": 1.2892147302627563, + "step": 10660 + }, + { + "epoch": 0.5919474071094715, + "grad_norm": 54.38540267944336, + "learning_rate": 3.5755719478278595e-08, + "logits/chosen": -0.31130069494247437, + "logits/rejected": -0.4759851098060608, + "logps/chosen": -157.94412231445312, + "logps/rejected": -222.71640014648438, + "loss": 1.3018, + "nll_loss": 0.908219039440155, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.573339939117432, + "rewards/margins": 3.681096315383911, + "rewards/rejected": 1.8922431468963623, + "step": 10670 + }, + { + "epoch": 0.5925021844357221, + "grad_norm": 104.39202880859375, + "learning_rate": 3.567220690240661e-08, + "logits/chosen": -0.3926613926887512, + "logits/rejected": -0.5438095331192017, + "logps/chosen": -174.09701538085938, + "logps/rejected": -228.9287872314453, + "loss": 1.2854, + "nll_loss": 0.9536849856376648, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.012936115264893, + "rewards/margins": 3.408534288406372, + "rewards/rejected": 2.6044020652770996, + "step": 10680 + }, + { + "epoch": 0.5930569617619728, + "grad_norm": 66.91699981689453, + "learning_rate": 3.5588737850493375e-08, + "logits/chosen": -0.31840792298316956, + "logits/rejected": -0.4478437900543213, + "logps/chosen": -159.5063018798828, + "logps/rejected": -211.3325653076172, + "loss": 1.2698, + "nll_loss": 0.9285990595817566, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.953314304351807, + "rewards/margins": 3.9582533836364746, + "rewards/rejected": 1.9950603246688843, + "step": 10690 + }, + { + "epoch": 0.5936117390882235, + "grad_norm": 85.52528381347656, + "learning_rate": 3.550531257609529e-08, + "logits/chosen": -0.3695484697818756, + "logits/rejected": -0.48085230588912964, + "logps/chosen": -152.49127197265625, + "logps/rejected": -218.1444091796875, + "loss": 1.3511, + "nll_loss": 0.9453736543655396, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.081177711486816, + "rewards/margins": 3.2808938026428223, + "rewards/rejected": 2.800283908843994, + "step": 10700 + }, + { + "epoch": 0.5941665164144742, + "grad_norm": 73.19735717773438, + "learning_rate": 3.542193133263576e-08, + "logits/chosen": -0.4986700117588043, + "logits/rejected": -0.5374937057495117, + "logps/chosen": -237.1245574951172, + "logps/rejected": -281.1835632324219, + "loss": 1.2915, + "nll_loss": 1.1597903966903687, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 7.327892303466797, + "rewards/margins": 4.328261852264404, + "rewards/rejected": 2.999631404876709, + "step": 10710 + }, + { + "epoch": 0.5947212937407248, + "grad_norm": 72.92422485351562, + "learning_rate": 3.533859437340445e-08, + "logits/chosen": -0.13615483045578003, + "logits/rejected": -0.30375248193740845, + "logps/chosen": -130.32920837402344, + "logps/rejected": -205.73660278320312, + "loss": 1.2372, + "nll_loss": 0.8853503465652466, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.057281494140625, + "rewards/margins": 3.348877429962158, + "rewards/rejected": 1.708404302597046, + "step": 10720 + }, + { + "epoch": 0.5952760710669754, + "grad_norm": 77.15365600585938, + "learning_rate": 3.5255301951556496e-08, + "logits/chosen": -0.4535873830318451, + "logits/rejected": -0.574942946434021, + "logps/chosen": -211.65103149414062, + "logps/rejected": -256.065673828125, + "loss": 1.2748, + "nll_loss": 1.1198481321334839, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 7.009836673736572, + "rewards/margins": 4.057226181030273, + "rewards/rejected": 2.952610492706299, + "step": 10730 + }, + { + "epoch": 0.5958308483932262, + "grad_norm": 54.13565444946289, + "learning_rate": 3.517205432011174e-08, + "logits/chosen": -0.42517557740211487, + "logits/rejected": -0.5419595241546631, + "logps/chosen": -200.6864471435547, + "logps/rejected": -258.7049255371094, + "loss": 1.3266, + "nll_loss": 1.0525949001312256, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.4116668701171875, + "rewards/margins": 3.5005390644073486, + "rewards/rejected": 2.911127805709839, + "step": 10740 + }, + { + "epoch": 0.5963856257194768, + "grad_norm": 59.02808380126953, + "learning_rate": 3.508885173195395e-08, + "logits/chosen": -0.45422667264938354, + "logits/rejected": -0.5683622360229492, + "logps/chosen": -187.98797607421875, + "logps/rejected": -259.0843505859375, + "loss": 1.2412, + "nll_loss": 1.0791963338851929, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.193970680236816, + "rewards/margins": 3.6251118183135986, + "rewards/rejected": 2.5688586235046387, + "step": 10750 + }, + { + "epoch": 0.5969404030457275, + "grad_norm": 65.45647430419922, + "learning_rate": 3.500569443983006e-08, + "logits/chosen": -0.21939226984977722, + "logits/rejected": -0.4209931492805481, + "logps/chosen": -130.20509338378906, + "logps/rejected": -179.78700256347656, + "loss": 1.2037, + "nll_loss": 0.7984130382537842, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.359994888305664, + "rewards/margins": 3.702338457107544, + "rewards/rejected": 1.6576560735702515, + "step": 10760 + }, + { + "epoch": 0.5974951803719782, + "grad_norm": 58.52310562133789, + "learning_rate": 3.492258269634948e-08, + "logits/chosen": -0.3713361322879791, + "logits/rejected": -0.5267468690872192, + "logps/chosen": -166.95346069335938, + "logps/rejected": -259.38800048828125, + "loss": 1.2701, + "nll_loss": 0.9824539422988892, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.330275058746338, + "rewards/margins": 4.58265495300293, + "rewards/rejected": 1.7476199865341187, + "step": 10770 + }, + { + "epoch": 0.5980499576982289, + "grad_norm": 67.48011779785156, + "learning_rate": 3.483951675398315e-08, + "logits/chosen": -0.4535156786441803, + "logits/rejected": -0.5738258957862854, + "logps/chosen": -194.94332885742188, + "logps/rejected": -265.9612731933594, + "loss": 1.2541, + "nll_loss": 1.1000378131866455, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.979112148284912, + "rewards/margins": 4.8542375564575195, + "rewards/rejected": 2.1248748302459717, + "step": 10780 + }, + { + "epoch": 0.5986047350244795, + "grad_norm": 79.3395767211914, + "learning_rate": 3.4756496865062966e-08, + "logits/chosen": -0.38299891352653503, + "logits/rejected": -0.5297697186470032, + "logps/chosen": -201.93260192871094, + "logps/rejected": -254.21224975585938, + "loss": 1.2738, + "nll_loss": 1.0478332042694092, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.4227614402771, + "rewards/margins": 4.124935150146484, + "rewards/rejected": 2.2978267669677734, + "step": 10790 + }, + { + "epoch": 0.5991595123507302, + "grad_norm": 45.63748550415039, + "learning_rate": 3.4673523281780856e-08, + "logits/chosen": -0.292010098695755, + "logits/rejected": -0.46596068143844604, + "logps/chosen": -162.84024047851562, + "logps/rejected": -217.80667114257812, + "loss": 1.2855, + "nll_loss": 0.8932517766952515, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.736856460571289, + "rewards/margins": 4.134681224822998, + "rewards/rejected": 1.6021745204925537, + "step": 10800 + }, + { + "epoch": 0.5997142896769809, + "grad_norm": 66.92276763916016, + "learning_rate": 3.45905962561881e-08, + "logits/chosen": -0.39471474289894104, + "logits/rejected": -0.5333040952682495, + "logps/chosen": -181.5998992919922, + "logps/rejected": -255.9614715576172, + "loss": 1.202, + "nll_loss": 1.0461175441741943, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.988242149353027, + "rewards/margins": 4.026206016540527, + "rewards/rejected": 1.9620361328125, + "step": 10810 + }, + { + "epoch": 0.6002690670032316, + "grad_norm": 76.19792175292969, + "learning_rate": 3.450771604019461e-08, + "logits/chosen": -0.3107849359512329, + "logits/rejected": -0.42468562722206116, + "logps/chosen": -168.02243041992188, + "logps/rejected": -228.12759399414062, + "loss": 1.2738, + "nll_loss": 0.9977830052375793, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.253368854522705, + "rewards/margins": 4.136763572692871, + "rewards/rejected": 2.116605758666992, + "step": 10820 + }, + { + "epoch": 0.6008238443294822, + "grad_norm": 63.823062896728516, + "learning_rate": 3.442488288556804e-08, + "logits/chosen": -0.5227320790290833, + "logits/rejected": -0.5916525721549988, + "logps/chosen": -183.53652954101562, + "logps/rejected": -252.58041381835938, + "loss": 1.2982, + "nll_loss": 1.063537359237671, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.722087860107422, + "rewards/margins": 3.78173565864563, + "rewards/rejected": 2.940351963043213, + "step": 10830 + }, + { + "epoch": 0.601378621655733, + "grad_norm": 69.8787841796875, + "learning_rate": 3.4342097043933096e-08, + "logits/chosen": -0.3921436369419098, + "logits/rejected": -0.4659816324710846, + "logps/chosen": -190.76670837402344, + "logps/rejected": -255.2912139892578, + "loss": 1.3615, + "nll_loss": 1.0732951164245605, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.0947136878967285, + "rewards/margins": 4.343235969543457, + "rewards/rejected": 1.7514781951904297, + "step": 10840 + }, + { + "epoch": 0.6019333989819836, + "grad_norm": 31.128889083862305, + "learning_rate": 3.425935876677076e-08, + "logits/chosen": -0.43652114272117615, + "logits/rejected": -0.5631170868873596, + "logps/chosen": -177.95590209960938, + "logps/rejected": -214.3686065673828, + "loss": 1.2299, + "nll_loss": 1.0268595218658447, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.997559547424316, + "rewards/margins": 3.688072681427002, + "rewards/rejected": 2.3094871044158936, + "step": 10850 + }, + { + "epoch": 0.6024881763082343, + "grad_norm": 77.23370361328125, + "learning_rate": 3.417666830541754e-08, + "logits/chosen": -0.45197612047195435, + "logits/rejected": -0.5712178945541382, + "logps/chosen": -177.9655303955078, + "logps/rejected": -214.2071533203125, + "loss": 1.289, + "nll_loss": 1.0503698587417603, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.04358434677124, + "rewards/margins": 3.6864120960235596, + "rewards/rejected": 2.3571720123291016, + "step": 10860 + }, + { + "epoch": 0.603042953634485, + "grad_norm": 74.53681182861328, + "learning_rate": 3.4094025911064686e-08, + "logits/chosen": -0.3006956875324249, + "logits/rejected": -0.46492305397987366, + "logps/chosen": -181.09133911132812, + "logps/rejected": -242.59927368164062, + "loss": 1.3742, + "nll_loss": 0.8726722002029419, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.730262279510498, + "rewards/margins": 3.801975727081299, + "rewards/rejected": 1.9282863140106201, + "step": 10870 + }, + { + "epoch": 0.6035977309607357, + "grad_norm": 42.141292572021484, + "learning_rate": 3.401143183475743e-08, + "logits/chosen": -0.30769291520118713, + "logits/rejected": -0.4697556495666504, + "logps/chosen": -160.7049102783203, + "logps/rejected": -226.4869842529297, + "loss": 1.1419, + "nll_loss": 0.8911483883857727, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.688979625701904, + "rewards/margins": 3.701953411102295, + "rewards/rejected": 1.9870258569717407, + "step": 10880 + }, + { + "epoch": 0.6041525082869863, + "grad_norm": 78.36772918701172, + "learning_rate": 3.392888632739424e-08, + "logits/chosen": -0.2836330235004425, + "logits/rejected": -0.412407249212265, + "logps/chosen": -148.725830078125, + "logps/rejected": -197.4859161376953, + "loss": 1.3034, + "nll_loss": 0.9351836442947388, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.380554676055908, + "rewards/margins": 2.8393402099609375, + "rewards/rejected": 2.5412139892578125, + "step": 10890 + }, + { + "epoch": 0.604707285613237, + "grad_norm": 65.53260803222656, + "learning_rate": 3.3846389639726e-08, + "logits/chosen": -0.31793665885925293, + "logits/rejected": -0.4620705246925354, + "logps/chosen": -169.74215698242188, + "logps/rejected": -242.336181640625, + "loss": 1.1877, + "nll_loss": 0.9373221397399902, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.978935241699219, + "rewards/margins": 3.973456621170044, + "rewards/rejected": 2.005479097366333, + "step": 10900 + }, + { + "epoch": 0.6052620629394877, + "grad_norm": 26.164304733276367, + "learning_rate": 3.376394202235534e-08, + "logits/chosen": -0.2988826632499695, + "logits/rejected": -0.3849804401397705, + "logps/chosen": -186.63185119628906, + "logps/rejected": -238.55679321289062, + "loss": 1.2071, + "nll_loss": 0.9764013290405273, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.3566179275512695, + "rewards/margins": 3.4110398292541504, + "rewards/rejected": 2.945578098297119, + "step": 10910 + }, + { + "epoch": 0.6058168402657383, + "grad_norm": 54.674285888671875, + "learning_rate": 3.368154372573584e-08, + "logits/chosen": -0.41310158371925354, + "logits/rejected": -0.4816606044769287, + "logps/chosen": -172.04104614257812, + "logps/rejected": -199.1383056640625, + "loss": 1.2596, + "nll_loss": 1.0648918151855469, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.158577919006348, + "rewards/margins": 3.6144866943359375, + "rewards/rejected": 2.544090986251831, + "step": 10920 + }, + { + "epoch": 0.606371617591989, + "grad_norm": 120.4090576171875, + "learning_rate": 3.35991950001712e-08, + "logits/chosen": -0.36028024554252625, + "logits/rejected": -0.5143309831619263, + "logps/chosen": -177.98716735839844, + "logps/rejected": -237.66494750976562, + "loss": 1.2803, + "nll_loss": 0.9472376108169556, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.03228759765625, + "rewards/margins": 4.048565864562988, + "rewards/rejected": 1.983722448348999, + "step": 10930 + }, + { + "epoch": 0.6069263949182397, + "grad_norm": 77.94869995117188, + "learning_rate": 3.351689609581458e-08, + "logits/chosen": -0.298319011926651, + "logits/rejected": -0.46092239022254944, + "logps/chosen": -141.98526000976562, + "logps/rejected": -201.03860473632812, + "loss": 1.1757, + "nll_loss": 0.8618471026420593, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.297152519226074, + "rewards/margins": 3.743290424346924, + "rewards/rejected": 1.5538616180419922, + "step": 10940 + }, + { + "epoch": 0.6074811722444904, + "grad_norm": 67.52654266357422, + "learning_rate": 3.3434647262667793e-08, + "logits/chosen": -0.3805920481681824, + "logits/rejected": -0.5205205678939819, + "logps/chosen": -196.33363342285156, + "logps/rejected": -256.85052490234375, + "loss": 1.2294, + "nll_loss": 1.0538861751556396, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.10671854019165, + "rewards/margins": 4.37234354019165, + "rewards/rejected": 1.734375238418579, + "step": 10950 + }, + { + "epoch": 0.608035949570741, + "grad_norm": 98.63069152832031, + "learning_rate": 3.335244875058051e-08, + "logits/chosen": -0.23003113269805908, + "logits/rejected": -0.4464952051639557, + "logps/chosen": -185.92897033691406, + "logps/rejected": -234.14205932617188, + "loss": 1.2685, + "nll_loss": 0.9236680269241333, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.777362823486328, + "rewards/margins": 4.378003120422363, + "rewards/rejected": 1.3993602991104126, + "step": 10960 + }, + { + "epoch": 0.6085907268969917, + "grad_norm": 60.71028137207031, + "learning_rate": 3.3270300809249596e-08, + "logits/chosen": -0.34472283720970154, + "logits/rejected": -0.5263240337371826, + "logps/chosen": -154.20912170410156, + "logps/rejected": -196.12025451660156, + "loss": 1.2432, + "nll_loss": 1.014953851699829, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.40725564956665, + "rewards/margins": 3.625709056854248, + "rewards/rejected": 1.7815459966659546, + "step": 10970 + }, + { + "epoch": 0.6091455042232424, + "grad_norm": 105.30965423583984, + "learning_rate": 3.318820368821826e-08, + "logits/chosen": -0.2851284444332123, + "logits/rejected": -0.45964550971984863, + "logps/chosen": -151.84864807128906, + "logps/rejected": -204.26199340820312, + "loss": 1.2432, + "nll_loss": 1.0300170183181763, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.299997806549072, + "rewards/margins": 3.8525798320770264, + "rewards/rejected": 1.4474176168441772, + "step": 10980 + }, + { + "epoch": 0.6097002815494931, + "grad_norm": 66.04049682617188, + "learning_rate": 3.310615763687535e-08, + "logits/chosen": -0.32158032059669495, + "logits/rejected": -0.42908206582069397, + "logps/chosen": -160.4062957763672, + "logps/rejected": -207.47634887695312, + "loss": 1.2513, + "nll_loss": 0.8961458206176758, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.963690757751465, + "rewards/margins": 4.239657402038574, + "rewards/rejected": 1.7240327596664429, + "step": 10990 + }, + { + "epoch": 0.6102550588757437, + "grad_norm": 105.71235656738281, + "learning_rate": 3.302416290445458e-08, + "logits/chosen": -0.3293169438838959, + "logits/rejected": -0.40171074867248535, + "logps/chosen": -168.55116271972656, + "logps/rejected": -217.5762176513672, + "loss": 1.3974, + "nll_loss": 0.9453521966934204, + "rewards/accuracies": 0.75, + "rewards/chosen": 6.0554094314575195, + "rewards/margins": 3.4899826049804688, + "rewards/rejected": 2.56542706489563, + "step": 11000 + }, + { + "epoch": 0.6102550588757437, + "eval_logits/chosen": -0.45493215322494507, + "eval_logits/rejected": -0.5725303292274475, + "eval_logps/chosen": -190.5392303466797, + "eval_logps/rejected": -259.3156433105469, + "eval_loss": 1.225435495376587, + "eval_nll_loss": 0.9884793758392334, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.742199897766113, + "eval_rewards/margins": 4.765914440155029, + "eval_rewards/rejected": 1.9762849807739258, + "eval_runtime": 17.2793, + "eval_samples_per_second": 14.815, + "eval_steps_per_second": 1.852, + "step": 11000 + }, + { + "epoch": 0.6108098362019945, + "grad_norm": 69.06470489501953, + "learning_rate": 3.2942219740033706e-08, + "logits/chosen": -0.33280569314956665, + "logits/rejected": -0.508167564868927, + "logps/chosen": -133.94076538085938, + "logps/rejected": -198.4490966796875, + "loss": 1.2233, + "nll_loss": 0.8154407739639282, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.323696136474609, + "rewards/margins": 2.990116596221924, + "rewards/rejected": 2.3335793018341064, + "step": 11010 + }, + { + "epoch": 0.6113646135282451, + "grad_norm": 67.94520568847656, + "learning_rate": 3.2860328392533964e-08, + "logits/chosen": -0.31693345308303833, + "logits/rejected": -0.4917060434818268, + "logps/chosen": -155.8323211669922, + "logps/rejected": -205.52706909179688, + "loss": 1.2193, + "nll_loss": 0.8596027493476868, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.675806999206543, + "rewards/margins": 3.0525705814361572, + "rewards/rejected": 2.6232364177703857, + "step": 11020 + }, + { + "epoch": 0.6119193908544958, + "grad_norm": 50.12065887451172, + "learning_rate": 3.277848911071908e-08, + "logits/chosen": -0.3317955732345581, + "logits/rejected": -0.5724862813949585, + "logps/chosen": -136.84103393554688, + "logps/rejected": -195.4505615234375, + "loss": 1.1545, + "nll_loss": 0.8111773729324341, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.188634395599365, + "rewards/margins": 3.3500258922576904, + "rewards/rejected": 1.8386093378067017, + "step": 11030 + }, + { + "epoch": 0.6124741681807464, + "grad_norm": 42.531307220458984, + "learning_rate": 3.269670214319464e-08, + "logits/chosen": -0.38377127051353455, + "logits/rejected": -0.486454576253891, + "logps/chosen": -164.94155883789062, + "logps/rejected": -203.86782836914062, + "loss": 1.3069, + "nll_loss": 0.9467847943305969, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.810660362243652, + "rewards/margins": 3.384552478790283, + "rewards/rejected": 2.4261085987091064, + "step": 11040 + }, + { + "epoch": 0.6130289455069972, + "grad_norm": 78.62700653076172, + "learning_rate": 3.261496773840733e-08, + "logits/chosen": -0.414069265127182, + "logits/rejected": -0.5053438544273376, + "logps/chosen": -187.6324462890625, + "logps/rejected": -246.0666046142578, + "loss": 1.3059, + "nll_loss": 1.0669176578521729, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.589938163757324, + "rewards/margins": 3.714946746826172, + "rewards/rejected": 2.8749921321868896, + "step": 11050 + }, + { + "epoch": 0.6135837228332478, + "grad_norm": 50.88478088378906, + "learning_rate": 3.253328614464413e-08, + "logits/chosen": -0.424401193857193, + "logits/rejected": -0.5055257081985474, + "logps/chosen": -191.259521484375, + "logps/rejected": -248.0755157470703, + "loss": 1.2733, + "nll_loss": 1.0479724407196045, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.432443141937256, + "rewards/margins": 3.278245449066162, + "rewards/rejected": 3.1541976928710938, + "step": 11060 + }, + { + "epoch": 0.6141385001594984, + "grad_norm": 49.54227828979492, + "learning_rate": 3.245165761003168e-08, + "logits/chosen": -0.38219529390335083, + "logits/rejected": -0.5165926218032837, + "logps/chosen": -180.51742553710938, + "logps/rejected": -230.5033721923828, + "loss": 1.2638, + "nll_loss": 0.976039707660675, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.015735149383545, + "rewards/margins": 3.3528194427490234, + "rewards/rejected": 2.662916421890259, + "step": 11070 + }, + { + "epoch": 0.6146932774857492, + "grad_norm": 70.92606353759766, + "learning_rate": 3.237008238253534e-08, + "logits/chosen": -0.3433719277381897, + "logits/rejected": -0.5679585337638855, + "logps/chosen": -178.82461547851562, + "logps/rejected": -279.38922119140625, + "loss": 1.3671, + "nll_loss": 0.9539782404899597, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.885937213897705, + "rewards/margins": 4.357199668884277, + "rewards/rejected": 1.5287374258041382, + "step": 11080 + }, + { + "epoch": 0.6152480548119998, + "grad_norm": 37.418861389160156, + "learning_rate": 3.2288560709958596e-08, + "logits/chosen": -0.3987385630607605, + "logits/rejected": -0.5709208846092224, + "logps/chosen": -168.68743896484375, + "logps/rejected": -234.5459442138672, + "loss": 1.2156, + "nll_loss": 1.0553100109100342, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.184418201446533, + "rewards/margins": 3.9626851081848145, + "rewards/rejected": 2.2217330932617188, + "step": 11090 + }, + { + "epoch": 0.6158028321382505, + "grad_norm": 36.667327880859375, + "learning_rate": 3.220709283994222e-08, + "logits/chosen": -0.20910899341106415, + "logits/rejected": -0.42044633626937866, + "logps/chosen": -140.78128051757812, + "logps/rejected": -202.6675567626953, + "loss": 1.1547, + "nll_loss": 0.7739453315734863, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.707763671875, + "rewards/margins": 3.4391884803771973, + "rewards/rejected": 2.268575668334961, + "step": 11100 + }, + { + "epoch": 0.6163576094645011, + "grad_norm": 46.022804260253906, + "learning_rate": 3.212567901996355e-08, + "logits/chosen": -0.2734110355377197, + "logits/rejected": -0.4417598843574524, + "logps/chosen": -156.12782287597656, + "logps/rejected": -210.34262084960938, + "loss": 1.1952, + "nll_loss": 0.9398097991943359, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.590392589569092, + "rewards/margins": 4.1208720207214355, + "rewards/rejected": 1.4695208072662354, + "step": 11110 + }, + { + "epoch": 0.6169123867907519, + "grad_norm": 56.89548110961914, + "learning_rate": 3.204431949733577e-08, + "logits/chosen": -0.33289042115211487, + "logits/rejected": -0.5147531032562256, + "logps/chosen": -167.79898071289062, + "logps/rejected": -241.2939910888672, + "loss": 1.179, + "nll_loss": 0.8868080377578735, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.794384479522705, + "rewards/margins": 4.220944404602051, + "rewards/rejected": 1.5734400749206543, + "step": 11120 + }, + { + "epoch": 0.6174671641170025, + "grad_norm": 62.29393005371094, + "learning_rate": 3.1963014519207074e-08, + "logits/chosen": -0.39875271916389465, + "logits/rejected": -0.5109494924545288, + "logps/chosen": -205.0402069091797, + "logps/rejected": -248.0024871826172, + "loss": 1.2758, + "nll_loss": 1.076166033744812, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.737574100494385, + "rewards/margins": 4.10688591003418, + "rewards/rejected": 2.630688428878784, + "step": 11130 + }, + { + "epoch": 0.6180219414432532, + "grad_norm": 130.27410888671875, + "learning_rate": 3.188176433256e-08, + "logits/chosen": -0.2652292251586914, + "logits/rejected": -0.42938145995140076, + "logps/chosen": -148.16464233398438, + "logps/rejected": -182.64065551757812, + "loss": 1.2944, + "nll_loss": 0.8596665263175964, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.408052444458008, + "rewards/margins": 3.4761035442352295, + "rewards/rejected": 1.9319489002227783, + "step": 11140 + }, + { + "epoch": 0.6185767187695039, + "grad_norm": 62.68735885620117, + "learning_rate": 3.180056918421062e-08, + "logits/chosen": -0.3436311185359955, + "logits/rejected": -0.5326007604598999, + "logps/chosen": -145.62283325195312, + "logps/rejected": -201.58200073242188, + "loss": 1.2259, + "nll_loss": 0.8367489576339722, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.320952892303467, + "rewards/margins": 3.4347481727600098, + "rewards/rejected": 1.8862043619155884, + "step": 11150 + }, + { + "epoch": 0.6191314960957546, + "grad_norm": 67.24079132080078, + "learning_rate": 3.171942932080782e-08, + "logits/chosen": -0.3893461525440216, + "logits/rejected": -0.5333869457244873, + "logps/chosen": -173.96942138671875, + "logps/rejected": -225.2637176513672, + "loss": 1.2849, + "nll_loss": 0.9754926562309265, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.036957740783691, + "rewards/margins": 3.80385160446167, + "rewards/rejected": 2.2331058979034424, + "step": 11160 + }, + { + "epoch": 0.6196862734220052, + "grad_norm": 152.81712341308594, + "learning_rate": 3.163834498883258e-08, + "logits/chosen": -0.28430917859077454, + "logits/rejected": -0.48452943563461304, + "logps/chosen": -150.76608276367188, + "logps/rejected": -196.15130615234375, + "loss": 1.2478, + "nll_loss": 0.8585501909255981, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.390089988708496, + "rewards/margins": 2.8869361877441406, + "rewards/rejected": 2.5031542778015137, + "step": 11170 + }, + { + "epoch": 0.620241050748256, + "grad_norm": 23.189393997192383, + "learning_rate": 3.155731643459715e-08, + "logits/chosen": -0.3876117467880249, + "logits/rejected": -0.47813859581947327, + "logps/chosen": -183.8386993408203, + "logps/rejected": -246.20559692382812, + "loss": 1.2264, + "nll_loss": 0.988499641418457, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.404829502105713, + "rewards/margins": 3.8341288566589355, + "rewards/rejected": 2.5707004070281982, + "step": 11180 + }, + { + "epoch": 0.6207958280745066, + "grad_norm": 71.80838775634766, + "learning_rate": 3.147634390424434e-08, + "logits/chosen": -0.3379099369049072, + "logits/rejected": -0.5105775594711304, + "logps/chosen": -168.1144256591797, + "logps/rejected": -225.281005859375, + "loss": 1.2116, + "nll_loss": 0.8586258888244629, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.672602653503418, + "rewards/margins": 3.8174891471862793, + "rewards/rejected": 1.8551137447357178, + "step": 11190 + }, + { + "epoch": 0.6213506054007573, + "grad_norm": 45.49497604370117, + "learning_rate": 3.13954276437468e-08, + "logits/chosen": -0.24583525955677032, + "logits/rejected": -0.3375547528266907, + "logps/chosen": -149.8053436279297, + "logps/rejected": -203.1839599609375, + "loss": 1.3485, + "nll_loss": 0.9066442251205444, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.1894211769104, + "rewards/margins": 3.2792041301727295, + "rewards/rejected": 1.910217046737671, + "step": 11200 + }, + { + "epoch": 0.6219053827270079, + "grad_norm": 78.665771484375, + "learning_rate": 3.131456789890622e-08, + "logits/chosen": -0.24614350497722626, + "logits/rejected": -0.40910449624061584, + "logps/chosen": -141.6895294189453, + "logps/rejected": -182.7397918701172, + "loss": 1.2449, + "nll_loss": 0.826746940612793, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.132467269897461, + "rewards/margins": 2.5911591053009033, + "rewards/rejected": 2.5413081645965576, + "step": 11210 + }, + { + "epoch": 0.6224601600532587, + "grad_norm": 75.47736358642578, + "learning_rate": 3.1233764915352644e-08, + "logits/chosen": -0.31453007459640503, + "logits/rejected": -0.39097392559051514, + "logps/chosen": -152.90516662597656, + "logps/rejected": -211.50540161132812, + "loss": 1.2088, + "nll_loss": 0.9485653638839722, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.528751373291016, + "rewards/margins": 2.273203134536743, + "rewards/rejected": 3.2555489540100098, + "step": 11220 + }, + { + "epoch": 0.6230149373795093, + "grad_norm": 62.74408721923828, + "learning_rate": 3.1153018938543674e-08, + "logits/chosen": -0.3634536862373352, + "logits/rejected": -0.48061853647232056, + "logps/chosen": -157.69241333007812, + "logps/rejected": -209.5538330078125, + "loss": 1.2621, + "nll_loss": 0.9580618143081665, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.60360050201416, + "rewards/margins": 2.582735538482666, + "rewards/rejected": 3.020864963531494, + "step": 11230 + }, + { + "epoch": 0.62356971470576, + "grad_norm": 65.83464813232422, + "learning_rate": 3.1072330213763734e-08, + "logits/chosen": -0.2914223074913025, + "logits/rejected": -0.44978776574134827, + "logps/chosen": -169.06375122070312, + "logps/rejected": -215.7394256591797, + "loss": 1.2399, + "nll_loss": 1.0182244777679443, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.937629222869873, + "rewards/margins": 4.014005184173584, + "rewards/rejected": 1.9236243963241577, + "step": 11240 + }, + { + "epoch": 0.6241244920320107, + "grad_norm": 81.04288482666016, + "learning_rate": 3.099169898612334e-08, + "logits/chosen": -0.42526236176490784, + "logits/rejected": -0.5600941181182861, + "logps/chosen": -196.39120483398438, + "logps/rejected": -255.0546417236328, + "loss": 1.2505, + "nll_loss": 1.025933861732483, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.386963844299316, + "rewards/margins": 3.5854058265686035, + "rewards/rejected": 2.8015573024749756, + "step": 11250 + }, + { + "epoch": 0.6246792693582613, + "grad_norm": 77.561767578125, + "learning_rate": 3.091112550055832e-08, + "logits/chosen": -0.263704389333725, + "logits/rejected": -0.3660004734992981, + "logps/chosen": -153.57029724121094, + "logps/rejected": -196.36021423339844, + "loss": 1.1754, + "nll_loss": 0.8896039128303528, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.714977741241455, + "rewards/margins": 3.1963276863098145, + "rewards/rejected": 2.5186495780944824, + "step": 11260 + }, + { + "epoch": 0.625234046684512, + "grad_norm": 64.04689025878906, + "learning_rate": 3.083061000182917e-08, + "logits/chosen": -0.2049088180065155, + "logits/rejected": -0.44929853081703186, + "logps/chosen": -120.06229400634766, + "logps/rejected": -159.4713592529297, + "loss": 1.289, + "nll_loss": 0.7990074753761292, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.157519340515137, + "rewards/margins": 3.633838653564453, + "rewards/rejected": 1.5236806869506836, + "step": 11270 + }, + { + "epoch": 0.6257888240107626, + "grad_norm": 75.26434326171875, + "learning_rate": 3.075015273452016e-08, + "logits/chosen": -0.1911907196044922, + "logits/rejected": -0.41610366106033325, + "logps/chosen": -136.66331481933594, + "logps/rejected": -191.37852478027344, + "loss": 1.1743, + "nll_loss": 0.7738946676254272, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.248966217041016, + "rewards/margins": 4.313348293304443, + "rewards/rejected": 0.9356174468994141, + "step": 11280 + }, + { + "epoch": 0.6263436013370134, + "grad_norm": 48.91354751586914, + "learning_rate": 3.0669753943038706e-08, + "logits/chosen": -0.3538931906223297, + "logits/rejected": -0.49591121077537537, + "logps/chosen": -180.0372772216797, + "logps/rejected": -241.81753540039062, + "loss": 1.202, + "nll_loss": 1.0452322959899902, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.185958385467529, + "rewards/margins": 4.097734451293945, + "rewards/rejected": 2.088223695755005, + "step": 11290 + }, + { + "epoch": 0.626898378663264, + "grad_norm": 67.3182144165039, + "learning_rate": 3.058941387161456e-08, + "logits/chosen": -0.38178586959838867, + "logits/rejected": -0.5090783834457397, + "logps/chosen": -180.46243286132812, + "logps/rejected": -235.2760772705078, + "loss": 1.2854, + "nll_loss": 1.0128010511398315, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.531017303466797, + "rewards/margins": 3.047769546508789, + "rewards/rejected": 3.483248233795166, + "step": 11300 + }, + { + "epoch": 0.6274531559895147, + "grad_norm": 45.30299377441406, + "learning_rate": 3.0509132764299164e-08, + "logits/chosen": -0.4374946057796478, + "logits/rejected": -0.5233668684959412, + "logps/chosen": -180.13943481445312, + "logps/rejected": -221.87490844726562, + "loss": 1.2035, + "nll_loss": 1.003361463546753, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.882946968078613, + "rewards/margins": 2.9304733276367188, + "rewards/rejected": 2.9524731636047363, + "step": 11310 + }, + { + "epoch": 0.6280079333157654, + "grad_norm": 85.49980926513672, + "learning_rate": 3.042891086496477e-08, + "logits/chosen": -0.2379666566848755, + "logits/rejected": -0.42325901985168457, + "logps/chosen": -152.56871032714844, + "logps/rejected": -216.0143585205078, + "loss": 1.1971, + "nll_loss": 0.8649711608886719, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.755773067474365, + "rewards/margins": 4.935097694396973, + "rewards/rejected": 0.820675253868103, + "step": 11320 + }, + { + "epoch": 0.6285627106420161, + "grad_norm": 59.26189422607422, + "learning_rate": 3.034874841730382e-08, + "logits/chosen": -0.3192819058895111, + "logits/rejected": -0.47028088569641113, + "logps/chosen": -156.40786743164062, + "logps/rejected": -207.82699584960938, + "loss": 1.2448, + "nll_loss": 0.9039347767829895, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.8771162033081055, + "rewards/margins": 3.4613919258117676, + "rewards/rejected": 2.415724277496338, + "step": 11330 + }, + { + "epoch": 0.6291174879682667, + "grad_norm": 30.611183166503906, + "learning_rate": 3.026864566482813e-08, + "logits/chosen": -0.16953524947166443, + "logits/rejected": -0.40352344512939453, + "logps/chosen": -134.0901641845703, + "logps/rejected": -189.51467895507812, + "loss": 1.2499, + "nll_loss": 0.774274468421936, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.538664817810059, + "rewards/margins": 4.124792098999023, + "rewards/rejected": 1.4138729572296143, + "step": 11340 + }, + { + "epoch": 0.6296722652945174, + "grad_norm": 81.96833801269531, + "learning_rate": 3.0188602850868185e-08, + "logits/chosen": -0.3460689187049866, + "logits/rejected": -0.5106383562088013, + "logps/chosen": -172.60360717773438, + "logps/rejected": -248.35702514648438, + "loss": 1.2077, + "nll_loss": 0.9729253053665161, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.984989643096924, + "rewards/margins": 3.9146087169647217, + "rewards/rejected": 2.070380687713623, + "step": 11350 + }, + { + "epoch": 0.6302270426207681, + "grad_norm": 99.990966796875, + "learning_rate": 3.01086202185724e-08, + "logits/chosen": -0.2937987446784973, + "logits/rejected": -0.44375085830688477, + "logps/chosen": -135.4306182861328, + "logps/rejected": -175.08709716796875, + "loss": 1.232, + "nll_loss": 0.8019644618034363, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.3385844230651855, + "rewards/margins": 3.8010196685791016, + "rewards/rejected": 1.5375645160675049, + "step": 11360 + }, + { + "epoch": 0.6307818199470188, + "grad_norm": 63.24259567260742, + "learning_rate": 3.002869801090638e-08, + "logits/chosen": -0.2819620370864868, + "logits/rejected": -0.43697643280029297, + "logps/chosen": -128.35104370117188, + "logps/rejected": -210.3355255126953, + "loss": 1.2474, + "nll_loss": 0.7971504926681519, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.393406867980957, + "rewards/margins": 4.027044773101807, + "rewards/rejected": 1.3663625717163086, + "step": 11370 + }, + { + "epoch": 0.6313365972732694, + "grad_norm": 74.21778869628906, + "learning_rate": 2.994883647065216e-08, + "logits/chosen": -0.4357389807701111, + "logits/rejected": -0.5347990989685059, + "logps/chosen": -178.0835723876953, + "logps/rejected": -229.2137908935547, + "loss": 1.2147, + "nll_loss": 0.9867460131645203, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.3596696853637695, + "rewards/margins": 3.9182181358337402, + "rewards/rejected": 2.441451072692871, + "step": 11380 + }, + { + "epoch": 0.6318913745995202, + "grad_norm": 59.573184967041016, + "learning_rate": 2.98690358404075e-08, + "logits/chosen": -0.3150174021720886, + "logits/rejected": -0.44475072622299194, + "logps/chosen": -152.2097625732422, + "logps/rejected": -202.0469512939453, + "loss": 1.3485, + "nll_loss": 0.9981307983398438, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.193087100982666, + "rewards/margins": 2.5515267848968506, + "rewards/rejected": 2.64155912399292, + "step": 11390 + }, + { + "epoch": 0.6324461519257708, + "grad_norm": 62.636783599853516, + "learning_rate": 2.978929636258508e-08, + "logits/chosen": -0.49559253454208374, + "logits/rejected": -0.6017329096794128, + "logps/chosen": -169.13633728027344, + "logps/rejected": -217.6695556640625, + "loss": 1.318, + "nll_loss": 1.0103613138198853, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.36987829208374, + "rewards/margins": 3.7874996662139893, + "rewards/rejected": 2.582379102706909, + "step": 11400 + }, + { + "epoch": 0.6330009292520214, + "grad_norm": 92.36743927001953, + "learning_rate": 2.970961827941192e-08, + "logits/chosen": -0.31166213750839233, + "logits/rejected": -0.43062323331832886, + "logps/chosen": -156.07284545898438, + "logps/rejected": -200.2255401611328, + "loss": 1.2467, + "nll_loss": 0.8944070935249329, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.562036991119385, + "rewards/margins": 3.1733527183532715, + "rewards/rejected": 2.3886845111846924, + "step": 11410 + }, + { + "epoch": 0.6335557065782721, + "grad_norm": 63.728736877441406, + "learning_rate": 2.9630001832928447e-08, + "logits/chosen": -0.3078330457210541, + "logits/rejected": -0.3683183193206787, + "logps/chosen": -191.08816528320312, + "logps/rejected": -222.91458129882812, + "loss": 1.3035, + "nll_loss": 1.1246745586395264, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.97721004486084, + "rewards/margins": 2.9635345935821533, + "rewards/rejected": 3.013674736022949, + "step": 11420 + }, + { + "epoch": 0.6341104839045228, + "grad_norm": 68.14006805419922, + "learning_rate": 2.955044726498789e-08, + "logits/chosen": -0.3305138051509857, + "logits/rejected": -0.43370646238327026, + "logps/chosen": -148.52413940429688, + "logps/rejected": -200.3426513671875, + "loss": 1.2064, + "nll_loss": 0.9519137144088745, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.201475620269775, + "rewards/margins": 3.2795891761779785, + "rewards/rejected": 1.9218858480453491, + "step": 11430 + }, + { + "epoch": 0.6346652612307735, + "grad_norm": 66.96622467041016, + "learning_rate": 2.94709548172555e-08, + "logits/chosen": -0.44262975454330444, + "logits/rejected": -0.5093342065811157, + "logps/chosen": -173.77906799316406, + "logps/rejected": -233.55960083007812, + "loss": 1.2975, + "nll_loss": 1.1695655584335327, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.275940418243408, + "rewards/margins": 3.912945508956909, + "rewards/rejected": 2.36299467086792, + "step": 11440 + }, + { + "epoch": 0.6352200385570241, + "grad_norm": 63.15080642700195, + "learning_rate": 2.9391524731207806e-08, + "logits/chosen": -0.2056857794523239, + "logits/rejected": -0.3412301540374756, + "logps/chosen": -136.90403747558594, + "logps/rejected": -157.2415313720703, + "loss": 1.2194, + "nll_loss": 0.8480414152145386, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.352352142333984, + "rewards/margins": 3.119447946548462, + "rewards/rejected": 2.2329041957855225, + "step": 11450 + }, + { + "epoch": 0.6357748158832749, + "grad_norm": 76.51830291748047, + "learning_rate": 2.931215724813195e-08, + "logits/chosen": -0.30280178785324097, + "logits/rejected": -0.44086456298828125, + "logps/chosen": -156.64993286132812, + "logps/rejected": -218.8702850341797, + "loss": 1.3205, + "nll_loss": 0.922582745552063, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.7424116134643555, + "rewards/margins": 3.8438477516174316, + "rewards/rejected": 1.898564100265503, + "step": 11460 + }, + { + "epoch": 0.6363295932095255, + "grad_norm": 77.34232330322266, + "learning_rate": 2.9232852609124865e-08, + "logits/chosen": -0.3481571674346924, + "logits/rejected": -0.41753000020980835, + "logps/chosen": -163.78369140625, + "logps/rejected": -222.6044921875, + "loss": 1.2826, + "nll_loss": 1.0413355827331543, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.836427688598633, + "rewards/margins": 3.0139377117156982, + "rewards/rejected": 2.822490692138672, + "step": 11470 + }, + { + "epoch": 0.6368843705357762, + "grad_norm": 39.352542877197266, + "learning_rate": 2.915361105509258e-08, + "logits/chosen": -0.2512122094631195, + "logits/rejected": -0.351836621761322, + "logps/chosen": -166.06838989257812, + "logps/rejected": -207.2430877685547, + "loss": 1.2009, + "nll_loss": 0.9286238551139832, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.441195487976074, + "rewards/margins": 2.800069808959961, + "rewards/rejected": 2.641125440597534, + "step": 11480 + }, + { + "epoch": 0.6374391478620269, + "grad_norm": 33.86089324951172, + "learning_rate": 2.9074432826749478e-08, + "logits/chosen": -0.18522383272647858, + "logits/rejected": -0.4233093857765198, + "logps/chosen": -163.3899383544922, + "logps/rejected": -212.404296875, + "loss": 1.175, + "nll_loss": 0.8490327000617981, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.2754621505737305, + "rewards/margins": 4.018345832824707, + "rewards/rejected": 1.2571160793304443, + "step": 11490 + }, + { + "epoch": 0.6379939251882776, + "grad_norm": 58.185585021972656, + "learning_rate": 2.899531816461761e-08, + "logits/chosen": -0.48071545362472534, + "logits/rejected": -0.6082251667976379, + "logps/chosen": -180.03013610839844, + "logps/rejected": -242.6453399658203, + "loss": 1.3563, + "nll_loss": 1.085688591003418, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.111035346984863, + "rewards/margins": 3.670149326324463, + "rewards/rejected": 2.4408864974975586, + "step": 11500 + }, + { + "epoch": 0.6379939251882776, + "eval_logits/chosen": -0.4249529242515564, + "eval_logits/rejected": -0.5514447689056396, + "eval_logps/chosen": -191.21336364746094, + "eval_logps/rejected": -262.6428527832031, + "eval_loss": 1.225059986114502, + "eval_nll_loss": 0.9916093349456787, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.674785614013672, + "eval_rewards/margins": 5.031222820281982, + "eval_rewards/rejected": 1.643563151359558, + "eval_runtime": 17.3547, + "eval_samples_per_second": 14.751, + "eval_steps_per_second": 1.844, + "step": 11500 + }, + { + "epoch": 0.6385487025145282, + "grad_norm": 34.527950286865234, + "learning_rate": 2.891626730902591e-08, + "logits/chosen": -0.3377942144870758, + "logits/rejected": -0.516598105430603, + "logps/chosen": -141.05381774902344, + "logps/rejected": -215.08511352539062, + "loss": 1.2723, + "nll_loss": 0.8781675100326538, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.726903915405273, + "rewards/margins": 4.327226161956787, + "rewards/rejected": 1.3996771574020386, + "step": 11510 + }, + { + "epoch": 0.6391034798407789, + "grad_norm": 65.67606353759766, + "learning_rate": 2.8837280500109513e-08, + "logits/chosen": -0.15242630243301392, + "logits/rejected": -0.3232906460762024, + "logps/chosen": -99.00645446777344, + "logps/rejected": -157.29441833496094, + "loss": 1.2496, + "nll_loss": 0.7182794809341431, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.500516891479492, + "rewards/margins": 3.2365946769714355, + "rewards/rejected": 1.2639222145080566, + "step": 11520 + }, + { + "epoch": 0.6396582571670296, + "grad_norm": 53.56328582763672, + "learning_rate": 2.8758357977808935e-08, + "logits/chosen": -0.25661394000053406, + "logits/rejected": -0.4976634085178375, + "logps/chosen": -157.01611328125, + "logps/rejected": -216.496826171875, + "loss": 1.2074, + "nll_loss": 0.8713720440864563, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.420714378356934, + "rewards/margins": 4.268970489501953, + "rewards/rejected": 1.1517441272735596, + "step": 11530 + }, + { + "epoch": 0.6402130344932803, + "grad_norm": 80.25347137451172, + "learning_rate": 2.8679499981869477e-08, + "logits/chosen": -0.4546354413032532, + "logits/rejected": -0.56840980052948, + "logps/chosen": -214.19345092773438, + "logps/rejected": -275.9617919921875, + "loss": 1.2605, + "nll_loss": 1.1772795915603638, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.892640590667725, + "rewards/margins": 3.761946201324463, + "rewards/rejected": 3.1306941509246826, + "step": 11540 + }, + { + "epoch": 0.6407678118195309, + "grad_norm": 43.13340377807617, + "learning_rate": 2.860070675184036e-08, + "logits/chosen": -0.3888665437698364, + "logits/rejected": -0.48605984449386597, + "logps/chosen": -158.18948364257812, + "logps/rejected": -224.8271026611328, + "loss": 1.2816, + "nll_loss": 0.9999931454658508, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.979371547698975, + "rewards/margins": 4.293971061706543, + "rewards/rejected": 1.6854002475738525, + "step": 11550 + }, + { + "epoch": 0.6413225891457817, + "grad_norm": 45.231292724609375, + "learning_rate": 2.8521978527074115e-08, + "logits/chosen": -0.3281251788139343, + "logits/rejected": -0.4574473798274994, + "logps/chosen": -141.13717651367188, + "logps/rejected": -199.07858276367188, + "loss": 1.2125, + "nll_loss": 0.8954921960830688, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.681342124938965, + "rewards/margins": 3.7514350414276123, + "rewards/rejected": 1.929907202720642, + "step": 11560 + }, + { + "epoch": 0.6418773664720323, + "grad_norm": 67.2186050415039, + "learning_rate": 2.844331554672581e-08, + "logits/chosen": -0.3187335431575775, + "logits/rejected": -0.42251071333885193, + "logps/chosen": -172.05410766601562, + "logps/rejected": -208.20034790039062, + "loss": 1.2633, + "nll_loss": 1.0226013660430908, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.629790782928467, + "rewards/margins": 2.8976128101348877, + "rewards/rejected": 2.732178211212158, + "step": 11570 + }, + { + "epoch": 0.642432143798283, + "grad_norm": 66.80694580078125, + "learning_rate": 2.836471804975225e-08, + "logits/chosen": -0.15100358426570892, + "logits/rejected": -0.3213760554790497, + "logps/chosen": -132.5699920654297, + "logps/rejected": -180.60330200195312, + "loss": 1.223, + "nll_loss": 0.7963281869888306, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.969555854797363, + "rewards/margins": 2.9499268531799316, + "rewards/rejected": 2.0196290016174316, + "step": 11580 + }, + { + "epoch": 0.6429869211245336, + "grad_norm": 70.9541244506836, + "learning_rate": 2.828618627491141e-08, + "logits/chosen": -0.39181265234947205, + "logits/rejected": -0.576582670211792, + "logps/chosen": -174.08493041992188, + "logps/rejected": -240.56185913085938, + "loss": 1.2268, + "nll_loss": 0.9803875684738159, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.388065814971924, + "rewards/margins": 4.782280921936035, + "rewards/rejected": 1.6057850122451782, + "step": 11590 + }, + { + "epoch": 0.6435416984507843, + "grad_norm": 43.877098083496094, + "learning_rate": 2.820772046076152e-08, + "logits/chosen": -0.2085866928100586, + "logits/rejected": -0.47025489807128906, + "logps/chosen": -110.3382568359375, + "logps/rejected": -189.8135528564453, + "loss": 1.189, + "nll_loss": 0.7289911508560181, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.165595054626465, + "rewards/margins": 4.362338066101074, + "rewards/rejected": 0.8032568097114563, + "step": 11600 + }, + { + "epoch": 0.644096475777035, + "grad_norm": 32.87038803100586, + "learning_rate": 2.8129320845660555e-08, + "logits/chosen": -0.1900126188993454, + "logits/rejected": -0.40282735228538513, + "logps/chosen": -125.1517333984375, + "logps/rejected": -196.83175659179688, + "loss": 1.2788, + "nll_loss": 0.7826611399650574, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.2875776290893555, + "rewards/margins": 4.222311019897461, + "rewards/rejected": 1.0652662515640259, + "step": 11610 + }, + { + "epoch": 0.6446512531032856, + "grad_norm": 28.977495193481445, + "learning_rate": 2.8050987667765286e-08, + "logits/chosen": -0.46628230810165405, + "logits/rejected": -0.5823394060134888, + "logps/chosen": -183.65231323242188, + "logps/rejected": -216.43801879882812, + "loss": 1.2014, + "nll_loss": 1.0308088064193726, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.078799247741699, + "rewards/margins": 3.7072880268096924, + "rewards/rejected": 2.3715109825134277, + "step": 11620 + }, + { + "epoch": 0.6452060304295364, + "grad_norm": 50.18671798706055, + "learning_rate": 2.797272116503075e-08, + "logits/chosen": -0.39025577902793884, + "logits/rejected": -0.4783563017845154, + "logps/chosen": -159.52896118164062, + "logps/rejected": -198.98770141601562, + "loss": 1.274, + "nll_loss": 1.0031954050064087, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.547030925750732, + "rewards/margins": 2.9004714488983154, + "rewards/rejected": 2.646559476852417, + "step": 11630 + }, + { + "epoch": 0.645760807755787, + "grad_norm": 89.77849578857422, + "learning_rate": 2.7894521575209363e-08, + "logits/chosen": -0.2552987337112427, + "logits/rejected": -0.44860172271728516, + "logps/chosen": -123.27348327636719, + "logps/rejected": -187.30453491210938, + "loss": 1.1879, + "nll_loss": 0.8547815084457397, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.886176109313965, + "rewards/margins": 3.252734422683716, + "rewards/rejected": 1.6334421634674072, + "step": 11640 + }, + { + "epoch": 0.6463155850820377, + "grad_norm": 75.75540161132812, + "learning_rate": 2.7816389135850348e-08, + "logits/chosen": -0.3667968213558197, + "logits/rejected": -0.49988412857055664, + "logps/chosen": -178.33102416992188, + "logps/rejected": -265.7437438964844, + "loss": 1.2416, + "nll_loss": 0.9843828082084656, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.9211835861206055, + "rewards/margins": 3.6456680297851562, + "rewards/rejected": 2.275515079498291, + "step": 11650 + }, + { + "epoch": 0.6468703624082883, + "grad_norm": 133.79086303710938, + "learning_rate": 2.7738324084298927e-08, + "logits/chosen": -0.46171092987060547, + "logits/rejected": -0.5715997219085693, + "logps/chosen": -183.53390502929688, + "logps/rejected": -259.72418212890625, + "loss": 1.2338, + "nll_loss": 1.0586216449737549, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.598612308502197, + "rewards/margins": 4.113223552703857, + "rewards/rejected": 2.4853885173797607, + "step": 11660 + }, + { + "epoch": 0.6474251397345391, + "grad_norm": 96.0303726196289, + "learning_rate": 2.7660326657695572e-08, + "logits/chosen": -0.3845437169075012, + "logits/rejected": -0.502238392829895, + "logps/chosen": -165.1953887939453, + "logps/rejected": -206.5979766845703, + "loss": 1.2835, + "nll_loss": 1.031736135482788, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.993288993835449, + "rewards/margins": 3.625528335571289, + "rewards/rejected": 2.367759943008423, + "step": 11670 + }, + { + "epoch": 0.6479799170607897, + "grad_norm": 50.27488327026367, + "learning_rate": 2.7582397092975395e-08, + "logits/chosen": -0.33000046014785767, + "logits/rejected": -0.5031772255897522, + "logps/chosen": -139.52232360839844, + "logps/rejected": -210.4247589111328, + "loss": 1.2628, + "nll_loss": 0.9663726687431335, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.999209403991699, + "rewards/margins": 3.4824111461639404, + "rewards/rejected": 1.5167982578277588, + "step": 11680 + }, + { + "epoch": 0.6485346943870404, + "grad_norm": 63.37350082397461, + "learning_rate": 2.7504535626867288e-08, + "logits/chosen": -0.3095516860485077, + "logits/rejected": -0.45925372838974, + "logps/chosen": -192.0953826904297, + "logps/rejected": -253.19265747070312, + "loss": 1.1787, + "nll_loss": 1.0016443729400635, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.674686431884766, + "rewards/margins": 4.775921821594238, + "rewards/rejected": 1.8987648487091064, + "step": 11690 + }, + { + "epoch": 0.6490894717132911, + "grad_norm": 80.29125213623047, + "learning_rate": 2.742674249589334e-08, + "logits/chosen": -0.3295501470565796, + "logits/rejected": -0.5117398500442505, + "logps/chosen": -196.1520538330078, + "logps/rejected": -272.27020263671875, + "loss": 1.2747, + "nll_loss": 1.0100562572479248, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.9651079177856445, + "rewards/margins": 4.8849310874938965, + "rewards/rejected": 1.0801770687103271, + "step": 11700 + }, + { + "epoch": 0.6496442490395418, + "grad_norm": 60.28606414794922, + "learning_rate": 2.7349017936368034e-08, + "logits/chosen": -0.26497378945350647, + "logits/rejected": -0.4506555199623108, + "logps/chosen": -155.94403076171875, + "logps/rejected": -207.8164520263672, + "loss": 1.433, + "nll_loss": 0.8810884356498718, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.338873863220215, + "rewards/margins": 3.31561279296875, + "rewards/rejected": 2.023261547088623, + "step": 11710 + }, + { + "epoch": 0.6501990263657924, + "grad_norm": 36.00334930419922, + "learning_rate": 2.7271362184397573e-08, + "logits/chosen": -0.5077487230300903, + "logits/rejected": -0.5495038628578186, + "logps/chosen": -198.93734741210938, + "logps/rejected": -220.6064453125, + "loss": 1.3674, + "nll_loss": 1.247666597366333, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 6.006911277770996, + "rewards/margins": 1.937212347984314, + "rewards/rejected": 4.069699287414551, + "step": 11720 + }, + { + "epoch": 0.650753803692043, + "grad_norm": 57.20671463012695, + "learning_rate": 2.7193775475879104e-08, + "logits/chosen": -0.25387778878211975, + "logits/rejected": -0.4570907652378082, + "logps/chosen": -136.553466796875, + "logps/rejected": -196.80422973632812, + "loss": 1.2568, + "nll_loss": 0.8721553683280945, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.297316551208496, + "rewards/margins": 2.8612141609191895, + "rewards/rejected": 2.4361026287078857, + "step": 11730 + }, + { + "epoch": 0.6513085810182938, + "grad_norm": 52.974388122558594, + "learning_rate": 2.711625804650003e-08, + "logits/chosen": -0.33820220828056335, + "logits/rejected": -0.42576026916503906, + "logps/chosen": -161.24368286132812, + "logps/rejected": -227.69442749023438, + "loss": 1.2178, + "nll_loss": 0.9846166372299194, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.212119102478027, + "rewards/margins": 3.714268207550049, + "rewards/rejected": 2.4978511333465576, + "step": 11740 + }, + { + "epoch": 0.6518633583445445, + "grad_norm": 57.019203186035156, + "learning_rate": 2.7038810131737344e-08, + "logits/chosen": -0.3755797743797302, + "logits/rejected": -0.5482282638549805, + "logps/chosen": -146.35415649414062, + "logps/rejected": -189.00033569335938, + "loss": 1.2881, + "nll_loss": 0.9598537683486938, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.358166694641113, + "rewards/margins": 3.091294050216675, + "rewards/rejected": 2.2668726444244385, + "step": 11750 + }, + { + "epoch": 0.6524181356707951, + "grad_norm": 74.11688232421875, + "learning_rate": 2.6961431966856862e-08, + "logits/chosen": -0.4043899476528168, + "logits/rejected": -0.47904711961746216, + "logps/chosen": -173.64895629882812, + "logps/rejected": -223.4357452392578, + "loss": 1.2868, + "nll_loss": 0.9924699068069458, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.230164527893066, + "rewards/margins": 3.5057075023651123, + "rewards/rejected": 2.724457263946533, + "step": 11760 + }, + { + "epoch": 0.6529729129970459, + "grad_norm": 48.800636291503906, + "learning_rate": 2.688412378691253e-08, + "logits/chosen": -0.30842381715774536, + "logits/rejected": -0.4647197723388672, + "logps/chosen": -157.63990783691406, + "logps/rejected": -211.4970245361328, + "loss": 1.2867, + "nll_loss": 0.9230083227157593, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.783360481262207, + "rewards/margins": 3.545384168624878, + "rewards/rejected": 2.237976312637329, + "step": 11770 + }, + { + "epoch": 0.6535276903232965, + "grad_norm": 68.97724914550781, + "learning_rate": 2.6806885826745644e-08, + "logits/chosen": -0.4042227268218994, + "logits/rejected": -0.5165958404541016, + "logps/chosen": -204.16995239257812, + "logps/rejected": -252.27609252929688, + "loss": 1.2793, + "nll_loss": 1.0783510208129883, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.4781599044799805, + "rewards/margins": 3.8852744102478027, + "rewards/rejected": 2.5928854942321777, + "step": 11780 + }, + { + "epoch": 0.6540824676495471, + "grad_norm": 59.931514739990234, + "learning_rate": 2.672971832098426e-08, + "logits/chosen": -0.2738388180732727, + "logits/rejected": -0.4349437654018402, + "logps/chosen": -182.16796875, + "logps/rejected": -263.86260986328125, + "loss": 1.1825, + "nll_loss": 0.9843899011611938, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.737701892852783, + "rewards/margins": 3.4806103706359863, + "rewards/rejected": 2.257091522216797, + "step": 11790 + }, + { + "epoch": 0.6546372449757979, + "grad_norm": 34.759620666503906, + "learning_rate": 2.665262150404236e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -164.31822204589844, + "logps/rejected": -224.2421417236328, + "loss": 1.1675, + "nll_loss": NaN, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.8863844871521, + "rewards/margins": 3.647648572921753, + "rewards/rejected": 2.238736152648926, + "step": 11800 + }, + { + "epoch": 0.6551920223020485, + "grad_norm": 34.575355529785156, + "learning_rate": 2.6575595610119217e-08, + "logits/chosen": -0.17474150657653809, + "logits/rejected": -0.4074668288230896, + "logps/chosen": -120.413330078125, + "logps/rejected": -199.8701629638672, + "loss": 1.2722, + "nll_loss": 0.734146773815155, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.292850017547607, + "rewards/margins": 4.236934661865234, + "rewards/rejected": 1.0559158325195312, + "step": 11810 + }, + { + "epoch": 0.6557467996282992, + "grad_norm": 77.0059814453125, + "learning_rate": 2.6498640873198674e-08, + "logits/chosen": -0.18045705556869507, + "logits/rejected": -0.3690612316131592, + "logps/chosen": -133.09133911132812, + "logps/rejected": -167.37332153320312, + "loss": 1.2525, + "nll_loss": 0.8449319005012512, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.069517612457275, + "rewards/margins": 3.9457669258117676, + "rewards/rejected": 1.1237508058547974, + "step": 11820 + }, + { + "epoch": 0.6563015769545498, + "grad_norm": 43.84608459472656, + "learning_rate": 2.6421757527048373e-08, + "logits/chosen": -0.24172338843345642, + "logits/rejected": -0.30262669920921326, + "logps/chosen": -159.74351501464844, + "logps/rejected": -218.16470336914062, + "loss": 1.2134, + "nll_loss": 0.9308856129646301, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.76108455657959, + "rewards/margins": 3.665342330932617, + "rewards/rejected": 2.0957419872283936, + "step": 11830 + }, + { + "epoch": 0.6568563542808006, + "grad_norm": 54.546878814697266, + "learning_rate": 2.6344945805219154e-08, + "logits/chosen": -0.3484647870063782, + "logits/rejected": -0.4647384583950043, + "logps/chosen": -181.24209594726562, + "logps/rejected": -244.99813842773438, + "loss": 1.188, + "nll_loss": 0.9683634042739868, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.014298439025879, + "rewards/margins": 3.6213595867156982, + "rewards/rejected": 2.3929383754730225, + "step": 11840 + }, + { + "epoch": 0.6574111316070512, + "grad_norm": 75.54428100585938, + "learning_rate": 2.6268205941044174e-08, + "logits/chosen": -0.31292271614074707, + "logits/rejected": -0.46505337953567505, + "logps/chosen": -183.8721466064453, + "logps/rejected": -241.2721710205078, + "loss": 1.1855, + "nll_loss": 0.9584083557128906, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.108242034912109, + "rewards/margins": 5.03185510635376, + "rewards/rejected": 1.076386570930481, + "step": 11850 + }, + { + "epoch": 0.6579659089333019, + "grad_norm": 86.36827087402344, + "learning_rate": 2.6191538167638473e-08, + "logits/chosen": -0.1850053369998932, + "logits/rejected": -0.3884859085083008, + "logps/chosen": -158.62387084960938, + "logps/rejected": -202.6744842529297, + "loss": 1.34, + "nll_loss": 0.9155745506286621, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.202109336853027, + "rewards/margins": 2.996246814727783, + "rewards/rejected": 2.205862522125244, + "step": 11860 + }, + { + "epoch": 0.6585206862595526, + "grad_norm": 34.229007720947266, + "learning_rate": 2.6114942717897924e-08, + "logits/chosen": -0.3144778907299042, + "logits/rejected": -0.47114571928977966, + "logps/chosen": -147.51065063476562, + "logps/rejected": -222.63412475585938, + "loss": 1.1944, + "nll_loss": 0.9164835810661316, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.97763729095459, + "rewards/margins": 3.2676658630371094, + "rewards/rejected": 2.7099711894989014, + "step": 11870 + }, + { + "epoch": 0.6590754635858033, + "grad_norm": 77.99613952636719, + "learning_rate": 2.6038419824498836e-08, + "logits/chosen": -0.45269614458084106, + "logits/rejected": -0.608244776725769, + "logps/chosen": -184.53318786621094, + "logps/rejected": -231.4016876220703, + "loss": 1.2404, + "nll_loss": 1.023178219795227, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.559817314147949, + "rewards/margins": 3.81182861328125, + "rewards/rejected": 2.7479889392852783, + "step": 11880 + }, + { + "epoch": 0.6596302409120539, + "grad_norm": 107.27423858642578, + "learning_rate": 2.5961969719897002e-08, + "logits/chosen": -0.4463115632534027, + "logits/rejected": -0.4912208616733551, + "logps/chosen": -204.35488891601562, + "logps/rejected": -226.47634887695312, + "loss": 1.3068, + "nll_loss": 1.0930724143981934, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.605230808258057, + "rewards/margins": 3.248439311981201, + "rewards/rejected": 3.3567910194396973, + "step": 11890 + }, + { + "epoch": 0.6601850182383046, + "grad_norm": 58.7518196105957, + "learning_rate": 2.5885592636327185e-08, + "logits/chosen": -0.308633416891098, + "logits/rejected": -0.47308340668678284, + "logps/chosen": -154.84756469726562, + "logps/rejected": -221.26101684570312, + "loss": 1.2456, + "nll_loss": 0.8628350496292114, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.491322994232178, + "rewards/margins": 3.4300694465637207, + "rewards/rejected": 2.0612540245056152, + "step": 11900 + }, + { + "epoch": 0.6607397955645553, + "grad_norm": 47.63860321044922, + "learning_rate": 2.5809288805802314e-08, + "logits/chosen": -0.2752883732318878, + "logits/rejected": -0.3745267987251282, + "logps/chosen": -156.73974609375, + "logps/rejected": -191.37518310546875, + "loss": 1.1428, + "nll_loss": 0.9624244570732117, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.198734283447266, + "rewards/margins": 3.503079891204834, + "rewards/rejected": 2.6956539154052734, + "step": 11910 + }, + { + "epoch": 0.661294572890806, + "grad_norm": 53.0847053527832, + "learning_rate": 2.5733058460112745e-08, + "logits/chosen": -0.28915005922317505, + "logits/rejected": -0.4263629913330078, + "logps/chosen": -154.884765625, + "logps/rejected": -195.12173461914062, + "loss": 1.2892, + "nll_loss": 0.9579075574874878, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.69394063949585, + "rewards/margins": 3.602841854095459, + "rewards/rejected": 2.091099262237549, + "step": 11920 + }, + { + "epoch": 0.6618493502170566, + "grad_norm": 52.77870178222656, + "learning_rate": 2.565690183082567e-08, + "logits/chosen": 0.02300594374537468, + "logits/rejected": -0.18990465998649597, + "logps/chosen": -117.44551849365234, + "logps/rejected": -164.27191162109375, + "loss": 1.2247, + "nll_loss": 0.6829160451889038, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 4.389244556427002, + "rewards/margins": 3.0034196376800537, + "rewards/rejected": 1.3858246803283691, + "step": 11930 + }, + { + "epoch": 0.6624041275433074, + "grad_norm": 64.58387756347656, + "learning_rate": 2.5580819149284294e-08, + "logits/chosen": -0.3967145085334778, + "logits/rejected": -0.5213366746902466, + "logps/chosen": -188.41629028320312, + "logps/rejected": -241.98794555664062, + "loss": 1.2769, + "nll_loss": 1.058941125869751, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.2895283699035645, + "rewards/margins": 3.641815185546875, + "rewards/rejected": 2.6477131843566895, + "step": 11940 + }, + { + "epoch": 0.662958904869558, + "grad_norm": 68.38165283203125, + "learning_rate": 2.550481064660724e-08, + "logits/chosen": -0.4660201966762543, + "logits/rejected": -0.5716916918754578, + "logps/chosen": -185.03787231445312, + "logps/rejected": -262.6652526855469, + "loss": 1.2245, + "nll_loss": 1.1056232452392578, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.7243852615356445, + "rewards/margins": 4.855156421661377, + "rewards/rejected": 1.869228720664978, + "step": 11950 + }, + { + "epoch": 0.6635136821958086, + "grad_norm": 75.75467681884766, + "learning_rate": 2.5428876553687785e-08, + "logits/chosen": -0.27877897024154663, + "logits/rejected": -0.44745931029319763, + "logps/chosen": -153.05589294433594, + "logps/rejected": -215.16299438476562, + "loss": 1.2257, + "nll_loss": 0.940390944480896, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.786623001098633, + "rewards/margins": 4.343803882598877, + "rewards/rejected": 1.442819356918335, + "step": 11960 + }, + { + "epoch": 0.6640684595220593, + "grad_norm": 132.7093963623047, + "learning_rate": 2.5353017101193118e-08, + "logits/chosen": -0.19808785617351532, + "logits/rejected": -0.30676984786987305, + "logps/chosen": -155.75851440429688, + "logps/rejected": -188.7913360595703, + "loss": 1.299, + "nll_loss": 1.008725643157959, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.505126953125, + "rewards/margins": 3.052074909210205, + "rewards/rejected": 2.453052043914795, + "step": 11970 + }, + { + "epoch": 0.66462323684831, + "grad_norm": 55.54659652709961, + "learning_rate": 2.5277232519563786e-08, + "logits/chosen": -0.37800878286361694, + "logits/rejected": -0.5081731081008911, + "logps/chosen": -205.9271240234375, + "logps/rejected": -258.99505615234375, + "loss": 1.2852, + "nll_loss": 1.035773754119873, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.5168352127075195, + "rewards/margins": 3.7608962059020996, + "rewards/rejected": 2.7559380531311035, + "step": 11980 + }, + { + "epoch": 0.6651780141745607, + "grad_norm": 58.10818099975586, + "learning_rate": 2.5201523039012786e-08, + "logits/chosen": -0.3581780791282654, + "logits/rejected": -0.45601707696914673, + "logps/chosen": -149.41586303710938, + "logps/rejected": -208.29531860351562, + "loss": 1.1857, + "nll_loss": 1.0122449398040771, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.706791877746582, + "rewards/margins": 3.4607417583465576, + "rewards/rejected": 2.2460505962371826, + "step": 11990 + }, + { + "epoch": 0.6657327915008113, + "grad_norm": 48.28881072998047, + "learning_rate": 2.5125888889525053e-08, + "logits/chosen": -0.43195661902427673, + "logits/rejected": -0.5321120023727417, + "logps/chosen": -217.3926544189453, + "logps/rejected": -306.47174072265625, + "loss": 1.2693, + "nll_loss": 1.1178908348083496, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 7.168890476226807, + "rewards/margins": 5.198139190673828, + "rewards/rejected": 1.9707515239715576, + "step": 12000 + }, + { + "epoch": 0.6657327915008113, + "eval_logits/chosen": -0.4029172360897064, + "eval_logits/rejected": -0.5176486372947693, + "eval_logps/chosen": -190.4573516845703, + "eval_logps/rejected": -261.09027099609375, + "eval_loss": 1.216133713722229, + "eval_nll_loss": 0.9862600564956665, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.750385761260986, + "eval_rewards/margins": 4.95156192779541, + "eval_rewards/rejected": 1.7988238334655762, + "eval_runtime": 16.8792, + "eval_samples_per_second": 15.167, + "eval_steps_per_second": 1.896, + "step": 12000 + }, + { + "epoch": 0.6662875688270621, + "grad_norm": 63.904869079589844, + "learning_rate": 2.505033030085668e-08, + "logits/chosen": -0.2707623839378357, + "logits/rejected": -0.41992464661598206, + "logps/chosen": -170.99859619140625, + "logps/rejected": -202.93099975585938, + "loss": 1.1845, + "nll_loss": 0.8949660062789917, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.781935691833496, + "rewards/margins": 4.267865180969238, + "rewards/rejected": 1.514070987701416, + "step": 12010 + }, + { + "epoch": 0.6668423461533127, + "grad_norm": 48.43641662597656, + "learning_rate": 2.4974847502534236e-08, + "logits/chosen": -0.21682846546173096, + "logits/rejected": -0.332996666431427, + "logps/chosen": -133.58831787109375, + "logps/rejected": -176.17198181152344, + "loss": 1.3191, + "nll_loss": 0.8816951513290405, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.321299076080322, + "rewards/margins": 2.6478521823883057, + "rewards/rejected": 2.6734461784362793, + "step": 12020 + }, + { + "epoch": 0.6673971234795634, + "grad_norm": 47.73326110839844, + "learning_rate": 2.4899440723853993e-08, + "logits/chosen": -0.32461345195770264, + "logits/rejected": -0.508495569229126, + "logps/chosen": -155.72108459472656, + "logps/rejected": -234.18374633789062, + "loss": 1.1253, + "nll_loss": 0.8847814798355103, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.887340545654297, + "rewards/margins": 4.291019439697266, + "rewards/rejected": 1.5963211059570312, + "step": 12030 + }, + { + "epoch": 0.667951900805814, + "grad_norm": 45.27253723144531, + "learning_rate": 2.482411019388138e-08, + "logits/chosen": -0.3711601495742798, + "logits/rejected": -0.46444177627563477, + "logps/chosen": -165.60052490234375, + "logps/rejected": -220.0888214111328, + "loss": 1.2507, + "nll_loss": 0.9290505647659302, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.9535369873046875, + "rewards/margins": 3.6149909496307373, + "rewards/rejected": 2.338545799255371, + "step": 12040 + }, + { + "epoch": 0.6685066781320648, + "grad_norm": 38.41141891479492, + "learning_rate": 2.474885614145013e-08, + "logits/chosen": -0.2623186707496643, + "logits/rejected": -0.40427589416503906, + "logps/chosen": -200.8873291015625, + "logps/rejected": -252.84957885742188, + "loss": 1.2134, + "nll_loss": 1.0127166509628296, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.942787170410156, + "rewards/margins": 3.569545269012451, + "rewards/rejected": 2.373241901397705, + "step": 12050 + }, + { + "epoch": 0.6690614554583154, + "grad_norm": 54.231590270996094, + "learning_rate": 2.467367879516171e-08, + "logits/chosen": -0.19070479273796082, + "logits/rejected": -0.3572729527950287, + "logps/chosen": -141.1498260498047, + "logps/rejected": -185.4639129638672, + "loss": 1.1299, + "nll_loss": 0.8368833661079407, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.0277299880981445, + "rewards/margins": 3.8939177989959717, + "rewards/rejected": 1.133811354637146, + "step": 12060 + }, + { + "epoch": 0.6696162327845661, + "grad_norm": 107.39879608154297, + "learning_rate": 2.4598578383384577e-08, + "logits/chosen": -0.19599628448486328, + "logits/rejected": -0.3971938490867615, + "logps/chosen": -150.45681762695312, + "logps/rejected": -206.2320098876953, + "loss": 1.198, + "nll_loss": 0.8617413640022278, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.54864501953125, + "rewards/margins": 4.213971138000488, + "rewards/rejected": 1.3346747159957886, + "step": 12070 + }, + { + "epoch": 0.6701710101108168, + "grad_norm": 69.48493957519531, + "learning_rate": 2.4523555134253427e-08, + "logits/chosen": -0.1980140507221222, + "logits/rejected": -0.3805929720401764, + "logps/chosen": -156.84254455566406, + "logps/rejected": -232.4204559326172, + "loss": 1.2391, + "nll_loss": 0.8746329545974731, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.7141547203063965, + "rewards/margins": 4.587075233459473, + "rewards/rejected": 1.1270796060562134, + "step": 12080 + }, + { + "epoch": 0.6707257874370675, + "grad_norm": 33.65756607055664, + "learning_rate": 2.4448609275668624e-08, + "logits/chosen": -0.325199693441391, + "logits/rejected": -0.4760337471961975, + "logps/chosen": -140.7031707763672, + "logps/rejected": -217.14047241210938, + "loss": 1.196, + "nll_loss": 0.9230989217758179, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.959973335266113, + "rewards/margins": 4.0895891189575195, + "rewards/rejected": 1.870383858680725, + "step": 12090 + }, + { + "epoch": 0.6712805647633181, + "grad_norm": 54.62273406982422, + "learning_rate": 2.4373741035295354e-08, + "logits/chosen": -0.385964959859848, + "logits/rejected": -0.5205613374710083, + "logps/chosen": -181.7679901123047, + "logps/rejected": -245.03591918945312, + "loss": 1.2331, + "nll_loss": 1.009312391281128, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.756820201873779, + "rewards/margins": 3.7107512950897217, + "rewards/rejected": 2.0460691452026367, + "step": 12100 + }, + { + "epoch": 0.6718353420895689, + "grad_norm": 42.30917739868164, + "learning_rate": 2.4298950640563153e-08, + "logits/chosen": -0.2956189215183258, + "logits/rejected": -0.4312848448753357, + "logps/chosen": -148.74661254882812, + "logps/rejected": -206.2273712158203, + "loss": 1.302, + "nll_loss": 0.9868310689926147, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.419343948364258, + "rewards/margins": 3.17564058303833, + "rewards/rejected": 2.2437033653259277, + "step": 12110 + }, + { + "epoch": 0.6723901194158195, + "grad_norm": 64.55408477783203, + "learning_rate": 2.422423831866494e-08, + "logits/chosen": -0.37799012660980225, + "logits/rejected": -0.494337797164917, + "logps/chosen": -189.30006408691406, + "logps/rejected": -283.4101867675781, + "loss": 1.3102, + "nll_loss": 1.084997296333313, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.286576271057129, + "rewards/margins": 4.620743274688721, + "rewards/rejected": 1.665832281112671, + "step": 12120 + }, + { + "epoch": 0.6729448967420701, + "grad_norm": 68.30670928955078, + "learning_rate": 2.4149604296556582e-08, + "logits/chosen": -0.17044074833393097, + "logits/rejected": -0.3540880084037781, + "logps/chosen": -150.18106079101562, + "logps/rejected": -228.80203247070312, + "loss": 1.1923, + "nll_loss": 0.8533521890640259, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.593564033508301, + "rewards/margins": 4.3736572265625, + "rewards/rejected": 1.2199056148529053, + "step": 12130 + }, + { + "epoch": 0.6734996740683208, + "grad_norm": 42.05036163330078, + "learning_rate": 2.4075048800955994e-08, + "logits/chosen": -0.2699395716190338, + "logits/rejected": -0.4457497000694275, + "logps/chosen": -174.43775939941406, + "logps/rejected": -227.38082885742188, + "loss": 1.1834, + "nll_loss": 0.9571785926818848, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.090694904327393, + "rewards/margins": 4.016547679901123, + "rewards/rejected": 2.0741469860076904, + "step": 12140 + }, + { + "epoch": 0.6740544513945715, + "grad_norm": 33.59762954711914, + "learning_rate": 2.4000572058342634e-08, + "logits/chosen": -0.2383767068386078, + "logits/rejected": -0.3165499269962311, + "logps/chosen": -158.05007934570312, + "logps/rejected": -225.517822265625, + "loss": 1.2319, + "nll_loss": 0.9493352770805359, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.893635272979736, + "rewards/margins": 4.028491497039795, + "rewards/rejected": 1.8651440143585205, + "step": 12150 + }, + { + "epoch": 0.6746092287208222, + "grad_norm": 77.18594360351562, + "learning_rate": 2.3926174294956696e-08, + "logits/chosen": -0.2202729731798172, + "logits/rejected": -0.40815192461013794, + "logps/chosen": -154.131591796875, + "logps/rejected": -213.88162231445312, + "loss": 1.3252, + "nll_loss": 0.9396843910217285, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.392186164855957, + "rewards/margins": 4.005577087402344, + "rewards/rejected": 1.386609435081482, + "step": 12160 + }, + { + "epoch": 0.6751640060470728, + "grad_norm": 56.84512710571289, + "learning_rate": 2.3851855736798433e-08, + "logits/chosen": -0.40499407052993774, + "logits/rejected": -0.46631139516830444, + "logps/chosen": -187.3240966796875, + "logps/rejected": -227.35302734375, + "loss": 1.2733, + "nll_loss": 1.0406402349472046, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.996212005615234, + "rewards/margins": 2.9431254863739014, + "rewards/rejected": 3.053086280822754, + "step": 12170 + }, + { + "epoch": 0.6757187833733236, + "grad_norm": 62.96339797973633, + "learning_rate": 2.377761660962754e-08, + "logits/chosen": -0.19384492933750153, + "logits/rejected": -0.4142443537712097, + "logps/chosen": -120.52742004394531, + "logps/rejected": -154.69898986816406, + "loss": 1.2966, + "nll_loss": 0.6951400637626648, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.903653144836426, + "rewards/margins": 3.6983203887939453, + "rewards/rejected": 1.2053325176239014, + "step": 12180 + }, + { + "epoch": 0.6762735606995742, + "grad_norm": 41.99937438964844, + "learning_rate": 2.3703457138962373e-08, + "logits/chosen": -0.3248792290687561, + "logits/rejected": -0.4245285987854004, + "logps/chosen": -170.15814208984375, + "logps/rejected": -214.8865509033203, + "loss": 1.2082, + "nll_loss": 1.0116769075393677, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.878182411193848, + "rewards/margins": 3.5163872241973877, + "rewards/rejected": 2.361795425415039, + "step": 12190 + }, + { + "epoch": 0.6768283380258249, + "grad_norm": 54.36767578125, + "learning_rate": 2.362937755007935e-08, + "logits/chosen": -0.25324520468711853, + "logits/rejected": -0.3982846140861511, + "logps/chosen": -148.38656616210938, + "logps/rejected": -229.2409210205078, + "loss": 1.2597, + "nll_loss": 0.8752374649047852, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.8527655601501465, + "rewards/margins": 3.713012218475342, + "rewards/rejected": 2.139753818511963, + "step": 12200 + }, + { + "epoch": 0.6773831153520755, + "grad_norm": 73.44265747070312, + "learning_rate": 2.355537806801224e-08, + "logits/chosen": -0.3576010763645172, + "logits/rejected": -0.48581376671791077, + "logps/chosen": -172.70126342773438, + "logps/rejected": -222.27035522460938, + "loss": 1.2391, + "nll_loss": 0.9975396990776062, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.035371780395508, + "rewards/margins": 3.65791392326355, + "rewards/rejected": 2.377457618713379, + "step": 12210 + }, + { + "epoch": 0.6779378926783263, + "grad_norm": 135.85301208496094, + "learning_rate": 2.3481458917551412e-08, + "logits/chosen": -0.2711308002471924, + "logits/rejected": -0.4493893086910248, + "logps/chosen": -121.0526351928711, + "logps/rejected": -175.5157012939453, + "loss": 1.2526, + "nll_loss": 0.8514910936355591, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 4.768159866333008, + "rewards/margins": 2.7525486946105957, + "rewards/rejected": 2.015611171722412, + "step": 12220 + }, + { + "epoch": 0.6784926700045769, + "grad_norm": 52.4161491394043, + "learning_rate": 2.3407620323243276e-08, + "logits/chosen": -0.29157713055610657, + "logits/rejected": -0.4096639156341553, + "logps/chosen": -169.6468048095703, + "logps/rejected": -217.779296875, + "loss": 1.2374, + "nll_loss": 0.9138143658638, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.091879367828369, + "rewards/margins": 3.7146849632263184, + "rewards/rejected": 2.3771939277648926, + "step": 12230 + }, + { + "epoch": 0.6790474473308276, + "grad_norm": 58.094173431396484, + "learning_rate": 2.3333862509389453e-08, + "logits/chosen": -0.2922077775001526, + "logits/rejected": -0.43591269850730896, + "logps/chosen": -153.40057373046875, + "logps/rejected": -206.0133056640625, + "loss": 1.2634, + "nll_loss": 0.921014666557312, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.6673455238342285, + "rewards/margins": 3.7713565826416016, + "rewards/rejected": 1.8959894180297852, + "step": 12240 + }, + { + "epoch": 0.6796022246570783, + "grad_norm": 44.03510665893555, + "learning_rate": 2.326018570004629e-08, + "logits/chosen": -0.36777248978614807, + "logits/rejected": -0.4938937723636627, + "logps/chosen": -175.78761291503906, + "logps/rejected": -244.18557739257812, + "loss": 1.2377, + "nll_loss": 0.9222499132156372, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.203314781188965, + "rewards/margins": 4.028185844421387, + "rewards/rejected": 2.175128698348999, + "step": 12250 + }, + { + "epoch": 0.680157001983329, + "grad_norm": 55.41645812988281, + "learning_rate": 2.3186590119023957e-08, + "logits/chosen": -0.3484051823616028, + "logits/rejected": -0.4971703886985779, + "logps/chosen": -181.65892028808594, + "logps/rejected": -213.5335235595703, + "loss": 1.2372, + "nll_loss": 0.9844461679458618, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.447892665863037, + "rewards/margins": 3.9748847484588623, + "rewards/rejected": 2.4730076789855957, + "step": 12260 + }, + { + "epoch": 0.6807117793095796, + "grad_norm": 84.85285186767578, + "learning_rate": 2.311307598988595e-08, + "logits/chosen": -0.21466335654258728, + "logits/rejected": -0.4452625811100006, + "logps/chosen": -107.22621154785156, + "logps/rejected": -147.81466674804688, + "loss": 1.1798, + "nll_loss": 0.6963762044906616, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.621092796325684, + "rewards/margins": 3.5071613788604736, + "rewards/rejected": 1.1139312982559204, + "step": 12270 + }, + { + "epoch": 0.6812665566358302, + "grad_norm": 35.29098892211914, + "learning_rate": 2.3039643535948254e-08, + "logits/chosen": -0.2706051468849182, + "logits/rejected": -0.4521883428096771, + "logps/chosen": -117.00657653808594, + "logps/rejected": -175.45608520507812, + "loss": 1.2768, + "nll_loss": 0.8625116348266602, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.758450508117676, + "rewards/margins": 3.15194034576416, + "rewards/rejected": 1.6065105199813843, + "step": 12280 + }, + { + "epoch": 0.681821333962081, + "grad_norm": 56.05775451660156, + "learning_rate": 2.2966292980278822e-08, + "logits/chosen": -0.3769080936908722, + "logits/rejected": -0.4741719365119934, + "logps/chosen": -185.7469940185547, + "logps/rejected": -212.88265991210938, + "loss": 1.2799, + "nll_loss": 1.052215576171875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.125279426574707, + "rewards/margins": 3.444147825241089, + "rewards/rejected": 2.681131601333618, + "step": 12290 + }, + { + "epoch": 0.6823761112883316, + "grad_norm": 58.406681060791016, + "learning_rate": 2.289302454569682e-08, + "logits/chosen": -0.22093644738197327, + "logits/rejected": -0.4181094169616699, + "logps/chosen": -139.93206787109375, + "logps/rejected": -175.80638122558594, + "loss": 1.2401, + "nll_loss": 0.8091662526130676, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.987369537353516, + "rewards/margins": 3.4839394092559814, + "rewards/rejected": 1.5034297704696655, + "step": 12300 + }, + { + "epoch": 0.6829308886145823, + "grad_norm": 76.34058380126953, + "learning_rate": 2.2819838454771883e-08, + "logits/chosen": -0.21736867725849152, + "logits/rejected": -0.42537808418273926, + "logps/chosen": -135.9351348876953, + "logps/rejected": -181.4181365966797, + "loss": 1.2302, + "nll_loss": 0.936695396900177, + "rewards/accuracies": 0.75, + "rewards/chosen": 4.897767066955566, + "rewards/margins": 2.7751545906066895, + "rewards/rejected": 2.1226117610931396, + "step": 12310 + }, + { + "epoch": 0.683485665940833, + "grad_norm": 33.38760757446289, + "learning_rate": 2.2746734929823592e-08, + "logits/chosen": -0.3998182713985443, + "logits/rejected": -0.49476099014282227, + "logps/chosen": -178.2798309326172, + "logps/rejected": -253.4151611328125, + "loss": 1.2437, + "nll_loss": 1.123988389968872, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.30886173248291, + "rewards/margins": 3.9014792442321777, + "rewards/rejected": 2.4073822498321533, + "step": 12320 + }, + { + "epoch": 0.6840404432670837, + "grad_norm": 60.61362075805664, + "learning_rate": 2.267371419292064e-08, + "logits/chosen": -0.2885650396347046, + "logits/rejected": -0.447486013174057, + "logps/chosen": -176.38241577148438, + "logps/rejected": -235.9733428955078, + "loss": 1.2304, + "nll_loss": 0.8946449160575867, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.297074317932129, + "rewards/margins": 4.915509223937988, + "rewards/rejected": 1.3815653324127197, + "step": 12330 + }, + { + "epoch": 0.6845952205933343, + "grad_norm": 41.32902908325195, + "learning_rate": 2.2600776465880284e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -137.27139282226562, + "logps/rejected": -175.2365264892578, + "loss": 1.2429, + "nll_loss": NaN, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.061639308929443, + "rewards/margins": 3.0307247638702393, + "rewards/rejected": 2.0309150218963623, + "step": 12340 + }, + { + "epoch": 0.685149997919585, + "grad_norm": 46.262428283691406, + "learning_rate": 2.252792197026761e-08, + "logits/chosen": -0.2002539336681366, + "logits/rejected": -0.460933119058609, + "logps/chosen": -125.36739349365234, + "logps/rejected": -214.4919891357422, + "loss": 1.1804, + "nll_loss": 0.7614500522613525, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.050937652587891, + "rewards/margins": 3.953423023223877, + "rewards/rejected": 1.0975148677825928, + "step": 12350 + }, + { + "epoch": 0.6857047752458357, + "grad_norm": 86.92090606689453, + "learning_rate": 2.2455150927394877e-08, + "logits/chosen": -0.11846522241830826, + "logits/rejected": -0.2859603464603424, + "logps/chosen": -144.65676879882812, + "logps/rejected": -190.19805908203125, + "loss": 1.248, + "nll_loss": 0.8645520210266113, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.232255458831787, + "rewards/margins": 3.135033130645752, + "rewards/rejected": 2.0972225666046143, + "step": 12360 + }, + { + "epoch": 0.6862595525720864, + "grad_norm": 71.14341735839844, + "learning_rate": 2.2382463558320785e-08, + "logits/chosen": -0.4598962664604187, + "logits/rejected": -0.5810025930404663, + "logps/chosen": -209.81283569335938, + "logps/rejected": -272.2491760253906, + "loss": 1.2829, + "nll_loss": 1.1209025382995605, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.895010471343994, + "rewards/margins": 4.252721309661865, + "rewards/rejected": 2.642289161682129, + "step": 12370 + }, + { + "epoch": 0.686814329898337, + "grad_norm": 116.52542877197266, + "learning_rate": 2.230986008384994e-08, + "logits/chosen": -0.18051250278949738, + "logits/rejected": -0.3351711332798004, + "logps/chosen": -145.1734161376953, + "logps/rejected": -210.15237426757812, + "loss": 1.2633, + "nll_loss": 0.8040771484375, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.451181411743164, + "rewards/margins": 3.6372859477996826, + "rewards/rejected": 1.8138954639434814, + "step": 12380 + }, + { + "epoch": 0.6873691072245878, + "grad_norm": 65.41537475585938, + "learning_rate": 2.2237340724532007e-08, + "logits/chosen": -0.3516872525215149, + "logits/rejected": -0.5156417489051819, + "logps/chosen": -184.73089599609375, + "logps/rejected": -243.6831512451172, + "loss": 1.1755, + "nll_loss": 0.9614565968513489, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.4388322830200195, + "rewards/margins": 4.41310977935791, + "rewards/rejected": 2.0257222652435303, + "step": 12390 + }, + { + "epoch": 0.6879238845508384, + "grad_norm": 44.749759674072266, + "learning_rate": 2.2164905700661197e-08, + "logits/chosen": -0.2982178330421448, + "logits/rejected": -0.4795723855495453, + "logps/chosen": -167.46710205078125, + "logps/rejected": -209.0430145263672, + "loss": 1.2054, + "nll_loss": 0.9289814233779907, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.628087043762207, + "rewards/margins": 3.88411283493042, + "rewards/rejected": 1.7439743280410767, + "step": 12400 + }, + { + "epoch": 0.6884786618770891, + "grad_norm": 52.118690490722656, + "learning_rate": 2.209255523227554e-08, + "logits/chosen": -0.35844165086746216, + "logits/rejected": -0.5388228893280029, + "logps/chosen": -179.90591430664062, + "logps/rejected": -232.36801147460938, + "loss": 1.1813, + "nll_loss": 0.9430680274963379, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.065617084503174, + "rewards/margins": 4.8700337409973145, + "rewards/rejected": 1.1955829858779907, + "step": 12410 + }, + { + "epoch": 0.6890334392033398, + "grad_norm": 90.59578704833984, + "learning_rate": 2.202028953915614e-08, + "logits/chosen": -0.2993447482585907, + "logits/rejected": -0.4592467248439789, + "logps/chosen": -156.58291625976562, + "logps/rejected": -223.3266143798828, + "loss": 1.2281, + "nll_loss": 0.9128421545028687, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.2060675621032715, + "rewards/margins": 4.218282222747803, + "rewards/rejected": 1.9877853393554688, + "step": 12420 + }, + { + "epoch": 0.6895882165295905, + "grad_norm": 158.9627685546875, + "learning_rate": 2.194810884082665e-08, + "logits/chosen": -0.37616056203842163, + "logits/rejected": -0.45298343896865845, + "logps/chosen": -163.0350799560547, + "logps/rejected": -223.75033569335938, + "loss": 1.3006, + "nll_loss": 0.9525953531265259, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.8995537757873535, + "rewards/margins": 3.986276626586914, + "rewards/rejected": 1.9132773876190186, + "step": 12430 + }, + { + "epoch": 0.6901429938558411, + "grad_norm": 56.218318939208984, + "learning_rate": 2.1876013356552482e-08, + "logits/chosen": -0.21467992663383484, + "logits/rejected": -0.4005191922187805, + "logps/chosen": -158.6463623046875, + "logps/rejected": -223.93447875976562, + "loss": 1.208, + "nll_loss": 0.8587056398391724, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.5468034744262695, + "rewards/margins": 4.3985066413879395, + "rewards/rejected": 1.1482973098754883, + "step": 12440 + }, + { + "epoch": 0.6906977711820917, + "grad_norm": 59.432518005371094, + "learning_rate": 2.1804003305340212e-08, + "logits/chosen": -0.3025331199169159, + "logits/rejected": -0.4626920223236084, + "logps/chosen": -211.9149932861328, + "logps/rejected": -250.5555877685547, + "loss": 1.3116, + "nll_loss": 1.023355484008789, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.563776969909668, + "rewards/margins": 4.3968400955200195, + "rewards/rejected": 2.166937828063965, + "step": 12450 + }, + { + "epoch": 0.6912525485083425, + "grad_norm": 84.86593627929688, + "learning_rate": 2.1732078905936923e-08, + "logits/chosen": -0.2796228229999542, + "logits/rejected": -0.38698163628578186, + "logps/chosen": -167.61642456054688, + "logps/rejected": -239.12905883789062, + "loss": 1.2759, + "nll_loss": 1.0216405391693115, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.806312561035156, + "rewards/margins": 2.9036295413970947, + "rewards/rejected": 2.9026830196380615, + "step": 12460 + }, + { + "epoch": 0.6918073258345931, + "grad_norm": 45.21405029296875, + "learning_rate": 2.1660240376829437e-08, + "logits/chosen": -0.2210051268339157, + "logits/rejected": -0.40046876668930054, + "logps/chosen": -171.15985107421875, + "logps/rejected": -228.51998901367188, + "loss": 1.2174, + "nll_loss": 0.8844130635261536, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.903317928314209, + "rewards/margins": 4.436379909515381, + "rewards/rejected": 1.4669368267059326, + "step": 12470 + }, + { + "epoch": 0.6923621031608438, + "grad_norm": 75.68427276611328, + "learning_rate": 2.1588487936243805e-08, + "logits/chosen": -0.29069143533706665, + "logits/rejected": -0.4780551493167877, + "logps/chosen": -165.8023681640625, + "logps/rejected": -217.4993438720703, + "loss": 1.1407, + "nll_loss": 0.9273978471755981, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.701485633850098, + "rewards/margins": 3.7327961921691895, + "rewards/rejected": 1.9686896800994873, + "step": 12480 + }, + { + "epoch": 0.6929168804870945, + "grad_norm": 131.2355194091797, + "learning_rate": 2.151682180214447e-08, + "logits/chosen": -0.2729392647743225, + "logits/rejected": -0.41850152611732483, + "logps/chosen": -146.65773010253906, + "logps/rejected": -186.9139404296875, + "loss": 1.1893, + "nll_loss": 0.9260753393173218, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.724074363708496, + "rewards/margins": 3.4622206687927246, + "rewards/rejected": 2.2618539333343506, + "step": 12490 + }, + { + "epoch": 0.6934716578133452, + "grad_norm": 199.65155029296875, + "learning_rate": 2.1445242192233832e-08, + "logits/chosen": -0.31922265887260437, + "logits/rejected": -0.4389330744743347, + "logps/chosen": -195.14793395996094, + "logps/rejected": -262.5490417480469, + "loss": 1.2636, + "nll_loss": 1.0710331201553345, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.984295845031738, + "rewards/margins": 3.9620003700256348, + "rewards/rejected": 2.0222959518432617, + "step": 12500 + }, + { + "epoch": 0.6934716578133452, + "eval_logits/chosen": -0.4087273180484772, + "eval_logits/rejected": -0.5315932631492615, + "eval_logps/chosen": -190.18540954589844, + "eval_logps/rejected": -263.9999694824219, + "eval_loss": 1.2172236442565918, + "eval_nll_loss": 0.986419141292572, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.777579307556152, + "eval_rewards/margins": 5.269726753234863, + "eval_rewards/rejected": 1.5078527927398682, + "eval_runtime": 16.8595, + "eval_samples_per_second": 15.184, + "eval_steps_per_second": 1.898, + "step": 12500 + }, + { + "epoch": 0.6940264351395958, + "grad_norm": 28.3609619140625, + "learning_rate": 2.137374932395133e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -116.59901428222656, + "logps/rejected": -184.24917602539062, + "loss": 1.2296, + "nll_loss": NaN, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.856743335723877, + "rewards/margins": 4.400498390197754, + "rewards/rejected": 0.4562453627586365, + "step": 12510 + }, + { + "epoch": 0.6945812124658465, + "grad_norm": 66.7529525756836, + "learning_rate": 2.130234341447298e-08, + "logits/chosen": -0.26258862018585205, + "logits/rejected": -0.44541144371032715, + "logps/chosen": -151.9936981201172, + "logps/rejected": -226.0979766845703, + "loss": 1.3037, + "nll_loss": 0.9197772741317749, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.724644660949707, + "rewards/margins": 3.637474775314331, + "rewards/rejected": 2.087170124053955, + "step": 12520 + }, + { + "epoch": 0.6951359897920972, + "grad_norm": 81.84940338134766, + "learning_rate": 2.123102468071058e-08, + "logits/chosen": -0.3097625970840454, + "logits/rejected": -0.5346147418022156, + "logps/chosen": -158.1197967529297, + "logps/rejected": -219.06069946289062, + "loss": 1.2766, + "nll_loss": 0.959620475769043, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.531791687011719, + "rewards/margins": 4.247246742248535, + "rewards/rejected": 1.284545660018921, + "step": 12530 + }, + { + "epoch": 0.6956907671183479, + "grad_norm": 87.16346740722656, + "learning_rate": 2.115979333931117e-08, + "logits/chosen": -0.34987300634384155, + "logits/rejected": -0.5129455924034119, + "logps/chosen": -187.62423706054688, + "logps/rejected": -233.568115234375, + "loss": 1.4026, + "nll_loss": 0.9986729621887207, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.0716729164123535, + "rewards/margins": 3.5790810585021973, + "rewards/rejected": 2.4925918579101562, + "step": 12540 + }, + { + "epoch": 0.6962455444445985, + "grad_norm": 66.4018325805664, + "learning_rate": 2.108864960665631e-08, + "logits/chosen": -0.3691956698894501, + "logits/rejected": -0.48615536093711853, + "logps/chosen": -150.43435668945312, + "logps/rejected": -215.7544708251953, + "loss": 1.2317, + "nll_loss": 0.9678544998168945, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 5.999693870544434, + "rewards/margins": 2.967386484146118, + "rewards/rejected": 3.0323076248168945, + "step": 12550 + }, + { + "epoch": 0.6968003217708493, + "grad_norm": 65.25221252441406, + "learning_rate": 2.101759369886137e-08, + "logits/chosen": -0.3581236004829407, + "logits/rejected": -0.4774579107761383, + "logps/chosen": -196.92544555664062, + "logps/rejected": -245.4114990234375, + "loss": 1.3096, + "nll_loss": 1.1450408697128296, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.050604343414307, + "rewards/margins": 3.169355869293213, + "rewards/rejected": 2.881248950958252, + "step": 12560 + }, + { + "epoch": 0.6973550990970999, + "grad_norm": 99.85542297363281, + "learning_rate": 2.094662583177501e-08, + "logits/chosen": -0.4287230372428894, + "logits/rejected": -0.5906526446342468, + "logps/chosen": -203.54275512695312, + "logps/rejected": -253.23587036132812, + "loss": 1.3315, + "nll_loss": 1.0714521408081055, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.651005744934082, + "rewards/margins": 3.783782482147217, + "rewards/rejected": 2.867222309112549, + "step": 12570 + }, + { + "epoch": 0.6979098764233506, + "grad_norm": 88.76417541503906, + "learning_rate": 2.0875746220978375e-08, + "logits/chosen": -0.25520652532577515, + "logits/rejected": -0.42219337821006775, + "logps/chosen": -159.36973571777344, + "logps/rejected": -202.49530029296875, + "loss": 1.3895, + "nll_loss": 0.9047862887382507, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.3983473777771, + "rewards/margins": 3.299961566925049, + "rewards/rejected": 2.0983855724334717, + "step": 12580 + }, + { + "epoch": 0.6984646537496012, + "grad_norm": 121.02467346191406, + "learning_rate": 2.0804955081784557e-08, + "logits/chosen": -0.3737090528011322, + "logits/rejected": -0.4600960314273834, + "logps/chosen": -152.117919921875, + "logps/rejected": -201.46641540527344, + "loss": 1.3058, + "nll_loss": 0.9616680145263672, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.8062543869018555, + "rewards/margins": 3.2586758136749268, + "rewards/rejected": 2.547579050064087, + "step": 12590 + }, + { + "epoch": 0.699019431075852, + "grad_norm": 64.31996154785156, + "learning_rate": 2.0734252629237893e-08, + "logits/chosen": -0.16399219632148743, + "logits/rejected": -0.4218166470527649, + "logps/chosen": -125.17207336425781, + "logps/rejected": -188.16537475585938, + "loss": 1.1864, + "nll_loss": 0.6983264684677124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.9564127922058105, + "rewards/margins": 3.527151107788086, + "rewards/rejected": 1.4292614459991455, + "step": 12600 + }, + { + "epoch": 0.6995742084021026, + "grad_norm": 122.69866180419922, + "learning_rate": 2.0663639078113305e-08, + "logits/chosen": -0.19395720958709717, + "logits/rejected": -0.3099953532218933, + "logps/chosen": -157.4480743408203, + "logps/rejected": -206.3944091796875, + "loss": 1.2991, + "nll_loss": 0.8979552388191223, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.614788055419922, + "rewards/margins": 3.3870463371276855, + "rewards/rejected": 2.2277417182922363, + "step": 12610 + }, + { + "epoch": 0.7001289857283532, + "grad_norm": 38.911495208740234, + "learning_rate": 2.0593114642915637e-08, + "logits/chosen": -0.2975391447544098, + "logits/rejected": -0.46102046966552734, + "logps/chosen": -164.1873779296875, + "logps/rejected": -248.583740234375, + "loss": 1.2425, + "nll_loss": 0.9558102488517761, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.593968868255615, + "rewards/margins": 4.030261993408203, + "rewards/rejected": 1.563706398010254, + "step": 12620 + }, + { + "epoch": 0.700683763054604, + "grad_norm": 49.26844787597656, + "learning_rate": 2.052267953787907e-08, + "logits/chosen": -0.35543131828308105, + "logits/rejected": -0.5265085101127625, + "logps/chosen": -166.32180786132812, + "logps/rejected": -224.92971801757812, + "loss": 1.3341, + "nll_loss": 0.964970588684082, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.636528491973877, + "rewards/margins": 3.437178134918213, + "rewards/rejected": 2.199349880218506, + "step": 12630 + }, + { + "epoch": 0.7012385403808546, + "grad_norm": 120.4012680053711, + "learning_rate": 2.0452333976966353e-08, + "logits/chosen": -0.1976948380470276, + "logits/rejected": -0.40840989351272583, + "logps/chosen": -144.37417602539062, + "logps/rejected": -214.06057739257812, + "loss": 1.1906, + "nll_loss": 0.7425335645675659, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.459261894226074, + "rewards/margins": 4.588191032409668, + "rewards/rejected": 0.8710712194442749, + "step": 12640 + }, + { + "epoch": 0.7017933177071053, + "grad_norm": 46.45096206665039, + "learning_rate": 2.0382078173868294e-08, + "logits/chosen": -0.3347860276699066, + "logits/rejected": -0.4544796049594879, + "logps/chosen": -184.65481567382812, + "logps/rejected": -199.2404327392578, + "loss": 1.305, + "nll_loss": 0.9165772199630737, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.64373254776001, + "rewards/margins": 3.0978517532348633, + "rewards/rejected": 2.5458810329437256, + "step": 12650 + }, + { + "epoch": 0.7023480950333559, + "grad_norm": 56.71844482421875, + "learning_rate": 2.031191234200303e-08, + "logits/chosen": -0.349911630153656, + "logits/rejected": -0.5095638036727905, + "logps/chosen": -175.8561553955078, + "logps/rejected": -242.212646484375, + "loss": 1.248, + "nll_loss": 0.9571784734725952, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.067541122436523, + "rewards/margins": 3.7752602100372314, + "rewards/rejected": 2.292280673980713, + "step": 12660 + }, + { + "epoch": 0.7029028723596067, + "grad_norm": 61.5022087097168, + "learning_rate": 2.0241836694515335e-08, + "logits/chosen": -0.28449350595474243, + "logits/rejected": -0.4594908654689789, + "logps/chosen": -179.852783203125, + "logps/rejected": -265.66815185546875, + "loss": 1.2322, + "nll_loss": 0.9275538325309753, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.3422346115112305, + "rewards/margins": 4.554647922515869, + "rewards/rejected": 1.7875865697860718, + "step": 12670 + }, + { + "epoch": 0.7034576496858573, + "grad_norm": 123.11775970458984, + "learning_rate": 2.01718514442761e-08, + "logits/chosen": -0.22520117461681366, + "logits/rejected": -0.4005351960659027, + "logps/chosen": -170.02728271484375, + "logps/rejected": -232.5746612548828, + "loss": 1.3099, + "nll_loss": 0.8446242213249207, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.153702735900879, + "rewards/margins": 4.833800315856934, + "rewards/rejected": 1.319903016090393, + "step": 12680 + }, + { + "epoch": 0.704012427012108, + "grad_norm": 70.78916931152344, + "learning_rate": 2.0101956803881555e-08, + "logits/chosen": -0.17443840205669403, + "logits/rejected": -0.3497583866119385, + "logps/chosen": -157.9462890625, + "logps/rejected": -216.73391723632812, + "loss": 1.252, + "nll_loss": 0.8217185139656067, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.5470476150512695, + "rewards/margins": 3.8540797233581543, + "rewards/rejected": 1.692967176437378, + "step": 12690 + }, + { + "epoch": 0.7045672043383587, + "grad_norm": 46.204471588134766, + "learning_rate": 2.0032152985652707e-08, + "logits/chosen": -0.2721540629863739, + "logits/rejected": -0.4170974791049957, + "logps/chosen": -184.45590209960938, + "logps/rejected": -244.99560546875, + "loss": 1.2509, + "nll_loss": 0.9397276043891907, + "rewards/accuracies": 0.75, + "rewards/chosen": 6.371033668518066, + "rewards/margins": 4.111761569976807, + "rewards/rejected": 2.2592720985412598, + "step": 12700 + }, + { + "epoch": 0.7051219816646094, + "grad_norm": 34.63050842285156, + "learning_rate": 1.9962440201634696e-08, + "logits/chosen": -0.30988579988479614, + "logits/rejected": -0.45380598306655884, + "logps/chosen": -152.26637268066406, + "logps/rejected": -203.39842224121094, + "loss": 1.2288, + "nll_loss": 0.8955994844436646, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.418598651885986, + "rewards/margins": 3.852252960205078, + "rewards/rejected": 1.566345453262329, + "step": 12710 + }, + { + "epoch": 0.70567675899086, + "grad_norm": 108.41631317138672, + "learning_rate": 1.989281866359606e-08, + "logits/chosen": -0.2549108862876892, + "logits/rejected": -0.3928259015083313, + "logps/chosen": -146.1342010498047, + "logps/rejected": -203.2252197265625, + "loss": 1.2525, + "nll_loss": 0.8680016398429871, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.755482196807861, + "rewards/margins": 3.5762786865234375, + "rewards/rejected": 2.179203510284424, + "step": 12720 + }, + { + "epoch": 0.7062315363171108, + "grad_norm": 30.510683059692383, + "learning_rate": 1.982328858302823e-08, + "logits/chosen": -0.24541839957237244, + "logits/rejected": -0.45684748888015747, + "logps/chosen": -145.65972900390625, + "logps/rejected": -205.34811401367188, + "loss": 1.2206, + "nll_loss": 0.8304737210273743, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.457831382751465, + "rewards/margins": 3.5541675090789795, + "rewards/rejected": 1.903663992881775, + "step": 12730 + }, + { + "epoch": 0.7067863136433614, + "grad_norm": 41.52299118041992, + "learning_rate": 1.9753850171144725e-08, + "logits/chosen": -0.1176132932305336, + "logits/rejected": -0.28097209334373474, + "logps/chosen": -115.817138671875, + "logps/rejected": -158.190185546875, + "loss": 1.2493, + "nll_loss": 0.7526054382324219, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.40674352645874, + "rewards/margins": 2.9769506454467773, + "rewards/rejected": 1.429793119430542, + "step": 12740 + }, + { + "epoch": 0.7073410909696121, + "grad_norm": 52.08366012573242, + "learning_rate": 1.968450363888073e-08, + "logits/chosen": -0.36436715722084045, + "logits/rejected": -0.42022258043289185, + "logps/chosen": -163.3020782470703, + "logps/rejected": -211.4189453125, + "loss": 1.2257, + "nll_loss": 0.983964741230011, + "rewards/accuracies": 0.75, + "rewards/chosen": 6.1039533615112305, + "rewards/margins": 3.2575669288635254, + "rewards/rejected": 2.846386432647705, + "step": 12750 + }, + { + "epoch": 0.7078958682958627, + "grad_norm": 33.801876068115234, + "learning_rate": 1.961524919689218e-08, + "logits/chosen": -0.135419100522995, + "logits/rejected": -0.32483750581741333, + "logps/chosen": -149.5926971435547, + "logps/rejected": -221.0042266845703, + "loss": 1.2294, + "nll_loss": 0.790702223777771, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.895544528961182, + "rewards/margins": 4.437676429748535, + "rewards/rejected": 1.4578684568405151, + "step": 12760 + }, + { + "epoch": 0.7084506456221135, + "grad_norm": 70.3134765625, + "learning_rate": 1.9546087055555375e-08, + "logits/chosen": -0.40542134642601013, + "logits/rejected": -0.5466644167900085, + "logps/chosen": -181.45196533203125, + "logps/rejected": -260.3875732421875, + "loss": 1.3914, + "nll_loss": 0.9717741012573242, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.495024681091309, + "rewards/margins": 4.145397186279297, + "rewards/rejected": 2.3496272563934326, + "step": 12770 + }, + { + "epoch": 0.7090054229483641, + "grad_norm": 76.55762481689453, + "learning_rate": 1.9477017424966152e-08, + "logits/chosen": -0.2543894648551941, + "logits/rejected": -0.42837873101234436, + "logps/chosen": -144.65731811523438, + "logps/rejected": -203.85897827148438, + "loss": 1.2742, + "nll_loss": 0.8862441182136536, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.567269325256348, + "rewards/margins": 3.940202236175537, + "rewards/rejected": 1.6270668506622314, + "step": 12780 + }, + { + "epoch": 0.7095602002746147, + "grad_norm": 75.84803771972656, + "learning_rate": 1.9408040514939377e-08, + "logits/chosen": -0.3257814049720764, + "logits/rejected": -0.44889751076698303, + "logps/chosen": -152.11138916015625, + "logps/rejected": -206.82565307617188, + "loss": 1.2851, + "nll_loss": 0.8820212483406067, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.982528209686279, + "rewards/margins": 4.001893043518066, + "rewards/rejected": 1.9806352853775024, + "step": 12790 + }, + { + "epoch": 0.7101149776008655, + "grad_norm": 40.27317810058594, + "learning_rate": 1.933915653500826e-08, + "logits/chosen": -0.2914651930332184, + "logits/rejected": -0.46833962202072144, + "logps/chosen": -149.4254913330078, + "logps/rejected": -205.02490234375, + "loss": 1.214, + "nll_loss": 0.8474915623664856, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.0263285636901855, + "rewards/margins": 3.7715892791748047, + "rewards/rejected": 2.254739761352539, + "step": 12800 + }, + { + "epoch": 0.7106697549271161, + "grad_norm": 74.58905792236328, + "learning_rate": 1.927036569442365e-08, + "logits/chosen": -0.27327996492385864, + "logits/rejected": -0.4386266767978668, + "logps/chosen": -203.98887634277344, + "logps/rejected": -243.96932983398438, + "loss": 1.2942, + "nll_loss": 0.9820057153701782, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.953102111816406, + "rewards/margins": 3.2255806922912598, + "rewards/rejected": 2.7275218963623047, + "step": 12810 + }, + { + "epoch": 0.7112245322533668, + "grad_norm": 42.248138427734375, + "learning_rate": 1.9201668202153554e-08, + "logits/chosen": -0.2995825409889221, + "logits/rejected": -0.47395235300064087, + "logps/chosen": -150.34645080566406, + "logps/rejected": -229.35995483398438, + "loss": 1.2174, + "nll_loss": 0.9468528032302856, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.46729850769043, + "rewards/margins": 3.862170457839966, + "rewards/rejected": 1.6051280498504639, + "step": 12820 + }, + { + "epoch": 0.7117793095796174, + "grad_norm": 57.27659606933594, + "learning_rate": 1.9133064266882328e-08, + "logits/chosen": -0.23794226348400116, + "logits/rejected": -0.39174434542655945, + "logps/chosen": -119.78440856933594, + "logps/rejected": -176.40623474121094, + "loss": 1.2189, + "nll_loss": 0.7929913401603699, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.895203590393066, + "rewards/margins": 2.8227221965789795, + "rewards/rejected": 2.0724809169769287, + "step": 12830 + }, + { + "epoch": 0.7123340869058682, + "grad_norm": 23.0706844329834, + "learning_rate": 1.9064554097010176e-08, + "logits/chosen": -0.37945157289505005, + "logits/rejected": -0.5358896255493164, + "logps/chosen": -153.80322265625, + "logps/rejected": -219.34814453125, + "loss": 1.2837, + "nll_loss": 0.977512001991272, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.117849349975586, + "rewards/margins": 3.8214268684387207, + "rewards/rejected": 2.296422004699707, + "step": 12840 + }, + { + "epoch": 0.7128888642321188, + "grad_norm": 102.89293670654297, + "learning_rate": 1.8996137900652466e-08, + "logits/chosen": -0.27641671895980835, + "logits/rejected": -0.40077847242355347, + "logps/chosen": -167.06674194335938, + "logps/rejected": -219.5724639892578, + "loss": 1.2671, + "nll_loss": 0.9891948699951172, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.899290561676025, + "rewards/margins": 3.168273687362671, + "rewards/rejected": 2.7310166358947754, + "step": 12850 + }, + { + "epoch": 0.7134436415583695, + "grad_norm": 57.34820556640625, + "learning_rate": 1.8927815885639097e-08, + "logits/chosen": -0.32612407207489014, + "logits/rejected": -0.4492993950843811, + "logps/chosen": -146.11221313476562, + "logps/rejected": -177.49276733398438, + "loss": 1.2811, + "nll_loss": 0.9233980178833008, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.71115779876709, + "rewards/margins": 3.345907688140869, + "rewards/rejected": 2.3652498722076416, + "step": 12860 + }, + { + "epoch": 0.7139984188846202, + "grad_norm": 54.08071517944336, + "learning_rate": 1.8859588259513864e-08, + "logits/chosen": -0.3142291009426117, + "logits/rejected": -0.49519652128219604, + "logps/chosen": -166.45726013183594, + "logps/rejected": -248.1087188720703, + "loss": 1.2014, + "nll_loss": 0.91033536195755, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.033982276916504, + "rewards/margins": 4.530450820922852, + "rewards/rejected": 1.5035309791564941, + "step": 12870 + }, + { + "epoch": 0.7145531962108709, + "grad_norm": 56.40658950805664, + "learning_rate": 1.8791455229533804e-08, + "logits/chosen": -0.322214275598526, + "logits/rejected": -0.44496506452560425, + "logps/chosen": -172.56436157226562, + "logps/rejected": -215.24002075195312, + "loss": 1.1968, + "nll_loss": 0.9485975503921509, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.363347053527832, + "rewards/margins": 3.617643356323242, + "rewards/rejected": 2.745704174041748, + "step": 12880 + }, + { + "epoch": 0.7151079735371215, + "grad_norm": 43.77219772338867, + "learning_rate": 1.8723417002668652e-08, + "logits/chosen": -0.26582399010658264, + "logits/rejected": -0.43083304166793823, + "logps/chosen": -143.25259399414062, + "logps/rejected": -204.05868530273438, + "loss": 1.29, + "nll_loss": 0.8451235890388489, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.363948822021484, + "rewards/margins": 3.3561675548553467, + "rewards/rejected": 2.0077812671661377, + "step": 12890 + }, + { + "epoch": 0.7156627508633722, + "grad_norm": 53.658660888671875, + "learning_rate": 1.8655473785600122e-08, + "logits/chosen": -0.2778196632862091, + "logits/rejected": -0.3918890953063965, + "logps/chosen": -161.07229614257812, + "logps/rejected": -195.16146850585938, + "loss": 1.2338, + "nll_loss": 0.9494892358779907, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.543831825256348, + "rewards/margins": 3.4626071453094482, + "rewards/rejected": 2.0812244415283203, + "step": 12900 + }, + { + "epoch": 0.7162175281896229, + "grad_norm": 40.95535659790039, + "learning_rate": 1.8587625784721356e-08, + "logits/chosen": -0.2518269717693329, + "logits/rejected": -0.42011457681655884, + "logps/chosen": -142.2730712890625, + "logps/rejected": -199.7687530517578, + "loss": 1.2362, + "nll_loss": 0.8660953640937805, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.482463836669922, + "rewards/margins": 4.113658428192139, + "rewards/rejected": 1.3688055276870728, + "step": 12910 + }, + { + "epoch": 0.7167723055158736, + "grad_norm": 42.07647705078125, + "learning_rate": 1.8519873206136177e-08, + "logits/chosen": -0.33359354734420776, + "logits/rejected": -0.458385705947876, + "logps/chosen": -149.71578979492188, + "logps/rejected": -203.4271240234375, + "loss": 1.3138, + "nll_loss": 1.0241000652313232, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.560154914855957, + "rewards/margins": 2.454641580581665, + "rewards/rejected": 3.105512857437134, + "step": 12920 + }, + { + "epoch": 0.7173270828421242, + "grad_norm": 39.50285720825195, + "learning_rate": 1.8452216255658626e-08, + "logits/chosen": -0.4769509434700012, + "logits/rejected": -0.5878731608390808, + "logps/chosen": -214.20791625976562, + "logps/rejected": -306.9803771972656, + "loss": 1.1628, + "nll_loss": 1.1445338726043701, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.931177616119385, + "rewards/margins": 4.8411078453063965, + "rewards/rejected": 2.090069055557251, + "step": 12930 + }, + { + "epoch": 0.717881860168375, + "grad_norm": 41.24628448486328, + "learning_rate": 1.8384655138812178e-08, + "logits/chosen": -0.349549800157547, + "logits/rejected": -0.5109783411026001, + "logps/chosen": -139.32525634765625, + "logps/rejected": -206.759033203125, + "loss": 1.1471, + "nll_loss": 0.9063628911972046, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.7531890869140625, + "rewards/margins": 3.5763237476348877, + "rewards/rejected": 2.176865339279175, + "step": 12940 + }, + { + "epoch": 0.7184366374946256, + "grad_norm": 66.80060577392578, + "learning_rate": 1.831719006082924e-08, + "logits/chosen": -0.31192249059677124, + "logits/rejected": -0.5079909563064575, + "logps/chosen": -163.4386444091797, + "logps/rejected": -230.68496704101562, + "loss": 1.1593, + "nll_loss": 0.9192889928817749, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.040367126464844, + "rewards/margins": 4.6038713455200195, + "rewards/rejected": 1.4364957809448242, + "step": 12950 + }, + { + "epoch": 0.7189914148208763, + "grad_norm": 45.7085075378418, + "learning_rate": 1.8249821226650486e-08, + "logits/chosen": -0.38364288210868835, + "logits/rejected": -0.49500417709350586, + "logps/chosen": -182.65310668945312, + "logps/rejected": -227.1117706298828, + "loss": 1.1732, + "nll_loss": 0.9986478090286255, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.40786600112915, + "rewards/margins": 3.7794127464294434, + "rewards/rejected": 2.628453254699707, + "step": 12960 + }, + { + "epoch": 0.7195461921471269, + "grad_norm": 58.808650970458984, + "learning_rate": 1.8182548840924172e-08, + "logits/chosen": -0.2475881278514862, + "logits/rejected": -0.3897285759449005, + "logps/chosen": -145.7071990966797, + "logps/rejected": -188.16705322265625, + "loss": 1.1754, + "nll_loss": 0.9295892715454102, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.440359115600586, + "rewards/margins": 3.4720706939697266, + "rewards/rejected": 1.9682880640029907, + "step": 12970 + }, + { + "epoch": 0.7201009694733777, + "grad_norm": 39.131309509277344, + "learning_rate": 1.8115373108005638e-08, + "logits/chosen": -0.37623220682144165, + "logits/rejected": -0.5077120661735535, + "logps/chosen": -178.80911254882812, + "logps/rejected": -245.22372436523438, + "loss": 1.2162, + "nll_loss": 0.9660285711288452, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.500500679016113, + "rewards/margins": 4.713377952575684, + "rewards/rejected": 1.787122130393982, + "step": 12980 + }, + { + "epoch": 0.7206557467996283, + "grad_norm": 42.33364486694336, + "learning_rate": 1.804829423195653e-08, + "logits/chosen": -0.26987963914871216, + "logits/rejected": -0.41641178727149963, + "logps/chosen": -166.83279418945312, + "logps/rejected": -231.6823272705078, + "loss": 1.2484, + "nll_loss": 0.9490247964859009, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.052439212799072, + "rewards/margins": 4.058259010314941, + "rewards/rejected": 1.994180679321289, + "step": 12990 + }, + { + "epoch": 0.7212105241258789, + "grad_norm": 45.6847038269043, + "learning_rate": 1.798131241654439e-08, + "logits/chosen": -0.4033544063568115, + "logits/rejected": -0.5462941527366638, + "logps/chosen": -166.350341796875, + "logps/rejected": -248.04190063476562, + "loss": 1.343, + "nll_loss": 1.0180920362472534, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.182610988616943, + "rewards/margins": 4.22403621673584, + "rewards/rejected": 1.9585742950439453, + "step": 13000 + }, + { + "epoch": 0.7212105241258789, + "eval_logits/chosen": -0.4150004982948303, + "eval_logits/rejected": -0.5308792591094971, + "eval_logps/chosen": -190.27316284179688, + "eval_logps/rejected": -260.8743591308594, + "eval_loss": 1.2157081365585327, + "eval_nll_loss": 0.987297773361206, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.768805503845215, + "eval_rewards/margins": 4.948392868041992, + "eval_rewards/rejected": 1.8204128742218018, + "eval_runtime": 17.1826, + "eval_samples_per_second": 14.899, + "eval_steps_per_second": 1.862, + "step": 13000 + }, + { + "epoch": 0.7217653014521297, + "grad_norm": 35.98946762084961, + "learning_rate": 1.791442786524181e-08, + "logits/chosen": -0.19918230175971985, + "logits/rejected": -0.3975090980529785, + "logps/chosen": -166.979248046875, + "logps/rejected": -215.09579467773438, + "loss": 1.0687, + "nll_loss": 0.8822317123413086, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.629497528076172, + "rewards/margins": 4.045687675476074, + "rewards/rejected": 1.58380925655365, + "step": 13010 + }, + { + "epoch": 0.7223200787783803, + "grad_norm": 94.03396606445312, + "learning_rate": 1.784764078122598e-08, + "logits/chosen": -0.2538452744483948, + "logits/rejected": -0.42120417952537537, + "logps/chosen": -149.91822814941406, + "logps/rejected": -221.2766876220703, + "loss": 1.3123, + "nll_loss": 0.8378020524978638, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.508445739746094, + "rewards/margins": 3.8266403675079346, + "rewards/rejected": 1.6818052530288696, + "step": 13020 + }, + { + "epoch": 0.722874856104631, + "grad_norm": 62.408546447753906, + "learning_rate": 1.7780951367377972e-08, + "logits/chosen": -0.33915066719055176, + "logits/rejected": -0.4873233437538147, + "logps/chosen": -145.5653839111328, + "logps/rejected": -212.60238647460938, + "loss": 1.2537, + "nll_loss": 0.9451497793197632, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.885664939880371, + "rewards/margins": 3.323911190032959, + "rewards/rejected": 2.561753749847412, + "step": 13030 + }, + { + "epoch": 0.7234296334308816, + "grad_norm": 90.34113311767578, + "learning_rate": 1.771435982628219e-08, + "logits/chosen": -0.25658300518989563, + "logits/rejected": -0.391094833612442, + "logps/chosen": -154.4763641357422, + "logps/rejected": -213.06753540039062, + "loss": 1.2512, + "nll_loss": 0.9946179389953613, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.304181098937988, + "rewards/margins": 3.5390563011169434, + "rewards/rejected": 1.7651245594024658, + "step": 13040 + }, + { + "epoch": 0.7239844107571324, + "grad_norm": 76.4636001586914, + "learning_rate": 1.7647866360225726e-08, + "logits/chosen": -0.31367915868759155, + "logits/rejected": -0.4467683732509613, + "logps/chosen": -163.2528076171875, + "logps/rejected": -188.37747192382812, + "loss": 1.1973, + "nll_loss": 0.9559744000434875, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.3748369216918945, + "rewards/margins": 3.4286091327667236, + "rewards/rejected": 1.9462273120880127, + "step": 13050 + }, + { + "epoch": 0.724539188083383, + "grad_norm": 106.77983856201172, + "learning_rate": 1.7581471171197722e-08, + "logits/chosen": -0.16705089807510376, + "logits/rejected": -0.4071389138698578, + "logps/chosen": -132.18258666992188, + "logps/rejected": -180.0357208251953, + "loss": 1.2273, + "nll_loss": 0.7544218897819519, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.956098556518555, + "rewards/margins": 3.1437020301818848, + "rewards/rejected": 1.8123964071273804, + "step": 13060 + }, + { + "epoch": 0.7250939654096337, + "grad_norm": 69.83487701416016, + "learning_rate": 1.7515174460888816e-08, + "logits/chosen": -0.29171133041381836, + "logits/rejected": -0.4464386999607086, + "logps/chosen": -165.49127197265625, + "logps/rejected": -199.56341552734375, + "loss": 1.3437, + "nll_loss": 0.8982936143875122, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 5.108651161193848, + "rewards/margins": 2.5784244537353516, + "rewards/rejected": 2.530226230621338, + "step": 13070 + }, + { + "epoch": 0.7256487427358844, + "grad_norm": 53.608699798583984, + "learning_rate": 1.7448976430690438e-08, + "logits/chosen": -0.2807365357875824, + "logits/rejected": -0.4650154709815979, + "logps/chosen": -143.1229705810547, + "logps/rejected": -221.6487274169922, + "loss": 1.1904, + "nll_loss": 0.898482620716095, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.50841760635376, + "rewards/margins": 4.433836460113525, + "rewards/rejected": 1.0745811462402344, + "step": 13080 + }, + { + "epoch": 0.7262035200621351, + "grad_norm": 42.33846664428711, + "learning_rate": 1.7382877281694354e-08, + "logits/chosen": -0.42869797348976135, + "logits/rejected": -0.5276592373847961, + "logps/chosen": -212.95223999023438, + "logps/rejected": -263.3831481933594, + "loss": 1.3195, + "nll_loss": 1.1187633275985718, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.4709601402282715, + "rewards/margins": 4.3836140632629395, + "rewards/rejected": 2.087346076965332, + "step": 13090 + }, + { + "epoch": 0.7267582973883857, + "grad_norm": 44.90936279296875, + "learning_rate": 1.7316877214691862e-08, + "logits/chosen": -0.2935159206390381, + "logits/rejected": -0.4234120845794678, + "logps/chosen": -152.82589721679688, + "logps/rejected": -200.16238403320312, + "loss": 1.2297, + "nll_loss": 0.9580494165420532, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.570136070251465, + "rewards/margins": 3.5980517864227295, + "rewards/rejected": 1.9720847606658936, + "step": 13100 + }, + { + "epoch": 0.7273130747146365, + "grad_norm": 72.38946533203125, + "learning_rate": 1.7250976430173285e-08, + "logits/chosen": -0.35018840432167053, + "logits/rejected": -0.46431058645248413, + "logps/chosen": -165.9536590576172, + "logps/rejected": -224.0995330810547, + "loss": 1.2554, + "nll_loss": 1.0881538391113281, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.744930267333984, + "rewards/margins": 3.581627607345581, + "rewards/rejected": 2.1633026599884033, + "step": 13110 + }, + { + "epoch": 0.7278678520408871, + "grad_norm": 39.84886169433594, + "learning_rate": 1.7185175128327418e-08, + "logits/chosen": -0.20200283825397491, + "logits/rejected": -0.35957229137420654, + "logps/chosen": -162.9403076171875, + "logps/rejected": -203.55410766601562, + "loss": 1.2343, + "nll_loss": 0.8529999852180481, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.79220724105835, + "rewards/margins": 3.306241989135742, + "rewards/rejected": 2.4859659671783447, + "step": 13120 + }, + { + "epoch": 0.7284226293671378, + "grad_norm": 35.085819244384766, + "learning_rate": 1.7119473509040756e-08, + "logits/chosen": -0.21651801466941833, + "logits/rejected": -0.364332914352417, + "logps/chosen": -163.5776824951172, + "logps/rejected": -206.1197509765625, + "loss": 1.1998, + "nll_loss": 0.9077421426773071, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.570407867431641, + "rewards/margins": 2.973717212677002, + "rewards/rejected": 2.5966904163360596, + "step": 13130 + }, + { + "epoch": 0.7289774066933884, + "grad_norm": 84.82121276855469, + "learning_rate": 1.7053871771897115e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -160.59307861328125, + "logps/rejected": -225.750244140625, + "loss": 1.3315, + "nll_loss": NaN, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.173449516296387, + "rewards/margins": 4.374788284301758, + "rewards/rejected": 1.7986600399017334, + "step": 13140 + }, + { + "epoch": 0.7295321840196392, + "grad_norm": 61.24282455444336, + "learning_rate": 1.6988370116176764e-08, + "logits/chosen": -0.34403157234191895, + "logits/rejected": -0.5174924731254578, + "logps/chosen": -186.6648712158203, + "logps/rejected": -223.6381072998047, + "loss": 1.3006, + "nll_loss": 0.9689914584159851, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.245591163635254, + "rewards/margins": 4.800109386444092, + "rewards/rejected": 1.4454818964004517, + "step": 13150 + }, + { + "epoch": 0.7300869613458898, + "grad_norm": 79.50798797607422, + "learning_rate": 1.692296874085605e-08, + "logits/chosen": -0.34989652037620544, + "logits/rejected": -0.4624873101711273, + "logps/chosen": -162.43450927734375, + "logps/rejected": -224.67764282226562, + "loss": 1.2184, + "nll_loss": 1.0137053728103638, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.597512722015381, + "rewards/margins": 3.3539185523986816, + "rewards/rejected": 2.2435946464538574, + "step": 13160 + }, + { + "epoch": 0.7306417386721404, + "grad_norm": 59.73739242553711, + "learning_rate": 1.6857667844606616e-08, + "logits/chosen": -0.41657987236976624, + "logits/rejected": -0.566390335559845, + "logps/chosen": -184.5636749267578, + "logps/rejected": -233.48483276367188, + "loss": 1.2721, + "nll_loss": 1.0097143650054932, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.789811134338379, + "rewards/margins": 3.3764655590057373, + "rewards/rejected": 2.4133458137512207, + "step": 13170 + }, + { + "epoch": 0.7311965159983912, + "grad_norm": 61.35866165161133, + "learning_rate": 1.6792467625794942e-08, + "logits/chosen": -0.2814289629459381, + "logits/rejected": -0.4504537582397461, + "logps/chosen": -174.3564910888672, + "logps/rejected": -246.03244018554688, + "loss": 1.2339, + "nll_loss": 0.8952564001083374, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.157135009765625, + "rewards/margins": 4.755683422088623, + "rewards/rejected": 1.4014512300491333, + "step": 13180 + }, + { + "epoch": 0.7317512933246418, + "grad_norm": 49.41172409057617, + "learning_rate": 1.6727368282481656e-08, + "logits/chosen": -0.30663132667541504, + "logits/rejected": -0.42407432198524475, + "logps/chosen": -167.57131958007812, + "logps/rejected": -204.41546630859375, + "loss": 1.193, + "nll_loss": 0.895298957824707, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.07155704498291, + "rewards/margins": 3.683244228363037, + "rewards/rejected": 2.388312816619873, + "step": 13190 + }, + { + "epoch": 0.7323060706508925, + "grad_norm": 40.43097686767578, + "learning_rate": 1.666237001242093e-08, + "logits/chosen": -0.4561356008052826, + "logits/rejected": -0.5546432137489319, + "logps/chosen": -188.98721313476562, + "logps/rejected": -239.19589233398438, + "loss": 1.2181, + "nll_loss": 1.0610122680664062, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.779797554016113, + "rewards/margins": 3.9123573303222656, + "rewards/rejected": 2.8674397468566895, + "step": 13200 + }, + { + "epoch": 0.7328608479771431, + "grad_norm": 32.458961486816406, + "learning_rate": 1.6597473013059943e-08, + "logits/chosen": -0.4126416742801666, + "logits/rejected": -0.5425761342048645, + "logps/chosen": -212.3006591796875, + "logps/rejected": -268.7002868652344, + "loss": 1.2087, + "nll_loss": 1.0621838569641113, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.994882106781006, + "rewards/margins": 4.066960334777832, + "rewards/rejected": 2.927921772003174, + "step": 13210 + }, + { + "epoch": 0.7334156253033939, + "grad_norm": 79.28108978271484, + "learning_rate": 1.6532677481538194e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -134.73980712890625, + "logps/rejected": -203.62240600585938, + "loss": 1.1323, + "nll_loss": NaN, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.300518035888672, + "rewards/margins": 3.7317371368408203, + "rewards/rejected": 1.5687808990478516, + "step": 13220 + }, + { + "epoch": 0.7339704026296445, + "grad_norm": 120.80438995361328, + "learning_rate": 1.6467983614686992e-08, + "logits/chosen": -0.3328816592693329, + "logits/rejected": -0.46285098791122437, + "logps/chosen": -168.47970581054688, + "logps/rejected": -232.7246551513672, + "loss": 1.2559, + "nll_loss": 0.9353010058403015, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.086447715759277, + "rewards/margins": 3.551722288131714, + "rewards/rejected": 2.5347256660461426, + "step": 13230 + }, + { + "epoch": 0.7345251799558952, + "grad_norm": 45.92528533935547, + "learning_rate": 1.6403391609028793e-08, + "logits/chosen": -0.32340607047080994, + "logits/rejected": -0.5008508563041687, + "logps/chosen": -192.72293090820312, + "logps/rejected": -246.63119506835938, + "loss": 1.2068, + "nll_loss": 0.9869135022163391, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.534943580627441, + "rewards/margins": 4.894669055938721, + "rewards/rejected": 1.6402740478515625, + "step": 13240 + }, + { + "epoch": 0.7350799572821459, + "grad_norm": 48.16605758666992, + "learning_rate": 1.6338901660776662e-08, + "logits/chosen": -0.2729222774505615, + "logits/rejected": -0.3413732945919037, + "logps/chosen": -201.21612548828125, + "logps/rejected": -254.92153930664062, + "loss": 1.2211, + "nll_loss": 1.0235908031463623, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.458531379699707, + "rewards/margins": 3.4676146507263184, + "rewards/rejected": 2.9909164905548096, + "step": 13250 + }, + { + "epoch": 0.7356347346083966, + "grad_norm": 59.24027633666992, + "learning_rate": 1.6274513965833565e-08, + "logits/chosen": -0.2952631413936615, + "logits/rejected": -0.4671157896518707, + "logps/chosen": -158.8353729248047, + "logps/rejected": -238.5457000732422, + "loss": 1.1925, + "nll_loss": 0.910784900188446, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.756814479827881, + "rewards/margins": 4.049107551574707, + "rewards/rejected": 1.707707166671753, + "step": 13260 + }, + { + "epoch": 0.7361895119346472, + "grad_norm": 36.049800872802734, + "learning_rate": 1.6210228719791947e-08, + "logits/chosen": -0.2845991849899292, + "logits/rejected": -0.45980915427207947, + "logps/chosen": -157.11216735839844, + "logps/rejected": -205.4685516357422, + "loss": 1.2241, + "nll_loss": 0.8929013013839722, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.706349849700928, + "rewards/margins": 3.1936049461364746, + "rewards/rejected": 2.512744426727295, + "step": 13270 + }, + { + "epoch": 0.7367442892608979, + "grad_norm": 135.76869201660156, + "learning_rate": 1.6146046117932942e-08, + "logits/chosen": -0.3357810378074646, + "logits/rejected": -0.4589572548866272, + "logps/chosen": -173.140869140625, + "logps/rejected": -245.12350463867188, + "loss": 1.2488, + "nll_loss": 1.0283997058868408, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.2359771728515625, + "rewards/margins": 4.565829753875732, + "rewards/rejected": 1.6701467037200928, + "step": 13280 + }, + { + "epoch": 0.7372990665871486, + "grad_norm": 71.30635070800781, + "learning_rate": 1.608196635522596e-08, + "logits/chosen": -0.24882233142852783, + "logits/rejected": -0.4175376296043396, + "logps/chosen": -144.53494262695312, + "logps/rejected": -198.82943725585938, + "loss": 1.2672, + "nll_loss": 0.9700925946235657, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.016253471374512, + "rewards/margins": 3.1141557693481445, + "rewards/rejected": 1.9020977020263672, + "step": 13290 + }, + { + "epoch": 0.7378538439133993, + "grad_norm": 31.621423721313477, + "learning_rate": 1.601798962632799e-08, + "logits/chosen": -0.35084596276283264, + "logits/rejected": -0.47968751192092896, + "logps/chosen": -169.7008514404297, + "logps/rejected": -235.3212127685547, + "loss": 1.1461, + "nll_loss": 0.9612873792648315, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.660435676574707, + "rewards/margins": 3.5851454734802246, + "rewards/rejected": 2.0752902030944824, + "step": 13300 + }, + { + "epoch": 0.7384086212396499, + "grad_norm": 51.74677276611328, + "learning_rate": 1.5954116125582996e-08, + "logits/chosen": -0.2961025834083557, + "logits/rejected": -0.41584667563438416, + "logps/chosen": -168.86094665527344, + "logps/rejected": -218.5751190185547, + "loss": 1.206, + "nll_loss": 1.0752700567245483, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.798641204833984, + "rewards/margins": 4.416439533233643, + "rewards/rejected": 1.3822017908096313, + "step": 13310 + }, + { + "epoch": 0.7389633985659007, + "grad_norm": 50.81084442138672, + "learning_rate": 1.589034604702142e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -135.4510955810547, + "logps/rejected": -203.21034240722656, + "loss": 1.1936, + "nll_loss": NaN, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.300278663635254, + "rewards/margins": 4.057191848754883, + "rewards/rejected": 1.243086576461792, + "step": 13320 + }, + { + "epoch": 0.7395181758921513, + "grad_norm": 74.6451416015625, + "learning_rate": 1.5826679584359454e-08, + "logits/chosen": -0.15505166351795197, + "logits/rejected": -0.3529577851295471, + "logps/chosen": -140.4691619873047, + "logps/rejected": -204.4829559326172, + "loss": 1.1831, + "nll_loss": 0.7784417271614075, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.117010116577148, + "rewards/margins": 3.7283904552459717, + "rewards/rejected": 1.3886195421218872, + "step": 13330 + }, + { + "epoch": 0.7400729532184019, + "grad_norm": 141.3043670654297, + "learning_rate": 1.576311693099866e-08, + "logits/chosen": -0.22378918528556824, + "logits/rejected": -0.3400081694126129, + "logps/chosen": -156.72413635253906, + "logps/rejected": -186.8373565673828, + "loss": 1.3799, + "nll_loss": 0.9340991973876953, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.177123546600342, + "rewards/margins": 2.554708242416382, + "rewards/rejected": 2.622415065765381, + "step": 13340 + }, + { + "epoch": 0.7406277305446526, + "grad_norm": 70.24127197265625, + "learning_rate": 1.569965828002514e-08, + "logits/chosen": -0.27799028158187866, + "logits/rejected": -0.4888533055782318, + "logps/chosen": -137.8538818359375, + "logps/rejected": -201.4366912841797, + "loss": 1.2301, + "nll_loss": 0.869592547416687, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.519021034240723, + "rewards/margins": 4.259803771972656, + "rewards/rejected": 1.2592167854309082, + "step": 13350 + }, + { + "epoch": 0.7411825078709033, + "grad_norm": 57.17934036254883, + "learning_rate": 1.5636303824209098e-08, + "logits/chosen": -0.1766783595085144, + "logits/rejected": -0.40922850370407104, + "logps/chosen": -135.87318420410156, + "logps/rejected": -201.8750762939453, + "loss": 1.2799, + "nll_loss": 0.7687122821807861, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.150801181793213, + "rewards/margins": 3.9855704307556152, + "rewards/rejected": 1.1652311086654663, + "step": 13360 + }, + { + "epoch": 0.741737285197154, + "grad_norm": 62.709877014160156, + "learning_rate": 1.5573053756004252e-08, + "logits/chosen": -0.13567259907722473, + "logits/rejected": -0.27794989943504333, + "logps/chosen": -152.32171630859375, + "logps/rejected": -213.1107177734375, + "loss": 1.2109, + "nll_loss": 0.8560276031494141, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.995692253112793, + "rewards/margins": 3.5224056243896484, + "rewards/rejected": 1.473286747932434, + "step": 13370 + }, + { + "epoch": 0.7422920625234046, + "grad_norm": 46.401878356933594, + "learning_rate": 1.550990826754715e-08, + "logits/chosen": -0.3182279169559479, + "logits/rejected": -0.4443788528442383, + "logps/chosen": -173.28939819335938, + "logps/rejected": -241.54397583007812, + "loss": 1.2117, + "nll_loss": 0.919338047504425, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.1595540046691895, + "rewards/margins": 3.7008984088897705, + "rewards/rejected": 2.458655595779419, + "step": 13380 + }, + { + "epoch": 0.7428468398496554, + "grad_norm": 62.002140045166016, + "learning_rate": 1.5446867550656767e-08, + "logits/chosen": -0.32122185826301575, + "logits/rejected": -0.4888898730278015, + "logps/chosen": -134.274658203125, + "logps/rejected": -183.2400360107422, + "loss": 1.168, + "nll_loss": 0.8901262283325195, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.329556465148926, + "rewards/margins": 3.9345555305480957, + "rewards/rejected": 1.3950015306472778, + "step": 13390 + }, + { + "epoch": 0.743401617175906, + "grad_norm": 31.993404388427734, + "learning_rate": 1.53839317968337e-08, + "logits/chosen": -0.13947448134422302, + "logits/rejected": -0.36338135600090027, + "logps/chosen": -126.97871398925781, + "logps/rejected": -188.83485412597656, + "loss": 1.2051, + "nll_loss": 0.7395111918449402, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.282337188720703, + "rewards/margins": 4.470915794372559, + "rewards/rejected": 0.8114216923713684, + "step": 13400 + }, + { + "epoch": 0.7439563945021567, + "grad_norm": 33.4692497253418, + "learning_rate": 1.532110119725976e-08, + "logits/chosen": -0.41327300667762756, + "logits/rejected": -0.5486120581626892, + "logps/chosen": -183.75914001464844, + "logps/rejected": -263.6907043457031, + "loss": 1.2326, + "nll_loss": 1.069012999534607, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.653592109680176, + "rewards/margins": 4.541326522827148, + "rewards/rejected": 2.1122653484344482, + "step": 13410 + }, + { + "epoch": 0.7445111718284074, + "grad_norm": 108.77604675292969, + "learning_rate": 1.5258375942797292e-08, + "logits/chosen": -0.21581730246543884, + "logits/rejected": -0.4722031056880951, + "logps/chosen": -146.3475799560547, + "logps/rejected": -222.5902862548828, + "loss": 1.2368, + "nll_loss": 0.7659646272659302, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.876851558685303, + "rewards/margins": 4.014065742492676, + "rewards/rejected": 1.8627859354019165, + "step": 13420 + }, + { + "epoch": 0.7450659491546581, + "grad_norm": 85.56887817382812, + "learning_rate": 1.519575622398865e-08, + "logits/chosen": -0.3675265610218048, + "logits/rejected": -0.5397453308105469, + "logps/chosen": -163.51470947265625, + "logps/rejected": -223.89694213867188, + "loss": 1.2145, + "nll_loss": 0.9677974581718445, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.7679643630981445, + "rewards/margins": 3.14967942237854, + "rewards/rejected": 2.6182851791381836, + "step": 13430 + }, + { + "epoch": 0.7456207264809087, + "grad_norm": 52.506587982177734, + "learning_rate": 1.513324223105562e-08, + "logits/chosen": -0.3206063210964203, + "logits/rejected": -0.4452175199985504, + "logps/chosen": -146.18118286132812, + "logps/rejected": -199.52804565429688, + "loss": 1.2252, + "nll_loss": 0.9559661149978638, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.652331352233887, + "rewards/margins": 3.6066582202911377, + "rewards/rejected": 2.04567289352417, + "step": 13440 + }, + { + "epoch": 0.7461755038071594, + "grad_norm": 65.94963836669922, + "learning_rate": 1.5070834153898766e-08, + "logits/chosen": -0.2580520510673523, + "logits/rejected": -0.3323266804218292, + "logps/chosen": -116.36967468261719, + "logps/rejected": -184.68650817871094, + "loss": 1.3069, + "nll_loss": 0.8281615972518921, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.108590602874756, + "rewards/margins": 3.4536919593811035, + "rewards/rejected": 1.6548986434936523, + "step": 13450 + }, + { + "epoch": 0.7467302811334101, + "grad_norm": 88.79898834228516, + "learning_rate": 1.5008532182096968e-08, + "logits/chosen": -0.291323721408844, + "logits/rejected": -0.46528196334838867, + "logps/chosen": -156.42576599121094, + "logps/rejected": -208.47030639648438, + "loss": 1.3222, + "nll_loss": 0.8707913160324097, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.347197532653809, + "rewards/margins": 3.129718065261841, + "rewards/rejected": 2.2174792289733887, + "step": 13460 + }, + { + "epoch": 0.7472850584596608, + "grad_norm": 79.50917053222656, + "learning_rate": 1.4946336504906733e-08, + "logits/chosen": -0.24390359222888947, + "logits/rejected": -0.4485185146331787, + "logps/chosen": -141.30801391601562, + "logps/rejected": -194.8183135986328, + "loss": 1.2848, + "nll_loss": 0.8318487405776978, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.60951566696167, + "rewards/margins": 3.9858298301696777, + "rewards/rejected": 1.6236860752105713, + "step": 13470 + }, + { + "epoch": 0.7478398357859114, + "grad_norm": 62.15235137939453, + "learning_rate": 1.4884247311261706e-08, + "logits/chosen": -0.34686800837516785, + "logits/rejected": -0.4664524495601654, + "logps/chosen": -171.86383056640625, + "logps/rejected": -223.7300262451172, + "loss": 1.2128, + "nll_loss": 1.0052675008773804, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.725604057312012, + "rewards/margins": 3.4819304943084717, + "rewards/rejected": 2.2436728477478027, + "step": 13480 + }, + { + "epoch": 0.7483946131121622, + "grad_norm": 73.1287841796875, + "learning_rate": 1.4822264789772071e-08, + "logits/chosen": -0.23515813052654266, + "logits/rejected": -0.3965316414833069, + "logps/chosen": -149.0940399169922, + "logps/rejected": -209.34872436523438, + "loss": 1.1761, + "nll_loss": 0.8091143369674683, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.670095443725586, + "rewards/margins": 4.315380573272705, + "rewards/rejected": 1.3547146320343018, + "step": 13490 + }, + { + "epoch": 0.7489493904384128, + "grad_norm": 29.182003021240234, + "learning_rate": 1.4760389128723965e-08, + "logits/chosen": -0.4131618142127991, + "logits/rejected": -0.5330209732055664, + "logps/chosen": -212.5506134033203, + "logps/rejected": -287.2911071777344, + "loss": 1.2237, + "nll_loss": 1.0589938163757324, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.932608604431152, + "rewards/margins": 5.323004722595215, + "rewards/rejected": 1.6096042394638062, + "step": 13500 + }, + { + "epoch": 0.7489493904384128, + "eval_logits/chosen": -0.416965126991272, + "eval_logits/rejected": -0.5322977900505066, + "eval_logps/chosen": -189.99722290039062, + "eval_logps/rejected": -260.26898193359375, + "eval_loss": 1.2166118621826172, + "eval_nll_loss": 0.9845010042190552, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.796399116516113, + "eval_rewards/margins": 4.915448188781738, + "eval_rewards/rejected": 1.8809503316879272, + "eval_runtime": 16.693, + "eval_samples_per_second": 15.336, + "eval_steps_per_second": 1.917, + "step": 13500 + }, + { + "epoch": 0.7495041677646634, + "grad_norm": 39.3568000793457, + "learning_rate": 1.469862051607888e-08, + "logits/chosen": -0.36322319507598877, + "logits/rejected": -0.5250617265701294, + "logps/chosen": -160.3135986328125, + "logps/rejected": -216.87191772460938, + "loss": 1.2106, + "nll_loss": 0.9671661257743835, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.912907123565674, + "rewards/margins": 3.5368614196777344, + "rewards/rejected": 2.3760459423065186, + "step": 13510 + }, + { + "epoch": 0.7500589450909141, + "grad_norm": 85.17237854003906, + "learning_rate": 1.463695913947317e-08, + "logits/chosen": -0.19930145144462585, + "logits/rejected": -0.39999374747276306, + "logps/chosen": -144.89492797851562, + "logps/rejected": -218.9955291748047, + "loss": 1.2911, + "nll_loss": 0.8356701135635376, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.16513204574585, + "rewards/margins": 3.6399447917938232, + "rewards/rejected": 1.5251868963241577, + "step": 13520 + }, + { + "epoch": 0.7506137224171648, + "grad_norm": 57.78459930419922, + "learning_rate": 1.4575405186217392e-08, + "logits/chosen": -0.30035391449928284, + "logits/rejected": -0.46901971101760864, + "logps/chosen": -185.02407836914062, + "logps/rejected": -220.53857421875, + "loss": 1.1251, + "nll_loss": 1.061841607093811, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.5068511962890625, + "rewards/margins": 4.961834907531738, + "rewards/rejected": 1.5450154542922974, + "step": 13530 + }, + { + "epoch": 0.7511684997434155, + "grad_norm": 74.79740905761719, + "learning_rate": 1.451395884329581e-08, + "logits/chosen": -0.37808674573898315, + "logits/rejected": -0.44419270753860474, + "logps/chosen": -172.4527587890625, + "logps/rejected": -230.8903045654297, + "loss": 1.3499, + "nll_loss": 1.083181619644165, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.872241020202637, + "rewards/margins": 3.1443417072296143, + "rewards/rejected": 2.7278990745544434, + "step": 13540 + }, + { + "epoch": 0.7517232770696661, + "grad_norm": 43.86543655395508, + "learning_rate": 1.4452620297365802e-08, + "logits/chosen": -0.3932144045829773, + "logits/rejected": -0.505740761756897, + "logps/chosen": -179.28599548339844, + "logps/rejected": -243.1675567626953, + "loss": 1.2663, + "nll_loss": 1.0719324350357056, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.5402960777282715, + "rewards/margins": 4.369576930999756, + "rewards/rejected": 2.1707186698913574, + "step": 13550 + }, + { + "epoch": 0.7522780543959169, + "grad_norm": 49.34416198730469, + "learning_rate": 1.4391389734757254e-08, + "logits/chosen": -0.319457083940506, + "logits/rejected": -0.47524309158325195, + "logps/chosen": -163.37777709960938, + "logps/rejected": -251.42660522460938, + "loss": 1.1682, + "nll_loss": 0.9476040601730347, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.27634334564209, + "rewards/margins": 4.794894218444824, + "rewards/rejected": 1.4814488887786865, + "step": 13560 + }, + { + "epoch": 0.7528328317221675, + "grad_norm": 70.49656677246094, + "learning_rate": 1.4330267341472069e-08, + "logits/chosen": -0.3233944773674011, + "logits/rejected": -0.44154053926467896, + "logps/chosen": -179.5468292236328, + "logps/rejected": -223.44540405273438, + "loss": 1.2648, + "nll_loss": 1.0197670459747314, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.219475746154785, + "rewards/margins": 3.2971725463867188, + "rewards/rejected": 2.9223031997680664, + "step": 13570 + }, + { + "epoch": 0.7533876090484182, + "grad_norm": 50.081268310546875, + "learning_rate": 1.4269253303183515e-08, + "logits/chosen": -0.2911186218261719, + "logits/rejected": -0.4075024724006653, + "logps/chosen": -156.9920196533203, + "logps/rejected": -201.3787384033203, + "loss": 1.1953, + "nll_loss": 0.966240406036377, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.275843620300293, + "rewards/margins": 3.2944388389587402, + "rewards/rejected": 1.9814043045043945, + "step": 13580 + }, + { + "epoch": 0.7539423863746688, + "grad_norm": 53.76469039916992, + "learning_rate": 1.4208347805235743e-08, + "logits/chosen": -0.3274695873260498, + "logits/rejected": -0.5015454888343811, + "logps/chosen": -149.1398468017578, + "logps/rejected": -196.37017822265625, + "loss": 1.2699, + "nll_loss": 0.9796813726425171, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.354022026062012, + "rewards/margins": 3.6555702686309814, + "rewards/rejected": 1.6984519958496094, + "step": 13590 + }, + { + "epoch": 0.7544971637009196, + "grad_norm": 72.05597686767578, + "learning_rate": 1.414755103264319e-08, + "logits/chosen": -0.3498299717903137, + "logits/rejected": -0.4781871736049652, + "logps/chosen": -172.31446838378906, + "logps/rejected": -226.637939453125, + "loss": 1.3502, + "nll_loss": 0.9635981321334839, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.778754234313965, + "rewards/margins": 3.1914174556732178, + "rewards/rejected": 2.587336540222168, + "step": 13600 + }, + { + "epoch": 0.7550519410271702, + "grad_norm": 56.078861236572266, + "learning_rate": 1.4086863170089975e-08, + "logits/chosen": -0.371195524930954, + "logits/rejected": -0.4768117368221283, + "logps/chosen": -174.6502685546875, + "logps/rejected": -226.41098022460938, + "loss": 1.2606, + "nll_loss": 1.113525629043579, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.1385674476623535, + "rewards/margins": 3.322308301925659, + "rewards/rejected": 2.8162596225738525, + "step": 13610 + }, + { + "epoch": 0.7556067183534209, + "grad_norm": 78.32002258300781, + "learning_rate": 1.4026284401929439e-08, + "logits/chosen": -0.27597662806510925, + "logits/rejected": -0.41690319776535034, + "logps/chosen": -175.29898071289062, + "logps/rejected": -219.8903350830078, + "loss": 1.2891, + "nll_loss": 0.9898480176925659, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.125199794769287, + "rewards/margins": 4.231071472167969, + "rewards/rejected": 1.8941287994384766, + "step": 13620 + }, + { + "epoch": 0.7561614956796716, + "grad_norm": 73.02739715576172, + "learning_rate": 1.3965814912183432e-08, + "logits/chosen": -0.34426796436309814, + "logits/rejected": -0.46403923630714417, + "logps/chosen": -191.7817840576172, + "logps/rejected": -241.34689331054688, + "loss": 1.2583, + "nll_loss": 1.01153564453125, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.102602481842041, + "rewards/margins": 3.521191120147705, + "rewards/rejected": 2.581411123275757, + "step": 13630 + }, + { + "epoch": 0.7567162730059223, + "grad_norm": 83.91927337646484, + "learning_rate": 1.3905454884541967e-08, + "logits/chosen": -0.3065240681171417, + "logits/rejected": -0.4858540892601013, + "logps/chosen": -160.86172485351562, + "logps/rejected": -191.63796997070312, + "loss": 1.3385, + "nll_loss": 0.8707631230354309, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.422104835510254, + "rewards/margins": 3.1213173866271973, + "rewards/rejected": 2.3007874488830566, + "step": 13640 + }, + { + "epoch": 0.7572710503321729, + "grad_norm": 70.45728302001953, + "learning_rate": 1.384520450236244e-08, + "logits/chosen": -0.19798685610294342, + "logits/rejected": -0.3198954164981842, + "logps/chosen": -133.4322967529297, + "logps/rejected": -211.990478515625, + "loss": 1.1453, + "nll_loss": 0.8883682489395142, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.187527179718018, + "rewards/margins": 3.546415328979492, + "rewards/rejected": 1.6411120891571045, + "step": 13650 + }, + { + "epoch": 0.7578258276584235, + "grad_norm": 117.31558990478516, + "learning_rate": 1.3785063948669229e-08, + "logits/chosen": -0.2564094066619873, + "logits/rejected": -0.45179280638694763, + "logps/chosen": -143.52139282226562, + "logps/rejected": -181.74449157714844, + "loss": 1.2219, + "nll_loss": 0.8907234072685242, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.215444087982178, + "rewards/margins": 3.032702684402466, + "rewards/rejected": 2.1827406883239746, + "step": 13660 + }, + { + "epoch": 0.7583806049846743, + "grad_norm": 92.70428466796875, + "learning_rate": 1.3725033406153042e-08, + "logits/chosen": -0.27764803171157837, + "logits/rejected": -0.39444833993911743, + "logps/chosen": -157.85360717773438, + "logps/rejected": -223.7536163330078, + "loss": 1.25, + "nll_loss": 0.9519385099411011, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.345429420471191, + "rewards/margins": 4.418973445892334, + "rewards/rejected": 1.9264558553695679, + "step": 13670 + }, + { + "epoch": 0.7589353823109249, + "grad_norm": 52.74095153808594, + "learning_rate": 1.3665113057170429e-08, + "logits/chosen": -0.29531174898147583, + "logits/rejected": -0.46530881524086, + "logps/chosen": -165.24893188476562, + "logps/rejected": -206.25045776367188, + "loss": 1.3079, + "nll_loss": 0.8828876614570618, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 5.421899318695068, + "rewards/margins": 2.826404094696045, + "rewards/rejected": 2.5954947471618652, + "step": 13680 + }, + { + "epoch": 0.7594901596371756, + "grad_norm": 54.86754608154297, + "learning_rate": 1.3605303083743225e-08, + "logits/chosen": -0.10553546994924545, + "logits/rejected": -0.33541515469551086, + "logps/chosen": -115.08282470703125, + "logps/rejected": -159.0226593017578, + "loss": 1.3038, + "nll_loss": 0.7253124117851257, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.589519500732422, + "rewards/margins": 3.5651519298553467, + "rewards/rejected": 1.0243679285049438, + "step": 13690 + }, + { + "epoch": 0.7600449369634263, + "grad_norm": 56.478858947753906, + "learning_rate": 1.3545603667557909e-08, + "logits/chosen": -0.15537983179092407, + "logits/rejected": -0.28376904129981995, + "logps/chosen": -156.2434539794922, + "logps/rejected": -185.52598571777344, + "loss": 1.2042, + "nll_loss": 0.8980692625045776, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.184126853942871, + "rewards/margins": 3.451261043548584, + "rewards/rejected": 1.732865333557129, + "step": 13700 + }, + { + "epoch": 0.760599714289677, + "grad_norm": 53.57682800292969, + "learning_rate": 1.3486014989965183e-08, + "logits/chosen": -0.30913281440734863, + "logits/rejected": -0.4532663822174072, + "logps/chosen": -135.60108947753906, + "logps/rejected": -189.81686401367188, + "loss": 1.1902, + "nll_loss": 0.8723493814468384, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.170324802398682, + "rewards/margins": 3.189711570739746, + "rewards/rejected": 1.980613112449646, + "step": 13710 + }, + { + "epoch": 0.7611544916159276, + "grad_norm": 58.94258499145508, + "learning_rate": 1.3426537231979307e-08, + "logits/chosen": -0.21132151782512665, + "logits/rejected": -0.37680166959762573, + "logps/chosen": -159.8973846435547, + "logps/rejected": -198.5689697265625, + "loss": 1.1732, + "nll_loss": 0.8618567585945129, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.449318885803223, + "rewards/margins": 3.848485231399536, + "rewards/rejected": 1.6008336544036865, + "step": 13720 + }, + { + "epoch": 0.7617092689421784, + "grad_norm": 72.0971450805664, + "learning_rate": 1.3367170574277619e-08, + "logits/chosen": -0.2789410948753357, + "logits/rejected": -0.4663736820220947, + "logps/chosen": -159.549072265625, + "logps/rejected": -221.7187042236328, + "loss": 1.2737, + "nll_loss": 0.8865306973457336, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.672693252563477, + "rewards/margins": 4.128529071807861, + "rewards/rejected": 1.5441645383834839, + "step": 13730 + }, + { + "epoch": 0.762264046268429, + "grad_norm": 143.35287475585938, + "learning_rate": 1.330791519719997e-08, + "logits/chosen": -0.2800445854663849, + "logits/rejected": -0.37771207094192505, + "logps/chosen": -151.23098754882812, + "logps/rejected": -208.29928588867188, + "loss": 1.2792, + "nll_loss": 0.9862383008003235, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.22971248626709, + "rewards/margins": 3.126978874206543, + "rewards/rejected": 2.1027328968048096, + "step": 13740 + }, + { + "epoch": 0.7628188235946797, + "grad_norm": 118.14602661132812, + "learning_rate": 1.3248771280748172e-08, + "logits/chosen": -0.1906663477420807, + "logits/rejected": -0.39341455698013306, + "logps/chosen": -156.43212890625, + "logps/rejected": -188.81900024414062, + "loss": 1.2115, + "nll_loss": 0.8189682960510254, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.237671852111816, + "rewards/margins": 3.4505152702331543, + "rewards/rejected": 1.7871557474136353, + "step": 13750 + }, + { + "epoch": 0.7633736009209303, + "grad_norm": 45.56608200073242, + "learning_rate": 1.318973900458542e-08, + "logits/chosen": -0.3441595733165741, + "logits/rejected": -0.47740721702575684, + "logps/chosen": -151.74215698242188, + "logps/rejected": -225.3459014892578, + "loss": 1.1899, + "nll_loss": 0.9371752738952637, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.4622697830200195, + "rewards/margins": 3.729787826538086, + "rewards/rejected": 1.7324821949005127, + "step": 13760 + }, + { + "epoch": 0.7639283782471811, + "grad_norm": 83.97145080566406, + "learning_rate": 1.3130818548035816e-08, + "logits/chosen": -0.27460265159606934, + "logits/rejected": -0.4410218596458435, + "logps/chosen": -145.2313995361328, + "logps/rejected": -205.31820678710938, + "loss": 1.2731, + "nll_loss": 0.883343517780304, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.396486759185791, + "rewards/margins": 3.477076292037964, + "rewards/rejected": 1.9194103479385376, + "step": 13770 + }, + { + "epoch": 0.7644831555734317, + "grad_norm": 69.6474380493164, + "learning_rate": 1.3072010090083747e-08, + "logits/chosen": -0.47135257720947266, + "logits/rejected": -0.5717315673828125, + "logps/chosen": -197.0193634033203, + "logps/rejected": -259.94525146484375, + "loss": 1.3495, + "nll_loss": 1.0664008855819702, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.381795406341553, + "rewards/margins": 3.5582733154296875, + "rewards/rejected": 2.8235225677490234, + "step": 13780 + }, + { + "epoch": 0.7650379328996824, + "grad_norm": 59.73896789550781, + "learning_rate": 1.3013313809373394e-08, + "logits/chosen": -0.3769679665565491, + "logits/rejected": -0.526505708694458, + "logps/chosen": -156.14852905273438, + "logps/rejected": -213.15328979492188, + "loss": 1.2536, + "nll_loss": 0.8887110948562622, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.734314918518066, + "rewards/margins": 3.6810462474823, + "rewards/rejected": 2.0532686710357666, + "step": 13790 + }, + { + "epoch": 0.7655927102259331, + "grad_norm": 76.70157623291016, + "learning_rate": 1.295472988420821e-08, + "logits/chosen": -0.3603596091270447, + "logits/rejected": -0.4884079098701477, + "logps/chosen": -172.51190185546875, + "logps/rejected": -238.10107421875, + "loss": 1.347, + "nll_loss": 0.9501543045043945, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.018237590789795, + "rewards/margins": 4.109791278839111, + "rewards/rejected": 1.9084469079971313, + "step": 13800 + }, + { + "epoch": 0.7661474875521838, + "grad_norm": 80.48224639892578, + "learning_rate": 1.2896258492550266e-08, + "logits/chosen": -0.3924103081226349, + "logits/rejected": -0.5263667702674866, + "logps/chosen": -150.50424194335938, + "logps/rejected": -214.1674346923828, + "loss": 1.3143, + "nll_loss": 0.9524089694023132, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.103146076202393, + "rewards/margins": 3.896202802658081, + "rewards/rejected": 2.2069430351257324, + "step": 13810 + }, + { + "epoch": 0.7667022648784344, + "grad_norm": 39.00547409057617, + "learning_rate": 1.2837899812019864e-08, + "logits/chosen": -0.37665650248527527, + "logits/rejected": -0.5423328876495361, + "logps/chosen": -169.2489013671875, + "logps/rejected": -217.527587890625, + "loss": 1.2466, + "nll_loss": 0.8893179893493652, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.886086940765381, + "rewards/margins": 3.2424449920654297, + "rewards/rejected": 2.643641948699951, + "step": 13820 + }, + { + "epoch": 0.767257042204685, + "grad_norm": 43.614009857177734, + "learning_rate": 1.2779654019894853e-08, + "logits/chosen": -0.23461337387561798, + "logits/rejected": -0.4687643051147461, + "logps/chosen": -152.0112762451172, + "logps/rejected": -216.02536010742188, + "loss": 1.2577, + "nll_loss": 0.8669017553329468, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.299442768096924, + "rewards/margins": 3.8248748779296875, + "rewards/rejected": 1.4745676517486572, + "step": 13830 + }, + { + "epoch": 0.7678118195309358, + "grad_norm": 53.92696762084961, + "learning_rate": 1.272152129311021e-08, + "logits/chosen": -0.4088827967643738, + "logits/rejected": -0.4966079294681549, + "logps/chosen": -179.58731079101562, + "logps/rejected": -244.9889373779297, + "loss": 1.3564, + "nll_loss": 1.0453494787216187, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.4819841384887695, + "rewards/margins": 3.3247177600860596, + "rewards/rejected": 3.157266139984131, + "step": 13840 + }, + { + "epoch": 0.7683665968571864, + "grad_norm": 63.71945571899414, + "learning_rate": 1.2663501808257443e-08, + "logits/chosen": -0.20103967189788818, + "logits/rejected": -0.3945234417915344, + "logps/chosen": -152.06423950195312, + "logps/rejected": -193.32913208007812, + "loss": 1.2031, + "nll_loss": 0.8020838499069214, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.715639591217041, + "rewards/margins": 4.018339157104492, + "rewards/rejected": 1.697300672531128, + "step": 13850 + }, + { + "epoch": 0.7689213741834371, + "grad_norm": 83.6656265258789, + "learning_rate": 1.2605595741584013e-08, + "logits/chosen": -0.5328843593597412, + "logits/rejected": -0.6139092445373535, + "logps/chosen": -225.92556762695312, + "logps/rejected": -270.70611572265625, + "loss": 1.2327, + "nll_loss": 1.1312992572784424, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.906135559082031, + "rewards/margins": 3.619173765182495, + "rewards/rejected": 3.286961793899536, + "step": 13860 + }, + { + "epoch": 0.7694761515096878, + "grad_norm": 50.409156799316406, + "learning_rate": 1.2547803268992917e-08, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": -155.8525848388672, + "logps/rejected": -206.296875, + "loss": 1.2289, + "nll_loss": NaN, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.228343963623047, + "rewards/margins": 4.358792304992676, + "rewards/rejected": 1.869551658630371, + "step": 13870 + }, + { + "epoch": 0.7700309288359385, + "grad_norm": 41.457496643066406, + "learning_rate": 1.2490124566042004e-08, + "logits/chosen": -0.3529255986213684, + "logits/rejected": -0.47922688722610474, + "logps/chosen": -173.4693603515625, + "logps/rejected": -212.62060546875, + "loss": 1.264, + "nll_loss": 0.972199559211731, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.152778148651123, + "rewards/margins": 4.0120134353637695, + "rewards/rejected": 2.1407644748687744, + "step": 13880 + }, + { + "epoch": 0.7705857061621891, + "grad_norm": 78.218994140625, + "learning_rate": 1.2432559807943632e-08, + "logits/chosen": -0.44029346108436584, + "logits/rejected": -0.4879940450191498, + "logps/chosen": -194.955078125, + "logps/rejected": -263.9924621582031, + "loss": 1.2328, + "nll_loss": 1.0761487483978271, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.286138534545898, + "rewards/margins": 4.106390476226807, + "rewards/rejected": 2.179748296737671, + "step": 13890 + }, + { + "epoch": 0.7711404834884398, + "grad_norm": 48.475379943847656, + "learning_rate": 1.2375109169563913e-08, + "logits/chosen": -0.295167475938797, + "logits/rejected": -0.44352278113365173, + "logps/chosen": -157.22340393066406, + "logps/rejected": -215.6311798095703, + "loss": 1.2638, + "nll_loss": 0.8964277505874634, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.457936763763428, + "rewards/margins": 2.9228222370147705, + "rewards/rejected": 2.535114288330078, + "step": 13900 + }, + { + "epoch": 0.7716952608146905, + "grad_norm": 63.257591247558594, + "learning_rate": 1.2317772825422367e-08, + "logits/chosen": -0.34150010347366333, + "logits/rejected": -0.43900713324546814, + "logps/chosen": -138.32891845703125, + "logps/rejected": -192.40408325195312, + "loss": 1.2421, + "nll_loss": 1.113840103149414, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.1876373291015625, + "rewards/margins": 3.5217552185058594, + "rewards/rejected": 1.6658827066421509, + "step": 13910 + }, + { + "epoch": 0.7722500381409412, + "grad_norm": 60.22822189331055, + "learning_rate": 1.2260550949691268e-08, + "logits/chosen": -0.3614691197872162, + "logits/rejected": -0.449531227350235, + "logps/chosen": -152.67042541503906, + "logps/rejected": -222.02505493164062, + "loss": 1.3089, + "nll_loss": 0.8986543416976929, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.665240287780762, + "rewards/margins": 3.4257895946502686, + "rewards/rejected": 2.2394509315490723, + "step": 13920 + }, + { + "epoch": 0.7728048154671918, + "grad_norm": 38.36786651611328, + "learning_rate": 1.2203443716195211e-08, + "logits/chosen": -0.35673871636390686, + "logits/rejected": -0.5140501260757446, + "logps/chosen": -162.87362670898438, + "logps/rejected": -248.33712768554688, + "loss": 1.2613, + "nll_loss": 0.9390610456466675, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.909486293792725, + "rewards/margins": 3.5899605751037598, + "rewards/rejected": 2.319525718688965, + "step": 13930 + }, + { + "epoch": 0.7733595927934426, + "grad_norm": 42.55559158325195, + "learning_rate": 1.2146451298410526e-08, + "logits/chosen": -0.43257418274879456, + "logits/rejected": -0.5539957284927368, + "logps/chosen": -188.33963012695312, + "logps/rejected": -237.80563354492188, + "loss": 1.3806, + "nll_loss": 1.0820258855819702, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 6.638883113861084, + "rewards/margins": 3.8393142223358154, + "rewards/rejected": 2.7995693683624268, + "step": 13940 + }, + { + "epoch": 0.7739143701196932, + "grad_norm": 81.97818756103516, + "learning_rate": 1.2089573869464736e-08, + "logits/chosen": -0.44080132246017456, + "logits/rejected": -0.5881060361862183, + "logps/chosen": -170.52700805664062, + "logps/rejected": -240.5904083251953, + "loss": 1.1844, + "nll_loss": 0.9856440424919128, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.976235866546631, + "rewards/margins": 4.229222297668457, + "rewards/rejected": 1.7470133304595947, + "step": 13950 + }, + { + "epoch": 0.7744691474459439, + "grad_norm": 58.12779235839844, + "learning_rate": 1.2032811602136107e-08, + "logits/chosen": -0.3213277757167816, + "logits/rejected": -0.5130875706672668, + "logps/chosen": -161.09698486328125, + "logps/rejected": -242.45492553710938, + "loss": 1.2264, + "nll_loss": 0.9088759422302246, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.524542808532715, + "rewards/margins": 4.230862617492676, + "rewards/rejected": 1.2936804294586182, + "step": 13960 + }, + { + "epoch": 0.7750239247721945, + "grad_norm": 70.44070434570312, + "learning_rate": 1.1976164668853e-08, + "logits/chosen": -0.38771852850914, + "logits/rejected": -0.5312173366546631, + "logps/chosen": -164.395751953125, + "logps/rejected": -250.2509765625, + "loss": 1.292, + "nll_loss": 0.9301018714904785, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.082016468048096, + "rewards/margins": 4.434224605560303, + "rewards/rejected": 1.6477924585342407, + "step": 13970 + }, + { + "epoch": 0.7755787020984453, + "grad_norm": 134.88778686523438, + "learning_rate": 1.1919633241693538e-08, + "logits/chosen": -0.2789040207862854, + "logits/rejected": -0.4455975890159607, + "logps/chosen": -153.7880401611328, + "logps/rejected": -211.2582550048828, + "loss": 1.2049, + "nll_loss": 0.8475750088691711, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.729306697845459, + "rewards/margins": 4.2116804122924805, + "rewards/rejected": 1.5176265239715576, + "step": 13980 + }, + { + "epoch": 0.7761334794246959, + "grad_norm": 70.38463592529297, + "learning_rate": 1.1863217492384853e-08, + "logits/chosen": -0.34892693161964417, + "logits/rejected": -0.5211832523345947, + "logps/chosen": -158.76480102539062, + "logps/rejected": -215.03958129882812, + "loss": 1.249, + "nll_loss": 0.9328628778457642, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.606719017028809, + "rewards/margins": 3.7106430530548096, + "rewards/rejected": 1.8960764408111572, + "step": 13990 + }, + { + "epoch": 0.7766882567509465, + "grad_norm": 76.29339599609375, + "learning_rate": 1.1806917592302761e-08, + "logits/chosen": -0.31255191564559937, + "logits/rejected": -0.48688554763793945, + "logps/chosen": -150.01837158203125, + "logps/rejected": -215.4327392578125, + "loss": 1.1815, + "nll_loss": 0.880654513835907, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.435760498046875, + "rewards/margins": 3.266252040863037, + "rewards/rejected": 2.1695079803466797, + "step": 14000 + }, + { + "epoch": 0.7766882567509465, + "eval_logits/chosen": -0.4256032109260559, + "eval_logits/rejected": -0.5457690358161926, + "eval_logps/chosen": -189.90115356445312, + "eval_logps/rejected": -260.0768737792969, + "eval_loss": 1.2177233695983887, + "eval_nll_loss": 0.9840515851974487, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.806005477905273, + "eval_rewards/margins": 4.90584135055542, + "eval_rewards/rejected": 1.9001634120941162, + "eval_runtime": 17.2756, + "eval_samples_per_second": 14.819, + "eval_steps_per_second": 1.852, + "step": 14000 + }, + { + "epoch": 0.7772430340771973, + "grad_norm": 65.32649230957031, + "learning_rate": 1.1750733712471106e-08, + "logits/chosen": -0.21402129530906677, + "logits/rejected": -0.30491748452186584, + "logps/chosen": -181.6761016845703, + "logps/rejected": -221.7805938720703, + "loss": 1.2736, + "nll_loss": 0.9396146535873413, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.532603740692139, + "rewards/margins": 2.3283419609069824, + "rewards/rejected": 3.2042622566223145, + "step": 14010 + }, + { + "epoch": 0.777797811403448, + "grad_norm": 73.90982055664062, + "learning_rate": 1.1694666023561284e-08, + "logits/chosen": -0.16523988544940948, + "logits/rejected": -0.3659622073173523, + "logps/chosen": -144.54388427734375, + "logps/rejected": -205.26248168945312, + "loss": 1.2495, + "nll_loss": 0.8604100346565247, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.111564636230469, + "rewards/margins": 3.390612840652466, + "rewards/rejected": 1.7209514379501343, + "step": 14020 + }, + { + "epoch": 0.7783525887296986, + "grad_norm": 52.22819900512695, + "learning_rate": 1.1638714695891822e-08, + "logits/chosen": -0.33601441979408264, + "logits/rejected": -0.47834569215774536, + "logps/chosen": -168.5770263671875, + "logps/rejected": -232.70596313476562, + "loss": 1.2159, + "nll_loss": 0.8969131708145142, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.402365207672119, + "rewards/margins": 4.196316719055176, + "rewards/rejected": 2.2060484886169434, + "step": 14030 + }, + { + "epoch": 0.7789073660559493, + "grad_norm": 59.168701171875, + "learning_rate": 1.1582879899427672e-08, + "logits/chosen": -0.1585932970046997, + "logits/rejected": -0.34851229190826416, + "logps/chosen": -131.6605682373047, + "logps/rejected": -196.3135528564453, + "loss": 1.2516, + "nll_loss": 0.7813040018081665, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.3250579833984375, + "rewards/margins": 3.896998882293701, + "rewards/rejected": 1.4280592203140259, + "step": 14040 + }, + { + "epoch": 0.7794621433822, + "grad_norm": 72.20557403564453, + "learning_rate": 1.1527161803779866e-08, + "logits/chosen": -0.25169992446899414, + "logits/rejected": -0.37294498085975647, + "logps/chosen": -162.71420288085938, + "logps/rejected": -191.31991577148438, + "loss": 1.2654, + "nll_loss": 0.9724845886230469, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.3179030418396, + "rewards/margins": 2.5933871269226074, + "rewards/rejected": 2.7245163917541504, + "step": 14050 + }, + { + "epoch": 0.7800169207084506, + "grad_norm": 34.26319885253906, + "learning_rate": 1.1471560578204875e-08, + "logits/chosen": -0.30690228939056396, + "logits/rejected": -0.45574530959129333, + "logps/chosen": -146.76254272460938, + "logps/rejected": -204.77993774414062, + "loss": 1.2631, + "nll_loss": 0.9296310544013977, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.883360862731934, + "rewards/margins": 4.266750335693359, + "rewards/rejected": 1.6166105270385742, + "step": 14060 + }, + { + "epoch": 0.7805716980347013, + "grad_norm": 74.3058853149414, + "learning_rate": 1.1416076391604195e-08, + "logits/chosen": -0.32706719636917114, + "logits/rejected": -0.4510704576969147, + "logps/chosen": -145.9856719970703, + "logps/rejected": -195.59005737304688, + "loss": 1.2075, + "nll_loss": 0.8912612795829773, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.421122074127197, + "rewards/margins": 3.1206881999969482, + "rewards/rejected": 2.300433397293091, + "step": 14070 + }, + { + "epoch": 0.781126475360952, + "grad_norm": 58.19743728637695, + "learning_rate": 1.1360709412523789e-08, + "logits/chosen": -0.34424278140068054, + "logits/rejected": -0.46891456842422485, + "logps/chosen": -163.01699829101562, + "logps/rejected": -211.4050750732422, + "loss": 1.2561, + "nll_loss": 0.9696332812309265, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.362013816833496, + "rewards/margins": 2.8251922130584717, + "rewards/rejected": 2.536821126937866, + "step": 14080 + }, + { + "epoch": 0.7816812526872027, + "grad_norm": 79.0674819946289, + "learning_rate": 1.1305459809153523e-08, + "logits/chosen": -0.3033314645290375, + "logits/rejected": -0.4799756407737732, + "logps/chosen": -140.81182861328125, + "logps/rejected": -201.52999877929688, + "loss": 1.3443, + "nll_loss": 0.837742030620575, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.111077308654785, + "rewards/margins": 3.7379837036132812, + "rewards/rejected": 1.373093843460083, + "step": 14090 + }, + { + "epoch": 0.7822360300134533, + "grad_norm": 64.90271759033203, + "learning_rate": 1.1250327749326772e-08, + "logits/chosen": -0.4450332224369049, + "logits/rejected": -0.5516294836997986, + "logps/chosen": -165.32357788085938, + "logps/rejected": -222.63339233398438, + "loss": 1.2532, + "nll_loss": 1.0632097721099854, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.119533538818359, + "rewards/margins": 3.5711288452148438, + "rewards/rejected": 2.5484046936035156, + "step": 14100 + }, + { + "epoch": 0.7827908073397041, + "grad_norm": 61.68737030029297, + "learning_rate": 1.119531340051979e-08, + "logits/chosen": -0.18699191510677338, + "logits/rejected": -0.29720041155815125, + "logps/chosen": -156.30979919433594, + "logps/rejected": -236.480224609375, + "loss": 1.2732, + "nll_loss": 0.9045829772949219, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.382053375244141, + "rewards/margins": 3.5788066387176514, + "rewards/rejected": 1.8032464981079102, + "step": 14110 + }, + { + "epoch": 0.7833455846659547, + "grad_norm": 77.37608337402344, + "learning_rate": 1.1140416929851304e-08, + "logits/chosen": -0.29755669832229614, + "logits/rejected": -0.47017520666122437, + "logps/chosen": -151.44454956054688, + "logps/rejected": -226.88577270507812, + "loss": 1.2929, + "nll_loss": 0.8588264584541321, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.481460094451904, + "rewards/margins": 3.87780499458313, + "rewards/rejected": 1.603655219078064, + "step": 14120 + }, + { + "epoch": 0.7839003619922054, + "grad_norm": 52.44789123535156, + "learning_rate": 1.108563850408193e-08, + "logits/chosen": -0.4048032760620117, + "logits/rejected": -0.4983956813812256, + "logps/chosen": -188.40444946289062, + "logps/rejected": -270.88568115234375, + "loss": 1.2048, + "nll_loss": 1.058279275894165, + "rewards/accuracies": 0.75, + "rewards/chosen": 6.549439907073975, + "rewards/margins": 3.9809250831604004, + "rewards/rejected": 2.5685155391693115, + "step": 14130 + }, + { + "epoch": 0.784455139318456, + "grad_norm": 157.4579620361328, + "learning_rate": 1.1030978289613724e-08, + "logits/chosen": -0.3345903158187866, + "logits/rejected": -0.4269779324531555, + "logps/chosen": -167.9392547607422, + "logps/rejected": -245.6435089111328, + "loss": 1.2268, + "nll_loss": 1.0074113607406616, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.121491432189941, + "rewards/margins": 3.8833796977996826, + "rewards/rejected": 2.238111972808838, + "step": 14140 + }, + { + "epoch": 0.7850099166447068, + "grad_norm": 42.360111236572266, + "learning_rate": 1.097643645248959e-08, + "logits/chosen": -0.32162588834762573, + "logits/rejected": -0.47896361351013184, + "logps/chosen": -156.4175262451172, + "logps/rejected": -198.45626831054688, + "loss": 1.1511, + "nll_loss": 0.9309374094009399, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.966883182525635, + "rewards/margins": 4.057782173156738, + "rewards/rejected": 1.9091007709503174, + "step": 14150 + }, + { + "epoch": 0.7855646939709574, + "grad_norm": 75.29857635498047, + "learning_rate": 1.0922013158392912e-08, + "logits/chosen": -0.20973214507102966, + "logits/rejected": -0.3338400721549988, + "logps/chosen": -159.48536682128906, + "logps/rejected": -205.24050903320312, + "loss": 1.1851, + "nll_loss": 0.8414397239685059, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.741863250732422, + "rewards/margins": 3.673130512237549, + "rewards/rejected": 2.0687320232391357, + "step": 14160 + }, + { + "epoch": 0.786119471297208, + "grad_norm": 56.388431549072266, + "learning_rate": 1.08677085726469e-08, + "logits/chosen": -0.16435568034648895, + "logits/rejected": -0.378712922334671, + "logps/chosen": -152.93850708007812, + "logps/rejected": -208.55862426757812, + "loss": 1.2624, + "nll_loss": 0.8205984830856323, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.232509136199951, + "rewards/margins": 4.504544734954834, + "rewards/rejected": 0.7279645204544067, + "step": 14170 + }, + { + "epoch": 0.7866742486234588, + "grad_norm": 60.83591079711914, + "learning_rate": 1.0813522860214208e-08, + "logits/chosen": -0.41574448347091675, + "logits/rejected": -0.5650998950004578, + "logps/chosen": -190.96762084960938, + "logps/rejected": -260.410400390625, + "loss": 1.2285, + "nll_loss": 1.052651047706604, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.427077293395996, + "rewards/margins": 4.325234413146973, + "rewards/rejected": 2.1018431186676025, + "step": 14180 + }, + { + "epoch": 0.7872290259497094, + "grad_norm": 93.42770385742188, + "learning_rate": 1.0759456185696375e-08, + "logits/chosen": -0.429645836353302, + "logits/rejected": -0.5378237962722778, + "logps/chosen": -188.11366271972656, + "logps/rejected": -282.5850524902344, + "loss": 1.2489, + "nll_loss": 1.0643198490142822, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 7.1027092933654785, + "rewards/margins": 4.801459312438965, + "rewards/rejected": 2.3012492656707764, + "step": 14190 + }, + { + "epoch": 0.7877838032759601, + "grad_norm": 150.52871704101562, + "learning_rate": 1.0705508713333312e-08, + "logits/chosen": -0.24871881306171417, + "logits/rejected": -0.44136008620262146, + "logps/chosen": -174.60910034179688, + "logps/rejected": -233.86111450195312, + "loss": 1.2012, + "nll_loss": 0.9185620546340942, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.834150791168213, + "rewards/margins": 4.138575077056885, + "rewards/rejected": 1.695575475692749, + "step": 14200 + }, + { + "epoch": 0.7883385806022107, + "grad_norm": 80.00650024414062, + "learning_rate": 1.065168060700286e-08, + "logits/chosen": -0.2197830229997635, + "logits/rejected": -0.39777567982673645, + "logps/chosen": -131.45639038085938, + "logps/rejected": -192.24990844726562, + "loss": 1.1549, + "nll_loss": 0.7730545997619629, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.495772361755371, + "rewards/margins": 4.274158954620361, + "rewards/rejected": 1.2216134071350098, + "step": 14210 + }, + { + "epoch": 0.7888933579284615, + "grad_norm": 78.72735595703125, + "learning_rate": 1.0597972030220214e-08, + "logits/chosen": -0.5054045915603638, + "logits/rejected": -0.6108459234237671, + "logps/chosen": -198.7064971923828, + "logps/rejected": -261.1624450683594, + "loss": 1.2565, + "nll_loss": 1.0970631837844849, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.776255130767822, + "rewards/margins": 3.514409303665161, + "rewards/rejected": 3.261845827102661, + "step": 14220 + }, + { + "epoch": 0.7894481352547121, + "grad_norm": 89.0277099609375, + "learning_rate": 1.0544383146137542e-08, + "logits/chosen": -0.13528604805469513, + "logits/rejected": -0.30049964785575867, + "logps/chosen": -124.52632141113281, + "logps/rejected": -187.10525512695312, + "loss": 1.2313, + "nll_loss": 0.905049204826355, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 4.986789226531982, + "rewards/margins": 3.6463348865509033, + "rewards/rejected": 1.3404542207717896, + "step": 14230 + }, + { + "epoch": 0.7900029125809628, + "grad_norm": 61.415306091308594, + "learning_rate": 1.0490914117543353e-08, + "logits/chosen": -0.306907057762146, + "logits/rejected": -0.504341721534729, + "logps/chosen": -150.9152374267578, + "logps/rejected": -225.4998016357422, + "loss": 1.1882, + "nll_loss": 0.8629624247550964, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 5.909262657165527, + "rewards/margins": 4.346271514892578, + "rewards/rejected": 1.562990665435791, + "step": 14240 + }, + { + "epoch": 0.7905576899072135, + "grad_norm": 54.4163932800293, + "learning_rate": 1.0437565106862073e-08, + "logits/chosen": -0.2065925896167755, + "logits/rejected": -0.37647438049316406, + "logps/chosen": -154.891845703125, + "logps/rejected": -226.71542358398438, + "loss": 1.216, + "nll_loss": 0.9100456237792969, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.1754865646362305, + "rewards/margins": 4.063882827758789, + "rewards/rejected": 2.1116039752960205, + "step": 14250 + }, + { + "epoch": 0.7911124672334642, + "grad_norm": 81.15811920166016, + "learning_rate": 1.0384336276153588e-08, + "logits/chosen": -0.3366141617298126, + "logits/rejected": -0.4981306493282318, + "logps/chosen": -150.25709533691406, + "logps/rejected": -204.87472534179688, + "loss": 1.3039, + "nll_loss": 0.9259662628173828, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.649740219116211, + "rewards/margins": 3.9433753490448, + "rewards/rejected": 1.7063640356063843, + "step": 14260 + }, + { + "epoch": 0.7916672445597148, + "grad_norm": 87.83256530761719, + "learning_rate": 1.0331227787112645e-08, + "logits/chosen": -0.2888008952140808, + "logits/rejected": -0.42626166343688965, + "logps/chosen": -166.24411010742188, + "logps/rejected": -204.98977661132812, + "loss": 1.2784, + "nll_loss": 0.8952625393867493, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.862424850463867, + "rewards/margins": 3.5198276042938232, + "rewards/rejected": 2.342597007751465, + "step": 14270 + }, + { + "epoch": 0.7922220218859655, + "grad_norm": 63.025001525878906, + "learning_rate": 1.0278239801068517e-08, + "logits/chosen": -0.2715142071247101, + "logits/rejected": -0.4095768928527832, + "logps/chosen": -135.20944213867188, + "logps/rejected": -169.39744567871094, + "loss": 1.2523, + "nll_loss": 0.7992717623710632, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.300876617431641, + "rewards/margins": 3.151837110519409, + "rewards/rejected": 2.1490390300750732, + "step": 14280 + }, + { + "epoch": 0.7927767992122162, + "grad_norm": 90.99301147460938, + "learning_rate": 1.0225372478984324e-08, + "logits/chosen": -0.19432711601257324, + "logits/rejected": -0.4075043797492981, + "logps/chosen": -129.4255828857422, + "logps/rejected": -188.38177490234375, + "loss": 1.2615, + "nll_loss": 0.8069963455200195, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.241572380065918, + "rewards/margins": 3.424506664276123, + "rewards/rejected": 1.8170654773712158, + "step": 14290 + }, + { + "epoch": 0.7933315765384669, + "grad_norm": 54.6978645324707, + "learning_rate": 1.0172625981456723e-08, + "logits/chosen": -0.34659120440483093, + "logits/rejected": -0.4400635361671448, + "logps/chosen": -189.80599975585938, + "logps/rejected": -243.38217163085938, + "loss": 1.2849, + "nll_loss": 1.0428221225738525, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.767068386077881, + "rewards/margins": 3.213662624359131, + "rewards/rejected": 3.553405284881592, + "step": 14300 + }, + { + "epoch": 0.7938863538647175, + "grad_norm": 77.70732116699219, + "learning_rate": 1.0120000468715267e-08, + "logits/chosen": -0.351653516292572, + "logits/rejected": -0.5094562768936157, + "logps/chosen": -189.60104370117188, + "logps/rejected": -238.1971893310547, + "loss": 1.2378, + "nll_loss": 0.9134159088134766, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.133057594299316, + "rewards/margins": 3.4282565116882324, + "rewards/rejected": 2.704801559448242, + "step": 14310 + }, + { + "epoch": 0.7944411311909683, + "grad_norm": 63.889896392822266, + "learning_rate": 1.0067496100622041e-08, + "logits/chosen": -0.23658093810081482, + "logits/rejected": -0.33566293120384216, + "logps/chosen": -132.89378356933594, + "logps/rejected": -160.03530883789062, + "loss": 1.323, + "nll_loss": 0.9023619890213013, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.8944854736328125, + "rewards/margins": 2.4125888347625732, + "rewards/rejected": 2.48189640045166, + "step": 14320 + }, + { + "epoch": 0.7949959085172189, + "grad_norm": 64.3019790649414, + "learning_rate": 1.0015113036671119e-08, + "logits/chosen": -0.263753741979599, + "logits/rejected": -0.37547627091407776, + "logps/chosen": -170.27474975585938, + "logps/rejected": -205.6151123046875, + "loss": 1.2795, + "nll_loss": 0.945264458656311, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.726595878601074, + "rewards/margins": 3.4203312397003174, + "rewards/rejected": 2.306265115737915, + "step": 14330 + }, + { + "epoch": 0.7955506858434696, + "grad_norm": 67.1368179321289, + "learning_rate": 9.962851435988056e-09, + "logits/chosen": -0.3096924126148224, + "logits/rejected": -0.4432447850704193, + "logps/chosen": -152.05868530273438, + "logps/rejected": -206.04177856445312, + "loss": 1.2941, + "nll_loss": 0.9117245674133301, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.722899913787842, + "rewards/margins": 3.8970985412597656, + "rewards/rejected": 1.8258016109466553, + "step": 14340 + }, + { + "epoch": 0.7961054631697203, + "grad_norm": 51.44058609008789, + "learning_rate": 9.910711457329479e-09, + "logits/chosen": -0.35930752754211426, + "logits/rejected": -0.5132125020027161, + "logps/chosen": -147.76963806152344, + "logps/rejected": -210.6107635498047, + "loss": 1.256, + "nll_loss": 0.9329828023910522, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.823217868804932, + "rewards/margins": 3.7123653888702393, + "rewards/rejected": 2.1108522415161133, + "step": 14350 + }, + { + "epoch": 0.796660240495971, + "grad_norm": 75.3576431274414, + "learning_rate": 9.8586932590825e-09, + "logits/chosen": -0.3371516764163971, + "logits/rejected": -0.4289192259311676, + "logps/chosen": -177.5013885498047, + "logps/rejected": -230.17385864257812, + "loss": 1.2697, + "nll_loss": 0.9882882833480835, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.371665000915527, + "rewards/margins": 3.8917853832244873, + "rewards/rejected": 2.4798789024353027, + "step": 14360 + }, + { + "epoch": 0.7972150178222216, + "grad_norm": 82.46305847167969, + "learning_rate": 9.806796999264361e-09, + "logits/chosen": -0.2205454409122467, + "logits/rejected": -0.3871150612831116, + "logps/chosen": -127.50831604003906, + "logps/rejected": -173.83023071289062, + "loss": 1.1455, + "nll_loss": 0.7921987175941467, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.2621378898620605, + "rewards/margins": 3.2860114574432373, + "rewards/rejected": 1.9761260747909546, + "step": 14370 + }, + { + "epoch": 0.7977697951484722, + "grad_norm": 68.38803100585938, + "learning_rate": 9.755022835521843e-09, + "logits/chosen": -0.2221817523241043, + "logits/rejected": -0.4071694016456604, + "logps/chosen": -130.88552856445312, + "logps/rejected": -202.6993408203125, + "loss": 1.1171, + "nll_loss": 0.8255695104598999, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.904606819152832, + "rewards/margins": 3.8406734466552734, + "rewards/rejected": 2.0639336109161377, + "step": 14380 + }, + { + "epoch": 0.798324572474723, + "grad_norm": 55.854034423828125, + "learning_rate": 9.703370925130865e-09, + "logits/chosen": -0.4654787480831146, + "logits/rejected": -0.5669001340866089, + "logps/chosen": -206.08377075195312, + "logps/rejected": -264.7427673339844, + "loss": 1.3264, + "nll_loss": 1.115120530128479, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 6.440249443054199, + "rewards/margins": 2.270873546600342, + "rewards/rejected": 4.169375896453857, + "step": 14390 + }, + { + "epoch": 0.7988793498009736, + "grad_norm": 55.6180419921875, + "learning_rate": 9.651841424995932e-09, + "logits/chosen": -0.3281271755695343, + "logits/rejected": -0.3929949104785919, + "logps/chosen": -184.00228881835938, + "logps/rejected": -233.5999755859375, + "loss": 1.323, + "nll_loss": 1.0436643362045288, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.61121129989624, + "rewards/margins": 4.3487677574157715, + "rewards/rejected": 2.262443780899048, + "step": 14400 + }, + { + "epoch": 0.7994341271272243, + "grad_norm": 74.48028564453125, + "learning_rate": 9.600434491649745e-09, + "logits/chosen": -0.31366071105003357, + "logits/rejected": -0.4073302149772644, + "logps/chosen": -182.0845184326172, + "logps/rejected": -251.82730102539062, + "loss": 1.2855, + "nll_loss": 0.9698610305786133, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.493363857269287, + "rewards/margins": 3.790088176727295, + "rewards/rejected": 2.703275442123413, + "step": 14410 + }, + { + "epoch": 0.799988904453475, + "grad_norm": 39.76643753051758, + "learning_rate": 9.549150281252633e-09, + "logits/chosen": -0.3544641137123108, + "logits/rejected": -0.4706074595451355, + "logps/chosen": -193.2956085205078, + "logps/rejected": -253.80166625976562, + "loss": 1.1707, + "nll_loss": 0.987841010093689, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 6.459817409515381, + "rewards/margins": 4.018962860107422, + "rewards/rejected": 2.440854549407959, + "step": 14420 + }, + { + "epoch": 0.8005436817797257, + "grad_norm": 68.87491607666016, + "learning_rate": 9.497988949592161e-09, + "logits/chosen": -0.2607325613498688, + "logits/rejected": -0.3864908218383789, + "logps/chosen": -145.94537353515625, + "logps/rejected": -185.57022094726562, + "loss": 1.3224, + "nll_loss": 0.9280544519424438, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.35838508605957, + "rewards/margins": 3.4789459705352783, + "rewards/rejected": 1.879439115524292, + "step": 14430 + }, + { + "epoch": 0.8010984591059763, + "grad_norm": 35.218536376953125, + "learning_rate": 9.446950652082636e-09, + "logits/chosen": -0.2851913571357727, + "logits/rejected": -0.43879151344299316, + "logps/chosen": -151.75906372070312, + "logps/rejected": -198.59219360351562, + "loss": 1.2342, + "nll_loss": 0.8864370584487915, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.473379135131836, + "rewards/margins": 4.034270286560059, + "rewards/rejected": 1.4391090869903564, + "step": 14440 + }, + { + "epoch": 0.801653236432227, + "grad_norm": 59.406585693359375, + "learning_rate": 9.396035543764558e-09, + "logits/chosen": -0.1484692394733429, + "logits/rejected": -0.40019527077674866, + "logps/chosen": -110.6164321899414, + "logps/rejected": -167.54690551757812, + "loss": 1.2621, + "nll_loss": 0.7206388711929321, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 4.720696449279785, + "rewards/margins": 3.0079617500305176, + "rewards/rejected": 1.7127354145050049, + "step": 14450 + }, + { + "epoch": 0.8022080137584777, + "grad_norm": 98.18295288085938, + "learning_rate": 9.345243779304285e-09, + "logits/chosen": -0.39211538434028625, + "logits/rejected": -0.5174371600151062, + "logps/chosen": -179.41665649414062, + "logps/rejected": -245.5238494873047, + "loss": 1.2218, + "nll_loss": 0.9766333699226379, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.362366676330566, + "rewards/margins": 3.984196186065674, + "rewards/rejected": 2.3781704902648926, + "step": 14460 + }, + { + "epoch": 0.8027627910847284, + "grad_norm": 43.433929443359375, + "learning_rate": 9.294575512993408e-09, + "logits/chosen": -0.3674396574497223, + "logits/rejected": -0.47984933853149414, + "logps/chosen": -171.6109619140625, + "logps/rejected": -235.5369110107422, + "loss": 1.2055, + "nll_loss": 0.9847660064697266, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.441687107086182, + "rewards/margins": 4.69619083404541, + "rewards/rejected": 1.7454957962036133, + "step": 14470 + }, + { + "epoch": 0.803317568410979, + "grad_norm": 39.43125915527344, + "learning_rate": 9.244030898748472e-09, + "logits/chosen": -0.28768596053123474, + "logits/rejected": -0.44666361808776855, + "logps/chosen": -157.49334716796875, + "logps/rejected": -208.1215362548828, + "loss": 1.1838, + "nll_loss": 0.869096577167511, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.3726806640625, + "rewards/margins": 3.0028040409088135, + "rewards/rejected": 2.3698766231536865, + "step": 14480 + }, + { + "epoch": 0.8038723457372298, + "grad_norm": 49.01121520996094, + "learning_rate": 9.193610090110304e-09, + "logits/chosen": -0.2439526617527008, + "logits/rejected": -0.43513980507850647, + "logps/chosen": -172.5922393798828, + "logps/rejected": -231.7100067138672, + "loss": 1.1373, + "nll_loss": 0.9176927804946899, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.124392509460449, + "rewards/margins": 4.6425018310546875, + "rewards/rejected": 1.4818907976150513, + "step": 14490 + }, + { + "epoch": 0.8044271230634804, + "grad_norm": 32.72774887084961, + "learning_rate": 9.143313240243667e-09, + "logits/chosen": -0.2891438901424408, + "logits/rejected": -0.45568108558654785, + "logps/chosen": -154.89642333984375, + "logps/rejected": -236.8765411376953, + "loss": 1.3321, + "nll_loss": 0.9265453219413757, + "rewards/accuracies": 0.75, + "rewards/chosen": 5.802674293518066, + "rewards/margins": 3.1932332515716553, + "rewards/rejected": 2.609440565109253, + "step": 14500 + }, + { + "epoch": 0.8044271230634804, + "eval_logits/chosen": -0.40663790702819824, + "eval_logits/rejected": -0.520878255367279, + "eval_logps/chosen": -190.1840057373047, + "eval_logps/rejected": -261.1012268066406, + "eval_loss": 1.2122180461883545, + "eval_nll_loss": 0.9856801629066467, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.777721881866455, + "eval_rewards/margins": 4.979991912841797, + "eval_rewards/rejected": 1.7977294921875, + "eval_runtime": 17.1334, + "eval_samples_per_second": 14.942, + "eval_steps_per_second": 1.868, + "step": 14500 + }, + { + "epoch": 0.804981900389731, + "grad_norm": 49.28852844238281, + "learning_rate": 9.093140501936813e-09, + "logits/chosen": -0.30533546209335327, + "logits/rejected": -0.4386633336544037, + "logps/chosen": -161.4071502685547, + "logps/rejected": -197.94219970703125, + "loss": 1.2158, + "nll_loss": 0.9138886332511902, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.987029075622559, + "rewards/margins": 3.8081512451171875, + "rewards/rejected": 2.178877353668213, + "step": 14510 + }, + { + "epoch": 0.8055366777159817, + "grad_norm": 71.35733032226562, + "learning_rate": 9.043092027600901e-09, + "logits/chosen": -0.35957610607147217, + "logits/rejected": -0.5047857165336609, + "logps/chosen": -147.0637969970703, + "logps/rejected": -201.2028350830078, + "loss": 1.2649, + "nll_loss": 0.9333122372627258, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.66337776184082, + "rewards/margins": 4.2670488357543945, + "rewards/rejected": 1.3963292837142944, + "step": 14520 + }, + { + "epoch": 0.8060914550422325, + "grad_norm": 73.6759033203125, + "learning_rate": 8.993167969269716e-09, + "logits/chosen": -0.43993091583251953, + "logits/rejected": -0.536629319190979, + "logps/chosen": -206.188720703125, + "logps/rejected": -273.0359802246094, + "loss": 1.2285, + "nll_loss": 1.0613583326339722, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 7.028354644775391, + "rewards/margins": 4.819065093994141, + "rewards/rejected": 2.209289789199829, + "step": 14530 + }, + { + "epoch": 0.8066462323684831, + "grad_norm": 78.81867980957031, + "learning_rate": 8.943368478598989e-09, + "logits/chosen": -0.46705374121665955, + "logits/rejected": -0.5537182092666626, + "logps/chosen": -191.93948364257812, + "logps/rejected": -271.65008544921875, + "loss": 1.2468, + "nll_loss": 1.0935325622558594, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 7.112006187438965, + "rewards/margins": 4.076033115386963, + "rewards/rejected": 3.035973072052002, + "step": 14540 + }, + { + "epoch": 0.8072010096947337, + "grad_norm": 54.17938995361328, + "learning_rate": 8.893693706866124e-09, + "logits/chosen": -0.20283639430999756, + "logits/rejected": -0.45410043001174927, + "logps/chosen": -108.3819580078125, + "logps/rejected": -176.9251251220703, + "loss": 1.2593, + "nll_loss": 0.7126230001449585, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 4.9742279052734375, + "rewards/margins": 4.209775924682617, + "rewards/rejected": 0.7644524574279785, + "step": 14550 + }, + { + "epoch": 0.8077557870209845, + "grad_norm": 32.046836853027344, + "learning_rate": 8.844143804969623e-09, + "logits/chosen": -0.24128413200378418, + "logits/rejected": -0.4615742564201355, + "logps/chosen": -135.6195831298828, + "logps/rejected": -219.5218505859375, + "loss": 1.2179, + "nll_loss": 0.837755560874939, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.290262699127197, + "rewards/margins": 3.544344425201416, + "rewards/rejected": 1.7459179162979126, + "step": 14560 + }, + { + "epoch": 0.8083105643472351, + "grad_norm": 45.38715744018555, + "learning_rate": 8.794718923428685e-09, + "logits/chosen": -0.23258860409259796, + "logits/rejected": -0.39432069659233093, + "logps/chosen": -156.38214111328125, + "logps/rejected": -210.4923553466797, + "loss": 1.1779, + "nll_loss": 0.9198177456855774, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 5.690469264984131, + "rewards/margins": 3.7175514698028564, + "rewards/rejected": 1.9729175567626953, + "step": 14570 + }, + { + "epoch": 0.8088653416734858, + "grad_norm": 62.882816314697266, + "learning_rate": 8.745419212382738e-09, + "logits/chosen": -0.1419857293367386, + "logits/rejected": -0.3641354739665985, + "logps/chosen": -128.32472229003906, + "logps/rejected": -188.57131958007812, + "loss": 1.213, + "nll_loss": 0.7510377764701843, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.057511806488037, + "rewards/margins": 3.2589499950408936, + "rewards/rejected": 1.7985626459121704, + "step": 14580 + }, + { + "epoch": 0.8094201189997364, + "grad_norm": 100.9625015258789, + "learning_rate": 8.696244821590948e-09, + "logits/chosen": -0.31309443712234497, + "logits/rejected": -0.4752843379974365, + "logps/chosen": -187.08206176757812, + "logps/rejected": -265.50408935546875, + "loss": 1.3192, + "nll_loss": 0.9962458610534668, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.294480323791504, + "rewards/margins": 5.051595211029053, + "rewards/rejected": 1.2428849935531616, + "step": 14590 + }, + { + "epoch": 0.8099748963259872, + "grad_norm": 94.37689208984375, + "learning_rate": 8.64719590043183e-09, + "logits/chosen": -0.19047455489635468, + "logits/rejected": -0.37319186329841614, + "logps/chosen": -131.11935424804688, + "logps/rejected": -172.666259765625, + "loss": 1.2171, + "nll_loss": 0.8773058652877808, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.016050338745117, + "rewards/margins": 2.4018607139587402, + "rewards/rejected": 2.614189624786377, + "step": 14600 + }, + { + "epoch": 0.8105296736522378, + "grad_norm": 24.281864166259766, + "learning_rate": 8.598272597902706e-09, + "logits/chosen": -0.18936872482299805, + "logits/rejected": -0.3464515805244446, + "logps/chosen": -151.7962646484375, + "logps/rejected": -206.4762725830078, + "loss": 1.25, + "nll_loss": 0.8511675000190735, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.5301618576049805, + "rewards/margins": 3.685591220855713, + "rewards/rejected": 1.8445703983306885, + "step": 14610 + }, + { + "epoch": 0.8110844509784885, + "grad_norm": 49.428993225097656, + "learning_rate": 8.549475062619354e-09, + "logits/chosen": -0.2897084355354309, + "logits/rejected": -0.48308873176574707, + "logps/chosen": -149.5684051513672, + "logps/rejected": -236.5645751953125, + "loss": 1.2051, + "nll_loss": 0.8345033526420593, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.602771282196045, + "rewards/margins": 4.437640190124512, + "rewards/rejected": 1.1651312112808228, + "step": 14620 + }, + { + "epoch": 0.8116392283047392, + "grad_norm": 56.484107971191406, + "learning_rate": 8.500803442815474e-09, + "logits/chosen": -0.23302340507507324, + "logits/rejected": -0.39650648832321167, + "logps/chosen": -153.33529663085938, + "logps/rejected": -210.3851776123047, + "loss": 1.2246, + "nll_loss": 0.9016008377075195, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.680493354797363, + "rewards/margins": 4.056046962738037, + "rewards/rejected": 1.624446153640747, + "step": 14630 + }, + { + "epoch": 0.8121940056309899, + "grad_norm": 111.01287841796875, + "learning_rate": 8.452257886342295e-09, + "logits/chosen": -0.1292901188135147, + "logits/rejected": -0.30237165093421936, + "logps/chosen": -119.23050689697266, + "logps/rejected": -178.7151336669922, + "loss": 1.3316, + "nll_loss": 1.0425434112548828, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 4.750171661376953, + "rewards/margins": 3.0412964820861816, + "rewards/rejected": 1.708875298500061, + "step": 14640 + }, + { + "epoch": 0.8127487829572405, + "grad_norm": 59.906715393066406, + "learning_rate": 8.403838540668057e-09, + "logits/chosen": -0.3006291687488556, + "logits/rejected": -0.42718249559402466, + "logps/chosen": -134.45458984375, + "logps/rejected": -182.62513732910156, + "loss": 1.2352, + "nll_loss": 0.8543822169303894, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 5.532834529876709, + "rewards/margins": 3.3276023864746094, + "rewards/rejected": 2.2052321434020996, + "step": 14650 + }, + { + "epoch": 0.8133035602834913, + "grad_norm": 71.21964263916016, + "learning_rate": 8.355545552877658e-09, + "logits/chosen": -0.45056334137916565, + "logits/rejected": -0.5881573557853699, + "logps/chosen": -176.95181274414062, + "logps/rejected": -240.5482940673828, + "loss": 1.2533, + "nll_loss": 1.017821192741394, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.567188262939453, + "rewards/margins": 4.7127227783203125, + "rewards/rejected": 1.8544645309448242, + "step": 14660 + }, + { + "epoch": 0.8138583376097419, + "grad_norm": 52.82645797729492, + "learning_rate": 8.307379069672099e-09, + "logits/chosen": -0.37268659472465515, + "logits/rejected": -0.5259225964546204, + "logps/chosen": -173.6767578125, + "logps/rejected": -238.0189666748047, + "loss": 1.2661, + "nll_loss": 0.9858657717704773, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.159924507141113, + "rewards/margins": 3.906090497970581, + "rewards/rejected": 2.253833293914795, + "step": 14670 + }, + { + "epoch": 0.8144131149359926, + "grad_norm": 73.35997772216797, + "learning_rate": 8.259339237368134e-09, + "logits/chosen": -0.3108692467212677, + "logits/rejected": -0.4505864679813385, + "logps/chosen": -141.55963134765625, + "logps/rejected": -208.3193817138672, + "loss": 1.2022, + "nll_loss": 0.915246307849884, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.435148239135742, + "rewards/margins": 3.543468952178955, + "rewards/rejected": 1.8916794061660767, + "step": 14680 + }, + { + "epoch": 0.8149678922622432, + "grad_norm": 57.747432708740234, + "learning_rate": 8.211426201897797e-09, + "logits/chosen": -0.25365111231803894, + "logits/rejected": -0.395337849855423, + "logps/chosen": -165.84991455078125, + "logps/rejected": -255.8370819091797, + "loss": 1.1484, + "nll_loss": 0.9351493120193481, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.533167839050293, + "rewards/margins": 4.6734185218811035, + "rewards/rejected": 1.8597490787506104, + "step": 14690 + }, + { + "epoch": 0.815522669588494, + "grad_norm": 49.915531158447266, + "learning_rate": 8.163640108807896e-09, + "logits/chosen": -0.2948180139064789, + "logits/rejected": -0.4152253568172455, + "logps/chosen": -157.45738220214844, + "logps/rejected": -204.9689178466797, + "loss": 1.1782, + "nll_loss": 0.904376208782196, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.626477241516113, + "rewards/margins": 3.7865302562713623, + "rewards/rejected": 1.8399465084075928, + "step": 14700 + }, + { + "epoch": 0.8160774469147446, + "grad_norm": 61.01435089111328, + "learning_rate": 8.115981103259678e-09, + "logits/chosen": -0.20418615639209747, + "logits/rejected": -0.3977503180503845, + "logps/chosen": -155.60549926757812, + "logps/rejected": -204.55967712402344, + "loss": 1.2328, + "nll_loss": 0.8879886865615845, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.589022636413574, + "rewards/margins": 3.396660566329956, + "rewards/rejected": 2.19236159324646, + "step": 14710 + }, + { + "epoch": 0.8166322242409952, + "grad_norm": 32.84076690673828, + "learning_rate": 8.068449330028282e-09, + "logits/chosen": -0.3054724931716919, + "logits/rejected": -0.45483383536338806, + "logps/chosen": -165.53244018554688, + "logps/rejected": -215.5263671875, + "loss": 1.2858, + "nll_loss": 1.0018367767333984, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.649083614349365, + "rewards/margins": 4.386407852172852, + "rewards/rejected": 1.2626762390136719, + "step": 14720 + }, + { + "epoch": 0.817187001567246, + "grad_norm": 52.607566833496094, + "learning_rate": 8.02104493350238e-09, + "logits/chosen": -0.2687918543815613, + "logits/rejected": -0.38726913928985596, + "logps/chosen": -161.62387084960938, + "logps/rejected": -228.53164672851562, + "loss": 1.2302, + "nll_loss": 1.0006484985351562, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.204294204711914, + "rewards/margins": 4.660628318786621, + "rewards/rejected": 1.5436656475067139, + "step": 14730 + }, + { + "epoch": 0.8177417788934966, + "grad_norm": 153.74290466308594, + "learning_rate": 7.973768057683728e-09, + "logits/chosen": -0.2631340026855469, + "logits/rejected": -0.48361843824386597, + "logps/chosen": -153.59364318847656, + "logps/rejected": -218.3295135498047, + "loss": 1.331, + "nll_loss": 0.9026532173156738, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.447627067565918, + "rewards/margins": 4.096835136413574, + "rewards/rejected": 1.3507912158966064, + "step": 14740 + }, + { + "epoch": 0.8182965562197473, + "grad_norm": 29.862611770629883, + "learning_rate": 7.926618846186645e-09, + "logits/chosen": -0.44401612877845764, + "logits/rejected": -0.5922880172729492, + "logps/chosen": -186.78976440429688, + "logps/rejected": -259.10577392578125, + "loss": 1.2862, + "nll_loss": 1.0834470987319946, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.416709899902344, + "rewards/margins": 4.0009002685546875, + "rewards/rejected": 2.415809154510498, + "step": 14750 + }, + { + "epoch": 0.8188513335459979, + "grad_norm": 49.80220031738281, + "learning_rate": 7.879597442237712e-09, + "logits/chosen": -0.22588615119457245, + "logits/rejected": -0.37203675508499146, + "logps/chosen": -145.67984008789062, + "logps/rejected": -199.66519165039062, + "loss": 1.244, + "nll_loss": 0.867226243019104, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.510145664215088, + "rewards/margins": 3.3055522441864014, + "rewards/rejected": 2.204594135284424, + "step": 14760 + }, + { + "epoch": 0.8194061108722487, + "grad_norm": 45.6759033203125, + "learning_rate": 7.832703988675194e-09, + "logits/chosen": -0.2180647850036621, + "logits/rejected": -0.4713711738586426, + "logps/chosen": -137.48190307617188, + "logps/rejected": -186.2786865234375, + "loss": 1.1119, + "nll_loss": 0.7633231282234192, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.3695783615112305, + "rewards/margins": 3.9644503593444824, + "rewards/rejected": 1.4051278829574585, + "step": 14770 + }, + { + "epoch": 0.8199608881984993, + "grad_norm": 70.81529235839844, + "learning_rate": 7.785938627948757e-09, + "logits/chosen": -0.29169461131095886, + "logits/rejected": -0.4414834976196289, + "logps/chosen": -140.72491455078125, + "logps/rejected": -196.05258178710938, + "loss": 1.1999, + "nll_loss": 0.9778728485107422, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.375916957855225, + "rewards/margins": 3.624210834503174, + "rewards/rejected": 1.7517064809799194, + "step": 14780 + }, + { + "epoch": 0.82051566552475, + "grad_norm": 72.59806060791016, + "learning_rate": 7.739301502118884e-09, + "logits/chosen": -0.2551764249801636, + "logits/rejected": -0.41377201676368713, + "logps/chosen": -139.05166625976562, + "logps/rejected": -197.09945678710938, + "loss": 1.2325, + "nll_loss": 0.8698671460151672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.665285110473633, + "rewards/margins": 3.6509602069854736, + "rewards/rejected": 2.014324903488159, + "step": 14790 + }, + { + "epoch": 0.8210704428510007, + "grad_norm": 68.07139587402344, + "learning_rate": 7.692792752856563e-09, + "logits/chosen": -0.2664044499397278, + "logits/rejected": -0.403535932302475, + "logps/chosen": -150.14035034179688, + "logps/rejected": -195.5835418701172, + "loss": 1.19, + "nll_loss": 0.85888671875, + "rewards/accuracies": 0.875, + "rewards/chosen": 5.65519905090332, + "rewards/margins": 3.977385997772217, + "rewards/rejected": 1.6778132915496826, + "step": 14800 + }, + { + "epoch": 0.8216252201772514, + "grad_norm": 66.05326843261719, + "learning_rate": 7.646412521442775e-09, + "logits/chosen": -0.275273859500885, + "logits/rejected": -0.41081708669662476, + "logps/chosen": -160.2446746826172, + "logps/rejected": -218.191162109375, + "loss": 1.2886, + "nll_loss": 0.8886381983757019, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.96896505355835, + "rewards/margins": 4.09000301361084, + "rewards/rejected": 1.878962755203247, + "step": 14810 + }, + { + "epoch": 0.822179997503502, + "grad_norm": 56.362491607666016, + "learning_rate": 7.600160948768119e-09, + "logits/chosen": -0.42797571420669556, + "logits/rejected": -0.5637251138687134, + "logps/chosen": -196.26580810546875, + "logps/rejected": -264.8769226074219, + "loss": 1.2708, + "nll_loss": 1.0535155534744263, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 6.409218788146973, + "rewards/margins": 4.376527309417725, + "rewards/rejected": 2.032691240310669, + "step": 14820 + }, + { + "epoch": 0.8227347748297527, + "grad_norm": 78.95390319824219, + "learning_rate": 7.554038175332372e-09, + "logits/chosen": -0.19425630569458008, + "logits/rejected": -0.4434455931186676, + "logps/chosen": -122.21415710449219, + "logps/rejected": -181.78126525878906, + "loss": 1.1581, + "nll_loss": 0.7200466990470886, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.086915493011475, + "rewards/margins": 4.251137733459473, + "rewards/rejected": 0.8357783555984497, + "step": 14830 + }, + { + "epoch": 0.8232895521560034, + "grad_norm": 90.60800170898438, + "learning_rate": 7.508044341244014e-09, + "logits/chosen": -0.35558730363845825, + "logits/rejected": -0.4984716773033142, + "logps/chosen": -177.00253295898438, + "logps/rejected": -234.00106811523438, + "loss": 1.2573, + "nll_loss": 0.8866392374038696, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.090183258056641, + "rewards/margins": 4.173531532287598, + "rewards/rejected": 1.916651725769043, + "step": 14840 + }, + { + "epoch": 0.8238443294822541, + "grad_norm": 92.40534973144531, + "learning_rate": 7.462179586219896e-09, + "logits/chosen": -0.26048916578292847, + "logits/rejected": -0.44145363569259644, + "logps/chosen": -142.84750366210938, + "logps/rejected": -187.72445678710938, + "loss": 1.298, + "nll_loss": 0.8607121706008911, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.610686302185059, + "rewards/margins": 3.019132375717163, + "rewards/rejected": 2.5915539264678955, + "step": 14850 + }, + { + "epoch": 0.8243991068085047, + "grad_norm": 40.92693328857422, + "learning_rate": 7.416444049584713e-09, + "logits/chosen": -0.37518757581710815, + "logits/rejected": -0.5347386598587036, + "logps/chosen": -136.802978515625, + "logps/rejected": -192.44764709472656, + "loss": 1.1889, + "nll_loss": 0.9797961115837097, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 5.848901271820068, + "rewards/margins": 3.5814437866210938, + "rewards/rejected": 2.2674574851989746, + "step": 14860 + }, + { + "epoch": 0.8249538841347555, + "grad_norm": 43.93336868286133, + "learning_rate": 7.370837870270657e-09, + "logits/chosen": -0.19765231013298035, + "logits/rejected": -0.31762415170669556, + "logps/chosen": -167.2465057373047, + "logps/rejected": -211.02896118164062, + "loss": 1.2521, + "nll_loss": 0.9774270057678223, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.4940571784973145, + "rewards/margins": 2.8652596473693848, + "rewards/rejected": 2.6287970542907715, + "step": 14870 + }, + { + "epoch": 0.8255086614610061, + "grad_norm": 77.28921508789062, + "learning_rate": 7.325361186816958e-09, + "logits/chosen": -0.3661887049674988, + "logits/rejected": -0.47705668210983276, + "logps/chosen": -183.70973205566406, + "logps/rejected": -248.5480499267578, + "loss": 1.3064, + "nll_loss": 0.9975088238716125, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 6.510737419128418, + "rewards/margins": 3.8452117443084717, + "rewards/rejected": 2.665524959564209, + "step": 14880 + }, + { + "epoch": 0.8260634387872567, + "grad_norm": 53.095848083496094, + "learning_rate": 7.2800141373695e-09, + "logits/chosen": -0.4615301489830017, + "logits/rejected": -0.5704389810562134, + "logps/chosen": -202.3080596923828, + "logps/rejected": -265.6336975097656, + "loss": 1.2323, + "nll_loss": 1.1122227907180786, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 6.346624374389648, + "rewards/margins": 3.7509427070617676, + "rewards/rejected": 2.595681667327881, + "step": 14890 + }, + { + "epoch": 0.8266182161135074, + "grad_norm": 59.551517486572266, + "learning_rate": 7.234796859680309e-09, + "logits/chosen": -0.18715207278728485, + "logits/rejected": -0.43487709760665894, + "logps/chosen": -134.09478759765625, + "logps/rejected": -210.234130859375, + "loss": 1.2, + "nll_loss": 0.8074381947517395, + "rewards/accuracies": 0.875, + "rewards/chosen": 4.857832431793213, + "rewards/margins": 4.174450874328613, + "rewards/rejected": 0.6833813786506653, + "step": 14900 + }, + { + "epoch": 0.8271729934397581, + "grad_norm": 53.82889175415039, + "learning_rate": 7.189709491107271e-09, + "logits/chosen": -0.18827755749225616, + "logits/rejected": -0.3777596056461334, + "logps/chosen": -146.95791625976562, + "logps/rejected": -196.8974609375, + "loss": 1.2265, + "nll_loss": 0.8681344985961914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.362422943115234, + "rewards/margins": 3.6606945991516113, + "rewards/rejected": 1.7017284631729126, + "step": 14910 + }, + { + "epoch": 0.8277277707660088, + "grad_norm": 59.87836837768555, + "learning_rate": 7.1447521686136045e-09, + "logits/chosen": -0.22889253497123718, + "logits/rejected": -0.4294372498989105, + "logps/chosen": -171.40213012695312, + "logps/rejected": -246.50717163085938, + "loss": 1.1909, + "nll_loss": 0.8252578973770142, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 5.834525108337402, + "rewards/margins": 3.9994735717773438, + "rewards/rejected": 1.8350521326065063, + "step": 14920 + }, + { + "epoch": 0.8282825480922594, + "grad_norm": 49.997886657714844, + "learning_rate": 7.099925028767484e-09, + "logits/chosen": -0.20189666748046875, + "logits/rejected": -0.34705278277397156, + "logps/chosen": -142.7606658935547, + "logps/rejected": -208.4573211669922, + "loss": 1.1941, + "nll_loss": 0.8327042460441589, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.526029109954834, + "rewards/margins": 3.9469552040100098, + "rewards/rejected": 1.5790737867355347, + "step": 14930 + }, + { + "epoch": 0.8288373254185102, + "grad_norm": 36.117759704589844, + "learning_rate": 7.055228207741648e-09, + "logits/chosen": -0.24866139888763428, + "logits/rejected": -0.41052132844924927, + "logps/chosen": -156.0734405517578, + "logps/rejected": -208.0565643310547, + "loss": 1.142, + "nll_loss": 0.8504625558853149, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.5310258865356445, + "rewards/margins": 3.632554531097412, + "rewards/rejected": 1.8984712362289429, + "step": 14940 + }, + { + "epoch": 0.8293921027447608, + "grad_norm": 78.30645751953125, + "learning_rate": 7.010661841312921e-09, + "logits/chosen": -0.3344518840312958, + "logits/rejected": -0.42739883065223694, + "logps/chosen": -174.8288116455078, + "logps/rejected": -229.3480224609375, + "loss": 1.2936, + "nll_loss": 1.0034263134002686, + "rewards/accuracies": 0.875, + "rewards/chosen": 6.538097381591797, + "rewards/margins": 4.158072471618652, + "rewards/rejected": 2.3800246715545654, + "step": 14950 + }, + { + "epoch": 0.8299468800710115, + "grad_norm": 78.57463073730469, + "learning_rate": 6.96622606486188e-09, + "logits/chosen": -0.22606225311756134, + "logits/rejected": -0.4104226529598236, + "logps/chosen": -128.64364624023438, + "logps/rejected": -194.7679901123047, + "loss": 1.179, + "nll_loss": 0.7998219728469849, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 5.099642276763916, + "rewards/margins": 2.9102585315704346, + "rewards/rejected": 2.189384698867798, + "step": 14960 + }, + { + "epoch": 0.8305016573972622, + "grad_norm": 42.729522705078125, + "learning_rate": 6.921921013372401e-09, + "logits/chosen": -0.09530682861804962, + "logits/rejected": -0.24463346600532532, + "logps/chosen": -139.29718017578125, + "logps/rejected": -214.3907470703125, + "loss": 1.2336, + "nll_loss": 0.835990309715271, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 5.2614030838012695, + "rewards/margins": 3.730445384979248, + "rewards/rejected": 1.5309584140777588, + "step": 14970 + }, + { + "epoch": 0.8310564347235129, + "grad_norm": 80.23871612548828, + "learning_rate": 6.877746821431218e-09, + "logits/chosen": -0.33773642778396606, + "logits/rejected": -0.47674092650413513, + "logps/chosen": -158.9118194580078, + "logps/rejected": -242.2969207763672, + "loss": 1.2778, + "nll_loss": 0.994554877281189, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 6.165432453155518, + "rewards/margins": 4.255608081817627, + "rewards/rejected": 1.909824013710022, + "step": 14980 + }, + { + "epoch": 0.8316112120497635, + "grad_norm": 67.4317398071289, + "learning_rate": 6.833703623227599e-09, + "logits/chosen": -0.39678817987442017, + "logits/rejected": -0.5164974927902222, + "logps/chosen": -187.6385955810547, + "logps/rejected": -282.72918701171875, + "loss": 1.2539, + "nll_loss": 1.0270280838012695, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.897919654846191, + "rewards/margins": 5.087853908538818, + "rewards/rejected": 1.810065507888794, + "step": 14990 + }, + { + "epoch": 0.8321659893760142, + "grad_norm": 78.31485748291016, + "learning_rate": 6.789791552552837e-09, + "logits/chosen": -0.3030152916908264, + "logits/rejected": -0.42236360907554626, + "logps/chosen": -158.3644561767578, + "logps/rejected": -209.4776153564453, + "loss": 1.2471, + "nll_loss": 0.9546947479248047, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 5.328968524932861, + "rewards/margins": 3.3644192218780518, + "rewards/rejected": 1.9645494222640991, + "step": 15000 + }, + { + "epoch": 0.8321659893760142, + "eval_logits/chosen": -0.40984421968460083, + "eval_logits/rejected": -0.5251399874687195, + "eval_logps/chosen": -190.08694458007812, + "eval_logps/rejected": -261.3680419921875, + "eval_loss": 1.215467929840088, + "eval_nll_loss": 0.9851780533790588, + "eval_rewards/accuracies": 0.90625, + "eval_rewards/chosen": 6.787428379058838, + "eval_rewards/margins": 5.016385078430176, + "eval_rewards/rejected": 1.7710434198379517, + "eval_runtime": 17.1295, + "eval_samples_per_second": 14.945, + "eval_steps_per_second": 1.868, + "step": 15000 + } + ], + "logging_steps": 10, + "max_steps": 18025, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}