{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.986666666666667, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 1128.0, "learning_rate": 7.8125e-06, "log_odds_chosen": -1.058196783065796, "log_odds_ratio": -11.008082389831543, "logps/chosen": -22.712032318115234, "logps/rejected": -21.654064178466797, "loss": 467.0638, "nll_loss": 9.837631225585938, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -11.356016159057617, "rewards/margins": -0.5289839506149292, "rewards/rejected": -10.827032089233398, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 588.0, "learning_rate": 1.5625e-05, "log_odds_chosen": -3.1185920238494873, "log_odds_ratio": -11.042587280273438, "logps/chosen": -21.993783950805664, "logps/rejected": -18.875520706176758, "loss": 435.3677, "nll_loss": 8.753497123718262, "rewards/accuracies": 0.453125, "rewards/chosen": -10.996891975402832, "rewards/margins": -1.5591304302215576, "rewards/rejected": -9.437760353088379, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 468.0, "learning_rate": 2.34375e-05, "log_odds_chosen": -1.2942270040512085, "log_odds_ratio": -11.319032669067383, "logps/chosen": -21.984771728515625, "logps/rejected": -20.689090728759766, "loss": 412.2578, "nll_loss": 7.999310493469238, "rewards/accuracies": 0.515625, "rewards/chosen": -10.992385864257812, "rewards/margins": -0.6478394269943237, "rewards/rejected": -10.344545364379883, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 8384.0, "learning_rate": 3.125e-05, "log_odds_chosen": -4.4439616203308105, "log_odds_ratio": -9.70390510559082, "logps/chosen": -17.518526077270508, "logps/rejected": -13.076098442077637, "loss": 338.9335, "nll_loss": 6.398016929626465, "rewards/accuracies": 0.4281249940395355, "rewards/chosen": -8.759263038635254, "rewards/margins": -2.2212135791778564, "rewards/rejected": -6.538049221038818, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 242.0, "learning_rate": 3.90625e-05, "log_odds_chosen": -0.14175763726234436, "log_odds_ratio": -2.161510944366455, "logps/chosen": -4.730892658233643, "logps/rejected": -4.572934150695801, "loss": 107.5018, "nll_loss": 2.8825061321258545, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -2.3654463291168213, "rewards/margins": -0.07897911965847015, "rewards/rejected": -2.2864670753479004, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 186.0, "learning_rate": 4.6875e-05, "log_odds_chosen": 0.1054491400718689, "log_odds_ratio": -0.8509915471076965, "logps/chosen": -1.8616880178451538, "logps/rejected": -1.9526846408843994, "loss": 65.1113, "nll_loss": 2.0738651752471924, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.9308440089225769, "rewards/margins": 0.04549835994839668, "rewards/rejected": -0.9763423204421997, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 205.0, "learning_rate": 4.998613757348784e-05, "log_odds_chosen": 0.31355661153793335, "log_odds_ratio": -0.7803260684013367, "logps/chosen": -1.765001654624939, "logps/rejected": -2.0435233116149902, "loss": 58.3669, "nll_loss": 1.9382717609405518, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.8825008273124695, "rewards/margins": 0.1392608880996704, "rewards/rejected": -1.0217616558074951, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 167.0, "learning_rate": 4.990147841143462e-05, "log_odds_chosen": 0.5204086899757385, "log_odds_ratio": -0.653488278388977, "logps/chosen": -1.6190803050994873, "logps/rejected": -2.0669641494750977, "loss": 53.3821, "nll_loss": 1.8854446411132812, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -0.8095401525497437, "rewards/margins": 0.22394177317619324, "rewards/rejected": -1.0334820747375488, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 350.0, "learning_rate": 4.97401218720448e-05, "log_odds_chosen": 1.1803003549575806, "log_odds_ratio": -0.5899690389633179, "logps/chosen": -1.6516567468643188, "logps/rejected": -2.7521023750305176, "loss": 46.1493, "nll_loss": 1.8530938625335693, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.8258283734321594, "rewards/margins": 0.5502227544784546, "rewards/rejected": -1.3760511875152588, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 230.0, "learning_rate": 4.9502564938797946e-05, "log_odds_chosen": 0.8049520254135132, "log_odds_ratio": -0.6416460871696472, "logps/chosen": -1.554616928100586, "logps/rejected": -2.283681869506836, "loss": 48.6877, "nll_loss": 1.7986338138580322, "rewards/accuracies": 0.659375011920929, "rewards/chosen": -0.777308464050293, "rewards/margins": 0.36453238129615784, "rewards/rejected": -1.141840934753418, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 155.0, "learning_rate": 4.918953929490768e-05, "log_odds_chosen": 1.1016016006469727, "log_odds_ratio": -0.6536484360694885, "logps/chosen": -1.6193764209747314, "logps/rejected": -2.638171434402466, "loss": 47.3989, "nll_loss": 1.8205287456512451, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -0.8096882104873657, "rewards/margins": 0.5093975067138672, "rewards/rejected": -1.319085717201233, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 348.0, "learning_rate": 4.88020090697132e-05, "log_odds_chosen": 1.6798295974731445, "log_odds_ratio": -0.6031836271286011, "logps/chosen": -1.5959837436676025, "logps/rejected": -3.1869189739227295, "loss": 46.0557, "nll_loss": 1.8288695812225342, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7979918718338013, "rewards/margins": 0.7954676151275635, "rewards/rejected": -1.5934594869613647, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 155.0, "learning_rate": 4.834116786912897e-05, "log_odds_chosen": 1.7133772373199463, "log_odds_ratio": -0.7503857016563416, "logps/chosen": -1.8493322134017944, "logps/rejected": -3.4778411388397217, "loss": 52.1616, "nll_loss": 1.9849662780761719, "rewards/accuracies": 0.671875, "rewards/chosen": -0.9246661067008972, "rewards/margins": 0.8142545819282532, "rewards/rejected": -1.7389205694198608, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 348.0, "learning_rate": 4.7808435099299045e-05, "log_odds_chosen": 1.9798109531402588, "log_odds_ratio": -0.5159353017807007, "logps/chosen": -1.7272640466690063, "logps/rejected": -3.5779659748077393, "loss": 38.9021, "nll_loss": 1.842188835144043, "rewards/accuracies": 0.7718750238418579, "rewards/chosen": -0.8636320233345032, "rewards/margins": 0.9253507852554321, "rewards/rejected": -1.7889829874038696, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 224.0, "learning_rate": 4.720545159477922e-05, "log_odds_chosen": 1.3524739742279053, "log_odds_ratio": -0.6269196271896362, "logps/chosen": -1.5649521350860596, "logps/rejected": -2.8103561401367188, "loss": 42.7284, "nll_loss": 1.7544043064117432, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.7824760675430298, "rewards/margins": 0.6227020025253296, "rewards/rejected": -1.4051780700683594, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 318.0, "learning_rate": 4.653407456471222e-05, "log_odds_chosen": 1.2641541957855225, "log_odds_ratio": -0.7116214036941528, "logps/chosen": -1.672009825706482, "logps/rejected": -2.8201100826263428, "loss": 45.8222, "nll_loss": 1.8372602462768555, "rewards/accuracies": 0.640625, "rewards/chosen": -0.836004912853241, "rewards/margins": 0.57405024766922, "rewards/rejected": -1.4100550413131714, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 812.0, "learning_rate": 4.579637187256222e-05, "log_odds_chosen": 2.119652509689331, "log_odds_ratio": -0.7182853817939758, "logps/chosen": -1.69955575466156, "logps/rejected": -3.706132411956787, "loss": 44.067, "nll_loss": 1.8409528732299805, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.84977787733078, "rewards/margins": 1.0032880306243896, "rewards/rejected": -1.8530662059783936, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 632.0, "learning_rate": 4.499461566702685e-05, "log_odds_chosen": 3.368741512298584, "log_odds_ratio": -0.9751121401786804, "logps/chosen": -1.8076425790786743, "logps/rejected": -5.029626369476318, "loss": 48.4689, "nll_loss": 1.954244613647461, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.9038212895393372, "rewards/margins": 1.6109920740127563, "rewards/rejected": -2.514813184738159, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 354.0, "learning_rate": 4.413127538374411e-05, "log_odds_chosen": 0.9373680353164673, "log_odds_ratio": -0.6437951922416687, "logps/chosen": -1.4754348993301392, "logps/rejected": -2.3128228187561035, "loss": 42.7029, "nll_loss": 1.6802858114242554, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": -0.7377174496650696, "rewards/margins": 0.418693870306015, "rewards/rejected": -1.1564114093780518, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 292.0, "learning_rate": 4.320901013934887e-05, "log_odds_chosen": 1.350513219833374, "log_odds_ratio": -0.5488158464431763, "logps/chosen": -1.4655238389968872, "logps/rejected": -2.687412738800049, "loss": 37.986, "nll_loss": 1.686754822731018, "rewards/accuracies": 0.703125, "rewards/chosen": -0.7327619194984436, "rewards/margins": 0.610944390296936, "rewards/rejected": -1.3437063694000244, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 167.0, "learning_rate": 4.223066054130568e-05, "log_odds_chosen": 1.7379270792007446, "log_odds_ratio": -0.6734561324119568, "logps/chosen": -2.0734519958496094, "logps/rejected": -3.697786808013916, "loss": 47.1636, "nll_loss": 2.014409065246582, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -1.0367259979248047, "rewards/margins": 0.8121673464775085, "rewards/rejected": -1.848893404006958, "step": 105 }, { "epoch": 1.0429629629629629, "grad_norm": 544.0, "learning_rate": 4.1199239938743797e-05, "log_odds_chosen": 3.0268468856811523, "log_odds_ratio": -0.44861817359924316, "logps/chosen": -1.4808493852615356, "logps/rejected": -4.2748918533325195, "loss": 27.2254, "nll_loss": 1.616323709487915, "rewards/accuracies": 0.78125, "rewards/chosen": -0.7404246926307678, "rewards/margins": 1.3970211744308472, "rewards/rejected": -2.1374459266662598, "step": 110 }, { "epoch": 1.0903703703703704, "grad_norm": 193.0, "learning_rate": 4.0117925141242174e-05, "log_odds_chosen": 2.6167469024658203, "log_odds_ratio": -0.4948674738407135, "logps/chosen": -1.3744416236877441, "logps/rejected": -3.7343673706054688, "loss": 25.6633, "nll_loss": 1.5660358667373657, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6872208118438721, "rewards/margins": 1.1799629926681519, "rewards/rejected": -1.8671836853027344, "step": 115 }, { "epoch": 1.1377777777777778, "grad_norm": 358.0, "learning_rate": 3.899004663415084e-05, "log_odds_chosen": 2.8587183952331543, "log_odds_ratio": -0.4674352705478668, "logps/chosen": -1.3489106893539429, "logps/rejected": -3.9463298320770264, "loss": 24.1664, "nll_loss": 1.5956518650054932, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.6744553446769714, "rewards/margins": 1.2987096309661865, "rewards/rejected": -1.9731649160385132, "step": 120 }, { "epoch": 1.1851851851851851, "grad_norm": 228.0, "learning_rate": 3.781907832058587e-05, "log_odds_chosen": 2.298691511154175, "log_odds_ratio": -0.44407153129577637, "logps/chosen": -1.3952347040176392, "logps/rejected": -3.443612575531006, "loss": 26.9036, "nll_loss": 1.621145248413086, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.6976173520088196, "rewards/margins": 1.0241888761520386, "rewards/rejected": -1.721806287765503, "step": 125 }, { "epoch": 1.2325925925925927, "grad_norm": 470.0, "learning_rate": 3.660862682169282e-05, "log_odds_chosen": 3.3581974506378174, "log_odds_ratio": -0.37174850702285767, "logps/chosen": -1.641959547996521, "logps/rejected": -4.736272811889648, "loss": 19.7077, "nll_loss": 1.707945466041565, "rewards/accuracies": 0.828125, "rewards/chosen": -0.8209797739982605, "rewards/margins": 1.5471566915512085, "rewards/rejected": -2.368136405944824, "step": 130 }, { "epoch": 1.28, "grad_norm": 260.0, "learning_rate": 3.5362420368134356e-05, "log_odds_chosen": 2.5022172927856445, "log_odds_ratio": -0.4535306990146637, "logps/chosen": -1.4220046997070312, "logps/rejected": -3.6646904945373535, "loss": 25.0471, "nll_loss": 1.5830011367797852, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7110023498535156, "rewards/margins": 1.1213428974151611, "rewards/rejected": -1.8323452472686768, "step": 135 }, { "epoch": 1.3274074074074074, "grad_norm": 484.0, "learning_rate": 3.408429731701635e-05, "log_odds_chosen": 2.461456775665283, "log_odds_ratio": -0.4378852844238281, "logps/chosen": -1.438861608505249, "logps/rejected": -3.6586761474609375, "loss": 25.1637, "nll_loss": 1.5763061046600342, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.7194308042526245, "rewards/margins": 1.1099072694778442, "rewards/rejected": -1.8293380737304688, "step": 140 }, { "epoch": 1.374814814814815, "grad_norm": 366.0, "learning_rate": 3.2778194329621104e-05, "log_odds_chosen": 3.443436861038208, "log_odds_ratio": -0.3327510952949524, "logps/chosen": -1.639411211013794, "logps/rejected": -4.814173221588135, "loss": 19.2385, "nll_loss": 1.6921018362045288, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.819705605506897, "rewards/margins": 1.5873807668685913, "rewards/rejected": -2.4070866107940674, "step": 145 }, { "epoch": 1.4222222222222223, "grad_norm": 460.0, "learning_rate": 3.144813424636031e-05, "log_odds_chosen": 3.7978568077087402, "log_odds_ratio": -0.3528765141963959, "logps/chosen": -1.41143000125885, "logps/rejected": -4.935047149658203, "loss": 16.6229, "nll_loss": 1.5774896144866943, "rewards/accuracies": 0.846875011920929, "rewards/chosen": -0.705715000629425, "rewards/margins": 1.7618083953857422, "rewards/rejected": -2.4675235748291016, "step": 150 }, { "epoch": 1.4696296296296296, "grad_norm": 414.0, "learning_rate": 3.0098213696293542e-05, "log_odds_chosen": 3.518047332763672, "log_odds_ratio": -0.5385881662368774, "logps/chosen": -1.7592281103134155, "logps/rejected": -5.035131931304932, "loss": 23.1146, "nll_loss": 1.7055237293243408, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8796140551567078, "rewards/margins": 1.6379520893096924, "rewards/rejected": -2.517565965652466, "step": 155 }, { "epoch": 1.5170370370370372, "grad_norm": 688.0, "learning_rate": 2.8732590479375165e-05, "log_odds_chosen": 3.530190944671631, "log_odds_ratio": -0.4366869032382965, "logps/chosen": -1.6791108846664429, "logps/rejected": -4.922083854675293, "loss": 17.4724, "nll_loss": 1.6354753971099854, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.8395554423332214, "rewards/margins": 1.6214866638183594, "rewards/rejected": -2.4610419273376465, "step": 160 }, { "epoch": 1.5644444444444443, "grad_norm": 288.0, "learning_rate": 2.7355470760292956e-05, "log_odds_chosen": 3.844754695892334, "log_odds_ratio": -0.3585518002510071, "logps/chosen": -1.4751592874526978, "logps/rejected": -5.015917778015137, "loss": 14.768, "nll_loss": 1.5972576141357422, "rewards/accuracies": 0.846875011920929, "rewards/chosen": -0.7375796437263489, "rewards/margins": 1.7703793048858643, "rewards/rejected": -2.5079588890075684, "step": 165 }, { "epoch": 1.6118518518518519, "grad_norm": 388.0, "learning_rate": 2.597109611334169e-05, "log_odds_chosen": 4.079934597015381, "log_odds_ratio": -0.44897276163101196, "logps/chosen": -1.6552518606185913, "logps/rejected": -5.458821773529053, "loss": 15.6283, "nll_loss": 1.6442257165908813, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.8276259303092957, "rewards/margins": 1.901785135269165, "rewards/rejected": -2.7294108867645264, "step": 170 }, { "epoch": 1.6592592592592592, "grad_norm": 356.0, "learning_rate": 2.458373045823404e-05, "log_odds_chosen": 3.561838150024414, "log_odds_ratio": -0.3351458013057709, "logps/chosen": -1.2622171640396118, "logps/rejected": -4.530435085296631, "loss": 14.9451, "nll_loss": 1.494937777519226, "rewards/accuracies": 0.828125, "rewards/chosen": -0.6311085820198059, "rewards/margins": 1.6341091394424438, "rewards/rejected": -2.2652175426483154, "step": 175 }, { "epoch": 1.7066666666666666, "grad_norm": 424.0, "learning_rate": 2.3197646927086697e-05, "log_odds_chosen": 4.188170433044434, "log_odds_ratio": -0.39013969898223877, "logps/chosen": -1.56984281539917, "logps/rejected": -5.4578375816345215, "loss": 14.8288, "nll_loss": 1.6269553899765015, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.784921407699585, "rewards/margins": 1.9439977407455444, "rewards/rejected": -2.7289187908172607, "step": 180 }, { "epoch": 1.7540740740740741, "grad_norm": 213.0, "learning_rate": 2.1817114703032176e-05, "log_odds_chosen": 4.228732585906982, "log_odds_ratio": -0.46826067566871643, "logps/chosen": -1.6550719738006592, "logps/rejected": -5.639611721038818, "loss": 15.1508, "nll_loss": 1.6106717586517334, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.8275359869003296, "rewards/margins": 1.9922701120376587, "rewards/rejected": -2.819805860519409, "step": 185 }, { "epoch": 1.8014814814814815, "grad_norm": 322.0, "learning_rate": 2.0446385870993467e-05, "log_odds_chosen": 3.5051357746124268, "log_odds_ratio": -0.3553612530231476, "logps/chosen": -1.289097785949707, "logps/rejected": -4.496476650238037, "loss": 13.3992, "nll_loss": 1.4740468263626099, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.6445488929748535, "rewards/margins": 1.603689193725586, "rewards/rejected": -2.2482383251190186, "step": 190 }, { "epoch": 1.8488888888888888, "grad_norm": 440.0, "learning_rate": 1.9089682321121834e-05, "log_odds_chosen": 4.617656230926514, "log_odds_ratio": -0.3456394374370575, "logps/chosen": -1.4975018501281738, "logps/rejected": -5.828963279724121, "loss": 13.1488, "nll_loss": 1.6538751125335693, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.7487509250640869, "rewards/margins": 2.1657304763793945, "rewards/rejected": -2.9144816398620605, "step": 195 }, { "epoch": 1.8962962962962964, "grad_norm": 516.0, "learning_rate": 1.775118274523545e-05, "log_odds_chosen": 4.322084426879883, "log_odds_ratio": -0.5411938428878784, "logps/chosen": -1.8952579498291016, "logps/rejected": -5.973208904266357, "loss": 21.0282, "nll_loss": 1.7580324411392212, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.9476289749145508, "rewards/margins": 2.038975477218628, "rewards/rejected": -2.9866044521331787, "step": 200 }, { "epoch": 1.9437037037037037, "grad_norm": 504.0, "learning_rate": 1.643500976631037e-05, "log_odds_chosen": 3.8178462982177734, "log_odds_ratio": -0.41045910120010376, "logps/chosen": -1.422102451324463, "logps/rejected": -4.953678131103516, "loss": 16.8609, "nll_loss": 1.5927026271820068, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7110512256622314, "rewards/margins": 1.7657878398895264, "rewards/rejected": -2.476839065551758, "step": 205 }, { "epoch": 1.991111111111111, "grad_norm": 450.0, "learning_rate": 1.514521724066537e-05, "log_odds_chosen": 3.2968242168426514, "log_odds_ratio": -0.5133435130119324, "logps/chosen": -1.5117247104644775, "logps/rejected": -4.539895057678223, "loss": 19.1746, "nll_loss": 1.5904091596603394, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7558623552322388, "rewards/margins": 1.514085292816162, "rewards/rejected": -2.2699475288391113, "step": 210 }, { "epoch": 2.0385185185185186, "grad_norm": 258.0, "learning_rate": 1.3885777771950348e-05, "log_odds_chosen": 4.693060398101807, "log_odds_ratio": -0.3281119465827942, "logps/chosen": -1.2254581451416016, "logps/rejected": -5.486399173736572, "loss": 2.4338, "nll_loss": 1.4177120923995972, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.6127290725708008, "rewards/margins": 2.1304705142974854, "rewards/rejected": -2.743199586868286, "step": 215 }, { "epoch": 2.0859259259259257, "grad_norm": 696.0, "learning_rate": 1.2660570475395683e-05, "log_odds_chosen": 5.181081295013428, "log_odds_ratio": -0.31730157136917114, "logps/chosen": -1.451982855796814, "logps/rejected": -6.270176887512207, "loss": 2.9052, "nll_loss": 1.5581401586532593, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.725991427898407, "rewards/margins": 2.4090969562530518, "rewards/rejected": -3.1350884437561035, "step": 220 }, { "epoch": 2.1333333333333333, "grad_norm": 294.0, "learning_rate": 1.1473369030008974e-05, "log_odds_chosen": 6.072526454925537, "log_odds_ratio": -0.17935991287231445, "logps/chosen": -1.4028112888336182, "logps/rejected": -7.085482120513916, "loss": -6.7103, "nll_loss": 1.5210641622543335, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": -0.7014056444168091, "rewards/margins": 2.8413350582122803, "rewards/rejected": -3.542741060256958, "step": 225 }, { "epoch": 2.180740740740741, "grad_norm": 452.0, "learning_rate": 1.0327830055518842e-05, "log_odds_chosen": 5.810656547546387, "log_odds_ratio": -0.17343996465206146, "logps/chosen": -1.257643222808838, "logps/rejected": -6.6586809158325195, "loss": -6.8616, "nll_loss": 1.463210105895996, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": -0.628821611404419, "rewards/margins": 2.70051908493042, "rewards/rejected": -3.3293404579162598, "step": 230 }, { "epoch": 2.228148148148148, "grad_norm": 348.0, "learning_rate": 9.227481849865235e-06, "log_odds_chosen": 6.33745813369751, "log_odds_ratio": -0.2248232066631317, "logps/chosen": -1.2892390489578247, "logps/rejected": -7.214311122894287, "loss": -7.1128, "nll_loss": 1.427039384841919, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": -0.6446195244789124, "rewards/margins": 2.962535858154297, "rewards/rejected": -3.6071555614471436, "step": 235 }, { "epoch": 2.2755555555555556, "grad_norm": 1248.0, "learning_rate": 8.175713521924978e-06, "log_odds_chosen": 7.022365570068359, "log_odds_ratio": -0.15484580397605896, "logps/chosen": -1.3367576599121094, "logps/rejected": -7.955646514892578, "loss": -10.1801, "nll_loss": 1.4348043203353882, "rewards/accuracies": 0.934374988079071, "rewards/chosen": -0.6683788299560547, "rewards/margins": 3.3094444274902344, "rewards/rejected": -3.977823257446289, "step": 240 }, { "epoch": 2.322962962962963, "grad_norm": 640.0, "learning_rate": 7.1757645529443665e-06, "log_odds_chosen": 7.711653709411621, "log_odds_ratio": -0.1372586041688919, "logps/chosen": -1.3331178426742554, "logps/rejected": -8.624441146850586, "loss": -11.5177, "nll_loss": 1.4856178760528564, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.6665589213371277, "rewards/margins": 3.6456611156463623, "rewards/rejected": -4.312220573425293, "step": 245 }, { "epoch": 2.3703703703703702, "grad_norm": 696.0, "learning_rate": 6.230714818829733e-06, "log_odds_chosen": 7.437263488769531, "log_odds_ratio": -0.1762746274471283, "logps/chosen": -1.2413113117218018, "logps/rejected": -8.23326301574707, "loss": -7.4037, "nll_loss": 1.4623820781707764, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.6206556558609009, "rewards/margins": 3.495976686477661, "rewards/rejected": -4.116631507873535, "step": 250 }, { "epoch": 2.417777777777778, "grad_norm": 672.0, "learning_rate": 5.343475104027743e-06, "log_odds_chosen": 7.334293365478516, "log_odds_ratio": -0.1758766919374466, "logps/chosen": -1.3234690427780151, "logps/rejected": -8.235211372375488, "loss": -9.7904, "nll_loss": 1.4475064277648926, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.6617345213890076, "rewards/margins": 3.4558708667755127, "rewards/rejected": -4.117605686187744, "step": 255 }, { "epoch": 2.4651851851851854, "grad_norm": 624.0, "learning_rate": 4.516778136213037e-06, "log_odds_chosen": 7.498259544372559, "log_odds_ratio": -0.2119707614183426, "logps/chosen": -1.427954912185669, "logps/rejected": -8.50899600982666, "loss": -4.1434, "nll_loss": 1.576634407043457, "rewards/accuracies": 0.9281250238418579, "rewards/chosen": -0.7139774560928345, "rewards/margins": 3.540520191192627, "rewards/rejected": -4.25449800491333, "step": 260 }, { "epoch": 2.5125925925925925, "grad_norm": 572.0, "learning_rate": 3.7531701693965554e-06, "log_odds_chosen": 7.056248664855957, "log_odds_ratio": -0.2280276119709015, "logps/chosen": -1.4004108905792236, "logps/rejected": -8.065291404724121, "loss": -7.3232, "nll_loss": 1.5201685428619385, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": -0.7002054452896118, "rewards/margins": 3.332440137863159, "rewards/rejected": -4.0326457023620605, "step": 265 }, { "epoch": 2.56, "grad_norm": 366.0, "learning_rate": 3.055003141378948e-06, "log_odds_chosen": 7.216971397399902, "log_odds_ratio": -0.21173417568206787, "logps/chosen": -1.2760790586471558, "logps/rejected": -8.050346374511719, "loss": -8.7459, "nll_loss": 1.481745958328247, "rewards/accuracies": 0.90625, "rewards/chosen": -0.6380395293235779, "rewards/margins": 3.3871333599090576, "rewards/rejected": -4.025173187255859, "step": 270 }, { "epoch": 2.6074074074074076, "grad_norm": 330.0, "learning_rate": 2.424427429704365e-06, "log_odds_chosen": 6.806351661682129, "log_odds_ratio": -0.17685401439666748, "logps/chosen": -1.2406129837036133, "logps/rejected": -7.61539363861084, "loss": -8.063, "nll_loss": 1.4348475933074951, "rewards/accuracies": 0.934374988079071, "rewards/chosen": -0.6203064918518066, "rewards/margins": 3.187389850616455, "rewards/rejected": -3.80769681930542, "step": 275 }, { "epoch": 2.6548148148148147, "grad_norm": 800.0, "learning_rate": 1.8633852284264508e-06, "log_odds_chosen": 6.517434120178223, "log_odds_ratio": -0.24552707374095917, "logps/chosen": -1.3278234004974365, "logps/rejected": -7.444277286529541, "loss": -4.5078, "nll_loss": 1.491350769996643, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.6639117002487183, "rewards/margins": 3.0582268238067627, "rewards/rejected": -3.7221386432647705, "step": 280 }, { "epoch": 2.7022222222222223, "grad_norm": 520.0, "learning_rate": 1.3736045660864034e-06, "log_odds_chosen": 6.652522087097168, "log_odds_ratio": -0.22169987857341766, "logps/chosen": -1.27732515335083, "logps/rejected": -7.5334601402282715, "loss": -5.8514, "nll_loss": 1.4527100324630737, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.638662576675415, "rewards/margins": 3.1280672550201416, "rewards/rejected": -3.7667300701141357, "step": 285 }, { "epoch": 2.74962962962963, "grad_norm": 470.0, "learning_rate": 9.565939833279192e-07, "log_odds_chosen": 6.609714508056641, "log_odds_ratio": -0.1993839591741562, "logps/chosen": -1.4217256307601929, "logps/rejected": -7.654360294342041, "loss": -3.682, "nll_loss": 1.5446010828018188, "rewards/accuracies": 0.921875, "rewards/chosen": -0.7108628153800964, "rewards/margins": 3.1163172721862793, "rewards/rejected": -3.8271801471710205, "step": 290 }, { "epoch": 2.797037037037037, "grad_norm": 418.0, "learning_rate": 6.136378865420872e-07, "log_odds_chosen": 6.339820861816406, "log_odds_ratio": -0.2236245572566986, "logps/chosen": -1.2482751607894897, "logps/rejected": -7.204880714416504, "loss": -6.4554, "nll_loss": 1.4326238632202148, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": -0.6241375803947449, "rewards/margins": 2.9783027172088623, "rewards/rejected": -3.602440357208252, "step": 295 }, { "epoch": 2.8444444444444446, "grad_norm": 478.0, "learning_rate": 3.45792591853214e-07, "log_odds_chosen": 7.071162223815918, "log_odds_ratio": -0.20901203155517578, "logps/chosen": -1.270916223526001, "logps/rejected": -7.936681270599365, "loss": -6.6607, "nll_loss": 1.460141897201538, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.6354581117630005, "rewards/margins": 3.3328824043273926, "rewards/rejected": -3.9683406352996826, "step": 300 }, { "epoch": 2.891851851851852, "grad_norm": 410.0, "learning_rate": 1.538830716302092e-07, "log_odds_chosen": 6.791069030761719, "log_odds_ratio": -0.19654326140880585, "logps/chosen": -1.2482593059539795, "logps/rejected": -7.608553886413574, "loss": -7.3947, "nll_loss": 1.447270154953003, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.6241296529769897, "rewards/margins": 3.180147647857666, "rewards/rejected": -3.804276943206787, "step": 305 }, { "epoch": 2.9392592592592592, "grad_norm": 540.0, "learning_rate": 3.8500413544415025e-08, "log_odds_chosen": 7.128172874450684, "log_odds_ratio": -0.1423414945602417, "logps/chosen": -1.2163145542144775, "logps/rejected": -7.905344486236572, "loss": -10.5115, "nll_loss": 1.436652421951294, "rewards/accuracies": 0.953125, "rewards/chosen": -0.6081572771072388, "rewards/margins": 3.344515323638916, "rewards/rejected": -3.952672243118286, "step": 310 }, { "epoch": 2.986666666666667, "grad_norm": 494.0, "learning_rate": 0.0, "log_odds_chosen": 6.501699924468994, "log_odds_ratio": -0.21002642810344696, "logps/chosen": -1.2352025508880615, "logps/rejected": -7.282286167144775, "loss": -7.439, "nll_loss": 1.396925687789917, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.6176012754440308, "rewards/margins": 3.0235419273376465, "rewards/rejected": -3.6411430835723877, "step": 315 }, { "epoch": 2.986666666666667, "step": 315, "total_flos": 0.0, "train_loss": 44.452726506430004, "train_runtime": 7306.7465, "train_samples_per_second": 2.771, "train_steps_per_second": 0.043 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }