uf-mistral-it-sft-iopo-iter1 / trainer_state.json
nlee-208's picture
Model save
56a4e57 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997038791827065,
"eval_steps": 500,
"global_step": 1688,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005922416345869114,
"grad_norm": 25.375,
"learning_rate": 2.9585798816568044e-08,
"log_odds_chosen": -0.4997142255306244,
"log_odds_ratio": -1.0621646642684937,
"logits/chosen": -2.2295050621032715,
"logits/rejected": -2.215860366821289,
"logps/chosen": -0.7159513235092163,
"logps/rejected": -0.47170203924179077,
"loss": 1.2686,
"nll_loss": 1.285839319229126,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 10
},
{
"epoch": 0.011844832691738229,
"grad_norm": 25.25,
"learning_rate": 5.917159763313609e-08,
"log_odds_chosen": -0.6078722476959229,
"log_odds_ratio": -1.1548207998275757,
"logits/chosen": -2.1872293949127197,
"logits/rejected": -2.1639022827148438,
"logps/chosen": -0.8250460624694824,
"logps/rejected": -0.4715689718723297,
"loss": 1.2301,
"nll_loss": 1.2283066511154175,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 20
},
{
"epoch": 0.017767249037607343,
"grad_norm": 27.125,
"learning_rate": 8.875739644970414e-08,
"log_odds_chosen": -0.5964034199714661,
"log_odds_ratio": -1.1720728874206543,
"logits/chosen": -2.155057191848755,
"logits/rejected": -2.146630048751831,
"logps/chosen": -0.8543933033943176,
"logps/rejected": -0.4923427104949951,
"loss": 1.2398,
"nll_loss": 1.313323736190796,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 30
},
{
"epoch": 0.023689665383476458,
"grad_norm": 28.25,
"learning_rate": 1.1834319526627217e-07,
"log_odds_chosen": -0.5351605415344238,
"log_odds_ratio": -1.0927046537399292,
"logits/chosen": -2.2190463542938232,
"logits/rejected": -2.206223964691162,
"logps/chosen": -0.7575310468673706,
"logps/rejected": -0.4635254740715027,
"loss": 1.2735,
"nll_loss": 1.2356092929840088,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 40
},
{
"epoch": 0.029612081729345572,
"grad_norm": 26.0,
"learning_rate": 1.4792899408284022e-07,
"log_odds_chosen": -0.4550475478172302,
"log_odds_ratio": -1.0262255668640137,
"logits/chosen": -2.163825750350952,
"logits/rejected": -2.148223400115967,
"logps/chosen": -0.7005314826965332,
"logps/rejected": -0.47106480598449707,
"loss": 1.2103,
"nll_loss": 1.2403192520141602,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 50
},
{
"epoch": 0.035534498075214686,
"grad_norm": 21.375,
"learning_rate": 1.7751479289940827e-07,
"log_odds_chosen": -0.6598173975944519,
"log_odds_ratio": -1.2315865755081177,
"logits/chosen": -2.2192461490631104,
"logits/rejected": -2.1879701614379883,
"logps/chosen": -0.8897407650947571,
"logps/rejected": -0.4609861969947815,
"loss": 1.2416,
"nll_loss": 1.2300336360931396,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 60
},
{
"epoch": 0.041456914421083804,
"grad_norm": 27.375,
"learning_rate": 2.0710059171597633e-07,
"log_odds_chosen": -0.5370969772338867,
"log_odds_ratio": -1.1101651191711426,
"logits/chosen": -2.233755588531494,
"logits/rejected": -2.201343297958374,
"logps/chosen": -0.7967244386672974,
"logps/rejected": -0.4630069136619568,
"loss": 1.2546,
"nll_loss": 1.2548679113388062,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 70
},
{
"epoch": 0.047379330766952915,
"grad_norm": 26.625,
"learning_rate": 2.3668639053254435e-07,
"log_odds_chosen": -0.5750253796577454,
"log_odds_ratio": -1.1556330919265747,
"logits/chosen": -2.19846773147583,
"logits/rejected": -2.187711715698242,
"logps/chosen": -0.7946293950080872,
"logps/rejected": -0.4594718813896179,
"loss": 1.2238,
"nll_loss": 1.2166999578475952,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 80
},
{
"epoch": 0.05330174711282203,
"grad_norm": 22.375,
"learning_rate": 2.662721893491124e-07,
"log_odds_chosen": -0.502492368221283,
"log_odds_ratio": -1.0737704038619995,
"logits/chosen": -2.18656063079834,
"logits/rejected": -2.1636054515838623,
"logps/chosen": -0.7198958992958069,
"logps/rejected": -0.4653542935848236,
"loss": 1.1987,
"nll_loss": 1.2203375101089478,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 90
},
{
"epoch": 0.059224163458691144,
"grad_norm": 22.375,
"learning_rate": 2.9585798816568045e-07,
"log_odds_chosen": -0.41660839319229126,
"log_odds_ratio": -0.9962056279182434,
"logits/chosen": -2.247572422027588,
"logits/rejected": -2.2023332118988037,
"logps/chosen": -0.6892199516296387,
"logps/rejected": -0.4913715422153473,
"loss": 1.1737,
"nll_loss": 1.2142550945281982,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 100
},
{
"epoch": 0.06514657980456026,
"grad_norm": 17.25,
"learning_rate": 3.254437869822485e-07,
"log_odds_chosen": -0.4817970395088196,
"log_odds_ratio": -1.0484726428985596,
"logits/chosen": -2.1959776878356934,
"logits/rejected": -2.172440767288208,
"logps/chosen": -0.7387205958366394,
"logps/rejected": -0.47952842712402344,
"loss": 1.1196,
"nll_loss": 1.1109485626220703,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 110
},
{
"epoch": 0.07106899615042937,
"grad_norm": 17.75,
"learning_rate": 3.5502958579881655e-07,
"log_odds_chosen": -0.5072614550590515,
"log_odds_ratio": -1.073188066482544,
"logits/chosen": -2.2234084606170654,
"logits/rejected": -2.212110996246338,
"logps/chosen": -0.7518635988235474,
"logps/rejected": -0.4725222587585449,
"loss": 1.1538,
"nll_loss": 1.1456319093704224,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 120
},
{
"epoch": 0.07699141249629848,
"grad_norm": 22.875,
"learning_rate": 3.8461538461538463e-07,
"log_odds_chosen": -0.6446342468261719,
"log_odds_ratio": -1.2178680896759033,
"logits/chosen": -2.2080233097076416,
"logits/rejected": -2.1998672485351562,
"logps/chosen": -0.8730036020278931,
"logps/rejected": -0.4481457769870758,
"loss": 1.1644,
"nll_loss": 1.1509124040603638,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 130
},
{
"epoch": 0.08291382884216761,
"grad_norm": 16.25,
"learning_rate": 4.1420118343195265e-07,
"log_odds_chosen": -0.45662721991539,
"log_odds_ratio": -1.0147430896759033,
"logits/chosen": -2.237990617752075,
"logits/rejected": -2.2128589153289795,
"logps/chosen": -0.6646671295166016,
"logps/rejected": -0.44373393058776855,
"loss": 1.0906,
"nll_loss": 1.0673267841339111,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 140
},
{
"epoch": 0.08883624518803672,
"grad_norm": 26.5,
"learning_rate": 4.437869822485207e-07,
"log_odds_chosen": -0.46678367257118225,
"log_odds_ratio": -1.0147194862365723,
"logits/chosen": -2.167670488357544,
"logits/rejected": -2.1592793464660645,
"logps/chosen": -0.7025789618492126,
"logps/rejected": -0.47203493118286133,
"loss": 1.1008,
"nll_loss": 1.1650502681732178,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 150
},
{
"epoch": 0.09475866153390583,
"grad_norm": 12.0625,
"learning_rate": 4.733727810650887e-07,
"log_odds_chosen": -0.3221941888332367,
"log_odds_ratio": -0.9352226257324219,
"logits/chosen": -2.247824192047119,
"logits/rejected": -2.2287344932556152,
"logps/chosen": -0.6016725301742554,
"logps/rejected": -0.4531864523887634,
"loss": 1.0947,
"nll_loss": 1.0781590938568115,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 160
},
{
"epoch": 0.10068107787977496,
"grad_norm": 11.5625,
"learning_rate": 4.999994653198566e-07,
"log_odds_chosen": -0.4564700722694397,
"log_odds_ratio": -1.0602452754974365,
"logits/chosen": -2.2789835929870605,
"logits/rejected": -2.2523741722106934,
"logps/chosen": -0.7461049556732178,
"logps/rejected": -0.48730534315109253,
"loss": 1.1159,
"nll_loss": 1.0658115148544312,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 170
},
{
"epoch": 0.10660349422564407,
"grad_norm": 8.9375,
"learning_rate": 4.999353064699471e-07,
"log_odds_chosen": -0.5452951192855835,
"log_odds_ratio": -1.1454532146453857,
"logits/chosen": -2.237121820449829,
"logits/rejected": -2.202718496322632,
"logps/chosen": -0.827674388885498,
"logps/rejected": -0.49005183577537537,
"loss": 0.9748,
"nll_loss": 1.0014435052871704,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 180
},
{
"epoch": 0.11252591057151318,
"grad_norm": 10.375,
"learning_rate": 4.99764243036258e-07,
"log_odds_chosen": -0.4207037091255188,
"log_odds_ratio": -0.9974331855773926,
"logits/chosen": -2.27175235748291,
"logits/rejected": -2.242116689682007,
"logps/chosen": -0.6407202482223511,
"logps/rejected": -0.4429788589477539,
"loss": 1.0095,
"nll_loss": 1.017865777015686,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 190
},
{
"epoch": 0.11844832691738229,
"grad_norm": 9.5,
"learning_rate": 4.994863481875841e-07,
"log_odds_chosen": -0.4031923711299896,
"log_odds_ratio": -0.973800003528595,
"logits/chosen": -2.221717119216919,
"logits/rejected": -2.18719482421875,
"logps/chosen": -0.6306296586990356,
"logps/rejected": -0.43233147263526917,
"loss": 1.0045,
"nll_loss": 0.9697571992874146,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 200
},
{
"epoch": 0.12437074326325141,
"grad_norm": 9.8125,
"learning_rate": 4.991017407876165e-07,
"log_odds_chosen": -0.4411424994468689,
"log_odds_ratio": -1.0120642185211182,
"logits/chosen": -2.238583564758301,
"logits/rejected": -2.1919620037078857,
"logps/chosen": -0.7006498575210571,
"logps/rejected": -0.4852658808231354,
"loss": 0.9832,
"nll_loss": 1.0057976245880127,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 210
},
{
"epoch": 0.13029315960912052,
"grad_norm": 9.0,
"learning_rate": 4.98610585344102e-07,
"log_odds_chosen": -0.25588923692703247,
"log_odds_ratio": -0.9158498048782349,
"logits/chosen": -2.258283853530884,
"logits/rejected": -2.2223126888275146,
"logps/chosen": -0.5977104306221008,
"logps/rejected": -0.4761990010738373,
"loss": 1.02,
"nll_loss": 1.0466753244400024,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 220
},
{
"epoch": 0.13621557595498965,
"grad_norm": 8.4375,
"learning_rate": 4.980130919384768e-07,
"log_odds_chosen": -0.5824810266494751,
"log_odds_ratio": -1.1220190525054932,
"logits/chosen": -2.2531580924987793,
"logits/rejected": -2.2409615516662598,
"logps/chosen": -0.7504315972328186,
"logps/rejected": -0.43684881925582886,
"loss": 1.0183,
"nll_loss": 1.0061722993850708,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 230
},
{
"epoch": 0.14213799230085875,
"grad_norm": 8.6875,
"learning_rate": 4.973095161360105e-07,
"log_odds_chosen": -0.44555410742759705,
"log_odds_ratio": -1.0208032131195068,
"logits/chosen": -2.2470836639404297,
"logits/rejected": -2.214434862136841,
"logps/chosen": -0.6731461882591248,
"logps/rejected": -0.4670758843421936,
"loss": 1.0354,
"nll_loss": 1.0512316226959229,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 240
},
{
"epoch": 0.14806040864672787,
"grad_norm": 8.875,
"learning_rate": 4.965001588764913e-07,
"log_odds_chosen": -0.4621347486972809,
"log_odds_ratio": -1.0333962440490723,
"logits/chosen": -2.274649143218994,
"logits/rejected": -2.241596221923828,
"logps/chosen": -0.6809024214744568,
"logps/rejected": -0.4347941279411316,
"loss": 1.0076,
"nll_loss": 1.007010817527771,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 250
},
{
"epoch": 0.15398282499259697,
"grad_norm": 8.8125,
"learning_rate": 4.955853663455072e-07,
"log_odds_chosen": -0.3350891172885895,
"log_odds_ratio": -0.9613872766494751,
"logits/chosen": -2.260413885116577,
"logits/rejected": -2.2278614044189453,
"logps/chosen": -0.6426165699958801,
"logps/rejected": -0.45530933141708374,
"loss": 0.9607,
"nll_loss": 0.9523956179618835,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 260
},
{
"epoch": 0.1599052413384661,
"grad_norm": 9.125,
"learning_rate": 4.945655298263713e-07,
"log_odds_chosen": -0.4467865824699402,
"log_odds_ratio": -1.0078147649765015,
"logits/chosen": -2.2099037170410156,
"logits/rejected": -2.183701992034912,
"logps/chosen": -0.6576748490333557,
"logps/rejected": -0.4429934620857239,
"loss": 1.0429,
"nll_loss": 1.075627326965332,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 270
},
{
"epoch": 0.16582765768433522,
"grad_norm": 7.96875,
"learning_rate": 4.934410855327585e-07,
"log_odds_chosen": -0.38402479887008667,
"log_odds_ratio": -0.9679163098335266,
"logits/chosen": -2.292367458343506,
"logits/rejected": -2.2721431255340576,
"logps/chosen": -0.6379308104515076,
"logps/rejected": -0.442401647567749,
"loss": 0.9621,
"nll_loss": 1.016234278678894,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 280
},
{
"epoch": 0.1717500740302043,
"grad_norm": 9.0,
"learning_rate": 4.922125144221252e-07,
"log_odds_chosen": -0.4171718955039978,
"log_odds_ratio": -0.9991844296455383,
"logits/chosen": -2.259284257888794,
"logits/rejected": -2.205514430999756,
"logps/chosen": -0.6329622268676758,
"logps/rejected": -0.43889325857162476,
"loss": 1.014,
"nll_loss": 1.0359452962875366,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 290
},
{
"epoch": 0.17767249037607344,
"grad_norm": 8.125,
"learning_rate": 4.90880341989989e-07,
"log_odds_chosen": -0.33935636281967163,
"log_odds_ratio": -0.9420417547225952,
"logits/chosen": -2.2610156536102295,
"logits/rejected": -2.2359061241149902,
"logps/chosen": -0.6213563084602356,
"logps/rejected": -0.44430437684059143,
"loss": 0.9833,
"nll_loss": 0.9867600202560425,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 300
},
{
"epoch": 0.18359490672194256,
"grad_norm": 8.375,
"learning_rate": 4.894451380451589e-07,
"log_odds_chosen": -0.5468162298202515,
"log_odds_ratio": -1.0870132446289062,
"logits/chosen": -2.241508722305298,
"logits/rejected": -2.22690749168396,
"logps/chosen": -0.7115592360496521,
"logps/rejected": -0.43017569184303284,
"loss": 1.0006,
"nll_loss": 0.994620680809021,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 310
},
{
"epoch": 0.18951732306781166,
"grad_norm": 9.25,
"learning_rate": 4.879075164660124e-07,
"log_odds_chosen": -0.3401740491390228,
"log_odds_ratio": -0.9383065104484558,
"logits/chosen": -2.2438132762908936,
"logits/rejected": -2.209188938140869,
"logps/chosen": -0.5985551476478577,
"logps/rejected": -0.43559733033180237,
"loss": 0.94,
"nll_loss": 0.9133344888687134,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 320
},
{
"epoch": 0.19543973941368079,
"grad_norm": 10.5,
"learning_rate": 4.862681349379212e-07,
"log_odds_chosen": -0.3794914484024048,
"log_odds_ratio": -0.9728193283081055,
"logits/chosen": -2.2533066272735596,
"logits/rejected": -2.1980607509613037,
"logps/chosen": -0.6138342022895813,
"logps/rejected": -0.44097796082496643,
"loss": 1.0041,
"nll_loss": 1.0256803035736084,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 330
},
{
"epoch": 0.2013621557595499,
"grad_norm": 7.6875,
"learning_rate": 4.8452769467194e-07,
"log_odds_chosen": -0.40433868765830994,
"log_odds_ratio": -0.9825445413589478,
"logits/chosen": -2.2585511207580566,
"logits/rejected": -2.233630657196045,
"logps/chosen": -0.6160660982131958,
"logps/rejected": -0.4248103201389313,
"loss": 0.9778,
"nll_loss": 0.9514611959457397,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 340
},
{
"epoch": 0.207284572105419,
"grad_norm": 7.625,
"learning_rate": 4.82686940104879e-07,
"log_odds_chosen": -0.4215853214263916,
"log_odds_ratio": -1.01924729347229,
"logits/chosen": -2.30430269241333,
"logits/rejected": -2.272357702255249,
"logps/chosen": -0.645369291305542,
"logps/rejected": -0.4133967459201813,
"loss": 0.9287,
"nll_loss": 0.9160087704658508,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 350
},
{
"epoch": 0.21320698845128813,
"grad_norm": 8.125,
"learning_rate": 4.807466585808856e-07,
"log_odds_chosen": -0.3686332702636719,
"log_odds_ratio": -0.9627587199211121,
"logits/chosen": -2.282811403274536,
"logits/rejected": -2.2714035511016846,
"logps/chosen": -0.5806415677070618,
"logps/rejected": -0.4163896143436432,
"loss": 0.987,
"nll_loss": 0.9767228960990906,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 360
},
{
"epoch": 0.21912940479715723,
"grad_norm": 8.3125,
"learning_rate": 4.787076800146752e-07,
"log_odds_chosen": -0.34714585542678833,
"log_odds_ratio": -0.9853572845458984,
"logits/chosen": -2.2601521015167236,
"logits/rejected": -2.2084286212921143,
"logps/chosen": -0.6458638906478882,
"logps/rejected": -0.4349249005317688,
"loss": 0.908,
"nll_loss": 0.8895160555839539,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 370
},
{
"epoch": 0.22505182114302635,
"grad_norm": 8.1875,
"learning_rate": 4.765708765365526e-07,
"log_odds_chosen": -0.30534738302230835,
"log_odds_ratio": -0.9373781085014343,
"logits/chosen": -2.2653586864471436,
"logits/rejected": -2.254210948944092,
"logps/chosen": -0.576322615146637,
"logps/rejected": -0.4294815957546234,
"loss": 1.0005,
"nll_loss": 0.958886981010437,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 380
},
{
"epoch": 0.23097423748889548,
"grad_norm": 9.875,
"learning_rate": 4.7433716211937587e-07,
"log_odds_chosen": -0.5105515122413635,
"log_odds_ratio": -1.0566070079803467,
"logits/chosen": -2.328101396560669,
"logits/rejected": -2.302281141281128,
"logps/chosen": -0.6501199007034302,
"logps/rejected": -0.404310941696167,
"loss": 0.9396,
"nll_loss": 0.9967532157897949,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 390
},
{
"epoch": 0.23689665383476458,
"grad_norm": 7.8125,
"learning_rate": 4.720074921876245e-07,
"log_odds_chosen": -0.45067232847213745,
"log_odds_ratio": -1.0197547674179077,
"logits/chosen": -2.340407133102417,
"logits/rejected": -2.293402910232544,
"logps/chosen": -0.6130900382995605,
"logps/rejected": -0.41540417075157166,
"loss": 0.9442,
"nll_loss": 0.9423254132270813,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 400
},
{
"epoch": 0.2428190701806337,
"grad_norm": 8.0625,
"learning_rate": 4.6958286320873593e-07,
"log_odds_chosen": -0.43627676367759705,
"log_odds_ratio": -0.9863921403884888,
"logits/chosen": -2.2813560962677,
"logits/rejected": -2.275886058807373,
"logps/chosen": -0.6022886633872986,
"logps/rejected": -0.4021386504173279,
"loss": 0.9658,
"nll_loss": 0.9948114156723022,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 410
},
{
"epoch": 0.24874148652650283,
"grad_norm": 8.3125,
"learning_rate": 4.6706431226688804e-07,
"log_odds_chosen": -0.3637348413467407,
"log_odds_ratio": -0.9635465741157532,
"logits/chosen": -2.2663254737854004,
"logits/rejected": -2.2325570583343506,
"logps/chosen": -0.6079740524291992,
"logps/rejected": -0.42877498269081116,
"loss": 0.971,
"nll_loss": 0.9684462547302246,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 420
},
{
"epoch": 0.25466390287237195,
"grad_norm": 7.71875,
"learning_rate": 4.6445291661940777e-07,
"log_odds_chosen": -0.29998743534088135,
"log_odds_ratio": -0.9151178598403931,
"logits/chosen": -2.288652181625366,
"logits/rejected": -2.28438138961792,
"logps/chosen": -0.5727067589759827,
"logps/rejected": -0.43537649512290955,
"loss": 0.9344,
"nll_loss": 0.8895971179008484,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 430
},
{
"epoch": 0.26058631921824105,
"grad_norm": 9.6875,
"learning_rate": 4.6174979323599715e-07,
"log_odds_chosen": -0.5159381031990051,
"log_odds_ratio": -1.0749253034591675,
"logits/chosen": -2.2701315879821777,
"logits/rejected": -2.2190845012664795,
"logps/chosen": -0.7043232321739197,
"logps/rejected": -0.4317665696144104,
"loss": 0.9929,
"nll_loss": 1.0871878862380981,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 440
},
{
"epoch": 0.26650873556411014,
"grad_norm": 7.9375,
"learning_rate": 4.5895609832097277e-07,
"log_odds_chosen": -0.38775309920310974,
"log_odds_ratio": -1.0040466785430908,
"logits/chosen": -2.2794992923736572,
"logits/rejected": -2.2638792991638184,
"logps/chosen": -0.6565039157867432,
"logps/rejected": -0.43878334760665894,
"loss": 0.9716,
"nll_loss": 0.9555328488349915,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 450
},
{
"epoch": 0.2724311519099793,
"grad_norm": 8.25,
"learning_rate": 4.560730268187236e-07,
"log_odds_chosen": -0.3349025249481201,
"log_odds_ratio": -0.9378219842910767,
"logits/chosen": -2.282761812210083,
"logits/rejected": -2.244011878967285,
"logps/chosen": -0.5650533437728882,
"logps/rejected": -0.4169080853462219,
"loss": 0.9547,
"nll_loss": 0.9367356300354004,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 460
},
{
"epoch": 0.2783535682558484,
"grad_norm": 8.9375,
"learning_rate": 4.531018119025989e-07,
"log_odds_chosen": -0.24693968892097473,
"log_odds_ratio": -0.9230139851570129,
"logits/chosen": -2.338200807571411,
"logits/rejected": -2.3114407062530518,
"logps/chosen": -0.5866008996963501,
"logps/rejected": -0.498542845249176,
"loss": 0.9863,
"nll_loss": 1.0312178134918213,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 470
},
{
"epoch": 0.2842759846017175,
"grad_norm": 7.59375,
"learning_rate": 4.5004372444744376e-07,
"log_odds_chosen": -0.259705126285553,
"log_odds_ratio": -0.9033578634262085,
"logits/chosen": -2.281229257583618,
"logits/rejected": -2.259384870529175,
"logps/chosen": -0.6026913523674011,
"logps/rejected": -0.46954187750816345,
"loss": 0.9559,
"nll_loss": 0.9717810750007629,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 480
},
{
"epoch": 0.2901984009475866,
"grad_norm": 9.0,
"learning_rate": 4.4690007248600967e-07,
"log_odds_chosen": -0.3773840069770813,
"log_odds_ratio": -0.9825248718261719,
"logits/chosen": -2.2721426486968994,
"logits/rejected": -2.2558834552764893,
"logps/chosen": -0.629915714263916,
"logps/rejected": -0.43304410576820374,
"loss": 0.954,
"nll_loss": 0.9644275903701782,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 490
},
{
"epoch": 0.29612081729345574,
"grad_norm": 7.71875,
"learning_rate": 4.436722006494701e-07,
"log_odds_chosen": -0.5259193778038025,
"log_odds_ratio": -1.1190059185028076,
"logits/chosen": -2.266916275024414,
"logits/rejected": -2.243081569671631,
"logps/chosen": -0.7579408884048462,
"logps/rejected": -0.4302619397640228,
"loss": 0.9695,
"nll_loss": 0.9956067204475403,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 500
},
{
"epoch": 0.30204323363932484,
"grad_norm": 8.5,
"learning_rate": 4.4036148959228356e-07,
"log_odds_chosen": -0.4430968165397644,
"log_odds_ratio": -1.0375418663024902,
"logits/chosen": -2.300400733947754,
"logits/rejected": -2.2604432106018066,
"logps/chosen": -0.6584800481796265,
"logps/rejected": -0.4138873517513275,
"loss": 0.9756,
"nll_loss": 0.9368442296981812,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 510
},
{
"epoch": 0.30796564998519393,
"grad_norm": 9.4375,
"learning_rate": 4.3696935540164705e-07,
"log_odds_chosen": -0.3859555423259735,
"log_odds_ratio": -0.9752845764160156,
"logits/chosen": -2.2633957862854004,
"logits/rejected": -2.2417874336242676,
"logps/chosen": -0.6037057638168335,
"logps/rejected": -0.41955527663230896,
"loss": 0.9235,
"nll_loss": 0.9441665410995483,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 520
},
{
"epoch": 0.3138880663310631,
"grad_norm": 7.71875,
"learning_rate": 4.334972489917947e-07,
"log_odds_chosen": -0.29654431343078613,
"log_odds_ratio": -0.9258224368095398,
"logits/chosen": -2.3264002799987793,
"logits/rejected": -2.269259214401245,
"logps/chosen": -0.5935055017471313,
"logps/rejected": -0.4429333806037903,
"loss": 0.9497,
"nll_loss": 0.9263819456100464,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 530
},
{
"epoch": 0.3198104826769322,
"grad_norm": 9.0,
"learning_rate": 4.299466554833997e-07,
"log_odds_chosen": -0.400839239358902,
"log_odds_ratio": -0.9843107461929321,
"logits/chosen": -2.30580472946167,
"logits/rejected": -2.256434440612793,
"logps/chosen": -0.5819273591041565,
"logps/rejected": -0.408183753490448,
"loss": 0.9515,
"nll_loss": 0.92247474193573,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 540
},
{
"epoch": 0.3257328990228013,
"grad_norm": 7.46875,
"learning_rate": 4.263190935683449e-07,
"log_odds_chosen": -0.32894009351730347,
"log_odds_ratio": -0.9418984651565552,
"logits/chosen": -2.282500743865967,
"logits/rejected": -2.24668025970459,
"logps/chosen": -0.5584912896156311,
"logps/rejected": -0.4048989713191986,
"loss": 0.8853,
"nll_loss": 0.8602296113967896,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 550
},
{
"epoch": 0.33165531536867043,
"grad_norm": 9.0,
"learning_rate": 4.2261611486013437e-07,
"log_odds_chosen": -0.39398467540740967,
"log_odds_ratio": -0.9864169955253601,
"logits/chosen": -2.3277463912963867,
"logits/rejected": -2.2908778190612793,
"logps/chosen": -0.618613064289093,
"logps/rejected": -0.4362561106681824,
"loss": 0.961,
"nll_loss": 0.9670404195785522,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 560
},
{
"epoch": 0.33757773171453953,
"grad_norm": 9.4375,
"learning_rate": 4.188393032302233e-07,
"log_odds_chosen": -0.2161109894514084,
"log_odds_ratio": -0.8888469934463501,
"logits/chosen": -2.266890048980713,
"logits/rejected": -2.2078969478607178,
"logps/chosen": -0.5593982934951782,
"logps/rejected": -0.48008909821510315,
"loss": 0.9285,
"nll_loss": 0.9204473495483398,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 570
},
{
"epoch": 0.3435001480604086,
"grad_norm": 10.75,
"learning_rate": 4.1499027413055e-07,
"log_odds_chosen": -0.41526442766189575,
"log_odds_ratio": -0.9975423812866211,
"logits/chosen": -2.2734172344207764,
"logits/rejected": -2.2457797527313232,
"logps/chosen": -0.6249933838844299,
"logps/rejected": -0.42492228746414185,
"loss": 0.9404,
"nll_loss": 0.9193958044052124,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 580
},
{
"epoch": 0.3494225644062778,
"grad_norm": 9.3125,
"learning_rate": 4.1107067390256056e-07,
"log_odds_chosen": -0.45963993668556213,
"log_odds_ratio": -1.0648995637893677,
"logits/chosen": -2.3240678310394287,
"logits/rejected": -2.2981557846069336,
"logps/chosen": -0.723495364189148,
"logps/rejected": -0.454792320728302,
"loss": 0.9656,
"nll_loss": 1.0240063667297363,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 590
},
{
"epoch": 0.3553449807521469,
"grad_norm": 9.875,
"learning_rate": 4.0708217907302047e-07,
"log_odds_chosen": -0.4009949564933777,
"log_odds_ratio": -0.9855114221572876,
"logits/chosen": -2.2710177898406982,
"logits/rejected": -2.237403392791748,
"logps/chosen": -0.6120108366012573,
"logps/rejected": -0.43240681290626526,
"loss": 0.9575,
"nll_loss": 0.9712766408920288,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 600
},
{
"epoch": 0.361267397098016,
"grad_norm": 12.0,
"learning_rate": 4.030264956369157e-07,
"log_odds_chosen": -0.39438915252685547,
"log_odds_ratio": -0.9749253988265991,
"logits/chosen": -2.31217098236084,
"logits/rejected": -2.273338794708252,
"logps/chosen": -0.5822636485099792,
"logps/rejected": -0.4064372181892395,
"loss": 0.9477,
"nll_loss": 0.9778239130973816,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 610
},
{
"epoch": 0.3671898134438851,
"grad_norm": 9.375,
"learning_rate": 3.989053583277492e-07,
"log_odds_chosen": -0.5915114879608154,
"log_odds_ratio": -1.1409562826156616,
"logits/chosen": -2.3212368488311768,
"logits/rejected": -2.3024001121520996,
"logps/chosen": -0.7573744654655457,
"logps/rejected": -0.4205297827720642,
"loss": 0.9491,
"nll_loss": 0.9616823196411133,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 620
},
{
"epoch": 0.3731122297897542,
"grad_norm": 8.125,
"learning_rate": 3.947205298755447e-07,
"log_odds_chosen": -0.32023632526397705,
"log_odds_ratio": -0.9460951685905457,
"logits/chosen": -2.2791507244110107,
"logits/rejected": -2.2480525970458984,
"logps/chosen": -0.6137298345565796,
"logps/rejected": -0.4523869454860687,
"loss": 0.9577,
"nll_loss": 0.9420009851455688,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 630
},
{
"epoch": 0.3790346461356233,
"grad_norm": 8.3125,
"learning_rate": 3.9047380025287634e-07,
"log_odds_chosen": -0.31926944851875305,
"log_odds_ratio": -0.9340398907661438,
"logits/chosen": -2.288464069366455,
"logits/rejected": -2.257875442504883,
"logps/chosen": -0.5796951055526733,
"logps/rejected": -0.43441399931907654,
"loss": 0.9511,
"nll_loss": 0.935884952545166,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 640
},
{
"epoch": 0.3849570624814925,
"grad_norm": 10.125,
"learning_rate": 3.8616698590924523e-07,
"log_odds_chosen": -0.3541373610496521,
"log_odds_ratio": -0.9547072649002075,
"logits/chosen": -2.3075475692749023,
"logits/rejected": -2.261488437652588,
"logps/chosen": -0.6222845315933228,
"logps/rejected": -0.4457763135433197,
"loss": 0.9292,
"nll_loss": 0.941017746925354,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 650
},
{
"epoch": 0.39087947882736157,
"grad_norm": 8.6875,
"learning_rate": 3.8180192899413123e-07,
"log_odds_chosen": -0.373871773481369,
"log_odds_ratio": -0.963890552520752,
"logits/chosen": -2.3060686588287354,
"logits/rejected": -2.2961385250091553,
"logps/chosen": -0.5887154936790466,
"logps/rejected": -0.41268324851989746,
"loss": 0.9644,
"nll_loss": 0.9328317642211914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 660
},
{
"epoch": 0.39680189517323067,
"grad_norm": 8.1875,
"learning_rate": 3.7738049656905225e-07,
"log_odds_chosen": -0.3005954623222351,
"log_odds_ratio": -0.9146180152893066,
"logits/chosen": -2.241210460662842,
"logits/rejected": -2.197197437286377,
"logps/chosen": -0.5695523023605347,
"logps/rejected": -0.4374919533729553,
"loss": 0.9635,
"nll_loss": 0.9454113841056824,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 670
},
{
"epoch": 0.4027243115190998,
"grad_norm": 8.25,
"learning_rate": 3.7290457980896787e-07,
"log_odds_chosen": -0.2508184611797333,
"log_odds_ratio": -0.8950401544570923,
"logits/chosen": -2.310917377471924,
"logits/rejected": -2.2810654640197754,
"logps/chosen": -0.5575405955314636,
"logps/rejected": -0.44296175241470337,
"loss": 0.9245,
"nll_loss": 0.9060578346252441,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 680
},
{
"epoch": 0.4086467278649689,
"grad_norm": 8.375,
"learning_rate": 3.68376093193369e-07,
"log_odds_chosen": -0.35061341524124146,
"log_odds_ratio": -0.9449998140335083,
"logits/chosen": -2.3210480213165283,
"logits/rejected": -2.281230926513672,
"logps/chosen": -0.5540003776550293,
"logps/rejected": -0.4034114480018616,
"loss": 0.9207,
"nll_loss": 0.9037810564041138,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 690
},
{
"epoch": 0.414569144210838,
"grad_norm": 8.4375,
"learning_rate": 3.637969736873992e-07,
"log_odds_chosen": -0.29555535316467285,
"log_odds_ratio": -0.9367197155952454,
"logits/chosen": -2.2944698333740234,
"logits/rejected": -2.2611544132232666,
"logps/chosen": -0.5592113733291626,
"logps/rejected": -0.42597031593322754,
"loss": 0.9637,
"nll_loss": 0.9748933911323547,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 700
},
{
"epoch": 0.4204915605567071,
"grad_norm": 8.6875,
"learning_rate": 3.591691799133587e-07,
"log_odds_chosen": -0.27811819314956665,
"log_odds_ratio": -0.8972823023796082,
"logits/chosen": -2.3404221534729004,
"logits/rejected": -2.3104233741760254,
"logps/chosen": -0.5548882484436035,
"logps/rejected": -0.42241740226745605,
"loss": 0.9489,
"nll_loss": 0.9462203979492188,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 710
},
{
"epoch": 0.42641397690257626,
"grad_norm": 7.8125,
"learning_rate": 3.5449469131294476e-07,
"log_odds_chosen": -0.282146155834198,
"log_odds_ratio": -0.9153865575790405,
"logits/chosen": -2.3050596714019775,
"logits/rejected": -2.2582859992980957,
"logps/chosen": -0.5491407513618469,
"logps/rejected": -0.4175952970981598,
"loss": 0.9258,
"nll_loss": 0.9185633659362793,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 720
},
{
"epoch": 0.43233639324844536,
"grad_norm": 7.78125,
"learning_rate": 3.497755073005868e-07,
"log_odds_chosen": -0.17704807221889496,
"log_odds_ratio": -0.8550702333450317,
"logits/chosen": -2.304471254348755,
"logits/rejected": -2.2704811096191406,
"logps/chosen": -0.5407411456108093,
"logps/rejected": -0.43452388048171997,
"loss": 0.9148,
"nll_loss": 0.8776341676712036,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 730
},
{
"epoch": 0.43825880959431446,
"grad_norm": 14.0,
"learning_rate": 3.4501364640823926e-07,
"log_odds_chosen": -0.4160383343696594,
"log_odds_ratio": -0.9982725381851196,
"logits/chosen": -2.3177871704101562,
"logits/rejected": -2.291195869445801,
"logps/chosen": -0.6620553135871887,
"logps/rejected": -0.4509620666503906,
"loss": 0.9449,
"nll_loss": 0.9611420631408691,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 740
},
{
"epoch": 0.4441812259401836,
"grad_norm": 8.4375,
"learning_rate": 3.402111454219966e-07,
"log_odds_chosen": -0.2541792690753937,
"log_odds_ratio": -0.8975493311882019,
"logits/chosen": -2.3212180137634277,
"logits/rejected": -2.2709405422210693,
"logps/chosen": -0.5664907693862915,
"logps/rejected": -0.4353105127811432,
"loss": 0.9301,
"nll_loss": 0.9432824850082397,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 750
},
{
"epoch": 0.4501036422860527,
"grad_norm": 7.28125,
"learning_rate": 3.353700585109005e-07,
"log_odds_chosen": -0.2790587842464447,
"log_odds_ratio": -0.9118951559066772,
"logits/chosen": -2.3148138523101807,
"logits/rejected": -2.2849326133728027,
"logps/chosen": -0.5668213963508606,
"logps/rejected": -0.4337525963783264,
"loss": 0.9239,
"nll_loss": 0.9522818326950073,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 760
},
{
"epoch": 0.4560260586319218,
"grad_norm": 7.90625,
"learning_rate": 3.304924563483129e-07,
"log_odds_chosen": -0.31332454085350037,
"log_odds_ratio": -0.9554667472839355,
"logits/chosen": -2.329709053039551,
"logits/rejected": -2.3159825801849365,
"logps/chosen": -0.6328242421150208,
"logps/rejected": -0.4525510370731354,
"loss": 0.9725,
"nll_loss": 0.9982641935348511,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 770
},
{
"epoch": 0.46194847497779096,
"grad_norm": 8.6875,
"learning_rate": 3.255804252262283e-07,
"log_odds_chosen": -0.26954448223114014,
"log_odds_ratio": -0.9041155576705933,
"logits/chosen": -2.26902437210083,
"logits/rejected": -2.2395756244659424,
"logps/chosen": -0.5438047647476196,
"logps/rejected": -0.4186398386955261,
"loss": 0.9454,
"nll_loss": 0.9862927198410034,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 780
},
{
"epoch": 0.46787089132366005,
"grad_norm": 6.78125,
"learning_rate": 3.2063606616290626e-07,
"log_odds_chosen": -0.40437692403793335,
"log_odds_ratio": -0.991305947303772,
"logits/chosen": -2.2474241256713867,
"logits/rejected": -2.2076640129089355,
"logps/chosen": -0.5990616083145142,
"logps/rejected": -0.40946364402770996,
"loss": 0.8641,
"nll_loss": 0.827691376209259,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 790
},
{
"epoch": 0.47379330766952915,
"grad_norm": 12.25,
"learning_rate": 3.1566149400420523e-07,
"log_odds_chosen": -0.3424193859100342,
"log_odds_ratio": -0.9442498087882996,
"logits/chosen": -2.300968885421753,
"logits/rejected": -2.289825201034546,
"logps/chosen": -0.6068278551101685,
"logps/rejected": -0.4422214925289154,
"loss": 0.9572,
"nll_loss": 0.9411390423774719,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 800
},
{
"epoch": 0.4797157240153983,
"grad_norm": 8.75,
"learning_rate": 3.1065883651900087e-07,
"log_odds_chosen": -0.3020106852054596,
"log_odds_ratio": -0.9359525442123413,
"logits/chosen": -2.288480281829834,
"logits/rejected": -2.246896982192993,
"logps/chosen": -0.5823680758476257,
"logps/rejected": -0.44429031014442444,
"loss": 0.9677,
"nll_loss": 0.9093478918075562,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 810
},
{
"epoch": 0.4856381403612674,
"grad_norm": 8.0625,
"learning_rate": 3.056302334890786e-07,
"log_odds_chosen": -0.38523969054222107,
"log_odds_ratio": -0.9780759811401367,
"logits/chosen": -2.294841766357422,
"logits/rejected": -2.2723891735076904,
"logps/chosen": -0.6043334603309631,
"logps/rejected": -0.4185991883277893,
"loss": 0.9121,
"nll_loss": 0.9030720591545105,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 820
},
{
"epoch": 0.4915605567071365,
"grad_norm": 8.75,
"learning_rate": 3.0057783579388586e-07,
"log_odds_chosen": -0.24561011791229248,
"log_odds_ratio": -0.8836873173713684,
"logits/chosen": -2.2996482849121094,
"logits/rejected": -2.258457660675049,
"logps/chosen": -0.5478182435035706,
"logps/rejected": -0.440875768661499,
"loss": 0.928,
"nll_loss": 0.9274915456771851,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 830
},
{
"epoch": 0.49748297305300565,
"grad_norm": 7.8125,
"learning_rate": 2.9550380449053907e-07,
"log_odds_chosen": -0.26652732491493225,
"log_odds_ratio": -0.9008363485336304,
"logits/chosen": -2.2529563903808594,
"logits/rejected": -2.2309823036193848,
"logps/chosen": -0.5522275567054749,
"logps/rejected": -0.42188987135887146,
"loss": 0.914,
"nll_loss": 0.8120133280754089,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 840
},
{
"epoch": 0.5034053893988747,
"grad_norm": 6.4375,
"learning_rate": 2.904103098894767e-07,
"log_odds_chosen": -0.3553586006164551,
"log_odds_ratio": -0.9903032183647156,
"logits/chosen": -2.291224956512451,
"logits/rejected": -2.2469000816345215,
"logps/chosen": -0.6393681764602661,
"logps/rejected": -0.42175260186195374,
"loss": 0.9129,
"nll_loss": 0.9170019030570984,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 850
},
{
"epoch": 0.5093278057447439,
"grad_norm": 9.75,
"learning_rate": 2.852995306261545e-07,
"log_odds_chosen": -0.2889431416988373,
"log_odds_ratio": -0.9156063795089722,
"logits/chosen": -2.318115472793579,
"logits/rejected": -2.2808139324188232,
"logps/chosen": -0.5667640566825867,
"logps/rejected": -0.44533196091651917,
"loss": 0.9772,
"nll_loss": 0.9936600923538208,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 860
},
{
"epoch": 0.515250222090613,
"grad_norm": 8.75,
"learning_rate": 2.801736527291797e-07,
"log_odds_chosen": -0.3678986728191376,
"log_odds_ratio": -0.9755579233169556,
"logits/chosen": -2.2834322452545166,
"logits/rejected": -2.2398197650909424,
"logps/chosen": -0.6312032341957092,
"logps/rejected": -0.4318135380744934,
"loss": 0.9391,
"nll_loss": 0.8974191546440125,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 870
},
{
"epoch": 0.5211726384364821,
"grad_norm": 7.84375,
"learning_rate": 2.750348686852836e-07,
"log_odds_chosen": -0.40664905309677124,
"log_odds_ratio": -0.9781969785690308,
"logits/chosen": -2.3389241695404053,
"logits/rejected": -2.2737958431243896,
"logps/chosen": -0.6092024445533752,
"logps/rejected": -0.4250633120536804,
"loss": 0.971,
"nll_loss": 0.9957748651504517,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 880
},
{
"epoch": 0.5270950547823512,
"grad_norm": 9.5,
"learning_rate": 2.69885376501531e-07,
"log_odds_chosen": -0.31569716334342957,
"log_odds_ratio": -0.9389151334762573,
"logits/chosen": -2.2705588340759277,
"logits/rejected": -2.2545580863952637,
"logps/chosen": -0.6091697812080383,
"logps/rejected": -0.45014920830726624,
"loss": 0.9568,
"nll_loss": 0.9439749717712402,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 890
},
{
"epoch": 0.5330174711282203,
"grad_norm": 10.0625,
"learning_rate": 2.647273787651687e-07,
"log_odds_chosen": -0.27334731817245483,
"log_odds_ratio": -0.8902351260185242,
"logits/chosen": -2.3029747009277344,
"logits/rejected": -2.2809951305389404,
"logps/chosen": -0.5580970644950867,
"logps/rejected": -0.43120306730270386,
"loss": 0.9333,
"nll_loss": 0.9487207531929016,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 900
},
{
"epoch": 0.5389398874740894,
"grad_norm": 11.5,
"learning_rate": 2.5956308170151526e-07,
"log_odds_chosen": -0.5443618893623352,
"log_odds_ratio": -1.1218284368515015,
"logits/chosen": -2.275094985961914,
"logits/rejected": -2.2452805042266846,
"logps/chosen": -0.7426999807357788,
"logps/rejected": -0.4159156382083893,
"loss": 1.0105,
"nll_loss": 0.9728318452835083,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 910
},
{
"epoch": 0.5448623038199586,
"grad_norm": 8.0625,
"learning_rate": 2.543946942302944e-07,
"log_odds_chosen": -0.30027318000793457,
"log_odds_ratio": -0.9199014902114868,
"logits/chosen": -2.265780210494995,
"logits/rejected": -2.2284467220306396,
"logps/chosen": -0.5695372819900513,
"logps/rejected": -0.4176994264125824,
"loss": 0.9145,
"nll_loss": 0.9305332899093628,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 920
},
{
"epoch": 0.5507847201658277,
"grad_norm": 9.375,
"learning_rate": 2.492244270208158e-07,
"log_odds_chosen": -0.2501292824745178,
"log_odds_ratio": -0.8865777850151062,
"logits/chosen": -2.273998260498047,
"logits/rejected": -2.247119665145874,
"logps/chosen": -0.562545895576477,
"logps/rejected": -0.44244521856307983,
"loss": 0.9004,
"nll_loss": 0.9349013566970825,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 930
},
{
"epoch": 0.5567071365116968,
"grad_norm": 8.25,
"learning_rate": 2.440544915464078e-07,
"log_odds_chosen": -0.29280886054039,
"log_odds_ratio": -0.9178652763366699,
"logits/chosen": -2.304103374481201,
"logits/rejected": -2.2641754150390625,
"logps/chosen": -0.5466963052749634,
"logps/rejected": -0.41387224197387695,
"loss": 0.9221,
"nll_loss": 0.9058642387390137,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 940
},
{
"epoch": 0.5626295528575659,
"grad_norm": 7.875,
"learning_rate": 2.3888709913850593e-07,
"log_odds_chosen": -0.30006080865859985,
"log_odds_ratio": -0.9208847880363464,
"logits/chosen": -2.350160837173462,
"logits/rejected": -2.3101677894592285,
"logps/chosen": -0.5762113928794861,
"logps/rejected": -0.43552321195602417,
"loss": 0.9856,
"nll_loss": 0.938804030418396,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 950
},
{
"epoch": 0.568551969203435,
"grad_norm": 7.8125,
"learning_rate": 2.337244600408025e-07,
"log_odds_chosen": -0.39082369208335876,
"log_odds_ratio": -0.9911519289016724,
"logits/chosen": -2.3191308975219727,
"logits/rejected": -2.2875494956970215,
"logps/chosen": -0.6351069211959839,
"logps/rejected": -0.4366016387939453,
"loss": 0.9507,
"nll_loss": 0.9739691615104675,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 960
},
{
"epoch": 0.5744743855493041,
"grad_norm": 9.1875,
"learning_rate": 2.2856878246386085e-07,
"log_odds_chosen": -0.2834867537021637,
"log_odds_ratio": -0.9153987765312195,
"logits/chosen": -2.3153960704803467,
"logits/rejected": -2.2916574478149414,
"logps/chosen": -0.5750494003295898,
"logps/rejected": -0.4384193420410156,
"loss": 0.9944,
"nll_loss": 1.0159144401550293,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 970
},
{
"epoch": 0.5803968018951732,
"grad_norm": 8.8125,
"learning_rate": 2.2342227164060035e-07,
"log_odds_chosen": -0.37823957204818726,
"log_odds_ratio": -0.9799555540084839,
"logits/chosen": -2.2767786979675293,
"logits/rejected": -2.2212002277374268,
"logps/chosen": -0.6210430860519409,
"logps/rejected": -0.4381546378135681,
"loss": 0.9342,
"nll_loss": 0.9048612713813782,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 980
},
{
"epoch": 0.5863192182410424,
"grad_norm": 7.625,
"learning_rate": 2.182871288830533e-07,
"log_odds_chosen": -0.3980916738510132,
"log_odds_ratio": -0.9920517206192017,
"logits/chosen": -2.3002982139587402,
"logits/rejected": -2.237112522125244,
"logps/chosen": -0.6255283951759338,
"logps/rejected": -0.4390384256839752,
"loss": 0.9667,
"nll_loss": 0.9580439329147339,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 990
},
{
"epoch": 0.5922416345869115,
"grad_norm": 7.8125,
"learning_rate": 2.131655506408007e-07,
"log_odds_chosen": -0.3284297287464142,
"log_odds_ratio": -0.9404581785202026,
"logits/chosen": -2.3054287433624268,
"logits/rejected": -2.263627767562866,
"logps/chosen": -0.5948997139930725,
"logps/rejected": -0.4438301622867584,
"loss": 0.9222,
"nll_loss": 0.9223626852035522,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1000
},
{
"epoch": 0.5981640509327806,
"grad_norm": 7.65625,
"learning_rate": 2.0805972756148643e-07,
"log_odds_chosen": -0.48507261276245117,
"log_odds_ratio": -1.0847915410995483,
"logits/chosen": -2.2977192401885986,
"logits/rejected": -2.2829782962799072,
"logps/chosen": -0.748909592628479,
"logps/rejected": -0.43740910291671753,
"loss": 0.9675,
"nll_loss": 0.9915729761123657,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1010
},
{
"epoch": 0.6040864672786497,
"grad_norm": 7.59375,
"learning_rate": 2.0297184355381432e-07,
"log_odds_chosen": -0.3442012667655945,
"log_odds_ratio": -0.9447819590568542,
"logits/chosen": -2.3124001026153564,
"logits/rejected": -2.2729249000549316,
"logps/chosen": -0.5671228170394897,
"logps/rejected": -0.42450952529907227,
"loss": 0.9345,
"nll_loss": 0.9476312398910522,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1020
},
{
"epoch": 0.6100088836245188,
"grad_norm": 7.71875,
"learning_rate": 1.9790407485342638e-07,
"log_odds_chosen": -0.5493720769882202,
"log_odds_ratio": -1.1290843486785889,
"logits/chosen": -2.3353946208953857,
"logits/rejected": -2.2935006618499756,
"logps/chosen": -0.7500286102294922,
"logps/rejected": -0.4041396975517273,
"loss": 0.9066,
"nll_loss": 0.930087685585022,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1030
},
{
"epoch": 0.6159312999703879,
"grad_norm": 9.125,
"learning_rate": 1.928585890920641e-07,
"log_odds_chosen": -0.2760196626186371,
"log_odds_ratio": -0.9122495651245117,
"logits/chosen": -2.303187608718872,
"logits/rejected": -2.267937183380127,
"logps/chosen": -0.5642871856689453,
"logps/rejected": -0.42797571420669556,
"loss": 0.9504,
"nll_loss": 0.9056134223937988,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1040
},
{
"epoch": 0.6218537163162571,
"grad_norm": 8.25,
"learning_rate": 1.8783754437040902e-07,
"log_odds_chosen": -0.34286069869995117,
"log_odds_ratio": -0.9555438756942749,
"logits/chosen": -2.282454013824463,
"logits/rejected": -2.247560739517212,
"logps/chosen": -0.5602587461471558,
"logps/rejected": -0.41198721528053284,
"loss": 0.9096,
"nll_loss": 0.8938838243484497,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1050
},
{
"epoch": 0.6277761326621262,
"grad_norm": 7.53125,
"learning_rate": 1.8284308833500118e-07,
"log_odds_chosen": -0.30499863624572754,
"log_odds_ratio": -0.9316195249557495,
"logits/chosen": -2.2879326343536377,
"logits/rejected": -2.2628307342529297,
"logps/chosen": -0.5727280378341675,
"logps/rejected": -0.4329405725002289,
"loss": 0.9415,
"nll_loss": 0.9282618761062622,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1060
},
{
"epoch": 0.6336985490079953,
"grad_norm": 9.0,
"learning_rate": 1.7787735725962756e-07,
"log_odds_chosen": -0.371854692697525,
"log_odds_ratio": -0.9633452296257019,
"logits/chosen": -2.295264482498169,
"logits/rejected": -2.2589457035064697,
"logps/chosen": -0.608909010887146,
"logps/rejected": -0.43556636571884155,
"loss": 0.9874,
"nll_loss": 0.9842734336853027,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1070
},
{
"epoch": 0.6396209653538644,
"grad_norm": 7.4375,
"learning_rate": 1.7294247513157616e-07,
"log_odds_chosen": -0.30945563316345215,
"log_odds_ratio": -0.9170244932174683,
"logits/chosen": -2.32027530670166,
"logits/rejected": -2.2682743072509766,
"logps/chosen": -0.5618667006492615,
"logps/rejected": -0.42478686571121216,
"loss": 0.9273,
"nll_loss": 0.9494869112968445,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1080
},
{
"epoch": 0.6455433816997335,
"grad_norm": 8.25,
"learning_rate": 1.6804055274314494e-07,
"log_odds_chosen": -0.28144484758377075,
"log_odds_ratio": -0.9044340252876282,
"logits/chosen": -2.2794411182403564,
"logits/rejected": -2.256417989730835,
"logps/chosen": -0.5532391667366028,
"logps/rejected": -0.43312329053878784,
"loss": 0.9267,
"nll_loss": 0.8960529565811157,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1090
},
{
"epoch": 0.6514657980456026,
"grad_norm": 7.8125,
"learning_rate": 1.6317368678879496e-07,
"log_odds_chosen": -0.28822919726371765,
"log_odds_ratio": -0.9030183553695679,
"logits/chosen": -2.3176181316375732,
"logits/rejected": -2.2776379585266113,
"logps/chosen": -0.5691734552383423,
"logps/rejected": -0.4426758885383606,
"loss": 0.9774,
"nll_loss": 0.9514939188957214,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1100
},
{
"epoch": 0.6573882143914718,
"grad_norm": 10.0,
"learning_rate": 1.5834395896833281e-07,
"log_odds_chosen": -0.3918454051017761,
"log_odds_ratio": -0.9793996810913086,
"logits/chosen": -2.3301963806152344,
"logits/rejected": -2.274294376373291,
"logps/chosen": -0.6070703864097595,
"logps/rejected": -0.42063984274864197,
"loss": 0.9521,
"nll_loss": 0.956030547618866,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1110
},
{
"epoch": 0.6633106307373409,
"grad_norm": 8.3125,
"learning_rate": 1.535534350965075e-07,
"log_odds_chosen": -0.3459760546684265,
"log_odds_ratio": -0.9434119462966919,
"logits/chosen": -2.3291070461273193,
"logits/rejected": -2.3154056072235107,
"logps/chosen": -0.5571088790893555,
"logps/rejected": -0.3965280055999756,
"loss": 0.9099,
"nll_loss": 0.9126838445663452,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1120
},
{
"epoch": 0.66923304708321,
"grad_norm": 9.1875,
"learning_rate": 1.4880416421940154e-07,
"log_odds_chosen": -0.35412847995758057,
"log_odds_ratio": -0.9552983045578003,
"logits/chosen": -2.2702512741088867,
"logits/rejected": -2.246307849884033,
"logps/chosen": -0.6233401298522949,
"logps/rejected": -0.4397760033607483,
"loss": 1.0082,
"nll_loss": 1.0226290225982666,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1130
},
{
"epoch": 0.6751554634290791,
"grad_norm": 10.8125,
"learning_rate": 1.4409817773799459e-07,
"log_odds_chosen": -0.30409640073776245,
"log_odds_ratio": -0.933831512928009,
"logits/chosen": -2.2992305755615234,
"logits/rejected": -2.2570438385009766,
"logps/chosen": -0.6016424894332886,
"logps/rejected": -0.44892677664756775,
"loss": 0.9551,
"nll_loss": 0.9244022369384766,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1140
},
{
"epoch": 0.6810778797749482,
"grad_norm": 8.8125,
"learning_rate": 1.3943748853927385e-07,
"log_odds_chosen": -0.41090458631515503,
"log_odds_ratio": -1.001075029373169,
"logits/chosen": -2.299729824066162,
"logits/rejected": -2.2916903495788574,
"logps/chosen": -0.6525920033454895,
"logps/rejected": -0.4317931532859802,
"loss": 0.9303,
"nll_loss": 0.9261299967765808,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1150
},
{
"epoch": 0.6870002961208173,
"grad_norm": 8.3125,
"learning_rate": 1.3482409013526436e-07,
"log_odds_chosen": -0.42632365226745605,
"log_odds_ratio": -1.0060193538665771,
"logits/chosen": -2.2864601612091064,
"logits/rejected": -2.2797439098358154,
"logps/chosen": -0.615047037601471,
"logps/rejected": -0.4228528141975403,
"loss": 0.9621,
"nll_loss": 0.9737777709960938,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1160
},
{
"epoch": 0.6929227124666865,
"grad_norm": 7.65625,
"learning_rate": 1.302599558103456e-07,
"log_odds_chosen": -0.3416286110877991,
"log_odds_ratio": -0.971416175365448,
"logits/chosen": -2.3440544605255127,
"logits/rejected": -2.3086702823638916,
"logps/chosen": -0.6329351663589478,
"logps/rejected": -0.44783586263656616,
"loss": 0.9418,
"nll_loss": 0.9567440152168274,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1170
},
{
"epoch": 0.6988451288125556,
"grad_norm": 9.1875,
"learning_rate": 1.257470377772214e-07,
"log_odds_chosen": -0.37471523880958557,
"log_odds_ratio": -0.9742682576179504,
"logits/chosen": -2.320568323135376,
"logits/rejected": -2.2932517528533936,
"logps/chosen": -0.5956822633743286,
"logps/rejected": -0.410876601934433,
"loss": 0.9619,
"nll_loss": 0.9405835270881653,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1180
},
{
"epoch": 0.7047675451584247,
"grad_norm": 7.96875,
"learning_rate": 1.2128726634190046e-07,
"log_odds_chosen": -0.3462384343147278,
"log_odds_ratio": -0.9395328760147095,
"logits/chosen": -2.3269436359405518,
"logits/rejected": -2.2818374633789062,
"logps/chosen": -0.5803397297859192,
"logps/rejected": -0.4099668860435486,
"loss": 0.9172,
"nll_loss": 0.9042008519172668,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1190
},
{
"epoch": 0.7106899615042938,
"grad_norm": 9.5625,
"learning_rate": 1.1688254907804992e-07,
"log_odds_chosen": -0.3338465392589569,
"log_odds_ratio": -0.9492910504341125,
"logits/chosen": -2.282212495803833,
"logits/rejected": -2.2418830394744873,
"logps/chosen": -0.6125479340553284,
"logps/rejected": -0.45005935430526733,
"loss": 0.9674,
"nll_loss": 0.9735835194587708,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1200
},
{
"epoch": 0.7166123778501629,
"grad_norm": 7.5,
"learning_rate": 1.1253477001106956e-07,
"log_odds_chosen": -0.26436474919319153,
"log_odds_ratio": -0.8985050916671753,
"logits/chosen": -2.259978771209717,
"logits/rejected": -2.223177433013916,
"logps/chosen": -0.5662964582443237,
"logps/rejected": -0.4440518915653229,
"loss": 0.9408,
"nll_loss": 0.90367591381073,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1210
},
{
"epoch": 0.722534794196032,
"grad_norm": 10.4375,
"learning_rate": 1.0824578881224065e-07,
"log_odds_chosen": -0.24436886608600616,
"log_odds_ratio": -0.8822824358940125,
"logits/chosen": -2.332968235015869,
"logits/rejected": -2.3182759284973145,
"logps/chosen": -0.539296567440033,
"logps/rejected": -0.42076578736305237,
"loss": 0.8899,
"nll_loss": 0.8598573803901672,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1220
},
{
"epoch": 0.728457210541901,
"grad_norm": 8.125,
"learning_rate": 1.0401744000328918e-07,
"log_odds_chosen": -0.28977444767951965,
"log_odds_ratio": -0.9303587675094604,
"logits/chosen": -2.2798142433166504,
"logits/rejected": -2.2761147022247314,
"logps/chosen": -0.5908230543136597,
"logps/rejected": -0.449887216091156,
"loss": 0.9195,
"nll_loss": 0.9145529866218567,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1230
},
{
"epoch": 0.7343796268877703,
"grad_norm": 9.375,
"learning_rate": 9.985153217170902e-08,
"log_odds_chosen": -0.357065886259079,
"log_odds_ratio": -0.9588850140571594,
"logits/chosen": -2.3385989665985107,
"logits/rejected": -2.323024034500122,
"logps/chosen": -0.6093414425849915,
"logps/rejected": -0.4347008168697357,
"loss": 1.008,
"nll_loss": 1.0087924003601074,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1240
},
{
"epoch": 0.7403020432336394,
"grad_norm": 10.5,
"learning_rate": 9.574984719717553e-08,
"log_odds_chosen": -0.3260490894317627,
"log_odds_ratio": -0.9441172480583191,
"logits/chosen": -2.321216344833374,
"logits/rejected": -2.302063226699829,
"logps/chosen": -0.5826759338378906,
"logps/rejected": -0.4284025728702545,
"loss": 0.9308,
"nll_loss": 0.9714264869689941,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1250
},
{
"epoch": 0.7462244595795084,
"grad_norm": 8.25,
"learning_rate": 9.171413948938459e-08,
"log_odds_chosen": -0.3101581037044525,
"log_odds_ratio": -0.929049015045166,
"logits/chosen": -2.310981273651123,
"logits/rejected": -2.2564587593078613,
"logps/chosen": -0.6019686460494995,
"logps/rejected": -0.45062392950057983,
"loss": 0.9576,
"nll_loss": 0.9787800908088684,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1260
},
{
"epoch": 0.7521468759253775,
"grad_norm": 7.53125,
"learning_rate": 8.774613523764049e-08,
"log_odds_chosen": -0.369983971118927,
"log_odds_ratio": -0.9563344120979309,
"logits/chosen": -2.2968955039978027,
"logits/rejected": -2.248944044113159,
"logps/chosen": -0.5875356197357178,
"logps/rejected": -0.4163094162940979,
"loss": 0.9305,
"nll_loss": 0.900018572807312,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1270
},
{
"epoch": 0.7580692922712466,
"grad_norm": 9.125,
"learning_rate": 8.384753167251412e-08,
"log_odds_chosen": -0.32507094740867615,
"log_odds_ratio": -0.9397505521774292,
"logits/chosen": -2.2516260147094727,
"logits/rejected": -2.226477861404419,
"logps/chosen": -0.5629323124885559,
"logps/rejected": -0.41151052713394165,
"loss": 0.8916,
"nll_loss": 0.8657590746879578,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1280
},
{
"epoch": 0.7639917086171157,
"grad_norm": 8.375,
"learning_rate": 8.001999633988942e-08,
"log_odds_chosen": -0.3611569106578827,
"log_odds_ratio": -0.957983672618866,
"logits/chosen": -2.3322999477386475,
"logits/rejected": -2.283409833908081,
"logps/chosen": -0.5784574747085571,
"logps/rejected": -0.4157342314720154,
"loss": 0.8973,
"nll_loss": 0.8929991722106934,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1290
},
{
"epoch": 0.769914124962985,
"grad_norm": 8.6875,
"learning_rate": 7.62651663877042e-08,
"log_odds_chosen": -0.26533371210098267,
"log_odds_ratio": -0.9089031219482422,
"logits/chosen": -2.2688136100769043,
"logits/rejected": -2.2409274578094482,
"logps/chosen": -0.5685082077980042,
"logps/rejected": -0.4421761631965637,
"loss": 0.9965,
"nll_loss": 0.9676351547241211,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1300
},
{
"epoch": 0.775836541308854,
"grad_norm": 7.1875,
"learning_rate": 7.258464786569549e-08,
"log_odds_chosen": -0.28731244802474976,
"log_odds_ratio": -0.9186748266220093,
"logits/chosen": -2.3306045532226562,
"logits/rejected": -2.2782888412475586,
"logps/chosen": -0.5666372776031494,
"logps/rejected": -0.43749627470970154,
"loss": 0.9659,
"nll_loss": 0.9544159770011902,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1310
},
{
"epoch": 0.7817589576547231,
"grad_norm": 9.75,
"learning_rate": 6.898001503844483e-08,
"log_odds_chosen": -0.5405977964401245,
"log_odds_ratio": -1.1180508136749268,
"logits/chosen": -2.3619232177734375,
"logits/rejected": -2.3188281059265137,
"logps/chosen": -0.7727476358413696,
"logps/rejected": -0.4373859465122223,
"loss": 0.9429,
"nll_loss": 0.9921876192092896,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1320
},
{
"epoch": 0.7876813740005922,
"grad_norm": 9.3125,
"learning_rate": 6.545280971202014e-08,
"log_odds_chosen": -0.2534041702747345,
"log_odds_ratio": -0.8981307744979858,
"logits/chosen": -2.320126533508301,
"logits/rejected": -2.289376974105835,
"logps/chosen": -0.5534666180610657,
"logps/rejected": -0.4292474687099457,
"loss": 0.9168,
"nll_loss": 0.9440558552742004,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1330
},
{
"epoch": 0.7936037903464613,
"grad_norm": 8.6875,
"learning_rate": 6.200454057450022e-08,
"log_odds_chosen": -0.36177825927734375,
"log_odds_ratio": -0.9495649337768555,
"logits/chosen": -2.2736241817474365,
"logits/rejected": -2.226933479309082,
"logps/chosen": -0.59937584400177,
"logps/rejected": -0.42542099952697754,
"loss": 0.9704,
"nll_loss": 0.9059191942214966,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1340
},
{
"epoch": 0.7995262066923304,
"grad_norm": 9.0,
"learning_rate": 5.863668255066492e-08,
"log_odds_chosen": -0.313324511051178,
"log_odds_ratio": -0.919102668762207,
"logits/chosen": -2.270073890686035,
"logits/rejected": -2.2372827529907227,
"logps/chosen": -0.5768779516220093,
"logps/rejected": -0.43518179655075073,
"loss": 0.9124,
"nll_loss": 0.9345908164978027,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1350
},
{
"epoch": 0.8054486230381996,
"grad_norm": 8.375,
"learning_rate": 5.53506761711274e-08,
"log_odds_chosen": -0.2887657880783081,
"log_odds_ratio": -0.912114143371582,
"logits/chosen": -2.305987596511841,
"logits/rejected": -2.2752127647399902,
"logps/chosen": -0.587549090385437,
"logps/rejected": -0.4469973146915436,
"loss": 0.9423,
"nll_loss": 0.9892560243606567,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1360
},
{
"epoch": 0.8113710393840687,
"grad_norm": 8.6875,
"learning_rate": 5.2147926956177174e-08,
"log_odds_chosen": -0.45511436462402344,
"log_odds_ratio": -1.0450800657272339,
"logits/chosen": -2.294468641281128,
"logits/rejected": -2.283860445022583,
"logps/chosen": -0.6607165336608887,
"logps/rejected": -0.4220455586910248,
"loss": 0.9395,
"nll_loss": 0.9603630304336548,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1370
},
{
"epoch": 0.8172934557299378,
"grad_norm": 15.3125,
"learning_rate": 4.902980481459834e-08,
"log_odds_chosen": -0.26191025972366333,
"log_odds_ratio": -0.9043244123458862,
"logits/chosen": -2.277843475341797,
"logits/rejected": -2.248347520828247,
"logps/chosen": -0.5755423307418823,
"logps/rejected": -0.4490273594856262,
"loss": 0.9003,
"nll_loss": 0.9170975685119629,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1380
},
{
"epoch": 0.8232158720758069,
"grad_norm": 9.25,
"learning_rate": 4.5997643457719646e-08,
"log_odds_chosen": -0.35434719920158386,
"log_odds_ratio": -0.9506848454475403,
"logits/chosen": -2.295780658721924,
"logits/rejected": -2.2894127368927,
"logps/chosen": -0.5851874351501465,
"logps/rejected": -0.4212135672569275,
"loss": 0.8958,
"nll_loss": 0.9056984186172485,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1390
},
{
"epoch": 0.829138288421676,
"grad_norm": 8.75,
"learning_rate": 4.305273982894772e-08,
"log_odds_chosen": -0.33616143465042114,
"log_odds_ratio": -0.9503694772720337,
"logits/chosen": -2.3287465572357178,
"logits/rejected": -2.286414623260498,
"logps/chosen": -0.6181541681289673,
"logps/rejected": -0.4454525113105774,
"loss": 0.9425,
"nll_loss": 0.9352006912231445,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1400
},
{
"epoch": 0.8350607047675451,
"grad_norm": 9.8125,
"learning_rate": 4.0196353549026786e-08,
"log_odds_chosen": -0.30044835805892944,
"log_odds_ratio": -0.9167086482048035,
"logits/chosen": -2.298393487930298,
"logits/rejected": -2.26066255569458,
"logps/chosen": -0.5805574655532837,
"logps/rejected": -0.43902960419654846,
"loss": 0.9712,
"nll_loss": 1.0039526224136353,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1410
},
{
"epoch": 0.8409831211134142,
"grad_norm": 7.75,
"learning_rate": 3.742970637726181e-08,
"log_odds_chosen": -0.179987370967865,
"log_odds_ratio": -0.8582404255867004,
"logits/chosen": -2.3168177604675293,
"logits/rejected": -2.269207000732422,
"logps/chosen": -0.5285545587539673,
"logps/rejected": -0.44142407178878784,
"loss": 0.9201,
"nll_loss": 0.9034355878829956,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1420
},
{
"epoch": 0.8469055374592834,
"grad_norm": 8.3125,
"learning_rate": 3.4753981688937284e-08,
"log_odds_chosen": -0.3474799394607544,
"log_odds_ratio": -0.9500767588615417,
"logits/chosen": -2.2948362827301025,
"logits/rejected": -2.2666220664978027,
"logps/chosen": -0.5843050479888916,
"logps/rejected": -0.4246344566345215,
"loss": 0.9549,
"nll_loss": 0.9555429220199585,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1430
},
{
"epoch": 0.8528279538051525,
"grad_norm": 8.25,
"learning_rate": 3.217032396915265e-08,
"log_odds_chosen": -0.40568438172340393,
"log_odds_ratio": -1.0082272291183472,
"logits/chosen": -2.3024380207061768,
"logits/rejected": -2.268986701965332,
"logps/chosen": -0.6786967515945435,
"logps/rejected": -0.44073349237442017,
"loss": 0.9636,
"nll_loss": 0.9871211051940918,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1440
},
{
"epoch": 0.8587503701510216,
"grad_norm": 9.75,
"learning_rate": 2.9679838323293404e-08,
"log_odds_chosen": -0.4226885437965393,
"log_odds_ratio": -1.0269486904144287,
"logits/chosen": -2.2947676181793213,
"logits/rejected": -2.2655680179595947,
"logps/chosen": -0.6772679090499878,
"logps/rejected": -0.44576793909072876,
"loss": 0.9064,
"nll_loss": 0.9240104556083679,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1450
},
{
"epoch": 0.8646727864968907,
"grad_norm": 9.6875,
"learning_rate": 2.728359000434488e-08,
"log_odds_chosen": -0.34337377548217773,
"log_odds_ratio": -0.9470311403274536,
"logits/chosen": -2.3264529705047607,
"logits/rejected": -2.290132761001587,
"logps/chosen": -0.5465956926345825,
"logps/rejected": -0.41503897309303284,
"loss": 0.9481,
"nll_loss": 0.8961936831474304,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1460
},
{
"epoch": 0.8705952028427598,
"grad_norm": 8.8125,
"learning_rate": 2.498260395725302e-08,
"log_odds_chosen": -0.3448273539543152,
"log_odds_ratio": -0.9480770230293274,
"logits/chosen": -2.293290376663208,
"logits/rejected": -2.278653144836426,
"logps/chosen": -0.5983850955963135,
"logps/rejected": -0.44902753829956055,
"loss": 0.9406,
"nll_loss": 0.93559330701828,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1470
},
{
"epoch": 0.8765176191886289,
"grad_norm": 8.5,
"learning_rate": 2.2777864380525426e-08,
"log_odds_chosen": -0.29847949743270874,
"log_odds_ratio": -0.9293072819709778,
"logits/chosen": -2.2964632511138916,
"logits/rejected": -2.2744767665863037,
"logps/chosen": -0.59266597032547,
"logps/rejected": -0.4337979853153229,
"loss": 0.9143,
"nll_loss": 0.8765565752983093,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1480
},
{
"epoch": 0.8824400355344981,
"grad_norm": 8.4375,
"learning_rate": 2.0670314305261423e-08,
"log_odds_chosen": -0.2956581115722656,
"log_odds_ratio": -0.914827823638916,
"logits/chosen": -2.312617540359497,
"logits/rejected": -2.2852249145507812,
"logps/chosen": -0.5551884770393372,
"logps/rejected": -0.42470401525497437,
"loss": 0.8906,
"nll_loss": 0.9051159620285034,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1490
},
{
"epoch": 0.8883624518803672,
"grad_norm": 8.5,
"learning_rate": 1.866085519178995e-08,
"log_odds_chosen": -0.3115543723106384,
"log_odds_ratio": -0.9432921409606934,
"logits/chosen": -2.294912815093994,
"logits/rejected": -2.2731730937957764,
"logps/chosen": -0.6179423332214355,
"logps/rejected": -0.47528520226478577,
"loss": 0.9807,
"nll_loss": 0.9959957003593445,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1500
},
{
"epoch": 0.8942848682262363,
"grad_norm": 8.0625,
"learning_rate": 1.675034654408894e-08,
"log_odds_chosen": -0.3954925239086151,
"log_odds_ratio": -0.966839611530304,
"logits/chosen": -2.331923007965088,
"logits/rejected": -2.3019633293151855,
"logps/chosen": -0.5650435090065002,
"logps/rejected": -0.40297931432724,
"loss": 0.9211,
"nll_loss": 0.9447514414787292,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1510
},
{
"epoch": 0.9002072845721054,
"grad_norm": 8.5625,
"learning_rate": 1.4939605542150595e-08,
"log_odds_chosen": -0.2961687445640564,
"log_odds_ratio": -0.9432598352432251,
"logits/chosen": -2.31669545173645,
"logits/rejected": -2.2790303230285645,
"logps/chosen": -0.6368409395217896,
"logps/rejected": -0.466596782207489,
"loss": 0.9972,
"nll_loss": 0.9845758676528931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1520
},
{
"epoch": 0.9061297009179745,
"grad_norm": 8.8125,
"learning_rate": 1.3229406692449791e-08,
"log_odds_chosen": -0.22676777839660645,
"log_odds_ratio": -0.8955879211425781,
"logits/chosen": -2.2553787231445312,
"logits/rejected": -2.228121280670166,
"logps/chosen": -0.5664690136909485,
"logps/rejected": -0.45375269651412964,
"loss": 0.9651,
"nll_loss": 0.9422292709350586,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1530
},
{
"epoch": 0.9120521172638436,
"grad_norm": 7.6875,
"learning_rate": 1.162048149666503e-08,
"log_odds_chosen": -0.28153032064437866,
"log_odds_ratio": -0.9496873617172241,
"logits/chosen": -2.3055193424224854,
"logits/rejected": -2.2621009349823,
"logps/chosen": -0.6041845083236694,
"logps/rejected": -0.4735984802246094,
"loss": 0.9464,
"nll_loss": 0.9381273984909058,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1540
},
{
"epoch": 0.9179745336097128,
"grad_norm": 8.3125,
"learning_rate": 1.0113518138794047e-08,
"log_odds_chosen": -0.3274211287498474,
"log_odds_ratio": -0.9445363283157349,
"logits/chosen": -2.2565391063690186,
"logits/rejected": -2.233027935028076,
"logps/chosen": -0.5867388844490051,
"logps/rejected": -0.4352657198905945,
"loss": 0.9594,
"nll_loss": 0.9287152290344238,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1550
},
{
"epoch": 0.9238969499555819,
"grad_norm": 8.625,
"learning_rate": 8.709161190797565e-09,
"log_odds_chosen": -0.23092766106128693,
"log_odds_ratio": -0.8911072611808777,
"logits/chosen": -2.3257815837860107,
"logits/rejected": -2.29530668258667,
"logps/chosen": -0.5526595115661621,
"logps/rejected": -0.4391084611415863,
"loss": 0.9337,
"nll_loss": 0.9090098142623901,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1560
},
{
"epoch": 0.929819366301451,
"grad_norm": 9.0,
"learning_rate": 7.408011336897141e-09,
"log_odds_chosen": -0.4632336497306824,
"log_odds_ratio": -1.089444875717163,
"logits/chosen": -2.346909284591675,
"logits/rejected": -2.334372043609619,
"logps/chosen": -0.7571093440055847,
"logps/rejected": -0.4586968421936035,
"loss": 0.9736,
"nll_loss": 1.0062029361724854,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1570
},
{
"epoch": 0.9357417826473201,
"grad_norm": 7.46875,
"learning_rate": 6.210625116645135e-09,
"log_odds_chosen": -0.41088682413101196,
"log_odds_ratio": -0.9913327097892761,
"logits/chosen": -2.3505208492279053,
"logits/rejected": -2.3092150688171387,
"logps/chosen": -0.6176980137825012,
"logps/rejected": -0.42517074942588806,
"loss": 0.9028,
"nll_loss": 0.8675041198730469,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1580
},
{
"epoch": 0.9416641989931892,
"grad_norm": 8.9375,
"learning_rate": 5.117514686876378e-09,
"log_odds_chosen": -0.29983749985694885,
"log_odds_ratio": -0.9341946840286255,
"logits/chosen": -2.3163905143737793,
"logits/rejected": -2.281881809234619,
"logps/chosen": -0.569345235824585,
"logps/rejected": -0.4335504174232483,
"loss": 0.946,
"nll_loss": 0.9458128809928894,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1590
},
{
"epoch": 0.9475866153390583,
"grad_norm": 8.25,
"learning_rate": 4.1291476026441565e-09,
"log_odds_chosen": -0.21767720580101013,
"log_odds_ratio": -0.8741191029548645,
"logits/chosen": -2.2778666019439697,
"logits/rejected": -2.2554242610931396,
"logps/chosen": -0.560379147529602,
"logps/rejected": -0.44631558656692505,
"loss": 0.897,
"nll_loss": 0.8650028109550476,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1600
},
{
"epoch": 0.9535090316849274,
"grad_norm": 7.96875,
"learning_rate": 3.2459466172331253e-09,
"log_odds_chosen": -0.35443753004074097,
"log_odds_ratio": -0.9958807229995728,
"logits/chosen": -2.2850985527038574,
"logits/rejected": -2.264432430267334,
"logps/chosen": -0.6543992757797241,
"logps/rejected": -0.436093807220459,
"loss": 0.9838,
"nll_loss": 0.9750429391860962,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1610
},
{
"epoch": 0.9594314480307966,
"grad_norm": 10.25,
"learning_rate": 2.4682895013354854e-09,
"log_odds_chosen": -0.3622002899646759,
"log_odds_ratio": -0.9864064455032349,
"logits/chosen": -2.287553548812866,
"logits/rejected": -2.2655410766601562,
"logps/chosen": -0.6380153894424438,
"logps/rejected": -0.4244503378868103,
"loss": 0.9213,
"nll_loss": 0.9489747881889343,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1620
},
{
"epoch": 0.9653538643766657,
"grad_norm": 9.1875,
"learning_rate": 1.7965088814675677e-09,
"log_odds_chosen": -0.4782753586769104,
"log_odds_ratio": -1.0607492923736572,
"logits/chosen": -2.290717840194702,
"logits/rejected": -2.272459030151367,
"logps/chosen": -0.6781035661697388,
"logps/rejected": -0.4276870787143707,
"loss": 0.9333,
"nll_loss": 0.9644565582275391,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1630
},
{
"epoch": 0.9712762807225348,
"grad_norm": 9.125,
"learning_rate": 1.2308920976958348e-09,
"log_odds_chosen": -0.29858607053756714,
"log_odds_ratio": -0.9460258483886719,
"logits/chosen": -2.269747734069824,
"logits/rejected": -2.247730016708374,
"logps/chosen": -0.6245580911636353,
"logps/rejected": -0.45077449083328247,
"loss": 0.906,
"nll_loss": 0.9039252996444702,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1640
},
{
"epoch": 0.9771986970684039,
"grad_norm": 9.5625,
"learning_rate": 7.716810807330276e-10,
"log_odds_chosen": -0.4411376416683197,
"log_odds_ratio": -1.0094521045684814,
"logits/chosen": -2.2869138717651367,
"logits/rejected": -2.24787974357605,
"logps/chosen": -0.6290577054023743,
"logps/rejected": -0.4051317572593689,
"loss": 0.9304,
"nll_loss": 0.9296571016311646,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1650
},
{
"epoch": 0.983121113414273,
"grad_norm": 10.125,
"learning_rate": 4.190722484575804e-10,
"log_odds_chosen": -0.3509382903575897,
"log_odds_ratio": -0.9882933497428894,
"logits/chosen": -2.2927916049957275,
"logits/rejected": -2.262193202972412,
"logps/chosen": -0.6660831570625305,
"logps/rejected": -0.4522073268890381,
"loss": 0.9544,
"nll_loss": 0.9784467816352844,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1660
},
{
"epoch": 0.9890435297601421,
"grad_norm": 7.71875,
"learning_rate": 1.732164218998522e-10,
"log_odds_chosen": -0.35314035415649414,
"log_odds_ratio": -0.9515780210494995,
"logits/chosen": -2.265188694000244,
"logits/rejected": -2.2218968868255615,
"logps/chosen": -0.5966526865959167,
"logps/rejected": -0.4354891777038574,
"loss": 0.9111,
"nll_loss": 0.9058610796928406,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1670
},
{
"epoch": 0.9949659461060113,
"grad_norm": 9.6875,
"learning_rate": 3.4218760731730136e-11,
"log_odds_chosen": -0.3034502863883972,
"log_odds_ratio": -0.9285211563110352,
"logits/chosen": -2.339616298675537,
"logits/rejected": -2.2971951961517334,
"logps/chosen": -0.5794862508773804,
"logps/rejected": -0.43610063195228577,
"loss": 0.9681,
"nll_loss": 0.9744264483451843,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1680
},
{
"epoch": 0.9997038791827065,
"step": 1688,
"total_flos": 0.0,
"train_loss": 0.9736523162132191,
"train_runtime": 25409.1611,
"train_samples_per_second": 2.126,
"train_steps_per_second": 0.066
}
],
"logging_steps": 10,
"max_steps": 1688,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}