|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997038791827065, |
|
"eval_steps": 500, |
|
"global_step": 1688, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005922416345869114, |
|
"grad_norm": 25.375, |
|
"learning_rate": 2.9585798816568044e-08, |
|
"log_odds_chosen": -0.4997142255306244, |
|
"log_odds_ratio": -1.0621646642684937, |
|
"logits/chosen": -2.2295050621032715, |
|
"logits/rejected": -2.215860366821289, |
|
"logps/chosen": -0.7159513235092163, |
|
"logps/rejected": -0.47170203924179077, |
|
"loss": 1.2686, |
|
"nll_loss": 1.285839319229126, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011844832691738229, |
|
"grad_norm": 25.25, |
|
"learning_rate": 5.917159763313609e-08, |
|
"log_odds_chosen": -0.6078722476959229, |
|
"log_odds_ratio": -1.1548207998275757, |
|
"logits/chosen": -2.1872293949127197, |
|
"logits/rejected": -2.1639022827148438, |
|
"logps/chosen": -0.8250460624694824, |
|
"logps/rejected": -0.4715689718723297, |
|
"loss": 1.2301, |
|
"nll_loss": 1.2283066511154175, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017767249037607343, |
|
"grad_norm": 27.125, |
|
"learning_rate": 8.875739644970414e-08, |
|
"log_odds_chosen": -0.5964034199714661, |
|
"log_odds_ratio": -1.1720728874206543, |
|
"logits/chosen": -2.155057191848755, |
|
"logits/rejected": -2.146630048751831, |
|
"logps/chosen": -0.8543933033943176, |
|
"logps/rejected": -0.4923427104949951, |
|
"loss": 1.2398, |
|
"nll_loss": 1.313323736190796, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023689665383476458, |
|
"grad_norm": 28.25, |
|
"learning_rate": 1.1834319526627217e-07, |
|
"log_odds_chosen": -0.5351605415344238, |
|
"log_odds_ratio": -1.0927046537399292, |
|
"logits/chosen": -2.2190463542938232, |
|
"logits/rejected": -2.206223964691162, |
|
"logps/chosen": -0.7575310468673706, |
|
"logps/rejected": -0.4635254740715027, |
|
"loss": 1.2735, |
|
"nll_loss": 1.2356092929840088, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029612081729345572, |
|
"grad_norm": 26.0, |
|
"learning_rate": 1.4792899408284022e-07, |
|
"log_odds_chosen": -0.4550475478172302, |
|
"log_odds_ratio": -1.0262255668640137, |
|
"logits/chosen": -2.163825750350952, |
|
"logits/rejected": -2.148223400115967, |
|
"logps/chosen": -0.7005314826965332, |
|
"logps/rejected": -0.47106480598449707, |
|
"loss": 1.2103, |
|
"nll_loss": 1.2403192520141602, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.035534498075214686, |
|
"grad_norm": 21.375, |
|
"learning_rate": 1.7751479289940827e-07, |
|
"log_odds_chosen": -0.6598173975944519, |
|
"log_odds_ratio": -1.2315865755081177, |
|
"logits/chosen": -2.2192461490631104, |
|
"logits/rejected": -2.1879701614379883, |
|
"logps/chosen": -0.8897407650947571, |
|
"logps/rejected": -0.4609861969947815, |
|
"loss": 1.2416, |
|
"nll_loss": 1.2300336360931396, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.041456914421083804, |
|
"grad_norm": 27.375, |
|
"learning_rate": 2.0710059171597633e-07, |
|
"log_odds_chosen": -0.5370969772338867, |
|
"log_odds_ratio": -1.1101651191711426, |
|
"logits/chosen": -2.233755588531494, |
|
"logits/rejected": -2.201343297958374, |
|
"logps/chosen": -0.7967244386672974, |
|
"logps/rejected": -0.4630069136619568, |
|
"loss": 1.2546, |
|
"nll_loss": 1.2548679113388062, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.047379330766952915, |
|
"grad_norm": 26.625, |
|
"learning_rate": 2.3668639053254435e-07, |
|
"log_odds_chosen": -0.5750253796577454, |
|
"log_odds_ratio": -1.1556330919265747, |
|
"logits/chosen": -2.19846773147583, |
|
"logits/rejected": -2.187711715698242, |
|
"logps/chosen": -0.7946293950080872, |
|
"logps/rejected": -0.4594718813896179, |
|
"loss": 1.2238, |
|
"nll_loss": 1.2166999578475952, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05330174711282203, |
|
"grad_norm": 22.375, |
|
"learning_rate": 2.662721893491124e-07, |
|
"log_odds_chosen": -0.502492368221283, |
|
"log_odds_ratio": -1.0737704038619995, |
|
"logits/chosen": -2.18656063079834, |
|
"logits/rejected": -2.1636054515838623, |
|
"logps/chosen": -0.7198958992958069, |
|
"logps/rejected": -0.4653542935848236, |
|
"loss": 1.1987, |
|
"nll_loss": 1.2203375101089478, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.059224163458691144, |
|
"grad_norm": 22.375, |
|
"learning_rate": 2.9585798816568045e-07, |
|
"log_odds_chosen": -0.41660839319229126, |
|
"log_odds_ratio": -0.9962056279182434, |
|
"logits/chosen": -2.247572422027588, |
|
"logits/rejected": -2.2023332118988037, |
|
"logps/chosen": -0.6892199516296387, |
|
"logps/rejected": -0.4913715422153473, |
|
"loss": 1.1737, |
|
"nll_loss": 1.2142550945281982, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06514657980456026, |
|
"grad_norm": 17.25, |
|
"learning_rate": 3.254437869822485e-07, |
|
"log_odds_chosen": -0.4817970395088196, |
|
"log_odds_ratio": -1.0484726428985596, |
|
"logits/chosen": -2.1959776878356934, |
|
"logits/rejected": -2.172440767288208, |
|
"logps/chosen": -0.7387205958366394, |
|
"logps/rejected": -0.47952842712402344, |
|
"loss": 1.1196, |
|
"nll_loss": 1.1109485626220703, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07106899615042937, |
|
"grad_norm": 17.75, |
|
"learning_rate": 3.5502958579881655e-07, |
|
"log_odds_chosen": -0.5072614550590515, |
|
"log_odds_ratio": -1.073188066482544, |
|
"logits/chosen": -2.2234084606170654, |
|
"logits/rejected": -2.212110996246338, |
|
"logps/chosen": -0.7518635988235474, |
|
"logps/rejected": -0.4725222587585449, |
|
"loss": 1.1538, |
|
"nll_loss": 1.1456319093704224, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07699141249629848, |
|
"grad_norm": 22.875, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"log_odds_chosen": -0.6446342468261719, |
|
"log_odds_ratio": -1.2178680896759033, |
|
"logits/chosen": -2.2080233097076416, |
|
"logits/rejected": -2.1998672485351562, |
|
"logps/chosen": -0.8730036020278931, |
|
"logps/rejected": -0.4481457769870758, |
|
"loss": 1.1644, |
|
"nll_loss": 1.1509124040603638, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08291382884216761, |
|
"grad_norm": 16.25, |
|
"learning_rate": 4.1420118343195265e-07, |
|
"log_odds_chosen": -0.45662721991539, |
|
"log_odds_ratio": -1.0147430896759033, |
|
"logits/chosen": -2.237990617752075, |
|
"logits/rejected": -2.2128589153289795, |
|
"logps/chosen": -0.6646671295166016, |
|
"logps/rejected": -0.44373393058776855, |
|
"loss": 1.0906, |
|
"nll_loss": 1.0673267841339111, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08883624518803672, |
|
"grad_norm": 26.5, |
|
"learning_rate": 4.437869822485207e-07, |
|
"log_odds_chosen": -0.46678367257118225, |
|
"log_odds_ratio": -1.0147194862365723, |
|
"logits/chosen": -2.167670488357544, |
|
"logits/rejected": -2.1592793464660645, |
|
"logps/chosen": -0.7025789618492126, |
|
"logps/rejected": -0.47203493118286133, |
|
"loss": 1.1008, |
|
"nll_loss": 1.1650502681732178, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09475866153390583, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 4.733727810650887e-07, |
|
"log_odds_chosen": -0.3221941888332367, |
|
"log_odds_ratio": -0.9352226257324219, |
|
"logits/chosen": -2.247824192047119, |
|
"logits/rejected": -2.2287344932556152, |
|
"logps/chosen": -0.6016725301742554, |
|
"logps/rejected": -0.4531864523887634, |
|
"loss": 1.0947, |
|
"nll_loss": 1.0781590938568115, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10068107787977496, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 4.999994653198566e-07, |
|
"log_odds_chosen": -0.4564700722694397, |
|
"log_odds_ratio": -1.0602452754974365, |
|
"logits/chosen": -2.2789835929870605, |
|
"logits/rejected": -2.2523741722106934, |
|
"logps/chosen": -0.7461049556732178, |
|
"logps/rejected": -0.48730534315109253, |
|
"loss": 1.1159, |
|
"nll_loss": 1.0658115148544312, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10660349422564407, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 4.999353064699471e-07, |
|
"log_odds_chosen": -0.5452951192855835, |
|
"log_odds_ratio": -1.1454532146453857, |
|
"logits/chosen": -2.237121820449829, |
|
"logits/rejected": -2.202718496322632, |
|
"logps/chosen": -0.827674388885498, |
|
"logps/rejected": -0.49005183577537537, |
|
"loss": 0.9748, |
|
"nll_loss": 1.0014435052871704, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11252591057151318, |
|
"grad_norm": 10.375, |
|
"learning_rate": 4.99764243036258e-07, |
|
"log_odds_chosen": -0.4207037091255188, |
|
"log_odds_ratio": -0.9974331855773926, |
|
"logits/chosen": -2.27175235748291, |
|
"logits/rejected": -2.242116689682007, |
|
"logps/chosen": -0.6407202482223511, |
|
"logps/rejected": -0.4429788589477539, |
|
"loss": 1.0095, |
|
"nll_loss": 1.017865777015686, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11844832691738229, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.994863481875841e-07, |
|
"log_odds_chosen": -0.4031923711299896, |
|
"log_odds_ratio": -0.973800003528595, |
|
"logits/chosen": -2.221717119216919, |
|
"logits/rejected": -2.18719482421875, |
|
"logps/chosen": -0.6306296586990356, |
|
"logps/rejected": -0.43233147263526917, |
|
"loss": 1.0045, |
|
"nll_loss": 0.9697571992874146, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12437074326325141, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.991017407876165e-07, |
|
"log_odds_chosen": -0.4411424994468689, |
|
"log_odds_ratio": -1.0120642185211182, |
|
"logits/chosen": -2.238583564758301, |
|
"logits/rejected": -2.1919620037078857, |
|
"logps/chosen": -0.7006498575210571, |
|
"logps/rejected": -0.4852658808231354, |
|
"loss": 0.9832, |
|
"nll_loss": 1.0057976245880127, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13029315960912052, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.98610585344102e-07, |
|
"log_odds_chosen": -0.25588923692703247, |
|
"log_odds_ratio": -0.9158498048782349, |
|
"logits/chosen": -2.258283853530884, |
|
"logits/rejected": -2.2223126888275146, |
|
"logps/chosen": -0.5977104306221008, |
|
"logps/rejected": -0.4761990010738373, |
|
"loss": 1.02, |
|
"nll_loss": 1.0466753244400024, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13621557595498965, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 4.980130919384768e-07, |
|
"log_odds_chosen": -0.5824810266494751, |
|
"log_odds_ratio": -1.1220190525054932, |
|
"logits/chosen": -2.2531580924987793, |
|
"logits/rejected": -2.2409615516662598, |
|
"logps/chosen": -0.7504315972328186, |
|
"logps/rejected": -0.43684881925582886, |
|
"loss": 1.0183, |
|
"nll_loss": 1.0061722993850708, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14213799230085875, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 4.973095161360105e-07, |
|
"log_odds_chosen": -0.44555410742759705, |
|
"log_odds_ratio": -1.0208032131195068, |
|
"logits/chosen": -2.2470836639404297, |
|
"logits/rejected": -2.214434862136841, |
|
"logps/chosen": -0.6731461882591248, |
|
"logps/rejected": -0.4670758843421936, |
|
"loss": 1.0354, |
|
"nll_loss": 1.0512316226959229, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14806040864672787, |
|
"grad_norm": 8.875, |
|
"learning_rate": 4.965001588764913e-07, |
|
"log_odds_chosen": -0.4621347486972809, |
|
"log_odds_ratio": -1.0333962440490723, |
|
"logits/chosen": -2.274649143218994, |
|
"logits/rejected": -2.241596221923828, |
|
"logps/chosen": -0.6809024214744568, |
|
"logps/rejected": -0.4347941279411316, |
|
"loss": 1.0076, |
|
"nll_loss": 1.007010817527771, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15398282499259697, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.955853663455072e-07, |
|
"log_odds_chosen": -0.3350891172885895, |
|
"log_odds_ratio": -0.9613872766494751, |
|
"logits/chosen": -2.260413885116577, |
|
"logits/rejected": -2.2278614044189453, |
|
"logps/chosen": -0.6426165699958801, |
|
"logps/rejected": -0.45530933141708374, |
|
"loss": 0.9607, |
|
"nll_loss": 0.9523956179618835, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1599052413384661, |
|
"grad_norm": 9.125, |
|
"learning_rate": 4.945655298263713e-07, |
|
"log_odds_chosen": -0.4467865824699402, |
|
"log_odds_ratio": -1.0078147649765015, |
|
"logits/chosen": -2.2099037170410156, |
|
"logits/rejected": -2.183701992034912, |
|
"logps/chosen": -0.6576748490333557, |
|
"logps/rejected": -0.4429934620857239, |
|
"loss": 1.0429, |
|
"nll_loss": 1.075627326965332, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16582765768433522, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 4.934410855327585e-07, |
|
"log_odds_chosen": -0.38402479887008667, |
|
"log_odds_ratio": -0.9679163098335266, |
|
"logits/chosen": -2.292367458343506, |
|
"logits/rejected": -2.2721431255340576, |
|
"logps/chosen": -0.6379308104515076, |
|
"logps/rejected": -0.442401647567749, |
|
"loss": 0.9621, |
|
"nll_loss": 1.016234278678894, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1717500740302043, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.922125144221252e-07, |
|
"log_odds_chosen": -0.4171718955039978, |
|
"log_odds_ratio": -0.9991844296455383, |
|
"logits/chosen": -2.259284257888794, |
|
"logits/rejected": -2.205514430999756, |
|
"logps/chosen": -0.6329622268676758, |
|
"logps/rejected": -0.43889325857162476, |
|
"loss": 1.014, |
|
"nll_loss": 1.0359452962875366, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17767249037607344, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.90880341989989e-07, |
|
"log_odds_chosen": -0.33935636281967163, |
|
"log_odds_ratio": -0.9420417547225952, |
|
"logits/chosen": -2.2610156536102295, |
|
"logits/rejected": -2.2359061241149902, |
|
"logps/chosen": -0.6213563084602356, |
|
"logps/rejected": -0.44430437684059143, |
|
"loss": 0.9833, |
|
"nll_loss": 0.9867600202560425, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18359490672194256, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.894451380451589e-07, |
|
"log_odds_chosen": -0.5468162298202515, |
|
"log_odds_ratio": -1.0870132446289062, |
|
"logits/chosen": -2.241508722305298, |
|
"logits/rejected": -2.22690749168396, |
|
"logps/chosen": -0.7115592360496521, |
|
"logps/rejected": -0.43017569184303284, |
|
"loss": 1.0006, |
|
"nll_loss": 0.994620680809021, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18951732306781166, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.879075164660124e-07, |
|
"log_odds_chosen": -0.3401740491390228, |
|
"log_odds_ratio": -0.9383065104484558, |
|
"logits/chosen": -2.2438132762908936, |
|
"logits/rejected": -2.209188938140869, |
|
"logps/chosen": -0.5985551476478577, |
|
"logps/rejected": -0.43559733033180237, |
|
"loss": 0.94, |
|
"nll_loss": 0.9133344888687134, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19543973941368079, |
|
"grad_norm": 10.5, |
|
"learning_rate": 4.862681349379212e-07, |
|
"log_odds_chosen": -0.3794914484024048, |
|
"log_odds_ratio": -0.9728193283081055, |
|
"logits/chosen": -2.2533066272735596, |
|
"logits/rejected": -2.1980607509613037, |
|
"logps/chosen": -0.6138342022895813, |
|
"logps/rejected": -0.44097796082496643, |
|
"loss": 1.0041, |
|
"nll_loss": 1.0256803035736084, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2013621557595499, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 4.8452769467194e-07, |
|
"log_odds_chosen": -0.40433868765830994, |
|
"log_odds_ratio": -0.9825445413589478, |
|
"logits/chosen": -2.2585511207580566, |
|
"logits/rejected": -2.233630657196045, |
|
"logps/chosen": -0.6160660982131958, |
|
"logps/rejected": -0.4248103201389313, |
|
"loss": 0.9778, |
|
"nll_loss": 0.9514611959457397, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.207284572105419, |
|
"grad_norm": 7.625, |
|
"learning_rate": 4.82686940104879e-07, |
|
"log_odds_chosen": -0.4215853214263916, |
|
"log_odds_ratio": -1.01924729347229, |
|
"logits/chosen": -2.30430269241333, |
|
"logits/rejected": -2.272357702255249, |
|
"logps/chosen": -0.645369291305542, |
|
"logps/rejected": -0.4133967459201813, |
|
"loss": 0.9287, |
|
"nll_loss": 0.9160087704658508, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21320698845128813, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.807466585808856e-07, |
|
"log_odds_chosen": -0.3686332702636719, |
|
"log_odds_ratio": -0.9627587199211121, |
|
"logits/chosen": -2.282811403274536, |
|
"logits/rejected": -2.2714035511016846, |
|
"logps/chosen": -0.5806415677070618, |
|
"logps/rejected": -0.4163896143436432, |
|
"loss": 0.987, |
|
"nll_loss": 0.9767228960990906, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21912940479715723, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 4.787076800146752e-07, |
|
"log_odds_chosen": -0.34714585542678833, |
|
"log_odds_ratio": -0.9853572845458984, |
|
"logits/chosen": -2.2601521015167236, |
|
"logits/rejected": -2.2084286212921143, |
|
"logps/chosen": -0.6458638906478882, |
|
"logps/rejected": -0.4349249005317688, |
|
"loss": 0.908, |
|
"nll_loss": 0.8895160555839539, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22505182114302635, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.765708765365526e-07, |
|
"log_odds_chosen": -0.30534738302230835, |
|
"log_odds_ratio": -0.9373781085014343, |
|
"logits/chosen": -2.2653586864471436, |
|
"logits/rejected": -2.254210948944092, |
|
"logps/chosen": -0.576322615146637, |
|
"logps/rejected": -0.4294815957546234, |
|
"loss": 1.0005, |
|
"nll_loss": 0.958886981010437, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23097423748889548, |
|
"grad_norm": 9.875, |
|
"learning_rate": 4.7433716211937587e-07, |
|
"log_odds_chosen": -0.5105515122413635, |
|
"log_odds_ratio": -1.0566070079803467, |
|
"logits/chosen": -2.328101396560669, |
|
"logits/rejected": -2.302281141281128, |
|
"logps/chosen": -0.6501199007034302, |
|
"logps/rejected": -0.404310941696167, |
|
"loss": 0.9396, |
|
"nll_loss": 0.9967532157897949, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23689665383476458, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 4.720074921876245e-07, |
|
"log_odds_chosen": -0.45067232847213745, |
|
"log_odds_ratio": -1.0197547674179077, |
|
"logits/chosen": -2.340407133102417, |
|
"logits/rejected": -2.293402910232544, |
|
"logps/chosen": -0.6130900382995605, |
|
"logps/rejected": -0.41540417075157166, |
|
"loss": 0.9442, |
|
"nll_loss": 0.9423254132270813, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2428190701806337, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 4.6958286320873593e-07, |
|
"log_odds_chosen": -0.43627676367759705, |
|
"log_odds_ratio": -0.9863921403884888, |
|
"logits/chosen": -2.2813560962677, |
|
"logits/rejected": -2.275886058807373, |
|
"logps/chosen": -0.6022886633872986, |
|
"logps/rejected": -0.4021386504173279, |
|
"loss": 0.9658, |
|
"nll_loss": 0.9948114156723022, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24874148652650283, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 4.6706431226688804e-07, |
|
"log_odds_chosen": -0.3637348413467407, |
|
"log_odds_ratio": -0.9635465741157532, |
|
"logits/chosen": -2.2663254737854004, |
|
"logits/rejected": -2.2325570583343506, |
|
"logps/chosen": -0.6079740524291992, |
|
"logps/rejected": -0.42877498269081116, |
|
"loss": 0.971, |
|
"nll_loss": 0.9684462547302246, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25466390287237195, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 4.6445291661940777e-07, |
|
"log_odds_chosen": -0.29998743534088135, |
|
"log_odds_ratio": -0.9151178598403931, |
|
"logits/chosen": -2.288652181625366, |
|
"logits/rejected": -2.28438138961792, |
|
"logps/chosen": -0.5727067589759827, |
|
"logps/rejected": -0.43537649512290955, |
|
"loss": 0.9344, |
|
"nll_loss": 0.8895971179008484, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26058631921824105, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.6174979323599715e-07, |
|
"log_odds_chosen": -0.5159381031990051, |
|
"log_odds_ratio": -1.0749253034591675, |
|
"logits/chosen": -2.2701315879821777, |
|
"logits/rejected": -2.2190845012664795, |
|
"logps/chosen": -0.7043232321739197, |
|
"logps/rejected": -0.4317665696144104, |
|
"loss": 0.9929, |
|
"nll_loss": 1.0871878862380981, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26650873556411014, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 4.5895609832097277e-07, |
|
"log_odds_chosen": -0.38775309920310974, |
|
"log_odds_ratio": -1.0040466785430908, |
|
"logits/chosen": -2.2794992923736572, |
|
"logits/rejected": -2.2638792991638184, |
|
"logps/chosen": -0.6565039157867432, |
|
"logps/rejected": -0.43878334760665894, |
|
"loss": 0.9716, |
|
"nll_loss": 0.9555328488349915, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2724311519099793, |
|
"grad_norm": 8.25, |
|
"learning_rate": 4.560730268187236e-07, |
|
"log_odds_chosen": -0.3349025249481201, |
|
"log_odds_ratio": -0.9378219842910767, |
|
"logits/chosen": -2.282761812210083, |
|
"logits/rejected": -2.244011878967285, |
|
"logps/chosen": -0.5650533437728882, |
|
"logps/rejected": -0.4169080853462219, |
|
"loss": 0.9547, |
|
"nll_loss": 0.9367356300354004, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2783535682558484, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 4.531018119025989e-07, |
|
"log_odds_chosen": -0.24693968892097473, |
|
"log_odds_ratio": -0.9230139851570129, |
|
"logits/chosen": -2.338200807571411, |
|
"logits/rejected": -2.3114407062530518, |
|
"logps/chosen": -0.5866008996963501, |
|
"logps/rejected": -0.498542845249176, |
|
"loss": 0.9863, |
|
"nll_loss": 1.0312178134918213, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2842759846017175, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 4.5004372444744376e-07, |
|
"log_odds_chosen": -0.259705126285553, |
|
"log_odds_ratio": -0.9033578634262085, |
|
"logits/chosen": -2.281229257583618, |
|
"logits/rejected": -2.259384870529175, |
|
"logps/chosen": -0.6026913523674011, |
|
"logps/rejected": -0.46954187750816345, |
|
"loss": 0.9559, |
|
"nll_loss": 0.9717810750007629, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2901984009475866, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.4690007248600967e-07, |
|
"log_odds_chosen": -0.3773840069770813, |
|
"log_odds_ratio": -0.9825248718261719, |
|
"logits/chosen": -2.2721426486968994, |
|
"logits/rejected": -2.2558834552764893, |
|
"logps/chosen": -0.629915714263916, |
|
"logps/rejected": -0.43304410576820374, |
|
"loss": 0.954, |
|
"nll_loss": 0.9644275903701782, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29612081729345574, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 4.436722006494701e-07, |
|
"log_odds_chosen": -0.5259193778038025, |
|
"log_odds_ratio": -1.1190059185028076, |
|
"logits/chosen": -2.266916275024414, |
|
"logits/rejected": -2.243081569671631, |
|
"logps/chosen": -0.7579408884048462, |
|
"logps/rejected": -0.4302619397640228, |
|
"loss": 0.9695, |
|
"nll_loss": 0.9956067204475403, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30204323363932484, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.4036148959228356e-07, |
|
"log_odds_chosen": -0.4430968165397644, |
|
"log_odds_ratio": -1.0375418663024902, |
|
"logits/chosen": -2.300400733947754, |
|
"logits/rejected": -2.2604432106018066, |
|
"logps/chosen": -0.6584800481796265, |
|
"logps/rejected": -0.4138873517513275, |
|
"loss": 0.9756, |
|
"nll_loss": 0.9368442296981812, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.30796564998519393, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 4.3696935540164705e-07, |
|
"log_odds_chosen": -0.3859555423259735, |
|
"log_odds_ratio": -0.9752845764160156, |
|
"logits/chosen": -2.2633957862854004, |
|
"logits/rejected": -2.2417874336242676, |
|
"logps/chosen": -0.6037057638168335, |
|
"logps/rejected": -0.41955527663230896, |
|
"loss": 0.9235, |
|
"nll_loss": 0.9441665410995483, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3138880663310631, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 4.334972489917947e-07, |
|
"log_odds_chosen": -0.29654431343078613, |
|
"log_odds_ratio": -0.9258224368095398, |
|
"logits/chosen": -2.3264002799987793, |
|
"logits/rejected": -2.269259214401245, |
|
"logps/chosen": -0.5935055017471313, |
|
"logps/rejected": -0.4429333806037903, |
|
"loss": 0.9497, |
|
"nll_loss": 0.9263819456100464, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3198104826769322, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.299466554833997e-07, |
|
"log_odds_chosen": -0.400839239358902, |
|
"log_odds_ratio": -0.9843107461929321, |
|
"logits/chosen": -2.30580472946167, |
|
"logits/rejected": -2.256434440612793, |
|
"logps/chosen": -0.5819273591041565, |
|
"logps/rejected": -0.408183753490448, |
|
"loss": 0.9515, |
|
"nll_loss": 0.92247474193573, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3257328990228013, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 4.263190935683449e-07, |
|
"log_odds_chosen": -0.32894009351730347, |
|
"log_odds_ratio": -0.9418984651565552, |
|
"logits/chosen": -2.282500743865967, |
|
"logits/rejected": -2.24668025970459, |
|
"logps/chosen": -0.5584912896156311, |
|
"logps/rejected": -0.4048989713191986, |
|
"loss": 0.8853, |
|
"nll_loss": 0.8602296113967896, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33165531536867043, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.2261611486013437e-07, |
|
"log_odds_chosen": -0.39398467540740967, |
|
"log_odds_ratio": -0.9864169955253601, |
|
"logits/chosen": -2.3277463912963867, |
|
"logits/rejected": -2.2908778190612793, |
|
"logps/chosen": -0.618613064289093, |
|
"logps/rejected": -0.4362561106681824, |
|
"loss": 0.961, |
|
"nll_loss": 0.9670404195785522, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33757773171453953, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 4.188393032302233e-07, |
|
"log_odds_chosen": -0.2161109894514084, |
|
"log_odds_ratio": -0.8888469934463501, |
|
"logits/chosen": -2.266890048980713, |
|
"logits/rejected": -2.2078969478607178, |
|
"logps/chosen": -0.5593982934951782, |
|
"logps/rejected": -0.48008909821510315, |
|
"loss": 0.9285, |
|
"nll_loss": 0.9204473495483398, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3435001480604086, |
|
"grad_norm": 10.75, |
|
"learning_rate": 4.1499027413055e-07, |
|
"log_odds_chosen": -0.41526442766189575, |
|
"log_odds_ratio": -0.9975423812866211, |
|
"logits/chosen": -2.2734172344207764, |
|
"logits/rejected": -2.2457797527313232, |
|
"logps/chosen": -0.6249933838844299, |
|
"logps/rejected": -0.42492228746414185, |
|
"loss": 0.9404, |
|
"nll_loss": 0.9193958044052124, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3494225644062778, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.1107067390256056e-07, |
|
"log_odds_chosen": -0.45963993668556213, |
|
"log_odds_ratio": -1.0648995637893677, |
|
"logits/chosen": -2.3240678310394287, |
|
"logits/rejected": -2.2981557846069336, |
|
"logps/chosen": -0.723495364189148, |
|
"logps/rejected": -0.454792320728302, |
|
"loss": 0.9656, |
|
"nll_loss": 1.0240063667297363, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3553449807521469, |
|
"grad_norm": 9.875, |
|
"learning_rate": 4.0708217907302047e-07, |
|
"log_odds_chosen": -0.4009949564933777, |
|
"log_odds_ratio": -0.9855114221572876, |
|
"logits/chosen": -2.2710177898406982, |
|
"logits/rejected": -2.237403392791748, |
|
"logps/chosen": -0.6120108366012573, |
|
"logps/rejected": -0.43240681290626526, |
|
"loss": 0.9575, |
|
"nll_loss": 0.9712766408920288, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.361267397098016, |
|
"grad_norm": 12.0, |
|
"learning_rate": 4.030264956369157e-07, |
|
"log_odds_chosen": -0.39438915252685547, |
|
"log_odds_ratio": -0.9749253988265991, |
|
"logits/chosen": -2.31217098236084, |
|
"logits/rejected": -2.273338794708252, |
|
"logps/chosen": -0.5822636485099792, |
|
"logps/rejected": -0.4064372181892395, |
|
"loss": 0.9477, |
|
"nll_loss": 0.9778239130973816, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3671898134438851, |
|
"grad_norm": 9.375, |
|
"learning_rate": 3.989053583277492e-07, |
|
"log_odds_chosen": -0.5915114879608154, |
|
"log_odds_ratio": -1.1409562826156616, |
|
"logits/chosen": -2.3212368488311768, |
|
"logits/rejected": -2.3024001121520996, |
|
"logps/chosen": -0.7573744654655457, |
|
"logps/rejected": -0.4205297827720642, |
|
"loss": 0.9491, |
|
"nll_loss": 0.9616823196411133, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3731122297897542, |
|
"grad_norm": 8.125, |
|
"learning_rate": 3.947205298755447e-07, |
|
"log_odds_chosen": -0.32023632526397705, |
|
"log_odds_ratio": -0.9460951685905457, |
|
"logits/chosen": -2.2791507244110107, |
|
"logits/rejected": -2.2480525970458984, |
|
"logps/chosen": -0.6137298345565796, |
|
"logps/rejected": -0.4523869454860687, |
|
"loss": 0.9577, |
|
"nll_loss": 0.9420009851455688, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3790346461356233, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 3.9047380025287634e-07, |
|
"log_odds_chosen": -0.31926944851875305, |
|
"log_odds_ratio": -0.9340398907661438, |
|
"logits/chosen": -2.288464069366455, |
|
"logits/rejected": -2.257875442504883, |
|
"logps/chosen": -0.5796951055526733, |
|
"logps/rejected": -0.43441399931907654, |
|
"loss": 0.9511, |
|
"nll_loss": 0.935884952545166, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3849570624814925, |
|
"grad_norm": 10.125, |
|
"learning_rate": 3.8616698590924523e-07, |
|
"log_odds_chosen": -0.3541373610496521, |
|
"log_odds_ratio": -0.9547072649002075, |
|
"logits/chosen": -2.3075475692749023, |
|
"logits/rejected": -2.261488437652588, |
|
"logps/chosen": -0.6222845315933228, |
|
"logps/rejected": -0.4457763135433197, |
|
"loss": 0.9292, |
|
"nll_loss": 0.941017746925354, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.39087947882736157, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 3.8180192899413123e-07, |
|
"log_odds_chosen": -0.373871773481369, |
|
"log_odds_ratio": -0.963890552520752, |
|
"logits/chosen": -2.3060686588287354, |
|
"logits/rejected": -2.2961385250091553, |
|
"logps/chosen": -0.5887154936790466, |
|
"logps/rejected": -0.41268324851989746, |
|
"loss": 0.9644, |
|
"nll_loss": 0.9328317642211914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39680189517323067, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 3.7738049656905225e-07, |
|
"log_odds_chosen": -0.3005954623222351, |
|
"log_odds_ratio": -0.9146180152893066, |
|
"logits/chosen": -2.241210460662842, |
|
"logits/rejected": -2.197197437286377, |
|
"logps/chosen": -0.5695523023605347, |
|
"logps/rejected": -0.4374919533729553, |
|
"loss": 0.9635, |
|
"nll_loss": 0.9454113841056824, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4027243115190998, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.7290457980896787e-07, |
|
"log_odds_chosen": -0.2508184611797333, |
|
"log_odds_ratio": -0.8950401544570923, |
|
"logits/chosen": -2.310917377471924, |
|
"logits/rejected": -2.2810654640197754, |
|
"logps/chosen": -0.5575405955314636, |
|
"logps/rejected": -0.44296175241470337, |
|
"loss": 0.9245, |
|
"nll_loss": 0.9060578346252441, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4086467278649689, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.68376093193369e-07, |
|
"log_odds_chosen": -0.35061341524124146, |
|
"log_odds_ratio": -0.9449998140335083, |
|
"logits/chosen": -2.3210480213165283, |
|
"logits/rejected": -2.281230926513672, |
|
"logps/chosen": -0.5540003776550293, |
|
"logps/rejected": -0.4034114480018616, |
|
"loss": 0.9207, |
|
"nll_loss": 0.9037810564041138, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.414569144210838, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 3.637969736873992e-07, |
|
"log_odds_chosen": -0.29555535316467285, |
|
"log_odds_ratio": -0.9367197155952454, |
|
"logits/chosen": -2.2944698333740234, |
|
"logits/rejected": -2.2611544132232666, |
|
"logps/chosen": -0.5592113733291626, |
|
"logps/rejected": -0.42597031593322754, |
|
"loss": 0.9637, |
|
"nll_loss": 0.9748933911323547, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4204915605567071, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 3.591691799133587e-07, |
|
"log_odds_chosen": -0.27811819314956665, |
|
"log_odds_ratio": -0.8972823023796082, |
|
"logits/chosen": -2.3404221534729004, |
|
"logits/rejected": -2.3104233741760254, |
|
"logps/chosen": -0.5548882484436035, |
|
"logps/rejected": -0.42241740226745605, |
|
"loss": 0.9489, |
|
"nll_loss": 0.9462203979492188, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42641397690257626, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 3.5449469131294476e-07, |
|
"log_odds_chosen": -0.282146155834198, |
|
"log_odds_ratio": -0.9153865575790405, |
|
"logits/chosen": -2.3050596714019775, |
|
"logits/rejected": -2.2582859992980957, |
|
"logps/chosen": -0.5491407513618469, |
|
"logps/rejected": -0.4175952970981598, |
|
"loss": 0.9258, |
|
"nll_loss": 0.9185633659362793, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.43233639324844536, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 3.497755073005868e-07, |
|
"log_odds_chosen": -0.17704807221889496, |
|
"log_odds_ratio": -0.8550702333450317, |
|
"logits/chosen": -2.304471254348755, |
|
"logits/rejected": -2.2704811096191406, |
|
"logps/chosen": -0.5407411456108093, |
|
"logps/rejected": -0.43452388048171997, |
|
"loss": 0.9148, |
|
"nll_loss": 0.8776341676712036, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43825880959431446, |
|
"grad_norm": 14.0, |
|
"learning_rate": 3.4501364640823926e-07, |
|
"log_odds_chosen": -0.4160383343696594, |
|
"log_odds_ratio": -0.9982725381851196, |
|
"logits/chosen": -2.3177871704101562, |
|
"logits/rejected": -2.291195869445801, |
|
"logps/chosen": -0.6620553135871887, |
|
"logps/rejected": -0.4509620666503906, |
|
"loss": 0.9449, |
|
"nll_loss": 0.9611420631408691, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4441812259401836, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 3.402111454219966e-07, |
|
"log_odds_chosen": -0.2541792690753937, |
|
"log_odds_ratio": -0.8975493311882019, |
|
"logits/chosen": -2.3212180137634277, |
|
"logits/rejected": -2.2709405422210693, |
|
"logps/chosen": -0.5664907693862915, |
|
"logps/rejected": -0.4353105127811432, |
|
"loss": 0.9301, |
|
"nll_loss": 0.9432824850082397, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4501036422860527, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 3.353700585109005e-07, |
|
"log_odds_chosen": -0.2790587842464447, |
|
"log_odds_ratio": -0.9118951559066772, |
|
"logits/chosen": -2.3148138523101807, |
|
"logits/rejected": -2.2849326133728027, |
|
"logps/chosen": -0.5668213963508606, |
|
"logps/rejected": -0.4337525963783264, |
|
"loss": 0.9239, |
|
"nll_loss": 0.9522818326950073, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4560260586319218, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 3.304924563483129e-07, |
|
"log_odds_chosen": -0.31332454085350037, |
|
"log_odds_ratio": -0.9554667472839355, |
|
"logits/chosen": -2.329709053039551, |
|
"logits/rejected": -2.3159825801849365, |
|
"logps/chosen": -0.6328242421150208, |
|
"logps/rejected": -0.4525510370731354, |
|
"loss": 0.9725, |
|
"nll_loss": 0.9982641935348511, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46194847497779096, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 3.255804252262283e-07, |
|
"log_odds_chosen": -0.26954448223114014, |
|
"log_odds_ratio": -0.9041155576705933, |
|
"logits/chosen": -2.26902437210083, |
|
"logits/rejected": -2.2395756244659424, |
|
"logps/chosen": -0.5438047647476196, |
|
"logps/rejected": -0.4186398386955261, |
|
"loss": 0.9454, |
|
"nll_loss": 0.9862927198410034, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46787089132366005, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 3.2063606616290626e-07, |
|
"log_odds_chosen": -0.40437692403793335, |
|
"log_odds_ratio": -0.991305947303772, |
|
"logits/chosen": -2.2474241256713867, |
|
"logits/rejected": -2.2076640129089355, |
|
"logps/chosen": -0.5990616083145142, |
|
"logps/rejected": -0.40946364402770996, |
|
"loss": 0.8641, |
|
"nll_loss": 0.827691376209259, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.47379330766952915, |
|
"grad_norm": 12.25, |
|
"learning_rate": 3.1566149400420523e-07, |
|
"log_odds_chosen": -0.3424193859100342, |
|
"log_odds_ratio": -0.9442498087882996, |
|
"logits/chosen": -2.300968885421753, |
|
"logits/rejected": -2.289825201034546, |
|
"logps/chosen": -0.6068278551101685, |
|
"logps/rejected": -0.4422214925289154, |
|
"loss": 0.9572, |
|
"nll_loss": 0.9411390423774719, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4797157240153983, |
|
"grad_norm": 8.75, |
|
"learning_rate": 3.1065883651900087e-07, |
|
"log_odds_chosen": -0.3020106852054596, |
|
"log_odds_ratio": -0.9359525442123413, |
|
"logits/chosen": -2.288480281829834, |
|
"logits/rejected": -2.246896982192993, |
|
"logps/chosen": -0.5823680758476257, |
|
"logps/rejected": -0.44429031014442444, |
|
"loss": 0.9677, |
|
"nll_loss": 0.9093478918075562, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4856381403612674, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 3.056302334890786e-07, |
|
"log_odds_chosen": -0.38523969054222107, |
|
"log_odds_ratio": -0.9780759811401367, |
|
"logits/chosen": -2.294841766357422, |
|
"logits/rejected": -2.2723891735076904, |
|
"logps/chosen": -0.6043334603309631, |
|
"logps/rejected": -0.4185991883277893, |
|
"loss": 0.9121, |
|
"nll_loss": 0.9030720591545105, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4915605567071365, |
|
"grad_norm": 8.75, |
|
"learning_rate": 3.0057783579388586e-07, |
|
"log_odds_chosen": -0.24561011791229248, |
|
"log_odds_ratio": -0.8836873173713684, |
|
"logits/chosen": -2.2996482849121094, |
|
"logits/rejected": -2.258457660675049, |
|
"logps/chosen": -0.5478182435035706, |
|
"logps/rejected": -0.440875768661499, |
|
"loss": 0.928, |
|
"nll_loss": 0.9274915456771851, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.49748297305300565, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 2.9550380449053907e-07, |
|
"log_odds_chosen": -0.26652732491493225, |
|
"log_odds_ratio": -0.9008363485336304, |
|
"logits/chosen": -2.2529563903808594, |
|
"logits/rejected": -2.2309823036193848, |
|
"logps/chosen": -0.5522275567054749, |
|
"logps/rejected": -0.42188987135887146, |
|
"loss": 0.914, |
|
"nll_loss": 0.8120133280754089, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5034053893988747, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 2.904103098894767e-07, |
|
"log_odds_chosen": -0.3553586006164551, |
|
"log_odds_ratio": -0.9903032183647156, |
|
"logits/chosen": -2.291224956512451, |
|
"logits/rejected": -2.2469000816345215, |
|
"logps/chosen": -0.6393681764602661, |
|
"logps/rejected": -0.42175260186195374, |
|
"loss": 0.9129, |
|
"nll_loss": 0.9170019030570984, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5093278057447439, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2.852995306261545e-07, |
|
"log_odds_chosen": -0.2889431416988373, |
|
"log_odds_ratio": -0.9156063795089722, |
|
"logits/chosen": -2.318115472793579, |
|
"logits/rejected": -2.2808139324188232, |
|
"logps/chosen": -0.5667640566825867, |
|
"logps/rejected": -0.44533196091651917, |
|
"loss": 0.9772, |
|
"nll_loss": 0.9936600923538208, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.515250222090613, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.801736527291797e-07, |
|
"log_odds_chosen": -0.3678986728191376, |
|
"log_odds_ratio": -0.9755579233169556, |
|
"logits/chosen": -2.2834322452545166, |
|
"logits/rejected": -2.2398197650909424, |
|
"logps/chosen": -0.6312032341957092, |
|
"logps/rejected": -0.4318135380744934, |
|
"loss": 0.9391, |
|
"nll_loss": 0.8974191546440125, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5211726384364821, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 2.750348686852836e-07, |
|
"log_odds_chosen": -0.40664905309677124, |
|
"log_odds_ratio": -0.9781969785690308, |
|
"logits/chosen": -2.3389241695404053, |
|
"logits/rejected": -2.2737958431243896, |
|
"logps/chosen": -0.6092024445533752, |
|
"logps/rejected": -0.4250633120536804, |
|
"loss": 0.971, |
|
"nll_loss": 0.9957748651504517, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5270950547823512, |
|
"grad_norm": 9.5, |
|
"learning_rate": 2.69885376501531e-07, |
|
"log_odds_chosen": -0.31569716334342957, |
|
"log_odds_ratio": -0.9389151334762573, |
|
"logits/chosen": -2.2705588340759277, |
|
"logits/rejected": -2.2545580863952637, |
|
"logps/chosen": -0.6091697812080383, |
|
"logps/rejected": -0.45014920830726624, |
|
"loss": 0.9568, |
|
"nll_loss": 0.9439749717712402, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5330174711282203, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 2.647273787651687e-07, |
|
"log_odds_chosen": -0.27334731817245483, |
|
"log_odds_ratio": -0.8902351260185242, |
|
"logits/chosen": -2.3029747009277344, |
|
"logits/rejected": -2.2809951305389404, |
|
"logps/chosen": -0.5580970644950867, |
|
"logps/rejected": -0.43120306730270386, |
|
"loss": 0.9333, |
|
"nll_loss": 0.9487207531929016, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5389398874740894, |
|
"grad_norm": 11.5, |
|
"learning_rate": 2.5956308170151526e-07, |
|
"log_odds_chosen": -0.5443618893623352, |
|
"log_odds_ratio": -1.1218284368515015, |
|
"logits/chosen": -2.275094985961914, |
|
"logits/rejected": -2.2452805042266846, |
|
"logps/chosen": -0.7426999807357788, |
|
"logps/rejected": -0.4159156382083893, |
|
"loss": 1.0105, |
|
"nll_loss": 0.9728318452835083, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5448623038199586, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 2.543946942302944e-07, |
|
"log_odds_chosen": -0.30027318000793457, |
|
"log_odds_ratio": -0.9199014902114868, |
|
"logits/chosen": -2.265780210494995, |
|
"logits/rejected": -2.2284467220306396, |
|
"logps/chosen": -0.5695372819900513, |
|
"logps/rejected": -0.4176994264125824, |
|
"loss": 0.9145, |
|
"nll_loss": 0.9305332899093628, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5507847201658277, |
|
"grad_norm": 9.375, |
|
"learning_rate": 2.492244270208158e-07, |
|
"log_odds_chosen": -0.2501292824745178, |
|
"log_odds_ratio": -0.8865777850151062, |
|
"logits/chosen": -2.273998260498047, |
|
"logits/rejected": -2.247119665145874, |
|
"logps/chosen": -0.562545895576477, |
|
"logps/rejected": -0.44244521856307983, |
|
"loss": 0.9004, |
|
"nll_loss": 0.9349013566970825, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5567071365116968, |
|
"grad_norm": 8.25, |
|
"learning_rate": 2.440544915464078e-07, |
|
"log_odds_chosen": -0.29280886054039, |
|
"log_odds_ratio": -0.9178652763366699, |
|
"logits/chosen": -2.304103374481201, |
|
"logits/rejected": -2.2641754150390625, |
|
"logps/chosen": -0.5466963052749634, |
|
"logps/rejected": -0.41387224197387695, |
|
"loss": 0.9221, |
|
"nll_loss": 0.9058642387390137, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5626295528575659, |
|
"grad_norm": 7.875, |
|
"learning_rate": 2.3888709913850593e-07, |
|
"log_odds_chosen": -0.30006080865859985, |
|
"log_odds_ratio": -0.9208847880363464, |
|
"logits/chosen": -2.350160837173462, |
|
"logits/rejected": -2.3101677894592285, |
|
"logps/chosen": -0.5762113928794861, |
|
"logps/rejected": -0.43552321195602417, |
|
"loss": 0.9856, |
|
"nll_loss": 0.938804030418396, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.568551969203435, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 2.337244600408025e-07, |
|
"log_odds_chosen": -0.39082369208335876, |
|
"log_odds_ratio": -0.9911519289016724, |
|
"logits/chosen": -2.3191308975219727, |
|
"logits/rejected": -2.2875494956970215, |
|
"logps/chosen": -0.6351069211959839, |
|
"logps/rejected": -0.4366016387939453, |
|
"loss": 0.9507, |
|
"nll_loss": 0.9739691615104675, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5744743855493041, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 2.2856878246386085e-07, |
|
"log_odds_chosen": -0.2834867537021637, |
|
"log_odds_ratio": -0.9153987765312195, |
|
"logits/chosen": -2.3153960704803467, |
|
"logits/rejected": -2.2916574478149414, |
|
"logps/chosen": -0.5750494003295898, |
|
"logps/rejected": -0.4384193420410156, |
|
"loss": 0.9944, |
|
"nll_loss": 1.0159144401550293, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5803968018951732, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 2.2342227164060035e-07, |
|
"log_odds_chosen": -0.37823957204818726, |
|
"log_odds_ratio": -0.9799555540084839, |
|
"logits/chosen": -2.2767786979675293, |
|
"logits/rejected": -2.2212002277374268, |
|
"logps/chosen": -0.6210430860519409, |
|
"logps/rejected": -0.4381546378135681, |
|
"loss": 0.9342, |
|
"nll_loss": 0.9048612713813782, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5863192182410424, |
|
"grad_norm": 7.625, |
|
"learning_rate": 2.182871288830533e-07, |
|
"log_odds_chosen": -0.3980916738510132, |
|
"log_odds_ratio": -0.9920517206192017, |
|
"logits/chosen": -2.3002982139587402, |
|
"logits/rejected": -2.237112522125244, |
|
"logps/chosen": -0.6255283951759338, |
|
"logps/rejected": -0.4390384256839752, |
|
"loss": 0.9667, |
|
"nll_loss": 0.9580439329147339, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5922416345869115, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 2.131655506408007e-07, |
|
"log_odds_chosen": -0.3284297287464142, |
|
"log_odds_ratio": -0.9404581785202026, |
|
"logits/chosen": -2.3054287433624268, |
|
"logits/rejected": -2.263627767562866, |
|
"logps/chosen": -0.5948997139930725, |
|
"logps/rejected": -0.4438301622867584, |
|
"loss": 0.9222, |
|
"nll_loss": 0.9223626852035522, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5981640509327806, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 2.0805972756148643e-07, |
|
"log_odds_chosen": -0.48507261276245117, |
|
"log_odds_ratio": -1.0847915410995483, |
|
"logits/chosen": -2.2977192401885986, |
|
"logits/rejected": -2.2829782962799072, |
|
"logps/chosen": -0.748909592628479, |
|
"logps/rejected": -0.43740910291671753, |
|
"loss": 0.9675, |
|
"nll_loss": 0.9915729761123657, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6040864672786497, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 2.0297184355381432e-07, |
|
"log_odds_chosen": -0.3442012667655945, |
|
"log_odds_ratio": -0.9447819590568542, |
|
"logits/chosen": -2.3124001026153564, |
|
"logits/rejected": -2.2729249000549316, |
|
"logps/chosen": -0.5671228170394897, |
|
"logps/rejected": -0.42450952529907227, |
|
"loss": 0.9345, |
|
"nll_loss": 0.9476312398910522, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6100088836245188, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 1.9790407485342638e-07, |
|
"log_odds_chosen": -0.5493720769882202, |
|
"log_odds_ratio": -1.1290843486785889, |
|
"logits/chosen": -2.3353946208953857, |
|
"logits/rejected": -2.2935006618499756, |
|
"logps/chosen": -0.7500286102294922, |
|
"logps/rejected": -0.4041396975517273, |
|
"loss": 0.9066, |
|
"nll_loss": 0.930087685585022, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6159312999703879, |
|
"grad_norm": 9.125, |
|
"learning_rate": 1.928585890920641e-07, |
|
"log_odds_chosen": -0.2760196626186371, |
|
"log_odds_ratio": -0.9122495651245117, |
|
"logits/chosen": -2.303187608718872, |
|
"logits/rejected": -2.267937183380127, |
|
"logps/chosen": -0.5642871856689453, |
|
"logps/rejected": -0.42797571420669556, |
|
"loss": 0.9504, |
|
"nll_loss": 0.9056134223937988, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6218537163162571, |
|
"grad_norm": 8.25, |
|
"learning_rate": 1.8783754437040902e-07, |
|
"log_odds_chosen": -0.34286069869995117, |
|
"log_odds_ratio": -0.9555438756942749, |
|
"logits/chosen": -2.282454013824463, |
|
"logits/rejected": -2.247560739517212, |
|
"logps/chosen": -0.5602587461471558, |
|
"logps/rejected": -0.41198721528053284, |
|
"loss": 0.9096, |
|
"nll_loss": 0.8938838243484497, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6277761326621262, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 1.8284308833500118e-07, |
|
"log_odds_chosen": -0.30499863624572754, |
|
"log_odds_ratio": -0.9316195249557495, |
|
"logits/chosen": -2.2879326343536377, |
|
"logits/rejected": -2.2628307342529297, |
|
"logps/chosen": -0.5727280378341675, |
|
"logps/rejected": -0.4329405725002289, |
|
"loss": 0.9415, |
|
"nll_loss": 0.9282618761062622, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6336985490079953, |
|
"grad_norm": 9.0, |
|
"learning_rate": 1.7787735725962756e-07, |
|
"log_odds_chosen": -0.371854692697525, |
|
"log_odds_ratio": -0.9633452296257019, |
|
"logits/chosen": -2.295264482498169, |
|
"logits/rejected": -2.2589457035064697, |
|
"logps/chosen": -0.608909010887146, |
|
"logps/rejected": -0.43556636571884155, |
|
"loss": 0.9874, |
|
"nll_loss": 0.9842734336853027, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6396209653538644, |
|
"grad_norm": 7.4375, |
|
"learning_rate": 1.7294247513157616e-07, |
|
"log_odds_chosen": -0.30945563316345215, |
|
"log_odds_ratio": -0.9170244932174683, |
|
"logits/chosen": -2.32027530670166, |
|
"logits/rejected": -2.2682743072509766, |
|
"logps/chosen": -0.5618667006492615, |
|
"logps/rejected": -0.42478686571121216, |
|
"loss": 0.9273, |
|
"nll_loss": 0.9494869112968445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6455433816997335, |
|
"grad_norm": 8.25, |
|
"learning_rate": 1.6804055274314494e-07, |
|
"log_odds_chosen": -0.28144484758377075, |
|
"log_odds_ratio": -0.9044340252876282, |
|
"logits/chosen": -2.2794411182403564, |
|
"logits/rejected": -2.256417989730835, |
|
"logps/chosen": -0.5532391667366028, |
|
"logps/rejected": -0.43312329053878784, |
|
"loss": 0.9267, |
|
"nll_loss": 0.8960529565811157, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6514657980456026, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 1.6317368678879496e-07, |
|
"log_odds_chosen": -0.28822919726371765, |
|
"log_odds_ratio": -0.9030183553695679, |
|
"logits/chosen": -2.3176181316375732, |
|
"logits/rejected": -2.2776379585266113, |
|
"logps/chosen": -0.5691734552383423, |
|
"logps/rejected": -0.4426758885383606, |
|
"loss": 0.9774, |
|
"nll_loss": 0.9514939188957214, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6573882143914718, |
|
"grad_norm": 10.0, |
|
"learning_rate": 1.5834395896833281e-07, |
|
"log_odds_chosen": -0.3918454051017761, |
|
"log_odds_ratio": -0.9793996810913086, |
|
"logits/chosen": -2.3301963806152344, |
|
"logits/rejected": -2.274294376373291, |
|
"logps/chosen": -0.6070703864097595, |
|
"logps/rejected": -0.42063984274864197, |
|
"loss": 0.9521, |
|
"nll_loss": 0.956030547618866, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6633106307373409, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 1.535534350965075e-07, |
|
"log_odds_chosen": -0.3459760546684265, |
|
"log_odds_ratio": -0.9434119462966919, |
|
"logits/chosen": -2.3291070461273193, |
|
"logits/rejected": -2.3154056072235107, |
|
"logps/chosen": -0.5571088790893555, |
|
"logps/rejected": -0.3965280055999756, |
|
"loss": 0.9099, |
|
"nll_loss": 0.9126838445663452, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66923304708321, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.4880416421940154e-07, |
|
"log_odds_chosen": -0.35412847995758057, |
|
"log_odds_ratio": -0.9552983045578003, |
|
"logits/chosen": -2.2702512741088867, |
|
"logits/rejected": -2.246307849884033, |
|
"logps/chosen": -0.6233401298522949, |
|
"logps/rejected": -0.4397760033607483, |
|
"loss": 1.0082, |
|
"nll_loss": 1.0226290225982666, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6751554634290791, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 1.4409817773799459e-07, |
|
"log_odds_chosen": -0.30409640073776245, |
|
"log_odds_ratio": -0.933831512928009, |
|
"logits/chosen": -2.2992305755615234, |
|
"logits/rejected": -2.2570438385009766, |
|
"logps/chosen": -0.6016424894332886, |
|
"logps/rejected": -0.44892677664756775, |
|
"loss": 0.9551, |
|
"nll_loss": 0.9244022369384766, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6810778797749482, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.3943748853927385e-07, |
|
"log_odds_chosen": -0.41090458631515503, |
|
"log_odds_ratio": -1.001075029373169, |
|
"logits/chosen": -2.299729824066162, |
|
"logits/rejected": -2.2916903495788574, |
|
"logps/chosen": -0.6525920033454895, |
|
"logps/rejected": -0.4317931532859802, |
|
"loss": 0.9303, |
|
"nll_loss": 0.9261299967765808, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6870002961208173, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 1.3482409013526436e-07, |
|
"log_odds_chosen": -0.42632365226745605, |
|
"log_odds_ratio": -1.0060193538665771, |
|
"logits/chosen": -2.2864601612091064, |
|
"logits/rejected": -2.2797439098358154, |
|
"logps/chosen": -0.615047037601471, |
|
"logps/rejected": -0.4228528141975403, |
|
"loss": 0.9621, |
|
"nll_loss": 0.9737777709960938, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6929227124666865, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 1.302599558103456e-07, |
|
"log_odds_chosen": -0.3416286110877991, |
|
"log_odds_ratio": -0.971416175365448, |
|
"logits/chosen": -2.3440544605255127, |
|
"logits/rejected": -2.3086702823638916, |
|
"logps/chosen": -0.6329351663589478, |
|
"logps/rejected": -0.44783586263656616, |
|
"loss": 0.9418, |
|
"nll_loss": 0.9567440152168274, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6988451288125556, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.257470377772214e-07, |
|
"log_odds_chosen": -0.37471523880958557, |
|
"log_odds_ratio": -0.9742682576179504, |
|
"logits/chosen": -2.320568323135376, |
|
"logits/rejected": -2.2932517528533936, |
|
"logps/chosen": -0.5956822633743286, |
|
"logps/rejected": -0.410876601934433, |
|
"loss": 0.9619, |
|
"nll_loss": 0.9405835270881653, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7047675451584247, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 1.2128726634190046e-07, |
|
"log_odds_chosen": -0.3462384343147278, |
|
"log_odds_ratio": -0.9395328760147095, |
|
"logits/chosen": -2.3269436359405518, |
|
"logits/rejected": -2.2818374633789062, |
|
"logps/chosen": -0.5803397297859192, |
|
"logps/rejected": -0.4099668860435486, |
|
"loss": 0.9172, |
|
"nll_loss": 0.9042008519172668, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7106899615042938, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 1.1688254907804992e-07, |
|
"log_odds_chosen": -0.3338465392589569, |
|
"log_odds_ratio": -0.9492910504341125, |
|
"logits/chosen": -2.282212495803833, |
|
"logits/rejected": -2.2418830394744873, |
|
"logps/chosen": -0.6125479340553284, |
|
"logps/rejected": -0.45005935430526733, |
|
"loss": 0.9674, |
|
"nll_loss": 0.9735835194587708, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7166123778501629, |
|
"grad_norm": 7.5, |
|
"learning_rate": 1.1253477001106956e-07, |
|
"log_odds_chosen": -0.26436474919319153, |
|
"log_odds_ratio": -0.8985050916671753, |
|
"logits/chosen": -2.259978771209717, |
|
"logits/rejected": -2.223177433013916, |
|
"logps/chosen": -0.5662964582443237, |
|
"logps/rejected": -0.4440518915653229, |
|
"loss": 0.9408, |
|
"nll_loss": 0.90367591381073, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.722534794196032, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 1.0824578881224065e-07, |
|
"log_odds_chosen": -0.24436886608600616, |
|
"log_odds_ratio": -0.8822824358940125, |
|
"logits/chosen": -2.332968235015869, |
|
"logits/rejected": -2.3182759284973145, |
|
"logps/chosen": -0.539296567440033, |
|
"logps/rejected": -0.42076578736305237, |
|
"loss": 0.8899, |
|
"nll_loss": 0.8598573803901672, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.728457210541901, |
|
"grad_norm": 8.125, |
|
"learning_rate": 1.0401744000328918e-07, |
|
"log_odds_chosen": -0.28977444767951965, |
|
"log_odds_ratio": -0.9303587675094604, |
|
"logits/chosen": -2.2798142433166504, |
|
"logits/rejected": -2.2761147022247314, |
|
"logps/chosen": -0.5908230543136597, |
|
"logps/rejected": -0.449887216091156, |
|
"loss": 0.9195, |
|
"nll_loss": 0.9145529866218567, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7343796268877703, |
|
"grad_norm": 9.375, |
|
"learning_rate": 9.985153217170902e-08, |
|
"log_odds_chosen": -0.357065886259079, |
|
"log_odds_ratio": -0.9588850140571594, |
|
"logits/chosen": -2.3385989665985107, |
|
"logits/rejected": -2.323024034500122, |
|
"logps/chosen": -0.6093414425849915, |
|
"logps/rejected": -0.4347008168697357, |
|
"loss": 1.008, |
|
"nll_loss": 1.0087924003601074, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7403020432336394, |
|
"grad_norm": 10.5, |
|
"learning_rate": 9.574984719717553e-08, |
|
"log_odds_chosen": -0.3260490894317627, |
|
"log_odds_ratio": -0.9441172480583191, |
|
"logits/chosen": -2.321216344833374, |
|
"logits/rejected": -2.302063226699829, |
|
"logps/chosen": -0.5826759338378906, |
|
"logps/rejected": -0.4284025728702545, |
|
"loss": 0.9308, |
|
"nll_loss": 0.9714264869689941, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7462244595795084, |
|
"grad_norm": 8.25, |
|
"learning_rate": 9.171413948938459e-08, |
|
"log_odds_chosen": -0.3101581037044525, |
|
"log_odds_ratio": -0.929049015045166, |
|
"logits/chosen": -2.310981273651123, |
|
"logits/rejected": -2.2564587593078613, |
|
"logps/chosen": -0.6019686460494995, |
|
"logps/rejected": -0.45062392950057983, |
|
"loss": 0.9576, |
|
"nll_loss": 0.9787800908088684, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7521468759253775, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 8.774613523764049e-08, |
|
"log_odds_chosen": -0.369983971118927, |
|
"log_odds_ratio": -0.9563344120979309, |
|
"logits/chosen": -2.2968955039978027, |
|
"logits/rejected": -2.248944044113159, |
|
"logps/chosen": -0.5875356197357178, |
|
"logps/rejected": -0.4163094162940979, |
|
"loss": 0.9305, |
|
"nll_loss": 0.900018572807312, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7580692922712466, |
|
"grad_norm": 9.125, |
|
"learning_rate": 8.384753167251412e-08, |
|
"log_odds_chosen": -0.32507094740867615, |
|
"log_odds_ratio": -0.9397505521774292, |
|
"logits/chosen": -2.2516260147094727, |
|
"logits/rejected": -2.226477861404419, |
|
"logps/chosen": -0.5629323124885559, |
|
"logps/rejected": -0.41151052713394165, |
|
"loss": 0.8916, |
|
"nll_loss": 0.8657590746879578, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7639917086171157, |
|
"grad_norm": 8.375, |
|
"learning_rate": 8.001999633988942e-08, |
|
"log_odds_chosen": -0.3611569106578827, |
|
"log_odds_ratio": -0.957983672618866, |
|
"logits/chosen": -2.3322999477386475, |
|
"logits/rejected": -2.283409833908081, |
|
"logps/chosen": -0.5784574747085571, |
|
"logps/rejected": -0.4157342314720154, |
|
"loss": 0.8973, |
|
"nll_loss": 0.8929991722106934, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.769914124962985, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 7.62651663877042e-08, |
|
"log_odds_chosen": -0.26533371210098267, |
|
"log_odds_ratio": -0.9089031219482422, |
|
"logits/chosen": -2.2688136100769043, |
|
"logits/rejected": -2.2409274578094482, |
|
"logps/chosen": -0.5685082077980042, |
|
"logps/rejected": -0.4421761631965637, |
|
"loss": 0.9965, |
|
"nll_loss": 0.9676351547241211, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.775836541308854, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 7.258464786569549e-08, |
|
"log_odds_chosen": -0.28731244802474976, |
|
"log_odds_ratio": -0.9186748266220093, |
|
"logits/chosen": -2.3306045532226562, |
|
"logits/rejected": -2.2782888412475586, |
|
"logps/chosen": -0.5666372776031494, |
|
"logps/rejected": -0.43749627470970154, |
|
"loss": 0.9659, |
|
"nll_loss": 0.9544159770011902, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7817589576547231, |
|
"grad_norm": 9.75, |
|
"learning_rate": 6.898001503844483e-08, |
|
"log_odds_chosen": -0.5405977964401245, |
|
"log_odds_ratio": -1.1180508136749268, |
|
"logits/chosen": -2.3619232177734375, |
|
"logits/rejected": -2.3188281059265137, |
|
"logps/chosen": -0.7727476358413696, |
|
"logps/rejected": -0.4373859465122223, |
|
"loss": 0.9429, |
|
"nll_loss": 0.9921876192092896, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7876813740005922, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 6.545280971202014e-08, |
|
"log_odds_chosen": -0.2534041702747345, |
|
"log_odds_ratio": -0.8981307744979858, |
|
"logits/chosen": -2.320126533508301, |
|
"logits/rejected": -2.289376974105835, |
|
"logps/chosen": -0.5534666180610657, |
|
"logps/rejected": -0.4292474687099457, |
|
"loss": 0.9168, |
|
"nll_loss": 0.9440558552742004, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7936037903464613, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 6.200454057450022e-08, |
|
"log_odds_chosen": -0.36177825927734375, |
|
"log_odds_ratio": -0.9495649337768555, |
|
"logits/chosen": -2.2736241817474365, |
|
"logits/rejected": -2.226933479309082, |
|
"logps/chosen": -0.59937584400177, |
|
"logps/rejected": -0.42542099952697754, |
|
"loss": 0.9704, |
|
"nll_loss": 0.9059191942214966, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7995262066923304, |
|
"grad_norm": 9.0, |
|
"learning_rate": 5.863668255066492e-08, |
|
"log_odds_chosen": -0.313324511051178, |
|
"log_odds_ratio": -0.919102668762207, |
|
"logits/chosen": -2.270073890686035, |
|
"logits/rejected": -2.2372827529907227, |
|
"logps/chosen": -0.5768779516220093, |
|
"logps/rejected": -0.43518179655075073, |
|
"loss": 0.9124, |
|
"nll_loss": 0.9345908164978027, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8054486230381996, |
|
"grad_norm": 8.375, |
|
"learning_rate": 5.53506761711274e-08, |
|
"log_odds_chosen": -0.2887657880783081, |
|
"log_odds_ratio": -0.912114143371582, |
|
"logits/chosen": -2.305987596511841, |
|
"logits/rejected": -2.2752127647399902, |
|
"logps/chosen": -0.587549090385437, |
|
"logps/rejected": -0.4469973146915436, |
|
"loss": 0.9423, |
|
"nll_loss": 0.9892560243606567, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8113710393840687, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 5.2147926956177174e-08, |
|
"log_odds_chosen": -0.45511436462402344, |
|
"log_odds_ratio": -1.0450800657272339, |
|
"logits/chosen": -2.294468641281128, |
|
"logits/rejected": -2.283860445022583, |
|
"logps/chosen": -0.6607165336608887, |
|
"logps/rejected": -0.4220455586910248, |
|
"loss": 0.9395, |
|
"nll_loss": 0.9603630304336548, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8172934557299378, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 4.902980481459834e-08, |
|
"log_odds_chosen": -0.26191025972366333, |
|
"log_odds_ratio": -0.9043244123458862, |
|
"logits/chosen": -2.277843475341797, |
|
"logits/rejected": -2.248347520828247, |
|
"logps/chosen": -0.5755423307418823, |
|
"logps/rejected": -0.4490273594856262, |
|
"loss": 0.9003, |
|
"nll_loss": 0.9170975685119629, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8232158720758069, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.5997643457719646e-08, |
|
"log_odds_chosen": -0.35434719920158386, |
|
"log_odds_ratio": -0.9506848454475403, |
|
"logits/chosen": -2.295780658721924, |
|
"logits/rejected": -2.2894127368927, |
|
"logps/chosen": -0.5851874351501465, |
|
"logps/rejected": -0.4212135672569275, |
|
"loss": 0.8958, |
|
"nll_loss": 0.9056984186172485, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.829138288421676, |
|
"grad_norm": 8.75, |
|
"learning_rate": 4.305273982894772e-08, |
|
"log_odds_chosen": -0.33616143465042114, |
|
"log_odds_ratio": -0.9503694772720337, |
|
"logits/chosen": -2.3287465572357178, |
|
"logits/rejected": -2.286414623260498, |
|
"logps/chosen": -0.6181541681289673, |
|
"logps/rejected": -0.4454525113105774, |
|
"loss": 0.9425, |
|
"nll_loss": 0.9352006912231445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8350607047675451, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.0196353549026786e-08, |
|
"log_odds_chosen": -0.30044835805892944, |
|
"log_odds_ratio": -0.9167086482048035, |
|
"logits/chosen": -2.298393487930298, |
|
"logits/rejected": -2.26066255569458, |
|
"logps/chosen": -0.5805574655532837, |
|
"logps/rejected": -0.43902960419654846, |
|
"loss": 0.9712, |
|
"nll_loss": 1.0039526224136353, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8409831211134142, |
|
"grad_norm": 7.75, |
|
"learning_rate": 3.742970637726181e-08, |
|
"log_odds_chosen": -0.179987370967865, |
|
"log_odds_ratio": -0.8582404255867004, |
|
"logits/chosen": -2.3168177604675293, |
|
"logits/rejected": -2.269207000732422, |
|
"logps/chosen": -0.5285545587539673, |
|
"logps/rejected": -0.44142407178878784, |
|
"loss": 0.9201, |
|
"nll_loss": 0.9034355878829956, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8469055374592834, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 3.4753981688937284e-08, |
|
"log_odds_chosen": -0.3474799394607544, |
|
"log_odds_ratio": -0.9500767588615417, |
|
"logits/chosen": -2.2948362827301025, |
|
"logits/rejected": -2.2666220664978027, |
|
"logps/chosen": -0.5843050479888916, |
|
"logps/rejected": -0.4246344566345215, |
|
"loss": 0.9549, |
|
"nll_loss": 0.9555429220199585, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8528279538051525, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.217032396915265e-08, |
|
"log_odds_chosen": -0.40568438172340393, |
|
"log_odds_ratio": -1.0082272291183472, |
|
"logits/chosen": -2.3024380207061768, |
|
"logits/rejected": -2.268986701965332, |
|
"logps/chosen": -0.6786967515945435, |
|
"logps/rejected": -0.44073349237442017, |
|
"loss": 0.9636, |
|
"nll_loss": 0.9871211051940918, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.8587503701510216, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2.9679838323293404e-08, |
|
"log_odds_chosen": -0.4226885437965393, |
|
"log_odds_ratio": -1.0269486904144287, |
|
"logits/chosen": -2.2947676181793213, |
|
"logits/rejected": -2.2655680179595947, |
|
"logps/chosen": -0.6772679090499878, |
|
"logps/rejected": -0.44576793909072876, |
|
"loss": 0.9064, |
|
"nll_loss": 0.9240104556083679, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8646727864968907, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 2.728359000434488e-08, |
|
"log_odds_chosen": -0.34337377548217773, |
|
"log_odds_ratio": -0.9470311403274536, |
|
"logits/chosen": -2.3264529705047607, |
|
"logits/rejected": -2.290132761001587, |
|
"logps/chosen": -0.5465956926345825, |
|
"logps/rejected": -0.41503897309303284, |
|
"loss": 0.9481, |
|
"nll_loss": 0.8961936831474304, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8705952028427598, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 2.498260395725302e-08, |
|
"log_odds_chosen": -0.3448273539543152, |
|
"log_odds_ratio": -0.9480770230293274, |
|
"logits/chosen": -2.293290376663208, |
|
"logits/rejected": -2.278653144836426, |
|
"logps/chosen": -0.5983850955963135, |
|
"logps/rejected": -0.44902753829956055, |
|
"loss": 0.9406, |
|
"nll_loss": 0.93559330701828, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8765176191886289, |
|
"grad_norm": 8.5, |
|
"learning_rate": 2.2777864380525426e-08, |
|
"log_odds_chosen": -0.29847949743270874, |
|
"log_odds_ratio": -0.9293072819709778, |
|
"logits/chosen": -2.2964632511138916, |
|
"logits/rejected": -2.2744767665863037, |
|
"logps/chosen": -0.59266597032547, |
|
"logps/rejected": -0.4337979853153229, |
|
"loss": 0.9143, |
|
"nll_loss": 0.8765565752983093, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8824400355344981, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 2.0670314305261423e-08, |
|
"log_odds_chosen": -0.2956581115722656, |
|
"log_odds_ratio": -0.914827823638916, |
|
"logits/chosen": -2.312617540359497, |
|
"logits/rejected": -2.2852249145507812, |
|
"logps/chosen": -0.5551884770393372, |
|
"logps/rejected": -0.42470401525497437, |
|
"loss": 0.8906, |
|
"nll_loss": 0.9051159620285034, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8883624518803672, |
|
"grad_norm": 8.5, |
|
"learning_rate": 1.866085519178995e-08, |
|
"log_odds_chosen": -0.3115543723106384, |
|
"log_odds_ratio": -0.9432921409606934, |
|
"logits/chosen": -2.294912815093994, |
|
"logits/rejected": -2.2731730937957764, |
|
"logps/chosen": -0.6179423332214355, |
|
"logps/rejected": -0.47528520226478577, |
|
"loss": 0.9807, |
|
"nll_loss": 0.9959957003593445, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8942848682262363, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 1.675034654408894e-08, |
|
"log_odds_chosen": -0.3954925239086151, |
|
"log_odds_ratio": -0.966839611530304, |
|
"logits/chosen": -2.331923007965088, |
|
"logits/rejected": -2.3019633293151855, |
|
"logps/chosen": -0.5650435090065002, |
|
"logps/rejected": -0.40297931432724, |
|
"loss": 0.9211, |
|
"nll_loss": 0.9447514414787292, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9002072845721054, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 1.4939605542150595e-08, |
|
"log_odds_chosen": -0.2961687445640564, |
|
"log_odds_ratio": -0.9432598352432251, |
|
"logits/chosen": -2.31669545173645, |
|
"logits/rejected": -2.2790303230285645, |
|
"logps/chosen": -0.6368409395217896, |
|
"logps/rejected": -0.466596782207489, |
|
"loss": 0.9972, |
|
"nll_loss": 0.9845758676528931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9061297009179745, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.3229406692449791e-08, |
|
"log_odds_chosen": -0.22676777839660645, |
|
"log_odds_ratio": -0.8955879211425781, |
|
"logits/chosen": -2.2553787231445312, |
|
"logits/rejected": -2.228121280670166, |
|
"logps/chosen": -0.5664690136909485, |
|
"logps/rejected": -0.45375269651412964, |
|
"loss": 0.9651, |
|
"nll_loss": 0.9422292709350586, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9120521172638436, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 1.162048149666503e-08, |
|
"log_odds_chosen": -0.28153032064437866, |
|
"log_odds_ratio": -0.9496873617172241, |
|
"logits/chosen": -2.3055193424224854, |
|
"logits/rejected": -2.2621009349823, |
|
"logps/chosen": -0.6041845083236694, |
|
"logps/rejected": -0.4735984802246094, |
|
"loss": 0.9464, |
|
"nll_loss": 0.9381273984909058, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9179745336097128, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 1.0113518138794047e-08, |
|
"log_odds_chosen": -0.3274211287498474, |
|
"log_odds_ratio": -0.9445363283157349, |
|
"logits/chosen": -2.2565391063690186, |
|
"logits/rejected": -2.233027935028076, |
|
"logps/chosen": -0.5867388844490051, |
|
"logps/rejected": -0.4352657198905945, |
|
"loss": 0.9594, |
|
"nll_loss": 0.9287152290344238, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9238969499555819, |
|
"grad_norm": 8.625, |
|
"learning_rate": 8.709161190797565e-09, |
|
"log_odds_chosen": -0.23092766106128693, |
|
"log_odds_ratio": -0.8911072611808777, |
|
"logits/chosen": -2.3257815837860107, |
|
"logits/rejected": -2.29530668258667, |
|
"logps/chosen": -0.5526595115661621, |
|
"logps/rejected": -0.4391084611415863, |
|
"loss": 0.9337, |
|
"nll_loss": 0.9090098142623901, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.929819366301451, |
|
"grad_norm": 9.0, |
|
"learning_rate": 7.408011336897141e-09, |
|
"log_odds_chosen": -0.4632336497306824, |
|
"log_odds_ratio": -1.089444875717163, |
|
"logits/chosen": -2.346909284591675, |
|
"logits/rejected": -2.334372043609619, |
|
"logps/chosen": -0.7571093440055847, |
|
"logps/rejected": -0.4586968421936035, |
|
"loss": 0.9736, |
|
"nll_loss": 1.0062029361724854, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9357417826473201, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 6.210625116645135e-09, |
|
"log_odds_chosen": -0.41088682413101196, |
|
"log_odds_ratio": -0.9913327097892761, |
|
"logits/chosen": -2.3505208492279053, |
|
"logits/rejected": -2.3092150688171387, |
|
"logps/chosen": -0.6176980137825012, |
|
"logps/rejected": -0.42517074942588806, |
|
"loss": 0.9028, |
|
"nll_loss": 0.8675041198730469, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9416641989931892, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 5.117514686876378e-09, |
|
"log_odds_chosen": -0.29983749985694885, |
|
"log_odds_ratio": -0.9341946840286255, |
|
"logits/chosen": -2.3163905143737793, |
|
"logits/rejected": -2.281881809234619, |
|
"logps/chosen": -0.569345235824585, |
|
"logps/rejected": -0.4335504174232483, |
|
"loss": 0.946, |
|
"nll_loss": 0.9458128809928894, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9475866153390583, |
|
"grad_norm": 8.25, |
|
"learning_rate": 4.1291476026441565e-09, |
|
"log_odds_chosen": -0.21767720580101013, |
|
"log_odds_ratio": -0.8741191029548645, |
|
"logits/chosen": -2.2778666019439697, |
|
"logits/rejected": -2.2554242610931396, |
|
"logps/chosen": -0.560379147529602, |
|
"logps/rejected": -0.44631558656692505, |
|
"loss": 0.897, |
|
"nll_loss": 0.8650028109550476, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9535090316849274, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 3.2459466172331253e-09, |
|
"log_odds_chosen": -0.35443753004074097, |
|
"log_odds_ratio": -0.9958807229995728, |
|
"logits/chosen": -2.2850985527038574, |
|
"logits/rejected": -2.264432430267334, |
|
"logps/chosen": -0.6543992757797241, |
|
"logps/rejected": -0.436093807220459, |
|
"loss": 0.9838, |
|
"nll_loss": 0.9750429391860962, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9594314480307966, |
|
"grad_norm": 10.25, |
|
"learning_rate": 2.4682895013354854e-09, |
|
"log_odds_chosen": -0.3622002899646759, |
|
"log_odds_ratio": -0.9864064455032349, |
|
"logits/chosen": -2.287553548812866, |
|
"logits/rejected": -2.2655410766601562, |
|
"logps/chosen": -0.6380153894424438, |
|
"logps/rejected": -0.4244503378868103, |
|
"loss": 0.9213, |
|
"nll_loss": 0.9489747881889343, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9653538643766657, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.7965088814675677e-09, |
|
"log_odds_chosen": -0.4782753586769104, |
|
"log_odds_ratio": -1.0607492923736572, |
|
"logits/chosen": -2.290717840194702, |
|
"logits/rejected": -2.272459030151367, |
|
"logps/chosen": -0.6781035661697388, |
|
"logps/rejected": -0.4276870787143707, |
|
"loss": 0.9333, |
|
"nll_loss": 0.9644565582275391, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9712762807225348, |
|
"grad_norm": 9.125, |
|
"learning_rate": 1.2308920976958348e-09, |
|
"log_odds_chosen": -0.29858607053756714, |
|
"log_odds_ratio": -0.9460258483886719, |
|
"logits/chosen": -2.269747734069824, |
|
"logits/rejected": -2.247730016708374, |
|
"logps/chosen": -0.6245580911636353, |
|
"logps/rejected": -0.45077449083328247, |
|
"loss": 0.906, |
|
"nll_loss": 0.9039252996444702, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9771986970684039, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 7.716810807330276e-10, |
|
"log_odds_chosen": -0.4411376416683197, |
|
"log_odds_ratio": -1.0094521045684814, |
|
"logits/chosen": -2.2869138717651367, |
|
"logits/rejected": -2.24787974357605, |
|
"logps/chosen": -0.6290577054023743, |
|
"logps/rejected": -0.4051317572593689, |
|
"loss": 0.9304, |
|
"nll_loss": 0.9296571016311646, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.983121113414273, |
|
"grad_norm": 10.125, |
|
"learning_rate": 4.190722484575804e-10, |
|
"log_odds_chosen": -0.3509382903575897, |
|
"log_odds_ratio": -0.9882933497428894, |
|
"logits/chosen": -2.2927916049957275, |
|
"logits/rejected": -2.262193202972412, |
|
"logps/chosen": -0.6660831570625305, |
|
"logps/rejected": -0.4522073268890381, |
|
"loss": 0.9544, |
|
"nll_loss": 0.9784467816352844, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9890435297601421, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 1.732164218998522e-10, |
|
"log_odds_chosen": -0.35314035415649414, |
|
"log_odds_ratio": -0.9515780210494995, |
|
"logits/chosen": -2.265188694000244, |
|
"logits/rejected": -2.2218968868255615, |
|
"logps/chosen": -0.5966526865959167, |
|
"logps/rejected": -0.4354891777038574, |
|
"loss": 0.9111, |
|
"nll_loss": 0.9058610796928406, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9949659461060113, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 3.4218760731730136e-11, |
|
"log_odds_chosen": -0.3034502863883972, |
|
"log_odds_ratio": -0.9285211563110352, |
|
"logits/chosen": -2.339616298675537, |
|
"logits/rejected": -2.2971951961517334, |
|
"logps/chosen": -0.5794862508773804, |
|
"logps/rejected": -0.43610063195228577, |
|
"loss": 0.9681, |
|
"nll_loss": 0.9744264483451843, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9997038791827065, |
|
"step": 1688, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9736523162132191, |
|
"train_runtime": 25409.1611, |
|
"train_samples_per_second": 2.126, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1688, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|