|
{ |
|
"best_metric": 0.005181881133466959, |
|
"best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-700", |
|
"epoch": 5.32825880114177, |
|
"eval_steps": 50, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.973484516143799, |
|
"kl": 0.07447954267263412, |
|
"learning_rate": 0.00015833333333333332, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3654, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.1150100231170654, |
|
"kl": 0.24954533576965332, |
|
"learning_rate": 0.00019606299212598428, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1911, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -157.132080078125, |
|
"eval_logps/rejected": -431.1063232421875, |
|
"eval_loss": 0.019795970991253853, |
|
"eval_rewards/chosen": 5.025023460388184, |
|
"eval_rewards/margins": 28.0367374420166, |
|
"eval_rewards/rejected": -23.0117130279541, |
|
"eval_runtime": 209.568, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.563, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8193832039833069, |
|
"kl": 0.0, |
|
"learning_rate": 0.00019081364829396326, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1212, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1438875198364258, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018556430446194227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0613, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5549229383468628, |
|
"kl": 0.30086809396743774, |
|
"learning_rate": 0.00018031496062992125, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0525, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -151.0825958251953, |
|
"eval_logps/rejected": -502.1325378417969, |
|
"eval_loss": 0.015516282990574837, |
|
"eval_rewards/chosen": 5.629973411560059, |
|
"eval_rewards/margins": 35.744300842285156, |
|
"eval_rewards/rejected": -30.11433219909668, |
|
"eval_runtime": 209.5342, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16322359442710876, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001750656167979003, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.037, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.8122725486755371, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016981627296587927, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0684, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -145.42347717285156, |
|
"eval_logps/rejected": -530.3358154296875, |
|
"eval_loss": 0.010358058847486973, |
|
"eval_rewards/chosen": 6.19588565826416, |
|
"eval_rewards/margins": 39.130550384521484, |
|
"eval_rewards/rejected": -32.934661865234375, |
|
"eval_runtime": 209.5283, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.11464398354291916, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016456692913385828, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0167, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 5.260560989379883, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015931758530183726, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.131, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.15409517288208008, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001540682414698163, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0253, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -147.8525390625, |
|
"eval_logps/rejected": -489.1932678222656, |
|
"eval_loss": 0.012590945698320866, |
|
"eval_rewards/chosen": 5.952979564666748, |
|
"eval_rewards/margins": 34.77338790893555, |
|
"eval_rewards/rejected": -28.82040786743164, |
|
"eval_runtime": 209.5044, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.025852208957076073, |
|
"kl": 0.0, |
|
"learning_rate": 0.00014908136482939634, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0101, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.013888155110180378, |
|
"kl": 0.1387307345867157, |
|
"learning_rate": 0.00014383202099737535, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0177, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -142.92942810058594, |
|
"eval_logps/rejected": -740.499267578125, |
|
"eval_loss": 0.01631937175989151, |
|
"eval_rewards/chosen": 6.445290565490723, |
|
"eval_rewards/margins": 60.39630126953125, |
|
"eval_rewards/rejected": -53.951011657714844, |
|
"eval_runtime": 209.4415, |
|
"eval_samples_per_second": 2.254, |
|
"eval_steps_per_second": 0.563, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.02793472446501255, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013858267716535433, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0112, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.0313153937458992, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0067, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.030554356053471565, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012808398950131235, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0126, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -145.38067626953125, |
|
"eval_logps/rejected": -851.4735717773438, |
|
"eval_loss": 0.020638220012187958, |
|
"eval_rewards/chosen": 6.200164794921875, |
|
"eval_rewards/margins": 71.24861145019531, |
|
"eval_rewards/rejected": -65.04844665527344, |
|
"eval_runtime": 209.5591, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.563, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.01577291637659073, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012283464566929136, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0134, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.017728939652442932, |
|
"kl": 0.0, |
|
"learning_rate": 0.00011758530183727034, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.007, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -140.51016235351562, |
|
"eval_logps/rejected": -616.1578369140625, |
|
"eval_loss": 0.006825109478086233, |
|
"eval_rewards/chosen": 6.687215328216553, |
|
"eval_rewards/margins": 48.20407485961914, |
|
"eval_rewards/rejected": -41.51686477661133, |
|
"eval_runtime": 209.4969, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.025538522750139236, |
|
"kl": 0.2244974821805954, |
|
"learning_rate": 0.00011233595800524934, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0034, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.009798596613109112, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010708661417322836, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0081, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.021639486774802208, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010183727034120735, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0085, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -139.7056884765625, |
|
"eval_logps/rejected": -727.2117309570312, |
|
"eval_loss": 0.007565339561551809, |
|
"eval_rewards/chosen": 6.767664432525635, |
|
"eval_rewards/margins": 59.389923095703125, |
|
"eval_rewards/rejected": -52.62226104736328, |
|
"eval_runtime": 209.4194, |
|
"eval_samples_per_second": 2.254, |
|
"eval_steps_per_second": 0.563, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.2425842434167862, |
|
"kl": 0.0, |
|
"learning_rate": 9.658792650918635e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0029, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.01065791118890047, |
|
"kl": 0.0, |
|
"learning_rate": 9.133858267716536e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0057, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -141.96299743652344, |
|
"eval_logps/rejected": -582.8775024414062, |
|
"eval_loss": 0.005958274472504854, |
|
"eval_rewards/chosen": 6.541934967041016, |
|
"eval_rewards/margins": 44.73076629638672, |
|
"eval_rewards/rejected": -38.18882751464844, |
|
"eval_runtime": 209.4524, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.015213225036859512, |
|
"kl": 0.0, |
|
"learning_rate": 8.608923884514435e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.008, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.016072571277618408, |
|
"kl": 0.0, |
|
"learning_rate": 8.083989501312336e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0119, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 0.007641986012458801, |
|
"kl": 0.16172848641872406, |
|
"learning_rate": 7.559055118110236e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0029, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -140.1928253173828, |
|
"eval_logps/rejected": -571.4075927734375, |
|
"eval_loss": 0.0060875071212649345, |
|
"eval_rewards/chosen": 6.718951225280762, |
|
"eval_rewards/margins": 43.76079177856445, |
|
"eval_rewards/rejected": -37.041839599609375, |
|
"eval_runtime": 209.4408, |
|
"eval_samples_per_second": 2.254, |
|
"eval_steps_per_second": 0.563, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 0.009444206021726131, |
|
"kl": 0.0, |
|
"learning_rate": 7.034120734908137e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0061, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 0.008130188100039959, |
|
"kl": 0.0, |
|
"learning_rate": 6.509186351706036e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.003, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -138.9207000732422, |
|
"eval_logps/rejected": -567.5726928710938, |
|
"eval_loss": 0.00618448993191123, |
|
"eval_rewards/chosen": 6.84616231918335, |
|
"eval_rewards/margins": 43.504512786865234, |
|
"eval_rewards/rejected": -36.658355712890625, |
|
"eval_runtime": 209.4628, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 0.007174960803240538, |
|
"kl": 0.0, |
|
"learning_rate": 5.984251968503938e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0043, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 0.009455788880586624, |
|
"kl": 0.0, |
|
"learning_rate": 5.4593175853018376e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0034, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.010507142171263695, |
|
"kl": 0.0, |
|
"learning_rate": 4.934383202099738e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0023, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -138.7514190673828, |
|
"eval_logps/rejected": -583.14013671875, |
|
"eval_loss": 0.005779278464615345, |
|
"eval_rewards/chosen": 6.863093376159668, |
|
"eval_rewards/margins": 45.078189849853516, |
|
"eval_rewards/rejected": -38.2150993347168, |
|
"eval_runtime": 209.7359, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.563, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.0048731728456914425, |
|
"kl": 0.16939109563827515, |
|
"learning_rate": 4.409448818897638e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0076, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 0.006191925145685673, |
|
"kl": 0.0, |
|
"learning_rate": 3.8845144356955383e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0028, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -139.12095642089844, |
|
"eval_logps/rejected": -604.9989624023438, |
|
"eval_loss": 0.0049984036013484, |
|
"eval_rewards/chosen": 6.826137065887451, |
|
"eval_rewards/margins": 47.227115631103516, |
|
"eval_rewards/rejected": -40.400978088378906, |
|
"eval_runtime": 209.7256, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.563, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.006048476789146662, |
|
"kl": 0.0, |
|
"learning_rate": 3.3595800524934386e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0099, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 0.0051725320518016815, |
|
"kl": 0.0, |
|
"learning_rate": 2.8346456692913388e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0033, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 0.009665340185165405, |
|
"kl": 0.0, |
|
"learning_rate": 2.309711286089239e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0029, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -138.82569885253906, |
|
"eval_logps/rejected": -603.3299560546875, |
|
"eval_loss": 0.005181881133466959, |
|
"eval_rewards/chosen": 6.855663776397705, |
|
"eval_rewards/margins": 47.089744567871094, |
|
"eval_rewards/rejected": -40.234073638916016, |
|
"eval_runtime": 209.7627, |
|
"eval_samples_per_second": 2.25, |
|
"eval_steps_per_second": 0.563, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 786, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|