{ "best_metric": 0.0060875071212649345, "best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-500", "epoch": 3.8058991436726926, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "grad_norm": 3.973484516143799, "kl": 0.07447954267263412, "learning_rate": 0.00015833333333333332, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3654, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 20 }, { "epoch": 0.3, "grad_norm": 2.1150100231170654, "kl": 0.24954533576965332, "learning_rate": 0.00019606299212598428, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.1911, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 40 }, { "epoch": 0.38, "eval_kl": 0.0, "eval_logps/chosen": -157.132080078125, "eval_logps/rejected": -431.1063232421875, "eval_loss": 0.019795970991253853, "eval_rewards/chosen": 5.025023460388184, "eval_rewards/margins": 28.0367374420166, "eval_rewards/rejected": -23.0117130279541, "eval_runtime": 209.568, "eval_samples_per_second": 2.252, "eval_steps_per_second": 0.563, "step": 50 }, { "epoch": 0.46, "grad_norm": 0.8193832039833069, "kl": 0.0, "learning_rate": 0.00019081364829396326, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.1212, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 60 }, { "epoch": 0.61, "grad_norm": 1.1438875198364258, "kl": 0.0, "learning_rate": 0.00018556430446194227, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0613, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 80 }, { "epoch": 0.76, "grad_norm": 1.5549229383468628, "kl": 0.30086809396743774, "learning_rate": 0.00018031496062992125, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0525, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 100 }, { "epoch": 0.76, "eval_kl": 0.0, "eval_logps/chosen": -151.0825958251953, "eval_logps/rejected": -502.1325378417969, "eval_loss": 0.015516282990574837, "eval_rewards/chosen": 5.629973411560059, "eval_rewards/margins": 35.744300842285156, "eval_rewards/rejected": -30.11433219909668, "eval_runtime": 209.5342, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 100 }, { "epoch": 0.91, "grad_norm": 0.16322359442710876, "kl": 0.0, "learning_rate": 0.0001750656167979003, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.037, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 120 }, { "epoch": 1.07, "grad_norm": 0.8122725486755371, "kl": 0.0, "learning_rate": 0.00016981627296587927, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0684, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 140 }, { "epoch": 1.14, "eval_kl": 0.0, "eval_logps/chosen": -145.42347717285156, "eval_logps/rejected": -530.3358154296875, "eval_loss": 0.010358058847486973, "eval_rewards/chosen": 6.19588565826416, "eval_rewards/margins": 39.130550384521484, "eval_rewards/rejected": -32.934661865234375, "eval_runtime": 209.5283, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 150 }, { "epoch": 1.22, "grad_norm": 0.11464398354291916, "kl": 0.0, "learning_rate": 0.00016456692913385828, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0167, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 160 }, { "epoch": 1.37, "grad_norm": 5.260560989379883, "kl": 0.0, "learning_rate": 0.00015931758530183726, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.131, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 180 }, { "epoch": 1.52, "grad_norm": 0.15409517288208008, "kl": 0.0, "learning_rate": 0.0001540682414698163, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0253, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 200 }, { "epoch": 1.52, "eval_kl": 0.0, "eval_logps/chosen": -147.8525390625, "eval_logps/rejected": -489.1932678222656, "eval_loss": 0.012590945698320866, "eval_rewards/chosen": 5.952979564666748, "eval_rewards/margins": 34.77338790893555, "eval_rewards/rejected": -28.82040786743164, "eval_runtime": 209.5044, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 200 }, { "epoch": 1.67, "grad_norm": 0.025852208957076073, "kl": 0.0, "learning_rate": 0.00014908136482939634, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0101, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 220 }, { "epoch": 1.83, "grad_norm": 0.013888155110180378, "kl": 0.1387307345867157, "learning_rate": 0.00014383202099737535, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0177, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 240 }, { "epoch": 1.9, "eval_kl": 0.0, "eval_logps/chosen": -142.92942810058594, "eval_logps/rejected": -740.499267578125, "eval_loss": 0.01631937175989151, "eval_rewards/chosen": 6.445290565490723, "eval_rewards/margins": 60.39630126953125, "eval_rewards/rejected": -53.951011657714844, "eval_runtime": 209.4415, "eval_samples_per_second": 2.254, "eval_steps_per_second": 0.563, "step": 250 }, { "epoch": 1.98, "grad_norm": 0.02793472446501255, "kl": 0.0, "learning_rate": 0.00013858267716535433, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0112, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 260 }, { "epoch": 2.13, "grad_norm": 0.0313153937458992, "kl": 0.0, "learning_rate": 0.00013333333333333334, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0067, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 280 }, { "epoch": 2.28, "grad_norm": 0.030554356053471565, "kl": 0.0, "learning_rate": 0.00012808398950131235, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0126, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 300 }, { "epoch": 2.28, "eval_kl": 0.0, "eval_logps/chosen": -145.38067626953125, "eval_logps/rejected": -851.4735717773438, "eval_loss": 0.020638220012187958, "eval_rewards/chosen": 6.200164794921875, "eval_rewards/margins": 71.24861145019531, "eval_rewards/rejected": -65.04844665527344, "eval_runtime": 209.5591, "eval_samples_per_second": 2.252, "eval_steps_per_second": 0.563, "step": 300 }, { "epoch": 2.44, "grad_norm": 0.01577291637659073, "kl": 0.0, "learning_rate": 0.00012283464566929136, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0134, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 320 }, { "epoch": 2.59, "grad_norm": 0.017728939652442932, "kl": 0.0, "learning_rate": 0.00011758530183727034, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.007, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 340 }, { "epoch": 2.66, "eval_kl": 0.0, "eval_logps/chosen": -140.51016235351562, "eval_logps/rejected": -616.1578369140625, "eval_loss": 0.006825109478086233, "eval_rewards/chosen": 6.687215328216553, "eval_rewards/margins": 48.20407485961914, "eval_rewards/rejected": -41.51686477661133, "eval_runtime": 209.4969, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 350 }, { "epoch": 2.74, "grad_norm": 0.025538522750139236, "kl": 0.2244974821805954, "learning_rate": 0.00011233595800524934, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0034, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 360 }, { "epoch": 2.89, "grad_norm": 0.009798596613109112, "kl": 0.0, "learning_rate": 0.00010708661417322836, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0081, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 380 }, { "epoch": 3.04, "grad_norm": 0.021639486774802208, "kl": 0.0, "learning_rate": 0.00010183727034120735, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0085, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 400 }, { "epoch": 3.04, "eval_kl": 0.0, "eval_logps/chosen": -139.7056884765625, "eval_logps/rejected": -727.2117309570312, "eval_loss": 0.007565339561551809, "eval_rewards/chosen": 6.767664432525635, "eval_rewards/margins": 59.389923095703125, "eval_rewards/rejected": -52.62226104736328, "eval_runtime": 209.4194, "eval_samples_per_second": 2.254, "eval_steps_per_second": 0.563, "step": 400 }, { "epoch": 3.2, "grad_norm": 0.2425842434167862, "kl": 0.0, "learning_rate": 9.658792650918635e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0029, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 420 }, { "epoch": 3.35, "grad_norm": 0.01065791118890047, "kl": 0.0, "learning_rate": 9.133858267716536e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0057, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 440 }, { "epoch": 3.43, "eval_kl": 0.0, "eval_logps/chosen": -141.96299743652344, "eval_logps/rejected": -582.8775024414062, "eval_loss": 0.005958274472504854, "eval_rewards/chosen": 6.541934967041016, "eval_rewards/margins": 44.73076629638672, "eval_rewards/rejected": -38.18882751464844, "eval_runtime": 209.4524, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 450 }, { "epoch": 3.5, "grad_norm": 0.015213225036859512, "kl": 0.0, "learning_rate": 8.608923884514435e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.008, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 460 }, { "epoch": 3.65, "grad_norm": 0.016072571277618408, "kl": 0.0, "learning_rate": 8.083989501312336e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0119, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 480 }, { "epoch": 3.81, "grad_norm": 0.007641986012458801, "kl": 0.16172848641872406, "learning_rate": 7.559055118110236e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0029, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 500 }, { "epoch": 3.81, "eval_kl": 0.0, "eval_logps/chosen": -140.1928253173828, "eval_logps/rejected": -571.4075927734375, "eval_loss": 0.0060875071212649345, "eval_rewards/chosen": 6.718951225280762, "eval_rewards/margins": 43.76079177856445, "eval_rewards/rejected": -37.041839599609375, "eval_runtime": 209.4408, "eval_samples_per_second": 2.254, "eval_steps_per_second": 0.563, "step": 500 } ], "logging_steps": 20, "max_steps": 786, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }