|
{ |
|
"best_metric": 0.012590945698320866, |
|
"best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-200", |
|
"epoch": 2.283539486203616, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.973484516143799, |
|
"kl": 0.07447954267263412, |
|
"learning_rate": 0.00015833333333333332, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3654, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.1150100231170654, |
|
"kl": 0.24954533576965332, |
|
"learning_rate": 0.00019606299212598428, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1911, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -157.132080078125, |
|
"eval_logps/rejected": -431.1063232421875, |
|
"eval_loss": 0.019795970991253853, |
|
"eval_rewards/chosen": 5.025023460388184, |
|
"eval_rewards/margins": 28.0367374420166, |
|
"eval_rewards/rejected": -23.0117130279541, |
|
"eval_runtime": 209.568, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.563, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8193832039833069, |
|
"kl": 0.0, |
|
"learning_rate": 0.00019081364829396326, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1212, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1438875198364258, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018556430446194227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0613, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5549229383468628, |
|
"kl": 0.30086809396743774, |
|
"learning_rate": 0.00018031496062992125, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0525, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -151.0825958251953, |
|
"eval_logps/rejected": -502.1325378417969, |
|
"eval_loss": 0.015516282990574837, |
|
"eval_rewards/chosen": 5.629973411560059, |
|
"eval_rewards/margins": 35.744300842285156, |
|
"eval_rewards/rejected": -30.11433219909668, |
|
"eval_runtime": 209.5342, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16322359442710876, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001750656167979003, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.037, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.8122725486755371, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016981627296587927, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0684, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -145.42347717285156, |
|
"eval_logps/rejected": -530.3358154296875, |
|
"eval_loss": 0.010358058847486973, |
|
"eval_rewards/chosen": 6.19588565826416, |
|
"eval_rewards/margins": 39.130550384521484, |
|
"eval_rewards/rejected": -32.934661865234375, |
|
"eval_runtime": 209.5283, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.11464398354291916, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016456692913385828, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0167, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 5.260560989379883, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015931758530183726, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.131, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.15409517288208008, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001540682414698163, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0253, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -147.8525390625, |
|
"eval_logps/rejected": -489.1932678222656, |
|
"eval_loss": 0.012590945698320866, |
|
"eval_rewards/chosen": 5.952979564666748, |
|
"eval_rewards/margins": 34.77338790893555, |
|
"eval_rewards/rejected": -28.82040786743164, |
|
"eval_runtime": 209.5044, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.025852208957076073, |
|
"kl": 0.0, |
|
"learning_rate": 0.00014908136482939634, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0101, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.013888155110180378, |
|
"kl": 0.1387307345867157, |
|
"learning_rate": 0.00014383202099737535, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0177, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -142.92942810058594, |
|
"eval_logps/rejected": -740.499267578125, |
|
"eval_loss": 0.01631937175989151, |
|
"eval_rewards/chosen": 6.445290565490723, |
|
"eval_rewards/margins": 60.39630126953125, |
|
"eval_rewards/rejected": -53.951011657714844, |
|
"eval_runtime": 209.4415, |
|
"eval_samples_per_second": 2.254, |
|
"eval_steps_per_second": 0.563, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.02793472446501255, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013858267716535433, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0112, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.0313153937458992, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0067, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.030554356053471565, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012808398950131235, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0126, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -145.38067626953125, |
|
"eval_logps/rejected": -851.4735717773438, |
|
"eval_loss": 0.020638220012187958, |
|
"eval_rewards/chosen": 6.200164794921875, |
|
"eval_rewards/margins": 71.24861145019531, |
|
"eval_rewards/rejected": -65.04844665527344, |
|
"eval_runtime": 209.5591, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.563, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 786, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|