{ "epoch": 0.96, "eval_logits/chosen": -0.6184229850769043, "eval_logits/rejected": -0.4482918381690979, "eval_logps/chosen": -0.7622865438461304, "eval_logps/rejected": -0.8116828799247742, "eval_loss": 4.593369960784912, "eval_rewards/accuracies": 0.5859375, "eval_rewards/chosen": -7.622865200042725, "eval_rewards/margins": 0.49396347999572754, "eval_rewards/rejected": -8.116828918457031, "eval_runtime": 1.1825, "eval_samples_per_second": 105.71, "eval_steps_per_second": 3.383, "total_flos": 17535837601792.0, "train_loss": 4.453756756252712, "train_runtime": 178.4889, "train_samples_per_second": 13.306, "train_steps_per_second": 0.101 }