|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012539184952978056, |
|
"grad_norm": 9.408438376530524, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.9077322483062744, |
|
"logits/rejected": -2.8318910598754883, |
|
"logps/chosen": -351.8885498046875, |
|
"logps/pi_response": -76.32845306396484, |
|
"logps/ref_response": -76.32845306396484, |
|
"logps/rejected": -169.29762268066406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 8.382734262861335, |
|
"learning_rate": 4.990217055187362e-07, |
|
"logits/chosen": -2.7861804962158203, |
|
"logits/rejected": -2.762985944747925, |
|
"logps/chosen": -234.13641357421875, |
|
"logps/pi_response": -70.14045715332031, |
|
"logps/ref_response": -70.02328491210938, |
|
"logps/rejected": -167.99607849121094, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": 0.006987536326050758, |
|
"rewards/margins": 0.004230231046676636, |
|
"rewards/rejected": 0.0027573055122047663, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 6.754671268873102, |
|
"learning_rate": 4.655786431300069e-07, |
|
"logits/chosen": -2.7409508228302, |
|
"logits/rejected": -2.689985752105713, |
|
"logps/chosen": -245.8350067138672, |
|
"logps/pi_response": -75.90806579589844, |
|
"logps/ref_response": -67.40553283691406, |
|
"logps/rejected": -170.30355834960938, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.04610520228743553, |
|
"rewards/margins": 0.0813165009021759, |
|
"rewards/rejected": -0.03521129488945007, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 6.575914102976567, |
|
"learning_rate": 3.9061232191019517e-07, |
|
"logits/chosen": -2.6522345542907715, |
|
"logits/rejected": -2.618391275405884, |
|
"logps/chosen": -233.65853881835938, |
|
"logps/pi_response": -102.7225112915039, |
|
"logps/ref_response": -65.888427734375, |
|
"logps/rejected": -187.14553833007812, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.10190945863723755, |
|
"rewards/margins": 0.1844368726015091, |
|
"rewards/rejected": -0.28634634613990784, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 7.971938899453216, |
|
"learning_rate": 2.8856223324132555e-07, |
|
"logits/chosen": -2.6599223613739014, |
|
"logits/rejected": -2.6376564502716064, |
|
"logps/chosen": -255.1501007080078, |
|
"logps/pi_response": -138.35655212402344, |
|
"logps/ref_response": -70.97199249267578, |
|
"logps/rejected": -225.760498046875, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.22394080460071564, |
|
"rewards/margins": 0.34193405508995056, |
|
"rewards/rejected": -0.5658749341964722, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 10.038764882605939, |
|
"learning_rate": 1.7908455541642582e-07, |
|
"logits/chosen": -2.6536831855773926, |
|
"logits/rejected": -2.6201894283294678, |
|
"logps/chosen": -285.1458435058594, |
|
"logps/pi_response": -155.6894989013672, |
|
"logps/ref_response": -69.12784576416016, |
|
"logps/rejected": -250.23306274414062, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3502059578895569, |
|
"rewards/margins": 0.4657812714576721, |
|
"rewards/rejected": -0.815987229347229, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 10.581283000102326, |
|
"learning_rate": 8.32661172908373e-08, |
|
"logits/chosen": -2.649432897567749, |
|
"logits/rejected": -2.614516019821167, |
|
"logps/chosen": -256.6278076171875, |
|
"logps/pi_response": -162.4719696044922, |
|
"logps/ref_response": -62.94016647338867, |
|
"logps/rejected": -266.3846435546875, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.44566231966018677, |
|
"rewards/margins": 0.5812581181526184, |
|
"rewards/rejected": -1.0269205570220947, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 10.940165896983189, |
|
"learning_rate": 1.956279997278043e-08, |
|
"logits/chosen": -2.6477224826812744, |
|
"logits/rejected": -2.610698699951172, |
|
"logps/chosen": -302.5779724121094, |
|
"logps/pi_response": -183.13412475585938, |
|
"logps/ref_response": -70.71024322509766, |
|
"logps/rejected": -277.17822265625, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.5014069676399231, |
|
"rewards/margins": 0.6355606913566589, |
|
"rewards/rejected": -1.1369677782058716, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9905956112852664, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.583715951895412, |
|
"train_runtime": 3516.8514, |
|
"train_samples_per_second": 5.794, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|