AmberYifan commited on
Commit
1aa8e46
·
verified ·
1 Parent(s): 7964402

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step933/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5919b126a340ddb7a50161a0e0d65bbe3be190e4ab525579c3082f91aaa15b0
3
+ size 14483468456
last-checkpoint/global_step933/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00252ba1c8b63314d1ac6d165065caa87e9b017275a4f443cb02e484d7b19de8
3
+ size 14483468456
last-checkpoint/global_step933/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b988a1e003f4a620e4eba0eb61e86abefd0911ad4967c6a40447079b3712860
3
+ size 14483468456
last-checkpoint/global_step933/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86c2872ed7a6526515d561ebc0a8aca67a2576b1d188e361b80e477ac2731f37
3
+ size 14483468456
last-checkpoint/global_step933/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200ddb0c96143160059beff8ebdfcf2444b707d466305d82a88f61b480b6f952
3
+ size 150629
last-checkpoint/global_step933/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5768a8767b4b1a2054c6c0c473894e4bbcb30dd0abb4a098699c9d1b5dd4877
3
+ size 150629
last-checkpoint/global_step933/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62d3c5bce5c117ba5e0447db7f6cf7c3f1c29fc4baedf998ceabed269a7e5d13
3
+ size 150629
last-checkpoint/global_step933/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdcdb05666c4b616f841630e2c937298754384e11d2e854146bcc8da76d832f6
3
+ size 150629
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step622
 
1
+ global_step933
last-checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fc00ca9dd9338df2a6996a4bfb3cd0281a9c68c12f0d064e11d56562e2e7368
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bcf533b0a8a867302c33cb820a31498c777b926b457489ac6c5516f30fa5fac
3
  size 4943162336
last-checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be5f0e9fe4774f0ac11b29b7ab95efce766854de10e2eab5bbbe41787c1b9a67
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e48ee38d0e794e0f393104da1cbbe5aa8fc3307fff01e9196177b44b6bfefce3
3
  size 4999819336
last-checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4396bdce91e7cbac57f1a4bba559d28f006bd640236ec041466e87c0ead8b8b
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59c9b741f899af2f98c945f4d988fb0050828aa8117ba25ca6050febf6ee5cdc
3
  size 4540516344
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2307c03867cef25b5028feb9a23f80e784b9af9a615de13ddca560a6a90fb593
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7cf0d34d60dfda516cf9661904550e2e294e723edd07c25c738f05e8ba92d1
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50218cfaecdd818354e567b7167c13899e3b42297e7d8f58bd7e732cfa547800
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b183324e8227a51a9556d86b2ad893a8c4c52205ed4a737356c6611dac7353
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9814a66b49861f5495b06dae3be12ddf7185b88e2cae1fb808ca9efd99d5807f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac69e994090f4818cb1fa6f6cefa363178552c3c731c6507ff195bcb07fd5bef
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7be93040a65e0a29975f6c70b94418e1fdf88423a50c58aa572141d3c92fbfc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e61b63402f8afb1f69c960f7944965655dac11e3ccf29919c282f23931f86
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfd6053a8450423b1508589b01aad6f55fbc0403955d073b555ad0b028220c09
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0caf22595aba0b7671e98a74f0bfad5519cb9cbb39889404f38180bfd0fe28e6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 622,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -984,6 +984,487 @@
984
  "eval_samples_per_second": 14.052,
985
  "eval_steps_per_second": 0.494,
986
  "step": 622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987
  }
988
  ],
989
  "logging_steps": 10,
@@ -998,7 +1479,7 @@
998
  "should_evaluate": false,
999
  "should_log": false,
1000
  "should_save": true,
1001
- "should_training_stop": false
1002
  },
1003
  "attributes": {}
1004
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 933,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
984
  "eval_samples_per_second": 14.052,
985
  "eval_steps_per_second": 0.494,
986
  "step": 622
987
+ },
988
+ {
989
+ "epoch": 2.0257234726688105,
990
+ "grad_norm": 1.0084688310317689,
991
+ "learning_rate": 1.805721096543504e-07,
992
+ "logits/chosen": -3.265625,
993
+ "logits/rejected": -3.53125,
994
+ "logps/chosen": -138.0,
995
+ "logps/rejected": -296.0,
996
+ "loss": 0.0133,
997
+ "rewards/accuracies": 1.0,
998
+ "rewards/chosen": 0.82421875,
999
+ "rewards/margins": 10.125,
1000
+ "rewards/rejected": -9.3125,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 2.057877813504823,
1005
+ "grad_norm": 1.0564688005496572,
1006
+ "learning_rate": 1.7461263408820024e-07,
1007
+ "logits/chosen": -3.28125,
1008
+ "logits/rejected": -3.46875,
1009
+ "logps/chosen": -123.5,
1010
+ "logps/rejected": -278.0,
1011
+ "loss": 0.0128,
1012
+ "rewards/accuracies": 0.987500011920929,
1013
+ "rewards/chosen": 0.5,
1014
+ "rewards/margins": 10.0,
1015
+ "rewards/rejected": -9.5,
1016
+ "step": 640
1017
+ },
1018
+ {
1019
+ "epoch": 2.090032154340836,
1020
+ "grad_norm": 2.814703376282905,
1021
+ "learning_rate": 1.6865315852205006e-07,
1022
+ "logits/chosen": -3.171875,
1023
+ "logits/rejected": -3.390625,
1024
+ "logps/chosen": -122.5,
1025
+ "logps/rejected": -330.0,
1026
+ "loss": 0.0096,
1027
+ "rewards/accuracies": 1.0,
1028
+ "rewards/chosen": 1.4296875,
1029
+ "rewards/margins": 13.4375,
1030
+ "rewards/rejected": -12.0,
1031
+ "step": 650
1032
+ },
1033
+ {
1034
+ "epoch": 2.122186495176849,
1035
+ "grad_norm": 9.832993297771846,
1036
+ "learning_rate": 1.626936829558999e-07,
1037
+ "logits/chosen": -3.25,
1038
+ "logits/rejected": -3.53125,
1039
+ "logps/chosen": -138.0,
1040
+ "logps/rejected": -330.0,
1041
+ "loss": 0.0111,
1042
+ "rewards/accuracies": 1.0,
1043
+ "rewards/chosen": 0.1591796875,
1044
+ "rewards/margins": 12.3125,
1045
+ "rewards/rejected": -12.1875,
1046
+ "step": 660
1047
+ },
1048
+ {
1049
+ "epoch": 2.154340836012862,
1050
+ "grad_norm": 0.21098065783878342,
1051
+ "learning_rate": 1.5673420738974968e-07,
1052
+ "logits/chosen": -3.234375,
1053
+ "logits/rejected": -3.34375,
1054
+ "logps/chosen": -109.0,
1055
+ "logps/rejected": -276.0,
1056
+ "loss": 0.0034,
1057
+ "rewards/accuracies": 1.0,
1058
+ "rewards/chosen": 1.03125,
1059
+ "rewards/margins": 12.3125,
1060
+ "rewards/rejected": -11.3125,
1061
+ "step": 670
1062
+ },
1063
+ {
1064
+ "epoch": 2.1864951768488745,
1065
+ "grad_norm": 2.476443587212778,
1066
+ "learning_rate": 1.5077473182359952e-07,
1067
+ "logits/chosen": -3.09375,
1068
+ "logits/rejected": -3.375,
1069
+ "logps/chosen": -106.5,
1070
+ "logps/rejected": -320.0,
1071
+ "loss": 0.0043,
1072
+ "rewards/accuracies": 1.0,
1073
+ "rewards/chosen": 0.546875,
1074
+ "rewards/margins": 12.75,
1075
+ "rewards/rejected": -12.1875,
1076
+ "step": 680
1077
+ },
1078
+ {
1079
+ "epoch": 2.2186495176848875,
1080
+ "grad_norm": 11.363132455431685,
1081
+ "learning_rate": 1.4481525625744933e-07,
1082
+ "logits/chosen": -3.125,
1083
+ "logits/rejected": -3.40625,
1084
+ "logps/chosen": -134.0,
1085
+ "logps/rejected": -348.0,
1086
+ "loss": 0.0122,
1087
+ "rewards/accuracies": 0.9750000238418579,
1088
+ "rewards/chosen": 0.7421875,
1089
+ "rewards/margins": 12.9375,
1090
+ "rewards/rejected": -12.1875,
1091
+ "step": 690
1092
+ },
1093
+ {
1094
+ "epoch": 2.2508038585209005,
1095
+ "grad_norm": 2.5108344224003334,
1096
+ "learning_rate": 1.3885578069129917e-07,
1097
+ "logits/chosen": -3.203125,
1098
+ "logits/rejected": -3.5,
1099
+ "logps/chosen": -123.5,
1100
+ "logps/rejected": -318.0,
1101
+ "loss": 0.0063,
1102
+ "rewards/accuracies": 0.987500011920929,
1103
+ "rewards/chosen": 0.13671875,
1104
+ "rewards/margins": 12.5625,
1105
+ "rewards/rejected": -12.375,
1106
+ "step": 700
1107
+ },
1108
+ {
1109
+ "epoch": 2.282958199356913,
1110
+ "grad_norm": 0.16002189769618574,
1111
+ "learning_rate": 1.3289630512514898e-07,
1112
+ "logits/chosen": -3.0625,
1113
+ "logits/rejected": -3.28125,
1114
+ "logps/chosen": -119.0,
1115
+ "logps/rejected": -340.0,
1116
+ "loss": 0.0073,
1117
+ "rewards/accuracies": 1.0,
1118
+ "rewards/chosen": 0.96484375,
1119
+ "rewards/margins": 13.3125,
1120
+ "rewards/rejected": -12.3125,
1121
+ "step": 710
1122
+ },
1123
+ {
1124
+ "epoch": 2.315112540192926,
1125
+ "grad_norm": 0.24278926553916283,
1126
+ "learning_rate": 1.2693682955899882e-07,
1127
+ "logits/chosen": -3.125,
1128
+ "logits/rejected": -3.296875,
1129
+ "logps/chosen": -136.0,
1130
+ "logps/rejected": -342.0,
1131
+ "loss": 0.001,
1132
+ "rewards/accuracies": 1.0,
1133
+ "rewards/chosen": -0.1201171875,
1134
+ "rewards/margins": 12.875,
1135
+ "rewards/rejected": -13.0,
1136
+ "step": 720
1137
+ },
1138
+ {
1139
+ "epoch": 2.347266881028939,
1140
+ "grad_norm": 8.462048605522668,
1141
+ "learning_rate": 1.2097735399284863e-07,
1142
+ "logits/chosen": -3.15625,
1143
+ "logits/rejected": -3.328125,
1144
+ "logps/chosen": -114.5,
1145
+ "logps/rejected": -298.0,
1146
+ "loss": 0.0085,
1147
+ "rewards/accuracies": 0.9750000238418579,
1148
+ "rewards/chosen": 0.5546875,
1149
+ "rewards/margins": 12.125,
1150
+ "rewards/rejected": -11.5625,
1151
+ "step": 730
1152
+ },
1153
+ {
1154
+ "epoch": 2.379421221864952,
1155
+ "grad_norm": 0.2836935848609144,
1156
+ "learning_rate": 1.1501787842669844e-07,
1157
+ "logits/chosen": -3.1875,
1158
+ "logits/rejected": -3.4375,
1159
+ "logps/chosen": -149.0,
1160
+ "logps/rejected": -342.0,
1161
+ "loss": 0.0095,
1162
+ "rewards/accuracies": 0.987500011920929,
1163
+ "rewards/chosen": 0.388671875,
1164
+ "rewards/margins": 12.625,
1165
+ "rewards/rejected": -12.25,
1166
+ "step": 740
1167
+ },
1168
+ {
1169
+ "epoch": 2.4115755627009645,
1170
+ "grad_norm": 0.20905746208537096,
1171
+ "learning_rate": 1.0905840286054827e-07,
1172
+ "logits/chosen": -3.21875,
1173
+ "logits/rejected": -3.4375,
1174
+ "logps/chosen": -144.0,
1175
+ "logps/rejected": -336.0,
1176
+ "loss": 0.0024,
1177
+ "rewards/accuracies": 1.0,
1178
+ "rewards/chosen": 0.66015625,
1179
+ "rewards/margins": 13.125,
1180
+ "rewards/rejected": -12.5,
1181
+ "step": 750
1182
+ },
1183
+ {
1184
+ "epoch": 2.4437299035369775,
1185
+ "grad_norm": 0.13532730518065741,
1186
+ "learning_rate": 1.030989272943981e-07,
1187
+ "logits/chosen": -3.15625,
1188
+ "logits/rejected": -3.4375,
1189
+ "logps/chosen": -135.0,
1190
+ "logps/rejected": -340.0,
1191
+ "loss": 0.0052,
1192
+ "rewards/accuracies": 0.987500011920929,
1193
+ "rewards/chosen": 0.07666015625,
1194
+ "rewards/margins": 12.9375,
1195
+ "rewards/rejected": -12.875,
1196
+ "step": 760
1197
+ },
1198
+ {
1199
+ "epoch": 2.4758842443729905,
1200
+ "grad_norm": 1.0359164426855658,
1201
+ "learning_rate": 9.713945172824791e-08,
1202
+ "logits/chosen": -3.09375,
1203
+ "logits/rejected": -3.40625,
1204
+ "logps/chosen": -124.5,
1205
+ "logps/rejected": -330.0,
1206
+ "loss": 0.0052,
1207
+ "rewards/accuracies": 1.0,
1208
+ "rewards/chosen": -0.034912109375,
1209
+ "rewards/margins": 13.3125,
1210
+ "rewards/rejected": -13.375,
1211
+ "step": 770
1212
+ },
1213
+ {
1214
+ "epoch": 2.508038585209003,
1215
+ "grad_norm": 0.02411362444063136,
1216
+ "learning_rate": 9.117997616209773e-08,
1217
+ "logits/chosen": -3.140625,
1218
+ "logits/rejected": -3.390625,
1219
+ "logps/chosen": -146.0,
1220
+ "logps/rejected": -356.0,
1221
+ "loss": 0.0025,
1222
+ "rewards/accuracies": 1.0,
1223
+ "rewards/chosen": 0.3046875,
1224
+ "rewards/margins": 13.75,
1225
+ "rewards/rejected": -13.5,
1226
+ "step": 780
1227
+ },
1228
+ {
1229
+ "epoch": 2.540192926045016,
1230
+ "grad_norm": 0.14361577541299492,
1231
+ "learning_rate": 8.522050059594756e-08,
1232
+ "logits/chosen": -3.15625,
1233
+ "logits/rejected": -3.40625,
1234
+ "logps/chosen": -148.0,
1235
+ "logps/rejected": -340.0,
1236
+ "loss": 0.0008,
1237
+ "rewards/accuracies": 1.0,
1238
+ "rewards/chosen": 0.2734375,
1239
+ "rewards/margins": 13.5,
1240
+ "rewards/rejected": -13.25,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 2.572347266881029,
1245
+ "grad_norm": 3.7709142939626923,
1246
+ "learning_rate": 7.926102502979737e-08,
1247
+ "logits/chosen": -3.171875,
1248
+ "logits/rejected": -3.40625,
1249
+ "logps/chosen": -126.0,
1250
+ "logps/rejected": -344.0,
1251
+ "loss": 0.0013,
1252
+ "rewards/accuracies": 1.0,
1253
+ "rewards/chosen": 0.62890625,
1254
+ "rewards/margins": 13.6875,
1255
+ "rewards/rejected": -13.0625,
1256
+ "step": 800
1257
+ },
1258
+ {
1259
+ "epoch": 2.604501607717042,
1260
+ "grad_norm": 0.6815929170420622,
1261
+ "learning_rate": 7.33015494636472e-08,
1262
+ "logits/chosen": -3.171875,
1263
+ "logits/rejected": -3.421875,
1264
+ "logps/chosen": -122.0,
1265
+ "logps/rejected": -306.0,
1266
+ "loss": 0.0094,
1267
+ "rewards/accuracies": 1.0,
1268
+ "rewards/chosen": 0.006866455078125,
1269
+ "rewards/margins": 12.75,
1270
+ "rewards/rejected": -12.75,
1271
+ "step": 810
1272
+ },
1273
+ {
1274
+ "epoch": 2.6366559485530545,
1275
+ "grad_norm": 1.1968332787733935,
1276
+ "learning_rate": 6.734207389749702e-08,
1277
+ "logits/chosen": -3.09375,
1278
+ "logits/rejected": -3.4375,
1279
+ "logps/chosen": -110.0,
1280
+ "logps/rejected": -320.0,
1281
+ "loss": 0.0075,
1282
+ "rewards/accuracies": 0.987500011920929,
1283
+ "rewards/chosen": 0.94140625,
1284
+ "rewards/margins": 13.75,
1285
+ "rewards/rejected": -12.75,
1286
+ "step": 820
1287
+ },
1288
+ {
1289
+ "epoch": 2.6688102893890675,
1290
+ "grad_norm": 0.2654328339493102,
1291
+ "learning_rate": 6.138259833134683e-08,
1292
+ "logits/chosen": -3.15625,
1293
+ "logits/rejected": -3.484375,
1294
+ "logps/chosen": -127.5,
1295
+ "logps/rejected": -328.0,
1296
+ "loss": 0.0081,
1297
+ "rewards/accuracies": 1.0,
1298
+ "rewards/chosen": 0.453125,
1299
+ "rewards/margins": 13.9375,
1300
+ "rewards/rejected": -13.5,
1301
+ "step": 830
1302
+ },
1303
+ {
1304
+ "epoch": 2.7009646302250805,
1305
+ "grad_norm": 0.2744508216663156,
1306
+ "learning_rate": 5.542312276519666e-08,
1307
+ "logits/chosen": -3.140625,
1308
+ "logits/rejected": -3.375,
1309
+ "logps/chosen": -119.5,
1310
+ "logps/rejected": -346.0,
1311
+ "loss": 0.005,
1312
+ "rewards/accuracies": 0.987500011920929,
1313
+ "rewards/chosen": 0.10107421875,
1314
+ "rewards/margins": 13.125,
1315
+ "rewards/rejected": -13.0625,
1316
+ "step": 840
1317
+ },
1318
+ {
1319
+ "epoch": 2.733118971061093,
1320
+ "grad_norm": 0.03947959588228437,
1321
+ "learning_rate": 4.9463647199046485e-08,
1322
+ "logits/chosen": -3.203125,
1323
+ "logits/rejected": -3.34375,
1324
+ "logps/chosen": -120.5,
1325
+ "logps/rejected": -316.0,
1326
+ "loss": 0.0073,
1327
+ "rewards/accuracies": 0.987500011920929,
1328
+ "rewards/chosen": 0.224609375,
1329
+ "rewards/margins": 12.875,
1330
+ "rewards/rejected": -12.6875,
1331
+ "step": 850
1332
+ },
1333
+ {
1334
+ "epoch": 2.765273311897106,
1335
+ "grad_norm": 0.6863210482116098,
1336
+ "learning_rate": 4.3504171632896303e-08,
1337
+ "logits/chosen": -3.25,
1338
+ "logits/rejected": -3.5,
1339
+ "logps/chosen": -136.0,
1340
+ "logps/rejected": -354.0,
1341
+ "loss": 0.0105,
1342
+ "rewards/accuracies": 1.0,
1343
+ "rewards/chosen": 0.59375,
1344
+ "rewards/margins": 13.75,
1345
+ "rewards/rejected": -13.125,
1346
+ "step": 860
1347
+ },
1348
+ {
1349
+ "epoch": 2.797427652733119,
1350
+ "grad_norm": 2.8280463279363106,
1351
+ "learning_rate": 3.754469606674612e-08,
1352
+ "logits/chosen": -3.0625,
1353
+ "logits/rejected": -3.390625,
1354
+ "logps/chosen": -116.5,
1355
+ "logps/rejected": -318.0,
1356
+ "loss": 0.0078,
1357
+ "rewards/accuracies": 0.987500011920929,
1358
+ "rewards/chosen": 0.255859375,
1359
+ "rewards/margins": 13.25,
1360
+ "rewards/rejected": -13.0,
1361
+ "step": 870
1362
+ },
1363
+ {
1364
+ "epoch": 2.829581993569132,
1365
+ "grad_norm": 4.677533654072525,
1366
+ "learning_rate": 3.158522050059595e-08,
1367
+ "logits/chosen": -3.21875,
1368
+ "logits/rejected": -3.421875,
1369
+ "logps/chosen": -147.0,
1370
+ "logps/rejected": -312.0,
1371
+ "loss": 0.0227,
1372
+ "rewards/accuracies": 0.987500011920929,
1373
+ "rewards/chosen": 0.1044921875,
1374
+ "rewards/margins": 12.375,
1375
+ "rewards/rejected": -12.25,
1376
+ "step": 880
1377
+ },
1378
+ {
1379
+ "epoch": 2.861736334405145,
1380
+ "grad_norm": 4.075516801733505,
1381
+ "learning_rate": 2.562574493444577e-08,
1382
+ "logits/chosen": -3.140625,
1383
+ "logits/rejected": -3.390625,
1384
+ "logps/chosen": -130.0,
1385
+ "logps/rejected": -328.0,
1386
+ "loss": 0.0112,
1387
+ "rewards/accuracies": 0.9750000238418579,
1388
+ "rewards/chosen": 0.1201171875,
1389
+ "rewards/margins": 13.125,
1390
+ "rewards/rejected": -13.0,
1391
+ "step": 890
1392
+ },
1393
+ {
1394
+ "epoch": 2.8938906752411575,
1395
+ "grad_norm": 0.31091685277937586,
1396
+ "learning_rate": 1.966626936829559e-08,
1397
+ "logits/chosen": -3.171875,
1398
+ "logits/rejected": -3.453125,
1399
+ "logps/chosen": -114.5,
1400
+ "logps/rejected": -316.0,
1401
+ "loss": 0.0116,
1402
+ "rewards/accuracies": 0.9750000238418579,
1403
+ "rewards/chosen": 0.498046875,
1404
+ "rewards/margins": 13.3125,
1405
+ "rewards/rejected": -12.8125,
1406
+ "step": 900
1407
+ },
1408
+ {
1409
+ "epoch": 2.9260450160771705,
1410
+ "grad_norm": 0.008132094709855322,
1411
+ "learning_rate": 1.370679380214541e-08,
1412
+ "logits/chosen": -3.125,
1413
+ "logits/rejected": -3.5,
1414
+ "logps/chosen": -124.0,
1415
+ "logps/rejected": -356.0,
1416
+ "loss": 0.0056,
1417
+ "rewards/accuracies": 1.0,
1418
+ "rewards/chosen": 0.0111083984375,
1419
+ "rewards/margins": 13.625,
1420
+ "rewards/rejected": -13.625,
1421
+ "step": 910
1422
+ },
1423
+ {
1424
+ "epoch": 2.958199356913183,
1425
+ "grad_norm": 10.508744046977588,
1426
+ "learning_rate": 7.747318235995233e-09,
1427
+ "logits/chosen": -3.28125,
1428
+ "logits/rejected": -3.5625,
1429
+ "logps/chosen": -115.0,
1430
+ "logps/rejected": -316.0,
1431
+ "loss": 0.0031,
1432
+ "rewards/accuracies": 1.0,
1433
+ "rewards/chosen": 0.1298828125,
1434
+ "rewards/margins": 13.125,
1435
+ "rewards/rejected": -13.0,
1436
+ "step": 920
1437
+ },
1438
+ {
1439
+ "epoch": 2.990353697749196,
1440
+ "grad_norm": 0.053140421396559286,
1441
+ "learning_rate": 1.7878426698450536e-09,
1442
+ "logits/chosen": -3.1875,
1443
+ "logits/rejected": -3.40625,
1444
+ "logps/chosen": -117.5,
1445
+ "logps/rejected": -308.0,
1446
+ "loss": 0.0051,
1447
+ "rewards/accuracies": 1.0,
1448
+ "rewards/chosen": 0.203125,
1449
+ "rewards/margins": 12.9375,
1450
+ "rewards/rejected": -12.75,
1451
+ "step": 930
1452
+ },
1453
+ {
1454
+ "epoch": 3.0,
1455
+ "eval_logits/chosen": -3.515625,
1456
+ "eval_logits/rejected": -3.59375,
1457
+ "eval_logps/chosen": -245.0,
1458
+ "eval_logps/rejected": -292.0,
1459
+ "eval_loss": 1.0541672706604004,
1460
+ "eval_rewards/accuracies": 0.6964285969734192,
1461
+ "eval_rewards/chosen": -5.96875,
1462
+ "eval_rewards/margins": 4.125,
1463
+ "eval_rewards/rejected": -10.0625,
1464
+ "eval_runtime": 15.892,
1465
+ "eval_samples_per_second": 12.522,
1466
+ "eval_steps_per_second": 0.44,
1467
+ "step": 933
1468
  }
1469
  ],
1470
  "logging_steps": 10,
 
1479
  "should_evaluate": false,
1480
  "should_log": false,
1481
  "should_save": true,
1482
+ "should_training_stop": true
1483
  },
1484
  "attributes": {}
1485
  }