dixedus commited on
Commit
652bd12
·
verified ·
1 Parent(s): 4646a59

Training in progress, step 51, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa72c69e81bba821cc4c73a7f7844bf111cb1b22c425a42f8940d2a620786f87
3
  size 19552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c40abc5ddbc8cce4c38252033f65546ef0ed695d12c0979ab0e6e53b8ddff3d
3
  size 19552
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dc66b34c385cd244db2547d4c348b3df1eb8dc43f9577663661efef47fbad88
3
  size 44354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6131a0b83a062e037bac2e68d74d4a9c1690df55d265b19520f04c71f8c51943
3
  size 44354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8401f39bd167cf8ab44ce58e7de9c3739721eec7ba9360b71bc778d12332a672
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bd16cd408bd3dd2676b8823dfc63738eec458eb445f09c19422fb33f5a1acb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af27ed5b5e5c3013f1da7a97e494138e751399ceff1f8e7486b6e269bc7092f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9fa892ecd236e652150058649b13d9161331d85e374c4bec1d60ad6a556c3a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.02568945976577257,
5
  "eval_steps": 17,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -108,6 +108,56 @@
108
  "eval_samples_per_second": 36.079,
109
  "eval_steps_per_second": 4.514,
110
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  }
112
  ],
113
  "logging_steps": 3,
@@ -127,7 +177,7 @@
127
  "attributes": {}
128
  }
129
  },
130
- "total_flos": 73987522560.0,
131
  "train_batch_size": 8,
132
  "trial_name": null,
133
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03853418964865886,
5
  "eval_steps": 17,
6
+ "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
108
  "eval_samples_per_second": 36.079,
109
  "eval_steps_per_second": 4.514,
110
  "step": 34
111
+ },
112
+ {
113
+ "epoch": 0.02720060445787684,
114
+ "grad_norm": 0.013970930129289627,
115
+ "learning_rate": 9.545032675245813e-05,
116
+ "loss": 12.4541,
117
+ "step": 36
118
+ },
119
+ {
120
+ "epoch": 0.029467321496033247,
121
+ "grad_norm": 0.014797762967646122,
122
+ "learning_rate": 9.43611409721806e-05,
123
+ "loss": 12.4476,
124
+ "step": 39
125
+ },
126
+ {
127
+ "epoch": 0.03173403853418965,
128
+ "grad_norm": 0.01638209819793701,
129
+ "learning_rate": 9.316282404787871e-05,
130
+ "loss": 12.4498,
131
+ "step": 42
132
+ },
133
+ {
134
+ "epoch": 0.03400075557234605,
135
+ "grad_norm": 0.017414981499314308,
136
+ "learning_rate": 9.185832391312644e-05,
137
+ "loss": 12.4489,
138
+ "step": 45
139
+ },
140
+ {
141
+ "epoch": 0.036267472610502456,
142
+ "grad_norm": 0.01976948417723179,
143
+ "learning_rate": 9.045084971874738e-05,
144
+ "loss": 12.4498,
145
+ "step": 48
146
+ },
147
+ {
148
+ "epoch": 0.03853418964865886,
149
+ "grad_norm": 0.017705973237752914,
150
+ "learning_rate": 8.894386393810563e-05,
151
+ "loss": 12.4478,
152
+ "step": 51
153
+ },
154
+ {
155
+ "epoch": 0.03853418964865886,
156
+ "eval_loss": 12.449511528015137,
157
+ "eval_runtime": 61.8317,
158
+ "eval_samples_per_second": 36.066,
159
+ "eval_steps_per_second": 4.512,
160
+ "step": 51
161
  }
162
  ],
163
  "logging_steps": 3,
 
177
  "attributes": {}
178
  }
179
  },
180
+ "total_flos": 111289565184.0,
181
  "train_batch_size": 8,
182
  "trial_name": null,
183
  "trial_params": null