eddysang commited on
Commit
83eed8b
·
verified ·
1 Parent(s): 6dc33c4

Training in progress, step 26, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c4c57ed7563c381ecea9a5a3742625f1fbd7b95cfba1a59af1fbdcfb15fa7b4
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798291a48541d51ec27dccfb04969ed84a240c4b5aad0fb27cc0e2669bfd528d
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b828ae868e266585515a010b81284dc9974bb2d9d5380076fc4b1829f212ba01
3
  size 1001863522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc8f86ad37f0cf51611f9aeecc8a395f82787608cb2ce6f7ccf27330ce2e6ee
3
  size 1001863522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32dce9a620fb887bf8c21a022ec964dc4bdc29e99cf526da3edd74c931dc7985
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ea9ad2466bee9501938172cccd6c85022832f061db4916ca506511c63fd6ce
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18566815dcf6671d9e6506c7faf4f1e794eb811de4804054ae3a16b2108e6c1f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f9c194f14d5c67265ac71b8d34cb6f960441ca9a99e80d33d78e02859e649b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.18205689277899342,
5
  "eval_steps": 50,
6
- "global_step": 13,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -106,6 +106,97 @@
106
  "learning_rate": 9.75e-05,
107
  "loss": 16.2046,
108
  "step": 13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  }
110
  ],
111
  "logging_steps": 1,
@@ -125,7 +216,7 @@
125
  "attributes": {}
126
  }
127
  },
128
- "total_flos": 1.3267472744448e+17,
129
  "train_batch_size": 2,
130
  "trial_name": null,
131
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.36411378555798685,
5
  "eval_steps": 50,
6
+ "global_step": 26,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
106
  "learning_rate": 9.75e-05,
107
  "loss": 16.2046,
108
  "step": 13
109
+ },
110
+ {
111
+ "epoch": 0.19606126914660832,
112
+ "grad_norm": 64.03682708740234,
113
+ "learning_rate": 0.00010499999999999999,
114
+ "loss": 13.2331,
115
+ "step": 14
116
+ },
117
+ {
118
+ "epoch": 0.2100656455142232,
119
+ "grad_norm": 34.19150924682617,
120
+ "learning_rate": 0.0001125,
121
+ "loss": 11.2453,
122
+ "step": 15
123
+ },
124
+ {
125
+ "epoch": 0.22407002188183808,
126
+ "grad_norm": 34.346343994140625,
127
+ "learning_rate": 0.00011999999999999999,
128
+ "loss": 9.5668,
129
+ "step": 16
130
+ },
131
+ {
132
+ "epoch": 0.23807439824945295,
133
+ "grad_norm": 26.109838485717773,
134
+ "learning_rate": 0.00012749999999999998,
135
+ "loss": 6.7749,
136
+ "step": 17
137
+ },
138
+ {
139
+ "epoch": 0.25207877461706785,
140
+ "grad_norm": 17.14702606201172,
141
+ "learning_rate": 0.000135,
142
+ "loss": 6.2887,
143
+ "step": 18
144
+ },
145
+ {
146
+ "epoch": 0.2660831509846827,
147
+ "grad_norm": 15.40426254272461,
148
+ "learning_rate": 0.0001425,
149
+ "loss": 5.1345,
150
+ "step": 19
151
+ },
152
+ {
153
+ "epoch": 0.2800875273522976,
154
+ "grad_norm": 13.663360595703125,
155
+ "learning_rate": 0.00015,
156
+ "loss": 4.3361,
157
+ "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.29409190371991245,
161
+ "grad_norm": 14.994868278503418,
162
+ "learning_rate": 0.00014997810105601446,
163
+ "loss": 3.2669,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.3080962800875274,
168
+ "grad_norm": 17.923627853393555,
169
+ "learning_rate": 0.0001499124170124245,
170
+ "loss": 2.8837,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.32210065645514224,
175
+ "grad_norm": 10.781777381896973,
176
+ "learning_rate": 0.00014980298622686183,
177
+ "loss": 2.5867,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.3361050328227571,
182
+ "grad_norm": 17.933012008666992,
183
+ "learning_rate": 0.00014964987260382363,
184
+ "loss": 3.79,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.350109409190372,
189
+ "grad_norm": 14.919720649719238,
190
+ "learning_rate": 0.00014945316555735403,
191
+ "loss": 3.5252,
192
+ "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.36411378555798685,
196
+ "grad_norm": 13.19090747833252,
197
+ "learning_rate": 0.0001492129799588288,
198
+ "loss": 2.2783,
199
+ "step": 26
200
  }
201
  ],
202
  "logging_steps": 1,
 
216
  "attributes": {}
217
  }
218
  },
219
+ "total_flos": 2.6534945488896e+17,
220
  "train_batch_size": 2,
221
  "trial_name": null,
222
  "trial_params": null