eddysang commited on
Commit
8ae6dcf
·
verified ·
1 Parent(s): 3b43856

Training in progress, step 51, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2191d75ff3a3cbc5977ee6aa1fa814e46a578f07c9693689328ea208bbf4be39
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:face95f531991d1c4ebc8ae39b39d8db4111d815f0a30edbf3c7d0e93d644be1
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff7d13d5439a9e3c4d151b127166791f2357c478fbaa7a80e57f5222b10418b9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1707c9ccf7c6746c4a9c679bcbea2dd0711edc333e819eb762e01abed56271bb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86702c0a3caad6c51746e54805a7289de03dff9cc5abc148a58966cf1f4d339
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03736007142366596,
5
  "eval_steps": 50,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -253,6 +253,133 @@
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 0.0,
255
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
  ],
258
  "logging_steps": 1,
@@ -272,7 +399,7 @@
272
  "attributes": {}
273
  }
274
  },
275
- "total_flos": 2.0001228862154342e+17,
276
  "train_batch_size": 2,
277
  "trial_name": null,
278
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05604010713549894,
5
  "eval_steps": 50,
6
+ "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 0.0,
255
  "step": 34
256
+ },
257
+ {
258
+ "epoch": 0.038458897053773776,
259
+ "grad_norm": NaN,
260
+ "learning_rate": 0.0001474444369716801,
261
+ "loss": 0.0,
262
+ "step": 35
263
+ },
264
+ {
265
+ "epoch": 0.0395577226838816,
266
+ "grad_norm": NaN,
267
+ "learning_rate": 0.0001470946271953739,
268
+ "loss": 0.0,
269
+ "step": 36
270
+ },
271
+ {
272
+ "epoch": 0.040656548313989425,
273
+ "grad_norm": NaN,
274
+ "learning_rate": 0.00014672285669722765,
275
+ "loss": 0.0,
276
+ "step": 37
277
+ },
278
+ {
279
+ "epoch": 0.04175537394409724,
280
+ "grad_norm": NaN,
281
+ "learning_rate": 0.00014632923872213652,
282
+ "loss": 0.0,
283
+ "step": 38
284
+ },
285
+ {
286
+ "epoch": 0.04285419957420507,
287
+ "grad_norm": NaN,
288
+ "learning_rate": 0.00014591389316994876,
289
+ "loss": 0.0,
290
+ "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.04395302520431289,
294
+ "grad_norm": NaN,
295
+ "learning_rate": 0.0001454769465589431,
296
+ "loss": 0.0,
297
+ "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.04505185083442071,
301
+ "grad_norm": NaN,
302
+ "learning_rate": 0.00014501853198729012,
303
+ "loss": 0.0,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.046150676464528535,
308
+ "grad_norm": NaN,
309
+ "learning_rate": 0.00014453878909250904,
310
+ "loss": 0.0,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.04724950209463636,
315
+ "grad_norm": NaN,
316
+ "learning_rate": 0.00014403786400893302,
317
+ "loss": 0.0,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.04834832772474418,
322
+ "grad_norm": NaN,
323
+ "learning_rate": 0.00014351590932319504,
324
+ "loss": 0.0,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.049447153354852,
329
+ "grad_norm": NaN,
330
+ "learning_rate": 0.00014297308402774875,
331
+ "loss": 0.0,
332
+ "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.05054597898495983,
336
+ "grad_norm": NaN,
337
+ "learning_rate": 0.0001424095534724375,
338
+ "loss": 0.0,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.051644804615067645,
343
+ "grad_norm": NaN,
344
+ "learning_rate": 0.00014182548931412757,
345
+ "loss": 0.0,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.05274363024517547,
350
+ "grad_norm": NaN,
351
+ "learning_rate": 0.0001412210694644195,
352
+ "loss": 0.0,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.05384245587528329,
357
+ "grad_norm": NaN,
358
+ "learning_rate": 0.00014059647803545467,
359
+ "loss": 0.0,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.05494128150539111,
364
+ "grad_norm": NaN,
365
+ "learning_rate": 0.0001399519052838329,
366
+ "loss": 0.0,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 0.05494128150539111,
371
+ "eval_loss": NaN,
372
+ "eval_runtime": 638.0326,
373
+ "eval_samples_per_second": 4.805,
374
+ "eval_steps_per_second": 2.403,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.05604010713549894,
379
+ "grad_norm": NaN,
380
+ "learning_rate": 0.00013928754755265842,
381
+ "loss": 0.0,
382
+ "step": 51
383
  }
384
  ],
385
  "logging_steps": 1,
 
399
  "attributes": {}
400
  }
401
  },
402
+ "total_flos": 3.0075800178917376e+17,
403
  "train_batch_size": 2,
404
  "trial_name": null,
405
  "trial_params": null