eddysang commited on
Commit
e5a1e58
·
verified ·
1 Parent(s): 75330cc

Training in progress, step 51, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1b54cccbbed2f75eaef804b9662f6ef8e1c82290e1144dee6ea63fb77aeb26f
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2690da81d83e89e9ee7b7dc7113de883b592b46176c59f118aaff5ad1b1c42cc
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90bcb817a546831a005a9504935df07250d9cc27fd981d0add1cf5aa973ee4f9
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580ef17b80875f7190859d2da30842b5c7fe663cfb653f5c46ee771354997856
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7abcd57538b0647768495b9aeb3b444e111ce136e810d289cc8fe830dca41b95
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02f54eb84b382db409dccc97cebf46240cf5b6285939222273bf59b8c0558286
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86702c0a3caad6c51746e54805a7289de03dff9cc5abc148a58966cf1f4d339
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.050057510927076145,
5
  "eval_steps": 50,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -253,6 +253,133 @@
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 0.0717,
255
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
  ],
258
  "logging_steps": 1,
@@ -272,7 +399,7 @@
272
  "attributes": {}
273
  }
274
  },
275
- "total_flos": 3.597635405563822e+17,
276
  "train_batch_size": 2,
277
  "trial_name": null,
278
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07508626639061422,
5
  "eval_steps": 50,
6
+ "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 0.0717,
255
  "step": 34
256
+ },
257
+ {
258
+ "epoch": 0.05152979066022544,
259
+ "grad_norm": 0.15129899978637695,
260
+ "learning_rate": 0.0001474444369716801,
261
+ "loss": 0.1103,
262
+ "step": 35
263
+ },
264
+ {
265
+ "epoch": 0.05300207039337474,
266
+ "grad_norm": 0.14446967840194702,
267
+ "learning_rate": 0.0001470946271953739,
268
+ "loss": 0.1674,
269
+ "step": 36
270
+ },
271
+ {
272
+ "epoch": 0.05447435012652404,
273
+ "grad_norm": 0.10094312578439713,
274
+ "learning_rate": 0.00014672285669722765,
275
+ "loss": 0.0696,
276
+ "step": 37
277
+ },
278
+ {
279
+ "epoch": 0.05594662985967334,
280
+ "grad_norm": 0.17120350897312164,
281
+ "learning_rate": 0.00014632923872213652,
282
+ "loss": 0.2139,
283
+ "step": 38
284
+ },
285
+ {
286
+ "epoch": 0.057418909592822635,
287
+ "grad_norm": 0.14146435260772705,
288
+ "learning_rate": 0.00014591389316994876,
289
+ "loss": 0.0925,
290
+ "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.058891189325971934,
294
+ "grad_norm": 0.14251448214054108,
295
+ "learning_rate": 0.0001454769465589431,
296
+ "loss": 0.1002,
297
+ "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.06036346905912123,
301
+ "grad_norm": 0.07004090398550034,
302
+ "learning_rate": 0.00014501853198729012,
303
+ "loss": 0.0538,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.06183574879227053,
308
+ "grad_norm": 0.14318852126598358,
309
+ "learning_rate": 0.00014453878909250904,
310
+ "loss": 0.1316,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.06330802852541983,
315
+ "grad_norm": 0.10623105615377426,
316
+ "learning_rate": 0.00014403786400893302,
317
+ "loss": 0.0866,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.06478030825856913,
322
+ "grad_norm": 0.10893028974533081,
323
+ "learning_rate": 0.00014351590932319504,
324
+ "loss": 0.0518,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.06625258799171843,
329
+ "grad_norm": 0.1411529928445816,
330
+ "learning_rate": 0.00014297308402774875,
331
+ "loss": 0.1357,
332
+ "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.06772486772486773,
336
+ "grad_norm": 0.10105417668819427,
337
+ "learning_rate": 0.0001424095534724375,
338
+ "loss": 0.0654,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.06919714745801703,
343
+ "grad_norm": 0.14420634508132935,
344
+ "learning_rate": 0.00014182548931412757,
345
+ "loss": 0.0935,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.07066942719116633,
350
+ "grad_norm": 0.12569449841976166,
351
+ "learning_rate": 0.0001412210694644195,
352
+ "loss": 0.0848,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.07214170692431562,
357
+ "grad_norm": 0.09209802001714706,
358
+ "learning_rate": 0.00014059647803545467,
359
+ "loss": 0.0473,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.07361398665746492,
364
+ "grad_norm": 0.12560804188251495,
365
+ "learning_rate": 0.0001399519052838329,
366
+ "loss": 0.0785,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 0.07361398665746492,
371
+ "eval_loss": 0.08997488021850586,
372
+ "eval_runtime": 784.5444,
373
+ "eval_samples_per_second": 2.916,
374
+ "eval_steps_per_second": 1.458,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.07508626639061422,
379
+ "grad_norm": 0.15386323630809784,
380
+ "learning_rate": 0.00013928754755265842,
381
+ "loss": 0.1427,
382
+ "step": 51
383
  }
384
  ],
385
  "logging_steps": 1,
 
399
  "attributes": {}
400
  }
401
  },
402
+ "total_flos": 5.401115673671762e+17,
403
  "train_batch_size": 2,
404
  "trial_name": null,
405
  "trial_params": null