rak-r05 commited on
Commit
cf7cc6f
·
verified ·
1 Parent(s): 9b45510

Training in progress, step 76, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a5e97ee51415c44dfd066e85165f682995c33d73ed3f15e25a657e740c5379
3
  size 598799664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5184a9f855392b369617a571c4fe8c407c7dbd8eba55d46eea4846faa6eef1da
3
  size 598799664
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db94711a7ee8f479fb121ace99a38fd30aafe607ad9e798de567f72cbd6eff2a
3
  size 42898516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:123b917ce2349c1a74d8844eac29f8c6cf2dc99b9a864c096be4ff65462b6328
3
  size 42898516
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23f9d6ab1ccc7fe8b31acc1af34cd4adf9b85aa8b786d6c73f339fc8f77922ac
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a053afc2b69b6892ee05d33f3fb3716eac75fd7fbab8c36a6324da423adb8d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2c308cc707c2dec21968b9880f5c59e591a14c9a4dfeed75e541107cfb707ca
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc8e5f98da37dd5d6e76ad1dcd9560295d5ddc12e8c320cbe967800911c3d6c0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03544776119402985,
5
  "eval_steps": 38,
6
- "global_step": 38,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -289,6 +289,280 @@
289
  "eval_samples_per_second": 7.503,
290
  "eval_steps_per_second": 3.751,
291
  "step": 38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  }
293
  ],
294
  "logging_steps": 1,
@@ -308,7 +582,7 @@
308
  "attributes": {}
309
  }
310
  },
311
- "total_flos": 4981999911567360.0,
312
  "train_batch_size": 2,
313
  "trial_name": null,
314
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0708955223880597,
5
  "eval_steps": 38,
6
+ "global_step": 76,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
289
  "eval_samples_per_second": 7.503,
290
  "eval_steps_per_second": 3.751,
291
  "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.036380597014925374,
295
+ "grad_norm": 1.4865802526474,
296
+ "learning_rate": 0.00035912490977635625,
297
+ "loss": 2.2656,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.03731343283582089,
302
+ "grad_norm": 1.3488837480545044,
303
+ "learning_rate": 0.000356366296493606,
304
+ "loss": 1.9688,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.03824626865671642,
309
+ "grad_norm": 1.4332904815673828,
310
+ "learning_rate": 0.0003535289480716022,
311
+ "loss": 2.2615,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.03917910447761194,
316
+ "grad_norm": 1.516402006149292,
317
+ "learning_rate": 0.00035061429320072223,
318
+ "loss": 2.059,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.04011194029850746,
323
+ "grad_norm": 1.4039419889450073,
324
+ "learning_rate": 0.00034762379949746815,
325
+ "loss": 2.1295,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.041044776119402986,
330
+ "grad_norm": 1.2742645740509033,
331
+ "learning_rate": 0.0003445589727654783,
332
+ "loss": 2.1964,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.04197761194029851,
337
+ "grad_norm": 1.115033507347107,
338
+ "learning_rate": 0.0003414213562373095,
339
+ "loss": 1.8306,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 0.04291044776119403,
344
+ "grad_norm": 1.1708894968032837,
345
+ "learning_rate": 0.00033821252979737297,
346
+ "loss": 1.9652,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 0.043843283582089554,
351
+ "grad_norm": 1.185393214225769,
352
+ "learning_rate": 0.0003349341091864149,
353
+ "loss": 1.8172,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 0.04477611940298507,
358
+ "grad_norm": 1.1099275350570679,
359
+ "learning_rate": 0.00033158774518794254,
360
+ "loss": 1.7576,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 0.0457089552238806,
365
+ "grad_norm": 1.0703908205032349,
366
+ "learning_rate": 0.0003281751227970048,
367
+ "loss": 1.9036,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 0.04664179104477612,
372
+ "grad_norm": 1.1421232223510742,
373
+ "learning_rate": 0.00032469796037174674,
374
+ "loss": 1.9759,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.04757462686567164,
379
+ "grad_norm": 1.1319009065628052,
380
+ "learning_rate": 0.000321158008768164,
381
+ "loss": 1.7476,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 0.048507462686567165,
386
+ "grad_norm": 1.1899311542510986,
387
+ "learning_rate": 0.00031755705045849464,
388
+ "loss": 1.8308,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 0.049440298507462684,
393
+ "grad_norm": 1.246800184249878,
394
+ "learning_rate": 0.0003138968986336904,
395
+ "loss": 1.9183,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 0.05037313432835821,
400
+ "grad_norm": 1.207138180732727,
401
+ "learning_rate": 0.0003101793962904205,
402
+ "loss": 1.8768,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 0.051305970149253734,
407
+ "grad_norm": 0.9686072468757629,
408
+ "learning_rate": 0.00030640641530306733,
409
+ "loss": 1.6107,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 0.05223880597014925,
414
+ "grad_norm": 1.1268424987792969,
415
+ "learning_rate": 0.00030257985548118126,
416
+ "loss": 1.9169,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 0.05317164179104478,
421
+ "grad_norm": 1.3243463039398193,
422
+ "learning_rate": 0.0002987016436128694,
423
+ "loss": 2.1651,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 0.054104477611940295,
428
+ "grad_norm": 1.230141043663025,
429
+ "learning_rate": 0.0002947737324945997,
430
+ "loss": 2.0871,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 0.05503731343283582,
435
+ "grad_norm": 1.0468418598175049,
436
+ "learning_rate": 0.00029079809994790937,
437
+ "loss": 1.7348,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 0.055970149253731345,
442
+ "grad_norm": 1.3394778966903687,
443
+ "learning_rate": 0.00028677674782351165,
444
+ "loss": 2.3597,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 0.05690298507462686,
449
+ "grad_norm": 1.2126855850219727,
450
+ "learning_rate": 0.00028271170099330415,
451
+ "loss": 2.0063,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 0.05783582089552239,
456
+ "grad_norm": 1.2102502584457397,
457
+ "learning_rate": 0.00027860500633078477,
458
+ "loss": 2.0048,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 0.058768656716417914,
463
+ "grad_norm": 1.2987444400787354,
464
+ "learning_rate": 0.00027445873168038907,
465
+ "loss": 2.6436,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 0.05970149253731343,
470
+ "grad_norm": 1.1055057048797607,
471
+ "learning_rate": 0.0002702749648162686,
472
+ "loss": 1.8015,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 0.06063432835820896,
477
+ "grad_norm": 1.1894230842590332,
478
+ "learning_rate": 0.00026605581239103347,
479
+ "loss": 2.0075,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 0.061567164179104475,
484
+ "grad_norm": 0.9544552564620972,
485
+ "learning_rate": 0.00026180339887498953,
486
+ "loss": 1.5043,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 0.0625,
491
+ "grad_norm": 1.2134445905685425,
492
+ "learning_rate": 0.00025751986548640346,
493
+ "loss": 1.9537,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 0.06343283582089553,
498
+ "grad_norm": 1.061187505722046,
499
+ "learning_rate": 0.00025320736911333503,
500
+ "loss": 1.6231,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 0.06436567164179105,
505
+ "grad_norm": 1.1641594171524048,
506
+ "learning_rate": 0.0002488680812275788,
507
+ "loss": 1.6016,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 0.06529850746268656,
512
+ "grad_norm": 1.1650375127792358,
513
+ "learning_rate": 0.0002445041867912629,
514
+ "loss": 1.8811,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 0.06623134328358209,
519
+ "grad_norm": 1.319548487663269,
520
+ "learning_rate": 0.00024011788315665458,
521
+ "loss": 1.7969,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 0.06716417910447761,
526
+ "grad_norm": 1.009516954421997,
527
+ "learning_rate": 0.00023571137895972733,
528
+ "loss": 1.4261,
529
+ "step": 72
530
+ },
531
+ {
532
+ "epoch": 0.06809701492537314,
533
+ "grad_norm": 1.1219674348831177,
534
+ "learning_rate": 0.0002312868930080462,
535
+ "loss": 1.5247,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 0.06902985074626866,
540
+ "grad_norm": 1.0498907566070557,
541
+ "learning_rate": 0.0002268466531635311,
542
+ "loss": 1.7131,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 0.06996268656716417,
547
+ "grad_norm": 1.0986140966415405,
548
+ "learning_rate": 0.00022239289522066157,
549
+ "loss": 1.7584,
550
+ "step": 75
551
+ },
552
+ {
553
+ "epoch": 0.0708955223880597,
554
+ "grad_norm": 1.2214365005493164,
555
+ "learning_rate": 0.00021792786178068672,
556
+ "loss": 1.7782,
557
+ "step": 76
558
+ },
559
+ {
560
+ "epoch": 0.0708955223880597,
561
+ "eval_loss": 0.47117629647254944,
562
+ "eval_runtime": 60.2201,
563
+ "eval_samples_per_second": 7.506,
564
+ "eval_steps_per_second": 3.753,
565
+ "step": 76
566
  }
567
  ],
568
  "logging_steps": 1,
 
582
  "attributes": {}
583
  }
584
  },
585
+ "total_flos": 9963999823134720.0,
586
  "train_batch_size": 2,
587
  "trial_name": null,
588
  "trial_params": null