oldiday commited on
Commit
515a463
·
verified ·
1 Parent(s): 37e9c7f

Training in progress, step 18, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64b7c359ca227bb13358640c76c139663e217ce067c05e58e13ebd6ca340968
3
  size 50503544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea54761ed1607b29f7e0b424b89c991f493177288ab99163e8f5c7de02a698e
3
  size 50503544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eeea3f2ee44c47bd8e5d7df938552560a33fda9dfc983aebbfe282fadb54677
3
  size 25986148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcb2d27aefe0220cf79cbc53fece45cbbb4dd13e0ac6ce47cd65e38f138bd4c
3
  size 25986148
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:475e61e5257ea8c29bd19f1f5541657fadbaf18eca05eb1b812c0d29deba88c1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53850beb550ee439f39dd9a26674a0401c26f4be397fed92d41d13b36036343f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1eca0ddb3ec890c90e8e6e7325bec40cdb8b59e4bebf5c0da8473b02809de875
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed723721bc2dad2381ab5522e40123ba4aa738fa3e449c48809fa9c2697eb95
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0031526403362816357,
5
  "eval_steps": 9,
6
- "global_step": 9,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -44,6 +44,35 @@
44
  "eval_samples_per_second": 27.87,
45
  "eval_steps_per_second": 3.484,
46
  "step": 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
  ],
49
  "logging_steps": 3,
@@ -63,7 +92,7 @@
63
  "attributes": {}
64
  }
65
  },
66
- "total_flos": 3705727579324416.0,
67
  "train_batch_size": 8,
68
  "trial_name": null,
69
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0063052806725632715,
5
  "eval_steps": 9,
6
+ "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
44
  "eval_samples_per_second": 27.87,
45
  "eval_steps_per_second": 3.484,
46
  "step": 9
47
+ },
48
+ {
49
+ "epoch": 0.004203520448375514,
50
+ "grad_norm": 0.2796209156513214,
51
+ "learning_rate": 9.987820251299122e-05,
52
+ "loss": 1.2384,
53
+ "step": 12
54
+ },
55
+ {
56
+ "epoch": 0.005254400560469393,
57
+ "grad_norm": 0.28329601883888245,
58
+ "learning_rate": 9.924038765061042e-05,
59
+ "loss": 1.2393,
60
+ "step": 15
61
+ },
62
+ {
63
+ "epoch": 0.0063052806725632715,
64
+ "grad_norm": 0.22264641523361206,
65
+ "learning_rate": 9.806308479691595e-05,
66
+ "loss": 1.1474,
67
+ "step": 18
68
+ },
69
+ {
70
+ "epoch": 0.0063052806725632715,
71
+ "eval_loss": 1.1840261220932007,
72
+ "eval_runtime": 172.502,
73
+ "eval_samples_per_second": 27.872,
74
+ "eval_steps_per_second": 3.484,
75
+ "step": 18
76
  }
77
  ],
78
  "logging_steps": 3,
 
92
  "attributes": {}
93
  }
94
  },
95
+ "total_flos": 7411455158648832.0,
96
  "train_batch_size": 8,
97
  "trial_name": null,
98
  "trial_params": null