tryingpro commited on
Commit
5536774
·
verified ·
1 Parent(s): 29d8248

Training in progress, step 32, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66c5ec15f84223b0454fad1686c66abb74e48a721b8aae07fc873f6d7362a795
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd253373d14792b1c72fb1f26a2d3c0c7f58984e6c01eef57ea13b2eb9056d0
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2cd2dafdb17e6a0bdc82605c0c2a15c7a1d2b8e95044ed00b6bf9d046f49dfe
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964ee8b98d3ada3b620642817a8e7ea24b88d32d6912af8dedff76fedac349f9
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fba7b3aa24187728c3eecfbdd90586f69803afe4ee661ac3d5463e4c2381ab0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c89468324c1a809962ad3dc3aa38e4a778e3d65a72b8d32588df9de3b7b85a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20b6afe9f4b9513bda96be941880d24726826d6e0d0960bca314ae3f1d65fc06
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512576cc203164442a0e0284b99a04b73f2839ef4315a8aea93837a11a880d8d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.014977767376550433,
5
  "eval_steps": 8,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -95,6 +95,28 @@
95
  "eval_samples_per_second": 9.397,
96
  "eval_steps_per_second": 4.699,
97
  "step": 24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  }
99
  ],
100
  "logging_steps": 3,
@@ -114,7 +136,7 @@
114
  "attributes": {}
115
  }
116
  },
117
- "total_flos": 1.2768461029638144e+17,
118
  "train_batch_size": 2,
119
  "trial_name": null,
120
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.019970356502067244,
5
  "eval_steps": 8,
6
+ "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
95
  "eval_samples_per_second": 9.397,
96
  "eval_steps_per_second": 4.699,
97
  "step": 24
98
+ },
99
+ {
100
+ "epoch": 0.016849988298619237,
101
+ "grad_norm": 0.40756919980049133,
102
+ "learning_rate": 0.00019510565162951537,
103
+ "loss": 0.8186,
104
+ "step": 27
105
+ },
106
+ {
107
+ "epoch": 0.01872220922068804,
108
+ "grad_norm": 0.5308417081832886,
109
+ "learning_rate": 0.0001900968867902419,
110
+ "loss": 0.8643,
111
+ "step": 30
112
+ },
113
+ {
114
+ "epoch": 0.019970356502067244,
115
+ "eval_loss": 0.7987203001976013,
116
+ "eval_runtime": 575.0353,
117
+ "eval_samples_per_second": 9.387,
118
+ "eval_steps_per_second": 4.694,
119
+ "step": 32
120
  }
121
  ],
122
  "logging_steps": 3,
 
136
  "attributes": {}
137
  }
138
  },
139
+ "total_flos": 1.7024614706184192e+17,
140
  "train_batch_size": 2,
141
  "trial_name": null,
142
  "trial_params": null