baby-dev commited on
Commit
1997b07
·
verified ·
1 Parent(s): 7000ffb

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:819e785a9980818c83c4b249bb714c2dbb71810466554b0ca9f0d2e1ec9c0c40
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e9ba1929ee972b77bcd72d468a37fe2ae415207ef7e59323b5fbe61463f4e4
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d8676b70101b430215e22b340ce45cc7b617d94d343716ed381586f9b1bb225
3
  size 170920532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af29e74f68512be973befebf6448552f1b28a5dcab1e75907db8bfa13fa2ff3
3
  size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:113738d73b2993a738a284b6571514b8789145a9c46430bfbd59e9ec6ea0f8d7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab3d18275216c730a64e666d0a0b15c9be1210bce446ef49cbf8dbc3a28973b2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b222f50cf4019bf2a4af9801ee61ced983584829f5aab356a024b6284fbfaea
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a64b03d0773f91f522e81c4f703747e71530a9ddde23207251d238f2ace6db2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.3906490802764893,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.05461123626186088,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -103,6 +103,28 @@
103
  "eval_samples_per_second": 35.144,
104
  "eval_steps_per_second": 8.786,
105
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 50,
@@ -117,7 +139,7 @@
117
  "early_stopping_threshold": 0.0
118
  },
119
  "attributes": {
120
- "early_stopping_patience_counter": 1
121
  }
122
  },
123
  "TrainerControl": {
@@ -131,7 +153,7 @@
131
  "attributes": {}
132
  }
133
  },
134
- "total_flos": 1.414505334571008e+17,
135
  "train_batch_size": 4,
136
  "trial_name": null,
137
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.30838942527771,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.06826404532732609,
5
  "eval_steps": 100,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
103
  "eval_samples_per_second": 35.144,
104
  "eval_steps_per_second": 8.786,
105
  "step": 400
106
+ },
107
+ {
108
+ "epoch": 0.06143764079459349,
109
+ "grad_norm": 64.72782135009766,
110
+ "learning_rate": 0.00025,
111
+ "loss": 8.8676,
112
+ "step": 450
113
+ },
114
+ {
115
+ "epoch": 0.06826404532732609,
116
+ "grad_norm": 61.32781219482422,
117
+ "learning_rate": 0.00025,
118
+ "loss": 8.8788,
119
+ "step": 500
120
+ },
121
+ {
122
+ "epoch": 0.06826404532732609,
123
+ "eval_loss": 2.30838942527771,
124
+ "eval_runtime": 175.6465,
125
+ "eval_samples_per_second": 35.116,
126
+ "eval_steps_per_second": 8.779,
127
+ "step": 500
128
  }
129
  ],
130
  "logging_steps": 50,
 
139
  "early_stopping_threshold": 0.0
140
  },
141
  "attributes": {
142
+ "early_stopping_patience_counter": 0
143
  }
144
  },
145
  "TrainerControl": {
 
153
  "attributes": {}
154
  }
155
  },
156
+ "total_flos": 1.76813166821376e+17,
157
  "train_batch_size": 4,
158
  "trial_name": null,
159
  "trial_params": null