Training in progress, step 500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 335604696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4e9ba1929ee972b77bcd72d468a37fe2ae415207ef7e59323b5fbe61463f4e4
|
3 |
size 335604696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 170920532
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6af29e74f68512be973befebf6448552f1b28a5dcab1e75907db8bfa13fa2ff3
|
3 |
size 170920532
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab3d18275216c730a64e666d0a0b15c9be1210bce446ef49cbf8dbc3a28973b2
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a64b03d0773f91f522e81c4f703747e71530a9ddde23207251d238f2ace6db2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -103,6 +103,28 @@
|
|
103 |
"eval_samples_per_second": 35.144,
|
104 |
"eval_steps_per_second": 8.786,
|
105 |
"step": 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
}
|
107 |
],
|
108 |
"logging_steps": 50,
|
@@ -117,7 +139,7 @@
|
|
117 |
"early_stopping_threshold": 0.0
|
118 |
},
|
119 |
"attributes": {
|
120 |
-
"early_stopping_patience_counter":
|
121 |
}
|
122 |
},
|
123 |
"TrainerControl": {
|
@@ -131,7 +153,7 @@
|
|
131 |
"attributes": {}
|
132 |
}
|
133 |
},
|
134 |
-
"total_flos": 1.
|
135 |
"train_batch_size": 4,
|
136 |
"trial_name": null,
|
137 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.30838942527771,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
+
"epoch": 0.06826404532732609,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
103 |
"eval_samples_per_second": 35.144,
|
104 |
"eval_steps_per_second": 8.786,
|
105 |
"step": 400
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 0.06143764079459349,
|
109 |
+
"grad_norm": 64.72782135009766,
|
110 |
+
"learning_rate": 0.00025,
|
111 |
+
"loss": 8.8676,
|
112 |
+
"step": 450
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 0.06826404532732609,
|
116 |
+
"grad_norm": 61.32781219482422,
|
117 |
+
"learning_rate": 0.00025,
|
118 |
+
"loss": 8.8788,
|
119 |
+
"step": 500
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 0.06826404532732609,
|
123 |
+
"eval_loss": 2.30838942527771,
|
124 |
+
"eval_runtime": 175.6465,
|
125 |
+
"eval_samples_per_second": 35.116,
|
126 |
+
"eval_steps_per_second": 8.779,
|
127 |
+
"step": 500
|
128 |
}
|
129 |
],
|
130 |
"logging_steps": 50,
|
|
|
139 |
"early_stopping_threshold": 0.0
|
140 |
},
|
141 |
"attributes": {
|
142 |
+
"early_stopping_patience_counter": 0
|
143 |
}
|
144 |
},
|
145 |
"TrainerControl": {
|
|
|
153 |
"attributes": {}
|
154 |
}
|
155 |
},
|
156 |
+
"total_flos": 1.76813166821376e+17,
|
157 |
"train_batch_size": 4,
|
158 |
"trial_name": null,
|
159 |
"trial_params": null
|