oldiday commited on
Commit
f4d236d
·
verified ·
1 Parent(s): 3e9404c

Training in progress, step 143, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4482a0859dbc0c09cbecf9061a8c6e9d1ed609f6fba7a0310b762775f34ce28a
3
  size 50624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028e9c324935382412c9e6de805da452757d7ae87fc44b169ee02e65ea311592
3
  size 50624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f42b8bea893c351df7f112f14588f1ee6e7183cdc8cc49b7464a0d1cb6cfb48
3
  size 111142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db54a4426452b64b4027e4dfa9ef0535906c38efe4d080f388c742a719a5fac0
3
  size 111142
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a69175eebd96d9abef5a70c95df2a121f5eab8841cb5b743e18797c301adb9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ace8e8b5fa5c901a16c2d027e66f97b4fcf6e88e1f35448babcdafe63e22ff7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f898282f5441a4001bfc295ec53b08bdc8d00e4f4d60b2e54baad9e1e0754a20
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544ca722df0421adc4c8d7e37de997de6c391ac56739a37eab2f58b5d53e7f1f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.731181144714355,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.7017543859649122,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -101,6 +101,34 @@
101
  "eval_samples_per_second": 316.765,
102
  "eval_steps_per_second": 79.191,
103
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  }
105
  ],
106
  "logging_steps": 10,
@@ -124,12 +152,12 @@
124
  "should_evaluate": false,
125
  "should_log": false,
126
  "should_save": true,
127
- "should_training_stop": false
128
  },
129
  "attributes": {}
130
  }
131
  },
132
- "total_flos": 22865329913856.0,
133
  "train_batch_size": 8,
134
  "trial_name": null,
135
  "trial_params": null
 
1
  {
2
  "best_metric": 11.731181144714355,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.0035087719298246,
5
  "eval_steps": 50,
6
+ "global_step": 143,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
101
  "eval_samples_per_second": 316.765,
102
  "eval_steps_per_second": 79.191,
103
  "step": 100
104
+ },
105
+ {
106
+ "epoch": 0.7719298245614035,
107
+ "grad_norm": 0.06784291565418243,
108
+ "learning_rate": 2.8872993029040508e-05,
109
+ "loss": 11.7303,
110
+ "step": 110
111
+ },
112
+ {
113
+ "epoch": 0.8421052631578947,
114
+ "grad_norm": 0.05094173923134804,
115
+ "learning_rate": 1.439838153227e-05,
116
+ "loss": 11.73,
117
+ "step": 120
118
+ },
119
+ {
120
+ "epoch": 0.9122807017543859,
121
+ "grad_norm": 0.06301553547382355,
122
+ "learning_rate": 4.6777594000230855e-06,
123
+ "loss": 11.731,
124
+ "step": 130
125
+ },
126
+ {
127
+ "epoch": 0.9824561403508771,
128
+ "grad_norm": 0.14561209082603455,
129
+ "learning_rate": 2.509731335744281e-07,
130
+ "loss": 11.7305,
131
+ "step": 140
132
  }
133
  ],
134
  "logging_steps": 10,
 
152
  "should_evaluate": false,
153
  "should_log": false,
154
  "should_save": true,
155
+ "should_training_stop": true
156
  },
157
  "attributes": {}
158
  }
159
  },
160
+ "total_flos": 32693857026048.0,
161
  "train_batch_size": 8,
162
  "trial_name": null,
163
  "trial_params": null