baby-dev commited on
Commit
b24aeb3
·
verified ·
1 Parent(s): 8882555

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4be90fbc579345dea17bf3682299dee655d26fe4488209d80228e39825c3ec04
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819e785a9980818c83c4b249bb714c2dbb71810466554b0ca9f0d2e1ec9c0c40
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:635065664a8d9468424850c4c2bad755fc89629549ca37ae5f11fdc9b71e58fa
3
  size 170920532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8676b70101b430215e22b340ce45cc7b617d94d343716ed381586f9b1bb225
3
  size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7574c4df2d781c0c0af3dbe90aaac583bcf7d60d9284c123001605967a6ba7e7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:113738d73b2993a738a284b6571514b8789145a9c46430bfbd59e9ec6ea0f8d7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c4f24febed8904edd77d7c3dfeceec80ce8ac46f187c13cc543808d5fa2a7b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b222f50cf4019bf2a4af9801ee61ced983584829f5aab356a024b6284fbfaea
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.3906490802764893,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.040958427196395655,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -81,6 +81,28 @@
81
  "eval_samples_per_second": 35.183,
82
  "eval_steps_per_second": 8.796,
83
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  }
85
  ],
86
  "logging_steps": 50,
@@ -95,7 +117,7 @@
95
  "early_stopping_threshold": 0.0
96
  },
97
  "attributes": {
98
- "early_stopping_patience_counter": 0
99
  }
100
  },
101
  "TrainerControl": {
@@ -109,7 +131,7 @@
109
  "attributes": {}
110
  }
111
  },
112
- "total_flos": 1.060879000928256e+17,
113
  "train_batch_size": 4,
114
  "trial_name": null,
115
  "trial_params": null
 
1
  {
2
  "best_metric": 2.3906490802764893,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.05461123626186088,
5
  "eval_steps": 100,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
81
  "eval_samples_per_second": 35.183,
82
  "eval_steps_per_second": 8.796,
83
  "step": 300
84
+ },
85
+ {
86
+ "epoch": 0.04778483172912827,
87
+ "grad_norm": 48.440155029296875,
88
+ "learning_rate": 0.00025,
89
+ "loss": 8.7073,
90
+ "step": 350
91
+ },
92
+ {
93
+ "epoch": 0.05461123626186088,
94
+ "grad_norm": 143.95608520507812,
95
+ "learning_rate": 0.00025,
96
+ "loss": 8.7399,
97
+ "step": 400
98
+ },
99
+ {
100
+ "epoch": 0.05461123626186088,
101
+ "eval_loss": 2.447601318359375,
102
+ "eval_runtime": 175.5057,
103
+ "eval_samples_per_second": 35.144,
104
+ "eval_steps_per_second": 8.786,
105
+ "step": 400
106
  }
107
  ],
108
  "logging_steps": 50,
 
117
  "early_stopping_threshold": 0.0
118
  },
119
  "attributes": {
120
+ "early_stopping_patience_counter": 1
121
  }
122
  },
123
  "TrainerControl": {
 
131
  "attributes": {}
132
  }
133
  },
134
+ "total_flos": 1.414505334571008e+17,
135
  "train_batch_size": 4,
136
  "trial_name": null,
137
  "trial_params": null