mrferr3t commited on
Commit
3e7a62e
·
verified ·
1 Parent(s): d3b8dae

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,9 +20,9 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "layer",
24
  "Wqkv",
25
- "out_proj"
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "Wqkv",
24
+ "out_proj",
25
+ "layer"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:868a3f917a5878e2b6944b4ac3f3554c8fa88ed3406f72ba3442e357efae7293
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbec2fd4afd82e2174d8e90843450aa9cfec17114609e0daaec109e20feb58b9
3
  size 5752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e951c6db3d322f827b37e2420f3724bf14039e3709cbf9e3ba4c8f422f38c721
3
  size 15814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdef43d2b373c43f94b1dfcbd9016d90a7c196769b7519c0951291dc44b8ca91
3
  size 15814
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeab1516692d8735d1cf9eb8242b9bfc315e61f76d8cb7444790c4e87ea94971
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae59499d6fa89d93656a32994352236f76ecdb3c0d5d7d01bbf5e497aa8ee6c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72b1decdef7148b48fd5a7101075992e3183d005c57731608543a60017a8211b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066ba65f4953fa1b75b645f57a930b28fbba82ba12586f9907c767ebd5948500
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.5,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-20",
4
- "epoch": 0.7843137254901961,
5
  "eval_steps": 20,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -31,17 +31,25 @@
31
  "eval_samples_per_second": 390.671,
32
  "eval_steps_per_second": 6.246,
33
  "step": 40
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "logging_steps": 100,
37
- "max_steps": 51,
38
  "num_input_tokens_seen": 0,
39
- "num_train_epochs": 1,
40
  "save_steps": 20,
41
  "stateful_callbacks": {
42
  "EarlyStoppingCallback": {
43
  "args": {
44
- "early_stopping_patience": 1,
45
  "early_stopping_threshold": 0.0
46
  },
47
  "attributes": {
@@ -54,12 +62,12 @@
54
  "should_evaluate": false,
55
  "should_log": false,
56
  "should_save": true,
57
- "should_training_stop": true
58
  },
59
  "attributes": {}
60
  }
61
  },
62
- "total_flos": 25501284433920.0,
63
  "train_batch_size": 64,
64
  "trial_name": null,
65
  "trial_params": null
 
1
  {
2
  "best_metric": 11.5,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-20",
4
+ "epoch": 0.5687203791469194,
5
  "eval_steps": 20,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
31
  "eval_samples_per_second": 390.671,
32
  "eval_steps_per_second": 6.246,
33
  "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.5687203791469194,
37
+ "eval_loss": 11.5,
38
+ "eval_runtime": 1.643,
39
+ "eval_samples_per_second": 167.381,
40
+ "eval_steps_per_second": 5.478,
41
+ "step": 60
42
  }
43
  ],
44
  "logging_steps": 100,
45
+ "max_steps": 5250,
46
  "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 50,
48
  "save_steps": 20,
49
  "stateful_callbacks": {
50
  "EarlyStoppingCallback": {
51
  "args": {
52
+ "early_stopping_patience": 3,
53
  "early_stopping_threshold": 0.0
54
  },
55
  "attributes": {
 
62
  "should_evaluate": false,
63
  "should_log": false,
64
  "should_save": true,
65
+ "should_training_stop": false
66
  },
67
  "attributes": {}
68
  }
69
  },
70
+ "total_flos": 31876605542400.0,
71
  "train_batch_size": 64,
72
  "trial_name": null,
73
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6ad55a4e2150c662533f989b30551151cd3acdbedc1c1aeda6af73b49cffe40
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d59480f4cc4c2b69c07a440b6b3314d38f79b7adeca28982abe6546fc7cb33d
3
  size 6776