mrferr3t commited on
Commit
d3b8dae
·
verified ·
1 Parent(s): f88a758

Training in progress, step 60

Browse files
README.md CHANGED
@@ -2,10 +2,9 @@
2
  library_name: peft
3
  base_model: katuni4ka/tiny-random-dbrx
4
  tags:
5
- - axolotl
6
  - generated_from_trainer
7
  model-index:
8
- - name: 74090ab0-1891-42b3-b53d-e6ca30f24c6a
9
  results: []
10
  ---
11
 
@@ -39,26 +38,27 @@ datasets:
39
  system_prompt: ''
40
  debug: null
41
  deepspeed: null
42
- early_stopping_patience: 1
43
  early_stopping_threshold: 0.001
44
  eval_max_new_tokens: 128
45
- eval_steps: 20
46
- flash_attention: false
 
47
  fp16: null
48
  fsdp: null
49
  fsdp_config: null
50
- gradient_accumulation_steps: 4
51
- gradient_checkpointing: true
52
  group_by_length: false
53
- hub_model_id: mrferr3t/74090ab0-1891-42b3-b53d-e6ca30f24c6a
54
  hub_repo: null
55
  hub_strategy: checkpoint
56
  hub_token: null
57
- learning_rate: 0.0005
58
  load_in_4bit: false
59
  load_in_8bit: false
60
  local_rank: null
61
- logging_steps: 100
62
  lora_alpha: 16
63
  lora_dropout: 0.05
64
  lora_fan_in_fan_out: null
@@ -66,16 +66,18 @@ lora_model_dir: null
66
  lora_r: 8
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
- micro_batch_size: 64
 
70
  mlflow_experiment_name: /tmp/285bfd19833f31b9_train_data.json
71
  model_type: AutoModelForCausalLM
72
- num_epochs: 1
73
  optimizer: adamw_bnb_8bit
74
  output_dir: miner_id_24
75
  pad_to_sequence_len: true
 
76
  s2_attention: null
77
  sample_packing: false
78
- save_steps: 20
79
  saves_per_epoch: 0
80
  sequence_len: 512
81
  strict: false
@@ -83,9 +85,9 @@ tf32: false
83
  tokenizer_type: AutoTokenizer
84
  train_on_inputs: false
85
  trust_remote_code: true
86
- val_set_size: 0.05
87
  wandb_entity: null
88
- wandb_mode: online
89
  wandb_name: 2947f66f-fa76-448b-9e40-d51beb5bff45
90
  wandb_project: Gradients-On-Demand
91
  wandb_run: your_name
@@ -98,7 +100,7 @@ xformers_attention: null
98
 
99
  </details><br>
100
 
101
- # 74090ab0-1891-42b3-b53d-e6ca30f24c6a
102
 
103
  This model is a fine-tuned version of [katuni4ka/tiny-random-dbrx](https://huggingface.co/katuni4ka/tiny-random-dbrx) on the None dataset.
104
  It achieves the following results on the evaluation set:
@@ -121,16 +123,16 @@ More information needed
121
  ### Training hyperparameters
122
 
123
  The following hyperparameters were used during training:
124
- - learning_rate: 0.0005
125
- - train_batch_size: 64
126
- - eval_batch_size: 64
127
  - seed: 42
128
- - gradient_accumulation_steps: 4
129
- - total_train_batch_size: 256
130
- - optimizer: Use adamw_bnb_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
131
  - lr_scheduler_type: cosine
132
  - lr_scheduler_warmup_steps: 2
133
- - num_epochs: 1
134
 
135
  ### Training results
136
 
@@ -145,6 +147,6 @@ The following hyperparameters were used during training:
145
 
146
  - PEFT 0.13.2
147
  - Transformers 4.46.0
148
- - Pytorch 2.3.1+cu121
149
  - Datasets 3.0.1
150
  - Tokenizers 0.20.1
 
2
  library_name: peft
3
  base_model: katuni4ka/tiny-random-dbrx
4
  tags:
 
5
  - generated_from_trainer
6
  model-index:
7
+ - name: miner_id_24
8
  results: []
9
  ---
10
 
 
38
  system_prompt: ''
39
  debug: null
40
  deepspeed: null
41
+ early_stopping_patience:
42
  early_stopping_threshold: 0.001
43
  eval_max_new_tokens: 128
44
+ eval_steps:
45
+ eval_strategy: null
46
+ flash_attention: true
47
  fp16: null
48
  fsdp: null
49
  fsdp_config: null
50
+ gradient_accumulation_steps: 2
51
+ gradient_checkpointing: false
52
  group_by_length: false
53
+ hub_model_id:
54
  hub_repo: null
55
  hub_strategy: checkpoint
56
  hub_token: null
57
+ learning_rate: 0.0003
58
  load_in_4bit: false
59
  load_in_8bit: false
60
  local_rank: null
61
+ logging_steps:
62
  lora_alpha: 16
63
  lora_dropout: 0.05
64
  lora_fan_in_fan_out: null
 
66
  lora_r: 8
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
+ max_steps: 20
70
+ micro_batch_size: 32
71
  mlflow_experiment_name: /tmp/285bfd19833f31b9_train_data.json
72
  model_type: AutoModelForCausalLM
73
+ num_epochs: 50
74
  optimizer: adamw_bnb_8bit
75
  output_dir: miner_id_24
76
  pad_to_sequence_len: true
77
+ resume_from_checkpoint: /workspace/hub_repo/last-checkpoint
78
  s2_attention: null
79
  sample_packing: false
80
+ save_steps:
81
  saves_per_epoch: 0
82
  sequence_len: 512
83
  strict: false
 
85
  tokenizer_type: AutoTokenizer
86
  train_on_inputs: false
87
  trust_remote_code: true
88
+ val_set_size: 0.0
89
  wandb_entity: null
90
+ wandb_mode: disabled
91
  wandb_name: 2947f66f-fa76-448b-9e40-d51beb5bff45
92
  wandb_project: Gradients-On-Demand
93
  wandb_run: your_name
 
100
 
101
  </details><br>
102
 
103
+ # miner_id_24
104
 
105
  This model is a fine-tuned version of [katuni4ka/tiny-random-dbrx](https://huggingface.co/katuni4ka/tiny-random-dbrx) on the None dataset.
106
  It achieves the following results on the evaluation set:
 
123
  ### Training hyperparameters
124
 
125
  The following hyperparameters were used during training:
126
+ - learning_rate: 0.0003
127
+ - train_batch_size: 32
128
+ - eval_batch_size: 32
129
  - seed: 42
130
+ - gradient_accumulation_steps: 2
131
+ - total_train_batch_size: 64
132
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
133
  - lr_scheduler_type: cosine
134
  - lr_scheduler_warmup_steps: 2
135
+ - training_steps: 20
136
 
137
  ### Training results
138
 
 
147
 
148
  - PEFT 0.13.2
149
  - Transformers 4.46.0
150
+ - Pytorch 2.5.0+cu124
151
  - Datasets 3.0.1
152
  - Tokenizers 0.20.1
adapter_config.json CHANGED
@@ -20,9 +20,9 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "layer",
24
  "Wqkv",
25
- "out_proj"
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "Wqkv",
24
+ "out_proj",
25
+ "layer"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e07e9e28c70ab277e23441895e35704f0ad897f37ea8bf10916ef4667a991c89
3
  size 9170
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae7f219471d0a1364524ee07a2f4bd60b6bc5c20e153378c026510462b8c4d67
3
  size 9170
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea3380b0be74b0ce6dabe1077e83ac5836018f2525a79c1e92d952724d869846
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbec2fd4afd82e2174d8e90843450aa9cfec17114609e0daaec109e20feb58b9
3
  size 5752
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6ad55a4e2150c662533f989b30551151cd3acdbedc1c1aeda6af73b49cffe40
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d59480f4cc4c2b69c07a440b6b3314d38f79b7adeca28982abe6546fc7cb33d
3
  size 6776