Training in progress, step 60

Browse files

Files changed (5) hide show

README.md +26 -24
adapter_config.json +2 -2
adapter_model.bin +1 -1
adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -2,10 +2,9 @@
 library_name: peft
 base_model: katuni4ka/tiny-random-dbrx
 tags:
-- axolotl
 - generated_from_trainer
 model-index:
-- name: 74090ab0-1891-42b3-b53d-e6ca30f24c6a
   results: []
 ---
@@ -39,26 +38,27 @@ datasets:
     system_prompt: ''
 debug: null
 deepspeed: null
-early_stopping_patience: 1
 early_stopping_threshold: 0.001
 eval_max_new_tokens: 128
-eval_steps: 20
-flash_attention: false
 fp16: null
 fsdp: null
 fsdp_config: null
-gradient_accumulation_steps: 4
-gradient_checkpointing: true
 group_by_length: false
-hub_model_id: mrferr3t/74090ab0-1891-42b3-b53d-e6ca30f24c6a
 hub_repo: null
 hub_strategy: checkpoint
 hub_token: null
-learning_rate: 0.0005
 load_in_4bit: false
 load_in_8bit: false
 local_rank: null
-logging_steps: 100
 lora_alpha: 16
 lora_dropout: 0.05
 lora_fan_in_fan_out: null
@@ -66,16 +66,18 @@ lora_model_dir: null
 lora_r: 8
 lora_target_linear: true
 lr_scheduler: cosine
-micro_batch_size: 64
 mlflow_experiment_name: /tmp/285bfd19833f31b9_train_data.json
 model_type: AutoModelForCausalLM
-num_epochs: 1
 optimizer: adamw_bnb_8bit
 output_dir: miner_id_24
 pad_to_sequence_len: true
 s2_attention: null
 sample_packing: false
-save_steps: 20
 saves_per_epoch: 0
 sequence_len: 512
 strict: false
@@ -83,9 +85,9 @@ tf32: false
 tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
-val_set_size: 0.05
 wandb_entity: null
-wandb_mode: online
 wandb_name: 2947f66f-fa76-448b-9e40-d51beb5bff45
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
@@ -98,7 +100,7 @@ xformers_attention: null
 </details><br>
-# 74090ab0-1891-42b3-b53d-e6ca30f24c6a
 This model is a fine-tuned version of [katuni4ka/tiny-random-dbrx](https://huggingface.co/katuni4ka/tiny-random-dbrx) on the None dataset.
 It achieves the following results on the evaluation set:
@@ -121,16 +123,16 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0005
-- train_batch_size: 64
-- eval_batch_size: 64
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 256
-- optimizer: Use adamw_bnb_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 2
-- num_epochs: 1
 ### Training results
@@ -145,6 +147,6 @@ The following hyperparameters were used during training:
 - PEFT 0.13.2
 - Transformers 4.46.0
-- Pytorch 2.3.1+cu121
 - Datasets 3.0.1
 - Tokenizers 0.20.1

 library_name: peft
 base_model: katuni4ka/tiny-random-dbrx
 tags:
 - generated_from_trainer
 model-index:
+- name: miner_id_24
   results: []
 ---
     system_prompt: ''
 debug: null
 deepspeed: null
+early_stopping_patience:
 early_stopping_threshold: 0.001
 eval_max_new_tokens: 128
+eval_steps:
+eval_strategy: null
+flash_attention: true
 fp16: null
 fsdp: null
 fsdp_config: null
+gradient_accumulation_steps: 2
+gradient_checkpointing: false
 group_by_length: false
+hub_model_id:
 hub_repo: null
 hub_strategy: checkpoint
 hub_token: null
+learning_rate: 0.0003
 load_in_4bit: false
 load_in_8bit: false
 local_rank: null
+logging_steps:
 lora_alpha: 16
 lora_dropout: 0.05
 lora_fan_in_fan_out: null
 lora_r: 8
 lora_target_linear: true
 lr_scheduler: cosine
+max_steps: 20
+micro_batch_size: 32
 mlflow_experiment_name: /tmp/285bfd19833f31b9_train_data.json
 model_type: AutoModelForCausalLM
+num_epochs: 50
 optimizer: adamw_bnb_8bit
 output_dir: miner_id_24
 pad_to_sequence_len: true
+resume_from_checkpoint: /workspace/hub_repo/last-checkpoint
 s2_attention: null
 sample_packing: false
+save_steps:
 saves_per_epoch: 0
 sequence_len: 512
 strict: false
 tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
+val_set_size: 0.0
 wandb_entity: null
+wandb_mode: disabled
 wandb_name: 2947f66f-fa76-448b-9e40-d51beb5bff45
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 </details><br>
+# miner_id_24
 This model is a fine-tuned version of [katuni4ka/tiny-random-dbrx](https://huggingface.co/katuni4ka/tiny-random-dbrx) on the None dataset.
 It achieves the following results on the evaluation set:
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 32
+- eval_batch_size: 32
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 64
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 2
+- training_steps: 20
 ### Training results
 - PEFT 0.13.2
 - Transformers 4.46.0
+- Pytorch 2.5.0+cu124
 - Datasets 3.0.1
 - Tokenizers 0.20.1

adapter_config.json CHANGED Viewed

@@ -20,9 +20,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "layer",
     "Wqkv",
-    "out_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "Wqkv",
+    "out_proj",
+    "layer"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e07e9e28c70ab277e23441895e35704f0ad897f37ea8bf10916ef4667a991c89
 size 9170

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae7f219471d0a1364524ee07a2f4bd60b6bc5c20e153378c026510462b8c4d67
 size 9170

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea3380b0be74b0ce6dabe1077e83ac5836018f2525a79c1e92d952724d869846
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbec2fd4afd82e2174d8e90843450aa9cfec17114609e0daaec109e20feb58b9
 size 5752

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6ad55a4e2150c662533f989b30551151cd3acdbedc1c1aeda6af73b49cffe40
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d59480f4cc4c2b69c07a440b6b3314d38f79b7adeca28982abe6546fc7cb33d
 size 6776