File size: 1,976 Bytes
acd73c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
_config_type: haystacks.embeddings.train_batch.TrainConfig
accelerator: gpu
accumulate_grad_batches: 1
adam_beta1: 0.9
adam_beta2: 0.95
base_save_dir: /home/sabri/code/haystacks/checkpoints
check_val_every_n_epoch: null
ckpt_path: null
devices: 8
dtype: bfloat16
embedding_module:
_config_type: haystacks.embeddings.modeling.TokenEmbeddingModuleConfig
batch_attention_layers: []
embedding_dim: 8192
kwargs: {}
target:
_is_type: true
name: haystacks.embeddings.modeling.TokenEmbeddingModule
enable_checkpointing: false
foreach: null
gradient_clip_val: null
launch_id: null
learnable_bias: true
learnable_temp: true
limit_train_batches: 1.0
limit_val_batches: 1.0
load_hub: true
log_every_n_steps: 4
log_grad_norms: false
loss_comparison: matched
loss_token_idxs:
- 64
- 128
- 256
- 512
- 1024
lr: 0.0001
lr_scheduler: null
manual_save_epochs: null
manual_save_steps: 8192
max_epochs: 512
max_hidden_layers: null
max_problems: null
max_seq_len: 1024
max_steps: -1
model_name: meta-llama/Llama-3.2-1B-Instruct
name: no_batch-attention-lr0.0001-bs32-d8192-new
num_sanity_val_steps: null
num_workers: 0
objective: cross_entropy
output_dir: null
overfit_batches: 0.0
precision: bf16
reload_dataloaders_every_n_epochs: 0
run_dir: null
run_id: null
samples_per_batch: 32
save_intermediates: true
script_id: null
seed: 42
train_batch_size: 1
train_data_path: ScalingIntelligence/math-train-l3.2-3Bi-meta-n128
use_wandb: false
val_batch_size: 1
val_check_interval: 64
val_data_path: ScalingIntelligence/math-test-l3.2-3Bi-meta-n128
val_rollout_data_path: null
val_samples_per_batch: 32
validate_before_train: true
wandb:
_config_type: haystacks.embeddings.train_batch.WandbLoggerConfig
group: ''
id: null
job_type: train
kwargs: {}
log_model: false
mode: online
name: null
prefix: ''
project: haystacks
save_dir: .
tags: []
target:
_is_type: true
name: pytorch_lightning.loggers.wandb.WandbLogger
weight_decay: 0.1
weights_only: true
|