bf16: true cutoff_len: 1024 dataset: XeTute/SStory-Gen-EN_ZH,MatanP/emotion_mapped_story_dataset,webnovel,jaydenccc/AI_Storyteller_Dataset dataset_dir: data ddp_timeout: 180000000 do_train: true finetuning_type: full flash_attn: auto gradient_accumulation_steps: 1 include_num_input_tokens_seen: true learning_rate: 1.0e-06 logging_steps: 100 lr_scheduler_type: cosine max_grad_norm: 1.0 max_samples: 1000000000 model_name_or_path: XeTute/Phantasor_V0.2-137M num_train_epochs: 12.0 optim: sgd output_dir: saves\GPT-2-Small\full\10-02-2025 packing: false per_device_train_batch_size: 1 plot_loss: true preprocessing_num_workers: 16 report_to: none save_steps: 5000 stage: sft template: alpaca trust_remote_code: true warmup_steps: 10