{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import login\n", "from dotenv import load_dotenv\n", "import os\n", "load_dotenv()\n", "\n", "# Login to Hugging Face Hub\n", "login(token=os.getenv(\"HUGGINGFACE_TOKEN\"))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a39e6120cbea4462999cfa5f887a8015", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/288 [00:00= 8:\n", " print(\"Setting BF16 to True\")\n", " hyperparams['bf16'] = True\n", " else:\n", " hyperparams['bf16'] = False" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9ab84ef6c43249de9726940a78f2717f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00,\n", "ignore_data_skip=False,\n", "include_for_metrics=[],\n", "include_inputs_for_metrics=False,\n", "include_num_input_tokens_seen=False,\n", "include_tokens_per_second=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=0.0002,\n", "length_column_name=length,\n", "load_best_model_at_end=False,\n", "local_rank=0,\n", "log_level=passive,\n", "log_level_replica=warning,\n", "log_on_each_node=True,\n", "logging_dir=./results\\runs\\Nov15_13-14-10_FutureGadgetLab,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=25,\n", "logging_strategy=IntervalStrategy.STEPS,\n", "lr_scheduler_kwargs={},\n", "lr_scheduler_type=SchedulerType.CONSTANT,\n", "max_grad_norm=0.3,\n", "max_steps=-1,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "neftune_noise_alpha=None,\n", "no_cuda=False,\n", "num_train_epochs=1,\n", "optim=OptimizerNames.PAGED_ADAMW,\n", "optim_args=None,\n", "optim_target_modules=None,\n", "output_dir=./results,\n", "overwrite_output_dir=False,\n", "past_index=-1,\n", "per_device_eval_batch_size=8,\n", "per_device_train_batch_size=2,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=['tensorboard'],\n", "restore_callback_states_from_checkpoint=False,\n", "resume_from_checkpoint=None,\n", "run_name=./results,\n", "save_on_each_node=False,\n", "save_only_model=False,\n", "save_safetensors=True,\n", "save_steps=25,\n", "save_strategy=IntervalStrategy.STEPS,\n", "save_total_limit=None,\n", "seed=42,\n", "skip_memory_metrics=True,\n", "split_batches=None,\n", "tf32=None,\n", "torch_compile=False,\n", "torch_compile_backend=None,\n", "torch_compile_mode=None,\n", "torch_empty_cache_steps=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_cpu=False,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_liger_kernel=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.03,\n", "warmup_steps=0,\n", "weight_decay=0.001,\n", ")" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set training parameters\n", "training_arguments = TrainingArguments(\n", " output_dir=hyperparams['output_dir'],\n", " num_train_epochs=hyperparams['num_train_epochs'],\n", " per_device_train_batch_size=hyperparams['per_device_train_batch_size'],\n", " gradient_accumulation_steps=hyperparams['gradient_accumulation_steps'],\n", " optim=hyperparams['optimizer'],\n", " save_steps=hyperparams['save_steps'],\n", " logging_steps=hyperparams['logging_steps'],\n", " learning_rate=float(hyperparams['learning_rate']),\n", " weight_decay=hyperparams['weight_decay'],\n", " fp16=hyperparams['fp16'],\n", " bf16=hyperparams['bf16'],\n", " max_grad_norm=hyperparams['max_grad_norm'],\n", " max_steps=hyperparams['max_steps'],\n", " warmup_ratio=hyperparams['warmup_ratio'],\n", " group_by_length=hyperparams['group_by_length'],\n", " lr_scheduler_type=hyperparams['lr_scheduler_type'],\n", " report_to=\"tensorboard\",\n", ")\n", "training_arguments" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "f:\\TADBot\\.venv\\Lib\\site-packages\\huggingface_hub\\utils\\_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': dataset_text_field, max_seq_length, packing. Will not be supported from version '0.13.0'.\n", "\n", "Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.\n", " warnings.warn(message, FutureWarning)\n", "f:\\TADBot\\.venv\\Lib\\site-packages\\trl\\trainer\\sft_trainer.py:212: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n", " warnings.warn(\n", "f:\\TADBot\\.venv\\Lib\\site-packages\\trl\\trainer\\sft_trainer.py:300: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n", " warnings.warn(\n", "f:\\TADBot\\.venv\\Lib\\site-packages\\trl\\trainer\\sft_trainer.py:328: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n", " warnings.warn(\n" ] } ], "source": [ "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " dataset_text_field=\"text\",\n", " # formatting_func=format_prompts_fn,\n", " max_seq_length=hyperparams['max_seq_length'],\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", " packing=hyperparams['packing'],\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0033f5bb31a7416facfd8a3fd3bd5ad1", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1340 [00:00