sdiazlor HF staff commited on
Commit
a68cd13
·
1 Parent(s): e3b32d4

add fine-tuning deepseek example

Browse files
examples/fine-tune-deepseek-reasoning-sft.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
src/synthetic_dataset_generator/apps/chat.py CHANGED
@@ -309,7 +309,7 @@ def generate_dataset_from_seed(
309
  progress(
310
  step_progress * n_processed / num_rows,
311
  total=total_steps,
312
- desc="Generating questions",
313
  )
314
  remaining_rows = num_rows - n_processed
315
  batch_size = min(batch_size, remaining_rows)
@@ -781,7 +781,7 @@ with gr.Blocks() as app:
781
  )
782
  document_column = gr.Dropdown(
783
  label="Document Column",
784
- info="Select the document column to generate the RAG dataset",
785
  choices=["Load your data first in step 1."],
786
  value="Load your data first in step 1.",
787
  interactive=False,
 
309
  progress(
310
  step_progress * n_processed / num_rows,
311
  total=total_steps,
312
+ desc="Generating instructions",
313
  )
314
  remaining_rows = num_rows - n_processed
315
  batch_size = min(batch_size, remaining_rows)
 
781
  )
782
  document_column = gr.Dropdown(
783
  label="Document Column",
784
+ info="Select the document column to generate the chat data",
785
  choices=["Load your data first in step 1."],
786
  value="Load your data first in step 1.",
787
  interactive=False,
src/synthetic_dataset_generator/pipelines/textcat.py CHANGED
@@ -109,7 +109,7 @@ def get_labeller_generator(system_prompt: str, labels: List[str], multi_label: b
109
  "temperature": 0.01,
110
  "max_new_tokens": MAX_NUM_TOKENS,
111
  }
112
- llm = _get_llm(is_completion=True, generation_kwargs=generation_kwargs)
113
  labeller_generator = TextClassification(
114
  llm=llm,
115
  context=system_prompt,
 
109
  "temperature": 0.01,
110
  "max_new_tokens": MAX_NUM_TOKENS,
111
  }
112
+ llm = _get_llm(generation_kwargs=generation_kwargs)
113
  labeller_generator = TextClassification(
114
  llm=llm,
115
  context=system_prompt,