add fine-tuning deepseek example
Browse files
examples/fine-tune-deepseek-reasoning-sft.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/synthetic_dataset_generator/apps/chat.py
CHANGED
@@ -309,7 +309,7 @@ def generate_dataset_from_seed(
|
|
309 |
progress(
|
310 |
step_progress * n_processed / num_rows,
|
311 |
total=total_steps,
|
312 |
-
desc="Generating
|
313 |
)
|
314 |
remaining_rows = num_rows - n_processed
|
315 |
batch_size = min(batch_size, remaining_rows)
|
@@ -781,7 +781,7 @@ with gr.Blocks() as app:
|
|
781 |
)
|
782 |
document_column = gr.Dropdown(
|
783 |
label="Document Column",
|
784 |
-
info="Select the document column to generate the
|
785 |
choices=["Load your data first in step 1."],
|
786 |
value="Load your data first in step 1.",
|
787 |
interactive=False,
|
|
|
309 |
progress(
|
310 |
step_progress * n_processed / num_rows,
|
311 |
total=total_steps,
|
312 |
+
desc="Generating instructions",
|
313 |
)
|
314 |
remaining_rows = num_rows - n_processed
|
315 |
batch_size = min(batch_size, remaining_rows)
|
|
|
781 |
)
|
782 |
document_column = gr.Dropdown(
|
783 |
label="Document Column",
|
784 |
+
info="Select the document column to generate the chat data",
|
785 |
choices=["Load your data first in step 1."],
|
786 |
value="Load your data first in step 1.",
|
787 |
interactive=False,
|
src/synthetic_dataset_generator/pipelines/textcat.py
CHANGED
@@ -109,7 +109,7 @@ def get_labeller_generator(system_prompt: str, labels: List[str], multi_label: b
|
|
109 |
"temperature": 0.01,
|
110 |
"max_new_tokens": MAX_NUM_TOKENS,
|
111 |
}
|
112 |
-
llm = _get_llm(
|
113 |
labeller_generator = TextClassification(
|
114 |
llm=llm,
|
115 |
context=system_prompt,
|
|
|
109 |
"temperature": 0.01,
|
110 |
"max_new_tokens": MAX_NUM_TOKENS,
|
111 |
}
|
112 |
+
llm = _get_llm(generation_kwargs=generation_kwargs)
|
113 |
labeller_generator = TextClassification(
|
114 |
llm=llm,
|
115 |
context=system_prompt,
|