|
import pytest |
|
|
|
from llmdataparser.ifeval_parser import IFEvalDatasetParser, IFEvalParseEntry |
|
|
|
|
|
@pytest.fixture |
|
def sample_ifeval_entries(): |
|
"""Create sample IFEval dataset entries for testing.""" |
|
return [ |
|
{ |
|
"key": 1, |
|
"prompt": "Write a function to calculate factorial.", |
|
"instruction_id_list": ["math_001", "programming_001"], |
|
"kwargs": {"difficulty": "medium", "category": "mathematics"}, |
|
}, |
|
{ |
|
"key": 2, |
|
"prompt": "Explain quantum computing.", |
|
"instruction_id_list": ["physics_001"], |
|
"kwargs": {"difficulty": "hard", "category": "physics"}, |
|
}, |
|
] |
|
|
|
|
|
@pytest.fixture |
|
def ifeval_parser(): |
|
"""Create an IFEval parser instance.""" |
|
return IFEvalDatasetParser() |
|
|
|
|
|
def test_ifeval_parse_entry_creation_valid(): |
|
"""Test valid creation of IFEvalParseEntry.""" |
|
entry = IFEvalParseEntry.create( |
|
question="Test instruction", |
|
answer="", |
|
raw_question="Test instruction", |
|
raw_answer="", |
|
key=1, |
|
instruction_id_list=["test_001", "test_002"], |
|
kwargs={"difficulty": "easy"}, |
|
task_name="default", |
|
) |
|
|
|
assert isinstance(entry, IFEvalParseEntry) |
|
assert entry.question == "Test instruction" |
|
assert entry.answer == "" |
|
assert entry.key == 1 |
|
assert entry.instruction_id_list == ["test_001", "test_002"] |
|
assert entry.kwargs == {"difficulty": "easy"} |
|
assert entry.task_name == "default" |
|
|
|
|
|
def test_process_entry_ifeval(ifeval_parser, sample_ifeval_entries): |
|
"""Test processing entries in IFEval parser.""" |
|
entry = ifeval_parser.process_entry(sample_ifeval_entries[0]) |
|
|
|
assert isinstance(entry, IFEvalParseEntry) |
|
assert entry.key == 1 |
|
assert entry.instruction_id_list == ["math_001", "programming_001"] |
|
assert entry.kwargs == {"difficulty": "medium", "category": "mathematics"} |
|
assert entry.raw_question == "Write a function to calculate factorial." |
|
assert entry.answer == "" |
|
assert entry.task_name == "default" |
|
|
|
|
|
def test_parser_initialization(ifeval_parser): |
|
"""Test initialization of IFEval parser.""" |
|
assert ifeval_parser._data_source == "google/IFEval" |
|
assert ifeval_parser._default_task == "default" |
|
assert ifeval_parser.task_names == ["default"] |
|
assert ( |
|
ifeval_parser.get_huggingface_link |
|
== "https://huggingface.co/datasets/google/IFEval" |
|
) |
|
|
|
|
|
@pytest.mark.integration |
|
def test_load_dataset(ifeval_parser): |
|
"""Test loading the IFEval dataset.""" |
|
ifeval_parser.load(split="train") |
|
assert ifeval_parser.raw_data is not None |
|
assert ifeval_parser.split_names == ["train"] |
|
assert ifeval_parser._current_task == "default" |
|
|
|
|
|
def test_parser_string_representation(ifeval_parser): |
|
"""Test string representation of IFEval parser.""" |
|
repr_str = str(ifeval_parser) |
|
assert "IFEvalDatasetParser" in repr_str |
|
assert "google/IFEval" in repr_str |
|
assert "not loaded" in repr_str |
|
|
|
|
|
def test_get_dataset_description(ifeval_parser): |
|
"""Test dataset description generation for IFEval.""" |
|
description = ifeval_parser.get_dataset_description() |
|
|
|
assert description.name == "IFEval" |
|
assert description.source == "Google Research" |
|
assert description.language == "English (BCP-47 en)" |
|
|
|
|
|
def test_get_evaluation_metrics(ifeval_parser): |
|
"""Test evaluation metrics generation for IFEval.""" |
|
metrics = ifeval_parser.get_evaluation_metrics() |
|
|
|
|
|
assert len(metrics) == 5 |
|
|
|
|
|
primary_metrics = [m for m in metrics if m.primary] |
|
assert len(primary_metrics) == 3 |
|
|
|
|
|
metric_names = {m.name for m in metrics} |
|
assert "format_compliance" in metric_names |
|
assert "length_constraints" in metric_names |
|
assert "punctuation_rules" in metric_names |
|
assert "keyword_usage" in metric_names |
|
assert "structural_requirements" in metric_names |
|
|