LLMEval-Dataset-Parser / tests /test_ifeval_parser.py
JeffYang52415's picture
refactor: remove system prompt
0450c4e unverified
raw
history blame
4.08 kB
import pytest
from llmdataparser.ifeval_parser import IFEvalDatasetParser, IFEvalParseEntry
@pytest.fixture
def sample_ifeval_entries():
"""Create sample IFEval dataset entries for testing."""
return [
{
"key": 1,
"prompt": "Write a function to calculate factorial.",
"instruction_id_list": ["math_001", "programming_001"],
"kwargs": {"difficulty": "medium", "category": "mathematics"},
},
{
"key": 2,
"prompt": "Explain quantum computing.",
"instruction_id_list": ["physics_001"],
"kwargs": {"difficulty": "hard", "category": "physics"},
},
]
@pytest.fixture
def ifeval_parser():
"""Create an IFEval parser instance."""
return IFEvalDatasetParser()
def test_ifeval_parse_entry_creation_valid():
"""Test valid creation of IFEvalParseEntry."""
entry = IFEvalParseEntry.create(
question="Test instruction",
answer="", # IFEval doesn't have answers
raw_question="Test instruction",
raw_answer="",
key=1,
instruction_id_list=["test_001", "test_002"],
kwargs={"difficulty": "easy"},
task_name="default",
)
assert isinstance(entry, IFEvalParseEntry)
assert entry.question == "Test instruction"
assert entry.answer == ""
assert entry.key == 1
assert entry.instruction_id_list == ["test_001", "test_002"]
assert entry.kwargs == {"difficulty": "easy"}
assert entry.task_name == "default"
def test_process_entry_ifeval(ifeval_parser, sample_ifeval_entries):
"""Test processing entries in IFEval parser."""
entry = ifeval_parser.process_entry(sample_ifeval_entries[0])
assert isinstance(entry, IFEvalParseEntry)
assert entry.key == 1
assert entry.instruction_id_list == ["math_001", "programming_001"]
assert entry.kwargs == {"difficulty": "medium", "category": "mathematics"}
assert entry.raw_question == "Write a function to calculate factorial."
assert entry.answer == "" # IFEval doesn't have answers
assert entry.task_name == "default"
def test_parser_initialization(ifeval_parser):
"""Test initialization of IFEval parser."""
assert ifeval_parser._data_source == "google/IFEval"
assert ifeval_parser._default_task == "default"
assert ifeval_parser.task_names == ["default"]
assert (
ifeval_parser.get_huggingface_link
== "https://huggingface.co/datasets/google/IFEval"
)
@pytest.mark.integration
def test_load_dataset(ifeval_parser):
"""Test loading the IFEval dataset."""
ifeval_parser.load(split="train")
assert ifeval_parser.raw_data is not None
assert ifeval_parser.split_names == ["train"]
assert ifeval_parser._current_task == "default"
def test_parser_string_representation(ifeval_parser):
"""Test string representation of IFEval parser."""
repr_str = str(ifeval_parser)
assert "IFEvalDatasetParser" in repr_str
assert "google/IFEval" in repr_str
assert "not loaded" in repr_str
def test_get_dataset_description(ifeval_parser):
"""Test dataset description generation for IFEval."""
description = ifeval_parser.get_dataset_description()
assert description.name == "IFEval"
assert description.source == "Google Research"
assert description.language == "English (BCP-47 en)"
def test_get_evaluation_metrics(ifeval_parser):
"""Test evaluation metrics generation for IFEval."""
metrics = ifeval_parser.get_evaluation_metrics()
# Should have 5 metrics total
assert len(metrics) == 5
# Check primary metrics
primary_metrics = [m for m in metrics if m.primary]
assert len(primary_metrics) == 3
# Verify specific metrics exist and have correct properties
metric_names = {m.name for m in metrics}
assert "format_compliance" in metric_names
assert "length_constraints" in metric_names
assert "punctuation_rules" in metric_names
assert "keyword_usage" in metric_names
assert "structural_requirements" in metric_names