Spaces:
Running
Running
import fire | |
from DABench import DABench | |
from metagpt.logs import logger | |
from metagpt.roles.di.data_interpreter import DataInterpreter | |
from metagpt.utils.recovery_util import save_history | |
async def main(id=0): | |
"""Evaluate one task""" | |
bench = DABench() | |
requirement = bench.generate_formatted_prompt(id) | |
di = DataInterpreter() | |
result = await di.run(requirement) | |
logger.info(result) | |
save_history(role=di) | |
_, is_correct = bench.eval(id, str(result)) | |
logger.info(f"Prediction is {'correct' if is_correct else 'incorrect'}.") | |
if __name__ == "__main__": | |
fire.Fire(main) | |