SPO / examples /di /InfiAgent-DABench /run_InfiAgent-DABench_single.py
XiangJinYu's picture
add metagpt
fe5c39d verified
raw
history blame
607 Bytes
import fire
from DABench import DABench
from metagpt.logs import logger
from metagpt.roles.di.data_interpreter import DataInterpreter
from metagpt.utils.recovery_util import save_history
async def main(id=0):
"""Evaluate one task"""
bench = DABench()
requirement = bench.generate_formatted_prompt(id)
di = DataInterpreter()
result = await di.run(requirement)
logger.info(result)
save_history(role=di)
_, is_correct = bench.eval(id, str(result))
logger.info(f"Prediction is {'correct' if is_correct else 'incorrect'}.")
if __name__ == "__main__":
fire.Fire(main)