import fire from DABench import DABench from metagpt.logs import logger from metagpt.roles.di.data_interpreter import DataInterpreter from metagpt.utils.recovery_util import save_history async def main(id=0): """Evaluate one task""" bench = DABench() requirement = bench.generate_formatted_prompt(id) di = DataInterpreter() result = await di.run(requirement) logger.info(result) save_history(role=di) _, is_correct = bench.eval(id, str(result)) logger.info(f"Prediction is {'correct' if is_correct else 'incorrect'}.") if __name__ == "__main__": fire.Fire(main)