|
import argparse |
|
import random |
|
import glob |
|
import json |
|
from collections import Counter |
|
from vllm import LLM, SamplingParams |
|
import torch |
|
from tqdm import tqdm |
|
import re |
|
import sys |
|
import os |
|
import numpy as np |
|
|
|
few_shot_string = """Question: Find the domain of the expression $\frac{\sqrt{x-2}}{\sqrt{5-x}}$.} |
|
Let's think step by step. The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $[2,5)$. Final Answer: The answer is $[2,5)$. I hope it is correct. |
|
|
|
Question: If $\det \mathbf{A} = 2$ and $\det \mathbf{B} = 12,$ then find $\det (\mathbf{A} \mathbf{B}).$ |
|
Let's think step by step. We have that $\det (\mathbf{A} \mathbf{B}) = (\det \mathbf{A})(\det \mathbf{B}) = (2)(12) = 24.$ Final Answer: The answer is $24$. I hope it is correct. |
|
|
|
Question: Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight? |
|
Let's think step by step. If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$:\begin{align*} |
|
30n&=480\ |
|
\Rightarrow\qquad n&=480/30=16 |
|
\end{align*} |
|
Final Answer: The answer is $16$. I hope it is correct. |
|
|
|
Question: If the system of equations |
|
|
|
\begin{align*} |
|
6x-4y&=a,\ |
|
6y-9x &=b. |
|
\end{align*} |
|
has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{a}{b},$ assuming $b$ is nonzero. |
|
Let's think step by step. If we multiply the first equation by $-\frac{3}{2}$, we obtain $$6y-9x=-\frac{3}{2}a.$$Since we also know that $6y-9x=b$, we have |
|
$$-\frac{3}{2}a=b\Rightarrow\frac{a}{b}=-\frac{2}{3}.$$ |
|
Final Answer: The answer is $-\frac{2}{3}$. I hope it is correct. |
|
|
|
""" |
|
|
|
PROMPT_DICT = { |
|
"lean4": ( |
|
"Statement and proof in natural language:\n\n" |
|
"statement:\n{nl_statement}\n\n" |
|
"proof:\n{nl_proof}\n\n" |
|
"Translate the statement and proof in natural language to lean4:" |
|
), |
|
"prompt_no_input": ( |
|
"Below is an instruction that describes a task. " |
|
"Write a response that appropriately completes the request.\n\n" |
|
"### Instruction:\n{instruction}\n\n### Response:" |
|
), |
|
"old_prompt_bd": """Question: {question} |
|
Let's think step by step.""", |
|
"vallina": """{question}""", |
|
} |
|
|
|
|
|
def batchify(pairs, batch_size): |
|
"""将列表分成指定大小的批次""" |
|
for i in range(0, len(pairs), batch_size): |
|
yield pairs[i : i + batch_size] |
|
|
|
|
|
def generate_prompts(questions, args): |
|
"""为每个问题生成提示""" |
|
prompts = [generate_prompt_generation(args, question) for question in questions] |
|
return prompts |
|
|
|
|
|
def generate_prompt_generation(args, question): |
|
if args.method == "zero_shot_cot": |
|
content = question + " Let's think step by step." |
|
elif args.method == "zero_shot": |
|
content = question |
|
else: |
|
raise ValueError("we do not method for such model type yet") |
|
|
|
if "generator" not in args.model_type: |
|
MODEL_DICT = { |
|
"llama": ("[INST] \n{content}\n [/INST]"), |
|
"mistral": ("<s>[INST] {content} [/INST]"), |
|
"chatglm": ("<|user|> \n{content}\n <|assistant|>"), |
|
"qianwen": ( |
|
"<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n" |
|
), |
|
"deepseek-math": ("User: {content}\n\nAssistant: "), |
|
"internlm2-math": ("<|im_start|>system\n{content}<|im_end|>\n"), |
|
"llemma": ( |
|
"### System Prompt\nYou are an intelligent mathematical assistant.\n\n### User Message\n{content}\n\n### Assistant" |
|
), |
|
} |
|
|
|
if args.model_type in ["qianwen", "qianwen-13b", "qianwen-70b"]: |
|
content = MODEL_DICT["qianwen"].format_map({"content": content}) |
|
|
|
elif args.model_type in ["chatglm", "deepseek-math-7b-base"]: |
|
pass |
|
|
|
elif args.model_type in ["llama2-7b-chat"]: |
|
content = MODEL_DICT["llama"].format_map({"content": content}) |
|
|
|
elif args.model_type in ["mistral", "mixtral", "Mistral-7B-Instruct-v0.2"]: |
|
content = MODEL_DICT["mistral"].format_map({"content": content}) |
|
|
|
elif args.model_type in ["internlm2-math-20b", "internlm2-math-7b"]: |
|
content = MODEL_DICT["internlm2-math"].format_map({"content": content}) |
|
elif args.model_type in ["llemma_34b", "llemma_7b"]: |
|
content = MODEL_DICT["llemma"].format_map({"content": content}) |
|
elif args.model_type in ["deepseek-math-7b-instruct"]: |
|
content = MODEL_DICT["deepseek-math"].format_map({"content": content}) |
|
|
|
return content |
|
|
|
|
|
def self_consistency(pairs): |
|
val_counts = Counter(value for key, value in pairs) |
|
most = val_counts.most_common(1)[0][0] |
|
for key, value in pairs: |
|
if value == most: |
|
return key |
|
|
|
|
|
def str2bool(s): |
|
s = s.lower() |
|
if s == "true": |
|
return True |
|
elif s == "false": |
|
return False |
|
else: |
|
raise ValueError("invalid value: {}, must be true or false".format(s)) |
|
|
|
|
|
def parse_arguments(): |
|
parser = argparse.ArgumentParser(description="Zero-shot-CoT") |
|
|
|
|
|
|
|
|
|
|
|
|
|
parser.add_argument( |
|
"--cot_trigger_no", |
|
type=int, |
|
default=1, |
|
help="A trigger sentence that elicits a model to execute chain of thought", |
|
) |
|
parser.add_argument("--dataset", type=str, default="") |
|
parser.add_argument("--data_path", type=str, default="") |
|
parser.add_argument("--batch_size", type=int, default=1) |
|
parser.add_argument("--eval_method", type=str, default="") |
|
|
|
parser.add_argument("--model_path", type=str, default="") |
|
parser.add_argument("--model_type", type=str, default="chatglm") |
|
|
|
parser.add_argument("--output_dir", type=str, default="generation_test") |
|
|
|
parser.add_argument("--lora_path", type=str, default="") |
|
|
|
parser.add_argument("--method", type=str, default="few_shot_cot") |
|
parser.add_argument("--data_question_key", type=str, default="question") |
|
parser.add_argument("--data_answer_key", type=str, default="answer") |
|
|
|
parser.add_argument("--sample_num", type=int, default=1) |
|
|
|
parser.add_argument("--cuda_ind", type=int, default=0) |
|
parser.add_argument("--tensor_parallel", type=int, default=1) |
|
parser.add_argument("--cuda_start", type=int, default=0) |
|
parser.add_argument("--cuda_num", type=int, default=8) |
|
|
|
parser.add_argument("--load_in_8bit", type=str2bool, default=False) |
|
parser.add_argument("--rewrite", type=str2bool, default=False) |
|
|
|
parser.add_argument("--use_typewriter", type=int, default=0) |
|
|
|
parser.add_argument("--temperature", type=float, default=0.0) |
|
parser.add_argument("--top_p", type=float, default=1) |
|
parser.add_argument("--iter_max_new_tokens", type=int, default=512) |
|
parser.add_argument("--init_max_new_tokens", type=int, default=2048) |
|
parser.add_argument("--min_new_tokens", type=int, default=1) |
|
parser.add_argument( |
|
"--correct_response_format", type=str, default="The correct response is:" |
|
) |
|
|
|
args = parser.parse_args() |
|
if "lean" in args.dataset: |
|
args.data_question_key = "nl_problem" |
|
args.data_answer_key = "nl_proof" |
|
else: |
|
args.data_question_key = "question" |
|
args.data_answer_key = "answer" |
|
|
|
print(args.model_type) |
|
assert len(args.model_path) |
|
|
|
if args.cot_trigger_no == 1: |
|
args.cot_trigger = "Let's think step by step." |
|
|
|
return args |
|
|
|
|
|
def get_question_answer(args): |
|
allfilepath = args.data_path |
|
questions = [] |
|
answers = [] |
|
|
|
|
|
for filepath in allfilepath.split(","): |
|
try: |
|
with open(filepath, "r") as file: |
|
data = json.load(file) |
|
|
|
if isinstance(data, list): |
|
for json_item in data: |
|
answers.append(json_item) |
|
|
|
elif isinstance(data, dict): |
|
answers.append(json_item) |
|
|
|
except ValueError: |
|
|
|
with open(filepath, "r") as file: |
|
for line in file: |
|
json_item = json.loads(line) |
|
answers.append(json_item) |
|
|
|
|
|
questions = [ |
|
PROMPT_DICT["vallina"].format( |
|
question=item[args.data_question_key], |
|
) |
|
for item in answers |
|
] |
|
|
|
|
|
sampled_question = random.choice(questions) |
|
print("Sampled Question:") |
|
print(sampled_question) |
|
|
|
return questions, answers |
|
|
|
|
|
def generation(args): |
|
|
|
model = LLM( |
|
model=args.model_path, |
|
dtype="bfloat16", |
|
trust_remote_code=True, |
|
tensor_parallel_size=args.tensor_parallel, |
|
|
|
gpu_memory_utilization=0.95, |
|
) |
|
|
|
print(args.model_path) |
|
|
|
if "qianwen" in args.model_type: |
|
model.llm_engine.tokenizer.eos_token_id = 151645 |
|
|
|
model.llm_engine.tokenizer.pad_token_id = None |
|
|
|
|
|
print("load data") |
|
|
|
questions, answers = get_question_answer(args) |
|
|
|
question_exist_list = [] |
|
write_pattern = "w" if args.rewrite else "a+" |
|
if os.path.exists(args.output_dir) and not args.rewrite: |
|
|
|
|
|
file_pattern = os.path.join(args.output_dir, "[0-9]*.json") |
|
for file_path in glob.glob(file_pattern): |
|
|
|
with open(file_path, "r") as fp: |
|
|
|
for line in fp.readlines(): |
|
question_exist_list.append(json.loads(line)["question"]) |
|
else: |
|
try: |
|
os.mkdir(args.output_dir) |
|
except: |
|
pass |
|
qa_pairs = [ |
|
(questions[idx], answers[idx]) |
|
for idx in range(len(questions)) |
|
if questions[idx] not in question_exist_list |
|
] |
|
cuda_pieces = np.array_split( |
|
range(len(qa_pairs)), args.cuda_num // args.tensor_parallel |
|
) |
|
print(f"fitered {len(questions) - len(qa_pairs)} already") |
|
|
|
with open( |
|
f"{args.output_dir}/{args.cuda_ind // args.tensor_parallel + args.cuda_start}.json", |
|
write_pattern, |
|
encoding="utf-8", |
|
) as wf: |
|
start = cuda_pieces[args.cuda_start + args.cuda_ind // args.tensor_parallel][0] |
|
end = ( |
|
cuda_pieces[args.cuda_start + args.cuda_ind // args.tensor_parallel][-1] + 1 |
|
) |
|
subset_length = end - start |
|
total_batches = ( |
|
subset_length + args.batch_size - 1 |
|
) // args.batch_size |
|
for batch in tqdm( |
|
batchify(qa_pairs[start:end], args.batch_size), total=total_batches |
|
): |
|
questions, answers = zip(*batch) |
|
prompts = generate_prompts(questions, args) |
|
|
|
with torch.no_grad(): |
|
output_all = [] |
|
try: |
|
for i in range(args.sample_num): |
|
sample_list = [] |
|
sampling_params = SamplingParams( |
|
temperature=args.temperature, |
|
top_p=args.top_p, |
|
max_tokens=args.init_max_new_tokens, |
|
) |
|
generations = model.generate( |
|
prompts, sampling_params, use_tqdm=False |
|
) |
|
for generation_output in generations: |
|
output = generation_output.outputs[0].text |
|
sample_list.append(output) |
|
output_all.append(sample_list) |
|
|
|
output_all = list(map(list, zip(*output_all))) |
|
except Exception as e: |
|
print(str(e)) |
|
exit |
|
dicts = [] |
|
for question, answer, output, prompt in zip( |
|
questions, answers, output_all, prompts |
|
): |
|
dicts.append( |
|
{ |
|
"question": question, |
|
"prompt": prompt, |
|
"content": answer, |
|
"total output": output, |
|
} |
|
) |
|
|
|
for dict in dicts: |
|
wf.writelines(json.dumps(dict, ensure_ascii=False) + "\n") |
|
|
|
wf.flush() |
|
|
|
|
|
def main(argv=None): |
|
args = parse_arguments() |
|
print("*****************************") |
|
print(args) |
|
print("*****************************") |
|
generation(args) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|