Spaces:

notmanoj
/

chronos

Paused

chronos / gpt_neo_db.py

Manoj Kumar

updated question structure

f753320 about 1 month ago

2.1 kB

	import json
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Define the schema for the database
	db_schema = {
	"products": ["product_id", "name", "price", "description", "type"],
	"orders": ["order_id", "product_id", "quantity", "order_date"],
	"customers": ["customer_id", "name", "email", "phone_number"]
	}

	# Load the model and tokenizer
	model_name = "EleutherAI/gpt-neox-20b" # You can also use "Llama-2-7b" or another model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

	def generate_sql_query(context, question):
	"""
	Generate an SQL query based on the question and context.

	Args:
	context (str): Description of the database schema or table relationships.
	question (str): User's natural language query.

	Returns:
	str: Generated SQL query.
	"""
	# Prepare the prompt
	prompt = f"""
	Context: {context}

	Question: {question}

	Write an SQL query to address the question based on the context.
	Query:
	"""
	# Tokenize input
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to("cuda" if torch.cuda.is_available() else "cpu")

	# Generate SQL query
	output = model.generate(inputs.input_ids, max_length=512, num_beams=5, early_stopping=True)
	query = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract query from the output
	sql_query = query.split("Query:")[-1].strip()
	return sql_query

	# Schema as a context for the model
	schema_description = json.dumps(db_schema, indent=4)

	# # Example interactive questions
	# print("Ask a question about the database schema.")
	# while True:
	# user_question = input("Question: ")
	# if user_question.lower() in ["exit", "quit"]:
	# print("Exiting...")
	# break

	user_question = 'Show all products that cost more than $50'

	# Generate SQL query
	sql_query = generate_sql_query(schema_description, user_question)
	print(f"Generated SQL Query:\n{sql_query}\n")